jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/364435 )
Change subject: [Breaking] [IMPR] Make WikidataBot more abstract ......................................................................
[Breaking] [IMPR] Make WikidataBot more abstract
The motivation for this change is to make creating of Wikidata bots simpler.
Major changes: - the abstract method to be implemented in subclasses is now treat_page_and_item (with the same signature as previous treat) - WikidataBot is now subclass of ExistingPageBot (and CurrentPageBot) which is the reason for the method change - kwarg use_from_page is now a class variable. The behaviour wasn't changed. - treat_missing_item is now a class variable as well. It wasn't even documented.
Other changes: - new method WikidataBot.create_item_for_page was factored from newitem.py - subclasses can set create_missing_item to True to create missing items (using the new method) - if the generator yields items, they are now kept untouched
If this change is too breaking, we could create a new class WikibaseBot instead and deprecate WikidataBot.
Change-Id: Ib43948d70d6c4e957043a862bdbf3c9d27eee03a --- M pywikibot/bot.py M scripts/claimit.py M scripts/coordinate_import.py M scripts/harvest_template.py M scripts/illustrate_wikidata.py M scripts/newitem.py 6 files changed, 108 insertions(+), 87 deletions(-)
Approvals: jenkins-bot: Verified Xqt: Looks good to me, approved
diff --git a/pywikibot/bot.py b/pywikibot/bot.py index c2ba886..e90f2fa 100644 --- a/pywikibot/bot.py +++ b/pywikibot/bot.py @@ -1825,26 +1825,35 @@ 'exception was raised.'.format(page.title(), page.site))
-class WikidataBot(Bot): +class WikidataBot(Bot, ExistingPageBot):
""" Generic Wikidata Bot to be subclassed.
- Source claims (P143) can be created for specific sites. + Source claims (P143) can be created for specific sites + + @cvar use_from_page: If True (default) it will apply ItemPage.fromPage + for every item. If False it assumes that the pages are actually + already ItemPage (page in treat_page_and_item will be None). + If None it'll use ItemPage.fromPage when the page is not in the site's + item namespace. + @type use_from_page: bool, None + @cvar treat_missing_item: Whether pages without items should be treated. + Note that this is checked after create_missing_item. + @type treat_missing_item: bool + @ivar create_missing_item: If True, new items will be created if the current + page doesn't have one. Subclasses should override this in the + constructor with a bool value or using self.getOption. + @type create_missing_item: bool """
- def __init__(self, **kwargs): - """ - Constructor of the WikidataBot. + use_from_page = True + treat_missing_item = False
- @kwarg use_from_page: If True (default) it will apply ItemPage.fromPage - for every item. If False it assumes that the pages are actually - already ItemPage (page in treat will be None). If None it'll use - ItemPage.fromPage when the page is not in the site's item - namespace. - @kwtype use_from_page: bool, None - """ - self.use_from_page = kwargs.pop('use_from_page', True) + @deprecated_args(use_from_page=None) + def __init__(self, **kwargs): + """Constructor of the WikidataBot.""" + self.create_missing_item = False super(WikidataBot, self).__init__(**kwargs) self.site = pywikibot.Site() self.repo = self.site.data_repository() @@ -1989,18 +1998,61 @@ source.setTarget(item) return source
- def run(self): - """Process all pages in generator.""" - if not hasattr(self, 'generator'): - raise NotImplementedError('Variable %s.generator not set.' - % self.__class__.__name__) + def create_item_for_page(self, page, data=None, summary=None, **kwargs): + """ + Create an ItemPage with the provided page as the sitelink.
- treat_missing_item = hasattr(self, 'treat_missing_item') + @param page: the page for which the item will be created + @type page: pywikibot.Page + @param data: additional data to be included in the new item (optional). + Note that data created from the page have higher priority. + @type data: dict + @param summary: optional edit summary to replace the default one + @type summary: str
- try: - for page in self.generator: - if not page.exists(): - pywikibot.output('%s doesn't exist.' % page) + @return: pywikibot.ItemPage or None + """ + if not summary: + # FIXME: i18n + summary = ('Bot: New item with sitelink from %s' + % page.title(asLink=True, insite=self.repo)) + + if data is None: + data = {} + data.setdefault('sitelinks', {}).update({ + page.site.dbName(): { + 'site': page.site.dbName(), + 'title': page.title() + } + }) + data.setdefault('labels', {}).update({ + page.site.lang: { + 'language': page.site.lang, + 'value': page.title() + } + }) + pywikibot.output('Creating item for %s...' % page) + item = pywikibot.ItemPage(page.site.data_repository()) + kwargs.setdefault('show_diff', False) + result = self.user_edit_entity(item, data, summary=summary, **kwargs) + if result: + return item + else: + return None + + def treat_page(self): + """Treat a page.""" + page = self.current_page + if self.use_from_page is True: + try: + item = pywikibot.ItemPage.fromPage(page) + except pywikibot.NoPage: + item = None + else: + if isinstance(page, pywikibot.ItemPage): + item = page + page = None + else: # FIXME: Hack because 'is_data_repository' doesn't work if # site is the APISite. See T85483 data_site = page.site.data_repository() @@ -2009,33 +2061,30 @@ is_item = page.namespace() == data_site.item_namespace.id else: is_item = False - if self.use_from_page is not True and is_item: + if is_item: item = pywikibot.ItemPage(data_site, page.title()) - item.get() - elif self.use_from_page is False: - pywikibot.error('{0} is not in the item namespace but ' - 'must be an item.'.format(page)) - continue + page = None else: - try: - item = pywikibot.ItemPage.fromPage(page) - except pywikibot.NoPage: - item = None - if not item: - if not treat_missing_item: - pywikibot.output( - '%s doesn't have a wikidata item.' % page) - # TODO: Add an option to create the item - continue - self.treat(page, item) - except QuitKeyboardInterrupt: - pywikibot.output('\nUser quit %s bot run...' % - self.__class__.__name__) - except KeyboardInterrupt: - if config.verbose_output: - raise - else: - pywikibot.output('\nKeyboardInterrupt during %s bot run...' % - self.__class__.__name__) - except Exception as e: - pywikibot.exception(msg=e, tb=True) + item = None + if self.use_from_page is False: + pywikibot.error('{0} is not in the item namespace but ' + 'must be an item.'.format(page)) + return + + if not item and self.create_missing_item: + item = self.create_item_for_page(page, asynchronous=False) + + if not item and not self.treat_missing_item: + pywikibot.output('%s doesn't have a Wikidata item.' % page) + return + + self.treat_page_and_item(page, item) + + def treat_page_and_item(self, page, item): + """ + Treat page together with its item (if it exists). + + Must be implemented in subclasses. + """ + raise NotImplementedError('Method %s.treat_page_and_item() not ' + 'implemented.' % self.__class__.__name__) diff --git a/scripts/claimit.py b/scripts/claimit.py index cdf8256..25796f5 100755 --- a/scripts/claimit.py +++ b/scripts/claimit.py @@ -90,9 +90,8 @@ if self.exists_arg: pywikibot.output("'exists' argument set to '%s'" % self.exists_arg)
- def treat(self, page, item): + def treat_page_and_item(self, page, item): """Treat each page.""" - self.current_page = page # The generator might yield pages from multiple sites source = self.getSource(page.site)
diff --git a/scripts/coordinate_import.py b/scripts/coordinate_import.py index 26694d1..41f6b47 100755 --- a/scripts/coordinate_import.py +++ b/scripts/coordinate_import.py @@ -67,10 +67,8 @@ if self.prop in claim.qualifiers: return prop
- def treat(self, page, item): + def treat_page_and_item(self, page, item): """Treat page/item.""" - self.current_page = page - coordinate = page.coordinates(primary_only=True)
if not coordinate: diff --git a/scripts/harvest_template.py b/scripts/harvest_template.py index 775bf17..4bec597 100755 --- a/scripts/harvest_template.py +++ b/scripts/harvest_template.py @@ -198,11 +198,10 @@ local = handler.getOption(option) return default is not local
- def treat(self, page, item): + def treat_page_and_item(self, page, item): """Process a single page/item.""" if willstop: raise KeyboardInterrupt - self.current_page = page item.get() if set(val[0] for val in self.fields.values()) <= set( item.claims.keys()): diff --git a/scripts/illustrate_wikidata.py b/scripts/illustrate_wikidata.py index b10b585..9056295 100755 --- a/scripts/illustrate_wikidata.py +++ b/scripts/illustrate_wikidata.py @@ -51,10 +51,8 @@ raise ValueError(u'%s is of type %s, should be commonsMedia' % (self.wdproperty, claim.type))
- def treat(self, page, item): + def treat_page_and_item(self, page, item): """Treat a page / item.""" - self.current_page = page - pywikibot.output(u'Found %s' % item.title()) imagename = page.properties().get('page_image_free')
diff --git a/scripts/newitem.py b/scripts/newitem.py index ad1e0bf..3ebb54d 100755 --- a/scripts/newitem.py +++ b/scripts/newitem.py @@ -37,6 +37,8 @@
"""A bot to create new items."""
+ treat_missing_item = True + def __init__(self, generator, **kwargs): """Only accepts options defined in availableOptions.""" self.availableOptions.update({ @@ -54,7 +56,6 @@ days=self.pageAge) self.lastEditBefore = self.repo.getcurrenttime() - timedelta( days=self.lastEdit) - self.treat_missing_item = True pywikibot.output('Page age is set to %s days so only pages created' '\nbefore %s will be considered.' % (self.pageAge, self.pageAgeBefore.isoformat())) @@ -81,7 +82,7 @@ if exc is None: self._touch_page(page)
- def treat(self, page, item): + def treat_page_and_item(self, page, item): """Treat page/item.""" if item and item.exists(): pywikibot.output(u'%s already has an item: %s.' % (page, item)) @@ -90,11 +91,6 @@ self._touch_page(page) return
- self.current_page = page - - if not page.exists(): - pywikibot.output('%s does not exist. Skipping.' % page) - return if page.isRedirectPage(): pywikibot.output(u'%s is a redirect page. Skipping.' % page) return @@ -120,25 +116,7 @@ "Haven't implemented that yet so skipping.") return
- # FIXME: i18n - summary = (u'Bot: New item with sitelink from %s' - % page.title(asLink=True, insite=self.repo)) - - data = {'sitelinks': - {page.site.dbName(): - {'site': page.site.dbName(), - 'title': page.title()} - }, - 'labels': - {page.site.lang: - {'language': page.site.lang, - 'value': page.title()} - } - } - - item = pywikibot.ItemPage(page.site.data_repository()) - self.user_edit_entity(item, data, summary=summary, - callback=self._callback) + self.create_item_for_page(page, callback=self._callback)
def main(*args):
pywikibot-commits@lists.wikimedia.org