jenkins-bot has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/364435 )
Change subject: [Breaking] [IMPR] Make WikidataBot more abstract
......................................................................
[Breaking] [IMPR] Make WikidataBot more abstract
The motivation for this change is to make creating of
Wikidata bots simpler.
Major changes:
- the abstract method to be implemented in subclasses
is now treat_page_and_item (with the same signature
as previous treat)
- WikidataBot is now subclass of ExistingPageBot (and
CurrentPageBot) which is the reason for the method
change
- kwarg use_from_page is now a class variable. The
behaviour wasn't changed.
- treat_missing_item is now a class variable as well.
It wasn't even documented.
Other changes:
- new method WikidataBot.create_item_for_page was factored
from newitem.py
- subclasses can set create_missing_item to True to create
missing items (using the new method)
- if the generator yields items, they are now kept untouched
If this change is too breaking, we could create a new class
WikibaseBot instead and deprecate WikidataBot.
Change-Id: Ib43948d70d6c4e957043a862bdbf3c9d27eee03a
---
M pywikibot/bot.py
M scripts/claimit.py
M scripts/coordinate_import.py
M scripts/harvest_template.py
M scripts/illustrate_wikidata.py
M scripts/newitem.py
6 files changed, 108 insertions(+), 87 deletions(-)
Approvals:
jenkins-bot: Verified
Xqt: Looks good to me, approved
diff --git a/pywikibot/bot.py b/pywikibot/bot.py
index c2ba886..e90f2fa 100644
--- a/pywikibot/bot.py
+++ b/pywikibot/bot.py
@@ -1825,26 +1825,35 @@
'exception was raised.'.format(page.title(), page.site))
-class WikidataBot(Bot):
+class WikidataBot(Bot, ExistingPageBot):
"""
Generic Wikidata Bot to be subclassed.
- Source claims (P143) can be created for specific sites.
+ Source claims (P143) can be created for specific sites
+
+ @cvar use_from_page: If True (default) it will apply ItemPage.fromPage
+ for every item. If False it assumes that the pages are actually
+ already ItemPage (page in treat_page_and_item will be None).
+ If None it'll use ItemPage.fromPage when the page is not in the site's
+ item namespace.
+ @type use_from_page: bool, None
+ @cvar treat_missing_item: Whether pages without items should be treated.
+ Note that this is checked after create_missing_item.
+ @type treat_missing_item: bool
+ @ivar create_missing_item: If True, new items will be created if the current
+ page doesn't have one. Subclasses should override this in the
+ constructor with a bool value or using self.getOption.
+ @type create_missing_item: bool
"""
- def __init__(self, **kwargs):
- """
- Constructor of the WikidataBot.
+ use_from_page = True
+ treat_missing_item = False
- @kwarg use_from_page: If True (default) it will apply ItemPage.fromPage
- for every item. If False it assumes that the pages are actually
- already ItemPage (page in treat will be None). If None it'll use
- ItemPage.fromPage when the page is not in the site's item
- namespace.
- @kwtype use_from_page: bool, None
- """
- self.use_from_page = kwargs.pop('use_from_page', True)
+ @deprecated_args(use_from_page=None)
+ def __init__(self, **kwargs):
+ """Constructor of the WikidataBot."""
+ self.create_missing_item = False
super(WikidataBot, self).__init__(**kwargs)
self.site = pywikibot.Site()
self.repo = self.site.data_repository()
@@ -1989,18 +1998,61 @@
source.setTarget(item)
return source
- def run(self):
- """Process all pages in generator."""
- if not hasattr(self, 'generator'):
- raise NotImplementedError('Variable %s.generator not set.'
- % self.__class__.__name__)
+ def create_item_for_page(self, page, data=None, summary=None, **kwargs):
+ """
+ Create an ItemPage with the provided page as the sitelink.
- treat_missing_item = hasattr(self, 'treat_missing_item')
+ @param page: the page for which the item will be created
+ @type page: pywikibot.Page
+ @param data: additional data to be included in the new item (optional).
+ Note that data created from the page have higher priority.
+ @type data: dict
+ @param summary: optional edit summary to replace the default one
+ @type summary: str
- try:
- for page in self.generator:
- if not page.exists():
- pywikibot.output('%s doesn\'t exist.' % page)
+ @return: pywikibot.ItemPage or None
+ """
+ if not summary:
+ # FIXME: i18n
+ summary = ('Bot: New item with sitelink from %s'
+ % page.title(asLink=True, insite=self.repo))
+
+ if data is None:
+ data = {}
+ data.setdefault('sitelinks', {}).update({
+ page.site.dbName(): {
+ 'site': page.site.dbName(),
+ 'title': page.title()
+ }
+ })
+ data.setdefault('labels', {}).update({
+ page.site.lang: {
+ 'language': page.site.lang,
+ 'value': page.title()
+ }
+ })
+ pywikibot.output('Creating item for %s...' % page)
+ item = pywikibot.ItemPage(page.site.data_repository())
+ kwargs.setdefault('show_diff', False)
+ result = self.user_edit_entity(item, data, summary=summary, **kwargs)
+ if result:
+ return item
+ else:
+ return None
+
+ def treat_page(self):
+ """Treat a page."""
+ page = self.current_page
+ if self.use_from_page is True:
+ try:
+ item = pywikibot.ItemPage.fromPage(page)
+ except pywikibot.NoPage:
+ item = None
+ else:
+ if isinstance(page, pywikibot.ItemPage):
+ item = page
+ page = None
+ else:
# FIXME: Hack because 'is_data_repository' doesn't work if
# site is the APISite. See T85483
data_site = page.site.data_repository()
@@ -2009,33 +2061,30 @@
is_item = page.namespace() == data_site.item_namespace.id
else:
is_item = False
- if self.use_from_page is not True and is_item:
+ if is_item:
item = pywikibot.ItemPage(data_site, page.title())
- item.get()
- elif self.use_from_page is False:
- pywikibot.error('{0} is not in the item namespace but '
- 'must be an item.'.format(page))
- continue
+ page = None
else:
- try:
- item = pywikibot.ItemPage.fromPage(page)
- except pywikibot.NoPage:
- item = None
- if not item:
- if not treat_missing_item:
- pywikibot.output(
- '%s doesn\'t have a wikidata item.' % page)
- # TODO: Add an option to create the item
- continue
- self.treat(page, item)
- except QuitKeyboardInterrupt:
- pywikibot.output('\nUser quit %s bot run...' %
- self.__class__.__name__)
- except KeyboardInterrupt:
- if config.verbose_output:
- raise
- else:
- pywikibot.output('\nKeyboardInterrupt during %s bot run...' %
- self.__class__.__name__)
- except Exception as e:
- pywikibot.exception(msg=e, tb=True)
+ item = None
+ if self.use_from_page is False:
+ pywikibot.error('{0} is not in the item namespace but '
+ 'must be an item.'.format(page))
+ return
+
+ if not item and self.create_missing_item:
+ item = self.create_item_for_page(page, asynchronous=False)
+
+ if not item and not self.treat_missing_item:
+ pywikibot.output('%s doesn\'t have a Wikidata item.' % page)
+ return
+
+ self.treat_page_and_item(page, item)
+
+ def treat_page_and_item(self, page, item):
+ """
+ Treat page together with its item (if it exists).
+
+ Must be implemented in subclasses.
+ """
+ raise NotImplementedError('Method %s.treat_page_and_item() not '
+ 'implemented.' % self.__class__.__name__)
diff --git a/scripts/claimit.py b/scripts/claimit.py
index cdf8256..25796f5 100755
--- a/scripts/claimit.py
+++ b/scripts/claimit.py
@@ -90,9 +90,8 @@
if self.exists_arg:
pywikibot.output("'exists' argument set to '%s'" %
self.exists_arg)
- def treat(self, page, item):
+ def treat_page_and_item(self, page, item):
"""Treat each page."""
- self.current_page = page
# The generator might yield pages from multiple sites
source = self.getSource(page.site)
diff --git a/scripts/coordinate_import.py b/scripts/coordinate_import.py
index 26694d1..41f6b47 100755
--- a/scripts/coordinate_import.py
+++ b/scripts/coordinate_import.py
@@ -67,10 +67,8 @@
if self.prop in claim.qualifiers:
return prop
- def treat(self, page, item):
+ def treat_page_and_item(self, page, item):
"""Treat page/item."""
- self.current_page = page
-
coordinate = page.coordinates(primary_only=True)
if not coordinate:
diff --git a/scripts/harvest_template.py b/scripts/harvest_template.py
index 775bf17..4bec597 100755
--- a/scripts/harvest_template.py
+++ b/scripts/harvest_template.py
@@ -198,11 +198,10 @@
local = handler.getOption(option)
return default is not local
- def treat(self, page, item):
+ def treat_page_and_item(self, page, item):
"""Process a single page/item."""
if willstop:
raise KeyboardInterrupt
- self.current_page = page
item.get()
if set(val[0] for val in self.fields.values()) <= set(
item.claims.keys()):
diff --git a/scripts/illustrate_wikidata.py b/scripts/illustrate_wikidata.py
index b10b585..9056295 100755
--- a/scripts/illustrate_wikidata.py
+++ b/scripts/illustrate_wikidata.py
@@ -51,10 +51,8 @@
raise ValueError(u'%s is of type %s, should be commonsMedia'
% (self.wdproperty, claim.type))
- def treat(self, page, item):
+ def treat_page_and_item(self, page, item):
"""Treat a page / item."""
- self.current_page = page
-
pywikibot.output(u'Found %s' % item.title())
imagename = page.properties().get('page_image_free')
diff --git a/scripts/newitem.py b/scripts/newitem.py
index ad1e0bf..3ebb54d 100755
--- a/scripts/newitem.py
+++ b/scripts/newitem.py
@@ -37,6 +37,8 @@
"""A bot to create new items."""
+ treat_missing_item = True
+
def __init__(self, generator, **kwargs):
"""Only accepts options defined in
availableOptions."""
self.availableOptions.update({
@@ -54,7 +56,6 @@
days=self.pageAge)
self.lastEditBefore = self.repo.getcurrenttime() - timedelta(
days=self.lastEdit)
- self.treat_missing_item = True
pywikibot.output('Page age is set to %s days so only pages created'
'\nbefore %s will be considered.'
% (self.pageAge, self.pageAgeBefore.isoformat()))
@@ -81,7 +82,7 @@
if exc is None:
self._touch_page(page)
- def treat(self, page, item):
+ def treat_page_and_item(self, page, item):
"""Treat page/item."""
if item and item.exists():
pywikibot.output(u'%s already has an item: %s.' % (page, item))
@@ -90,11 +91,6 @@
self._touch_page(page)
return
- self.current_page = page
-
- if not page.exists():
- pywikibot.output('%s does not exist. Skipping.' % page)
- return
if page.isRedirectPage():
pywikibot.output(u'%s is a redirect page. Skipping.' % page)
return
@@ -120,25 +116,7 @@
"Haven't implemented that yet so skipping.")
return
- # FIXME: i18n
- summary = (u'Bot: New item with sitelink from %s'
- % page.title(asLink=True, insite=self.repo))
-
- data = {'sitelinks':
- {page.site.dbName():
- {'site': page.site.dbName(),
- 'title': page.title()}
- },
- 'labels':
- {page.site.lang:
- {'language': page.site.lang,
- 'value': page.title()}
- }
- }
-
- item = pywikibot.ItemPage(page.site.data_repository())
- self.user_edit_entity(item, data, summary=summary,
- callback=self._callback)
+ self.create_item_for_page(page, callback=self._callback)
def main(*args):
--
To view, visit
https://gerrit.wikimedia.org/r/364435
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ib43948d70d6c4e957043a862bdbf3c9d27eee03a
Gerrit-PatchSet: 4
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Matěj Suchánek <matejsuchanek97(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Magul <tomasz.magulski(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>