[Gerrit] Implement item-centric WikidataBot.run - change (pywikibot/core) - Pywikibot-commits

23 Sep 2014

jenkins-bot has submitted this change and it was merged.
Change subject: Implement item-centric WikidataBot.run
......................................................................
Implement item-centric WikidataBot.run
All wikidata scripts need to get the item for a page.
Convert them to using run() and treat() semantics.
Adds 'treat_missing_item' keyword, so newitem.py can
indicate that it wants to perform custom handling of
missing items for a page.
Also fix bug 66523 for all scripts, including category.py.
The bug was introduced by 431cb77.
And fix all pep257 errors except missing docstrings.
Bug: 66523
Change-Id: Iaaaf3fa583a3e299899197ac9c67530f1972a861
---
M pywikibot/bot.py
M scripts/category.py
M scripts/claimit.py
M scripts/coordinate_import.py
M scripts/harvest_template.py
M scripts/illustrate_wikidata.py
M scripts/newitem.py
M tox.ini
8 files changed, 232 insertions(+), 189 deletions(-)
Approvals:
  XZise: Looks good to me, approved
  jenkins-bot: Verified

diff --git a/pywikibot/bot.py b/pywikibot/bot.py
index 2be8a2f..941f4b0 100644
--- a/pywikibot/bot.py
+++ b/pywikibot/bot.py
@@ -980,3 +980,38 @@
             source = pywikibot.Claim(self.repo, 'P143')
             source.setTarget(self.source_values.get(site.family.name).get(site.code))
             return source
+
+    def run(self):
+        """Process all pages in generator."""
+        if not hasattr(self, 'generator'):
+            raise NotImplementedError('Variable %s.generator not set.'
+                                      % self.__class__.__name__)
+
+        treat_missing_item = hasattr(self, 'treat_missing_item')
+
+        try:
+            for page in self.generator:
+                if not page.exists():
+                    pywikibot.output('%s doesn't exist.' % page)
+                try:
+                    item = pywikibot.ItemPage.fromPage(page)
+                except pywikibot.NoPage:
+                    item = None
+                if not item:
+                    if not treat_missing_item:
+                        pywikibot.output(
+                            '%s doesn't have a wikidata item.' % page)
+                        #TODO FIXME: Add an option to create the item
+                        continue
+                self.treat(page, item)
+        except QuitKeyboardInterrupt:
+            pywikibot.output('\nUser quit %s bot run...' %
+                             self.__class__.__name__)
+        except KeyboardInterrupt:
+            if config.verbose_output:
+                raise
+            else:
+                pywikibot.output('\nKeyboardInterrupt during %s bot run...' %
+                                 self.__class__.__name__)
+        except Exception as e:
+                pywikibot.exception(msg=e, tb=True)
diff --git a/scripts/category.py b/scripts/category.py
index 09f960f..12beae3 100755
--- a/scripts/category.py
+++ b/scripts/category.py
@@ -697,8 +697,11 @@
         Do not use this function from outside the class.
         """
         if self.oldcat.exists():
-            item = pywikibot.ItemPage.fromPage(self.oldcat)
-            if item.exists():
+            try:
+                item = pywikibot.ItemPage.fromPage(self.oldcat)
+            except pywikibot.NoPage:
+                item = None
+            if item and item.exists():
                 comment = i18n.twtranslate(self.site, 'category-was-moved',
                                            {'newcat': self.newcat.title(),
                                             'title': self.newcat.title()})
diff --git a/scripts/claimit.py b/scripts/claimit.py
index 39fcf11..d389c93 100755
--- a/scripts/claimit.py
+++ b/scripts/claimit.py
@@ -1,7 +1,7 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 """
-This script adds claims to Wikidata items based on categories.
+A script that adds claims to Wikidata items based on categories.
------------------------------------------------------------------------------
@@ -68,11 +68,13 @@
class ClaimRobot(WikidataBot):
-    """
-    A bot to add Wikidata claims
-    """
+
+    """A bot to add Wikidata claims."""
+
     def __init__(self, generator, claims, exists_arg=''):
         """
+        Constructor.
+
         Arguments:
             * generator    - A generator that yields Page objects.
             * claims       - A list of wikidata claims
@@ -84,18 +86,14 @@
         self.exists_arg = exists_arg
         self.repo = pywikibot.Site().data_repository()
         self.cacheSources()
-
-    def run(self):
-        """Starts the robot."""
         if self.exists_arg:
             pywikibot.output(''exists' argument set to '%s'' % self.exists_arg)
-        for page in self.generator:
-            self.current_page = page
-            item = pywikibot.ItemPage.fromPage(page)
-            if not item.exists():
-                # TODO FIXME: We should provide an option to create the page
-                pywikibot.output('%s doesn't have a wikidata item :(' % page)
-                continue
+
+    def treat(self, page, item):
+        """Treat each page."""
+        self.current_page = page
+
+        if item:
             for claim in self.claims:
                 skip = False
                 # If claim with same property already exists...
@@ -137,7 +135,8 @@
def listsEqual(list1, list2):
     """
-    Returns true if the lists are probably equal, ignoring order.
+    Return true if the lists are probably equal, ignoring order.
+
     Works for lists of unhashable items (like dictionaries).
     """
     if len(list1) != len(list2):
diff --git a/scripts/coordinate_import.py b/scripts/coordinate_import.py
index 322db55..71cef9c 100644
--- a/scripts/coordinate_import.py
+++ b/scripts/coordinate_import.py
@@ -1,6 +1,8 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 """
+Coordinate importing script.
+
 Usage:
python coordinate_import.py -lang:en -family:wikipedia -cat:Category:Coordinates_not_on_Wikidata
@@ -30,11 +32,13 @@
class CoordImportRobot(WikidataBot):
-    """
-    A bot to import coordinates to Wikidata
-    """
+
+    """A bot to import coordinates to Wikidata."""
+
     def __init__(self, generator):
         """
+        Constructor.
+
         Arguments:
             * generator    - A generator that yields Page objects.
@@ -60,41 +64,41 @@
                 if self.prop in claim.qualifiers:
                     return prop
-    def run(self):
-        """Start the robot."""
-        for page in self.generator:
-            self.current_page = page
-            item = pywikibot.ItemPage.fromPage(page)
+    def treat(self, page, item):
+        """Treat page/item."""
+        self.current_page = page
-            if item.exists():
-                pywikibot.output(u'Found %s' % item.title())
-                coordinate = page.coordinates(primary_only=True)
+        coordinate = page.coordinates(primary_only=True)
-                if coordinate:
-                    claims = item.get().get('claims')
-                    if self.prop in claims:
-                        pywikibot.output(u'Item %s already contains coordinates (%s)'
-                                         % (item.title(), self.prop))
-                    else:
-                        prop = self.has_coord_qualifier(claims)
-                        if prop:
-                            pywikibot.output(u'Item %s already contains coordinates'
-                                             u' (%s) as qualifier for %s'
-                                             % (item.title(), self.prop, prop))
-                        else:
-                            newclaim = pywikibot.Claim(self.repo, self.prop)
-                            newclaim.setTarget(coordinate)
-                            pywikibot.output(u'Adding %s, %s to %s' % (coordinate.lat,
-                                                                       coordinate.lon,
-                                                                       item.title()))
-                            try:
-                                item.addClaim(newclaim)
+        if not coordinate:
+            return
-                                source = self.getSource(page.site)
-                                if source:
-                                    newclaim.addSource(source, bot=True)
-                            except CoordinateGlobeUnknownException as e:
-                                pywikibot.output(u'Skipping unsupported globe: %s' % e.args)
+        claims = item.get().get('claims')
+        if self.prop in claims:
+            pywikibot.output(u'Item %s already contains coordinates (%s)'
+                             % (item.title(), self.prop))
+            return
+
+        prop = self.has_coord_qualifier(claims)
+        if prop:
+            pywikibot.output(u'Item %s already contains coordinates'
+                             u' (%s) as qualifier for %s'
+                             % (item.title(), self.prop, prop))
+            return
+
+        newclaim = pywikibot.Claim(self.repo, self.prop)
+        newclaim.setTarget(coordinate)
+        pywikibot.output(u'Adding %s, %s to %s' % (coordinate.lat,
+                                                   coordinate.lon,
+                                                   item.title()))
+        try:
+            item.addClaim(newclaim)
+
+            source = self.getSource(page.site)
+            if source:
+                newclaim.addSource(source, bot=True)
+        except CoordinateGlobeUnknownException as e:
+            pywikibot.output(u'Skipping unsupported globe: %s' % e.args)
def main():
diff --git a/scripts/harvest_template.py b/scripts/harvest_template.py
index d26245b..cdb1275 100755
--- a/scripts/harvest_template.py
+++ b/scripts/harvest_template.py
@@ -1,6 +1,8 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 """
+Template harvesting script.
+
 Usage:
python harvest_template.py -transcludes:"..." template_parameter PID [template_parameter PID]
@@ -38,11 +40,13 @@
class HarvestRobot(WikidataBot):
-    """
-    A bot to add Wikidata claims
-    """
+
+    """A bot to add Wikidata claims."""
+
     def __init__(self, generator, templateTitle, fields):
         """
+        Constructor.
+
         Arguments:
             * generator     - A generator that yields Page objects.
             * templateTitle - The template to work on
@@ -55,18 +59,10 @@
         self.fields = fields
         self.repo = pywikibot.Site().data_repository()
         self.cacheSources()
-
-    def run(self):
-        """Starts the robot."""
         self.templateTitles = self.getTemplateSynonyms(self.templateTitle)
-        for page in self.generator:
-            try:
-                self.processPage(page)
-            except Exception as e:
-                pywikibot.exception(msg=e, tb=True)
def getTemplateSynonyms(self, title):
-        """Fetches redirects of the title, so we can check against them."""
+        """Fetch redirects of the title, so we can check against them."""
         temp = pywikibot.Page(pywikibot.Site(), title, ns=10)
         if not temp.exists():
             pywikibot.error(u'Template %s does not exist.' % temp.title())
@@ -94,9 +90,12 @@
         if linked_page.isRedirectPage():
             linked_page = linked_page.getRedirectTarget()
-        linked_item = pywikibot.ItemPage.fromPage(linked_page)
+        try:
+            linked_item = pywikibot.ItemPage.fromPage(linked_page)
+        except pywikibot.NoPage:
+            linked_item = None
-        if not linked_item.exists():
+        if not item or not linked_item.exists():
             pywikibot.output(u'%s doesn't have a wikidata item to link with. Skipping' % (linked_page))
             return
@@ -106,35 +105,31 @@
return linked_item
-    def processPage(self, page):
-        """Process a single page."""
-        item = pywikibot.ItemPage.fromPage(page)
+    def treat(self, page, item):
+        """Process a single page/item."""
         self.current_page = page
-        if not item.exists():
-            pywikibot.output('%s doesn't have a wikidata item :(' % page)
-            #TODO FIXME: We should provide an option to create the page
-            return
         item.get()
         if set(self.fields.values()) <= set(item.claims.keys()):
             pywikibot.output(u'%s item %s has claims for all properties. Skipping' % (page, item.title()))
-        else:
-            pagetext = page.get()
-            templates = textlib.extract_templates_and_params(pagetext)
-            for (template, fielddict) in templates:
-                # Clean up template
-                try:
-                    template = pywikibot.Page(page.site, template,
-                                              ns=10).title(withNamespace=False)
-                except pywikibot.exceptions.InvalidTitle:
-                    pywikibot.error(u"Failed parsing template; '%s' should be the template name." % template)
-                    continue
-                # We found the template we were looking for
-                if template in self.templateTitles:
-                    for field, value in fielddict.items():
-                        field = field.strip()
-                        value = value.strip()
-                        if not field or not value:
-                            continue
+            return
+
+        pagetext = page.get()
+        templates = textlib.extract_templates_and_params(pagetext)
+        for (template, fielddict) in templates:
+            # Clean up template
+            try:
+                template = pywikibot.Page(page.site, template,
+                                          ns=10).title(withNamespace=False)
+            except pywikibot.exceptions.InvalidTitle:
+                pywikibot.error(u"Failed parsing template; '%s' should be the template name." % template)
+                return
+            # We found the template we were looking for
+            if template in self.templateTitles:
+                for field, value in fielddict.items():
+                    field = field.strip()
+                    value = value.strip()
+                    if not field or not value:
+                        return
# This field contains something useful for us
                         if field in self.fields:
@@ -153,12 +148,12 @@
                                     match = re.search(pywikibot.link_regex, value)
                                     if not match:
                                         pywikibot.output(u'%s field %s value %s isnt a wikilink. Skipping' % (claim.getID(), field, value))
-                                        continue
+                                        return
link_text = match.group(1)
                                     linked_item = self._template_link_target(item, link_text)
                                     if not linked_item:
-                                        continue
+                                        return
claim.setTarget(linked_item)
                                 elif claim.type == 'string':
@@ -171,11 +166,11 @@
                                         image = pywikibot.FilePage(image.getRedirectTarget())
                                     if not image.exists():
                                         pywikibot.output('[[%s]] doesn't exist so I can't link to it' % (image.title(),))
-                                        continue
+                                        return
                                     claim.setTarget(image)
                                 else:
                                     pywikibot.output("%s is not a supported datatype." % claim.type)
-                                    continue
+                                    return
pywikibot.output('Adding %s --> %s' % (claim.getID(), claim.getTarget()))
                                 item.addClaim(claim)
diff --git a/scripts/illustrate_wikidata.py b/scripts/illustrate_wikidata.py
index 21b1a36..0a4dc1c 100644
--- a/scripts/illustrate_wikidata.py
+++ b/scripts/illustrate_wikidata.py
@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 """
 Bot to add images to Wikidata items. The image is extracted from the page_props.
+
 For this to be available the PageImages extension
 (https://www.mediawiki.org/wiki/Extension:PageImages) needs to be installed
@@ -27,11 +28,13 @@
class IllustrateRobot(WikidataBot):
-    """
-    A bot to add Wikidata image claims
-    """
+
+    """A bot to add Wikidata image claims."""
+
     def __init__(self, generator, wdproperty=u'P18'):
         """
+        Constructor.
+
         Arguments:
             * generator     - A generator that yields Page objects.
             * wdproperty    - The property to add. Should be of type commonsMedia
@@ -47,38 +50,40 @@
             raise ValueError(u'%s is of type %s, should be commonsMedia'
                              % (self.wdproperty, claim.type))
-    def run(self):
-        """Starts the bot."""
-        for page in self.generator:
-            self.current_page = page
-            item = pywikibot.ItemPage.fromPage(page)
+    def treat(self, page, item):
+        """Treat a page / item."""
+        self.current_page = page
-            if item.exists():
-                pywikibot.output(u'Found %s' % item.title())
-                imagename = page.properties().get('page_image')
+        pywikibot.output(u'Found %s' % item.title())
+        imagename = page.properties().get('page_image')
-                if imagename:
-                    claims = item.get().get('claims')
-                    if self.wdproperty in claims:
-                        pywikibot.output(u'Item %s already contains image (%s)' % (item.title(), self.wdproperty))
-                    else:
-                        newclaim = pywikibot.Claim(self.repo, self.wdproperty)
-                        commonssite = pywikibot.Site("commons", "commons")
-                        imagelink = pywikibot.Link(imagename, source=commonssite, defaultNamespace=6)
-                        image = pywikibot.FilePage(imagelink)
-                        if image.isRedirectPage():
-                            image = pywikibot.FilePage(image.getRedirectTarget())
-                        if not image.exists():
-                            pywikibot.output('[[%s]] doesn't exist so I can't link to it' % (image.title(),))
-                            continue
-                        newclaim.setTarget(image)
-                        pywikibot.output('Adding %s --> %s' % (newclaim.getID(), newclaim.getTarget()))
-                        item.addClaim(newclaim)
+        if not imagename:
+            return
-                        # A generator might yield pages from multiple sites
-                        source = self.getSource(page.site)
-                        if source:
-                            newclaim.addSource(source, bot=True)
+        claims = item.get().get('claims')
+        if self.wdproperty in claims:
+            pywikibot.output(u'Item %s already contains image (%s)' % (item.title(), self.wdproperty))
+            return
+
+        newclaim = pywikibot.Claim(self.repo, self.wdproperty)
+        commonssite = pywikibot.Site("commons", "commons")
+        imagelink = pywikibot.Link(imagename, source=commonssite, defaultNamespace=6)
+        image = pywikibot.FilePage(imagelink)
+        if image.isRedirectPage():
+            image = pywikibot.FilePage(image.getRedirectTarget())
+
+        if not image.exists():
+            pywikibot.output('[[%s]] doesn't exist so I can't link to it' % (image.title(),))
+            return
+
+        newclaim.setTarget(image)
+        pywikibot.output('Adding %s --> %s' % (newclaim.getID(), newclaim.getTarget()))
+        item.addClaim(newclaim)
+
+        # A generator might yield pages from multiple sites
+        source = self.getSource(page.site)
+        if source:
+            newclaim.addSource(source, bot=True)
def main():
diff --git a/scripts/newitem.py b/scripts/newitem.py
index 7210d2a..b6ea11f 100644
--- a/scripts/newitem.py
+++ b/scripts/newitem.py
@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 """
 This script creates new items on Wikidata based on certain criteria.
+
 * When was the (Wikipedia) page created?
 * When was the last edit on the page?
 * Does the page contain interwiki's?
@@ -27,12 +28,13 @@
 #
import pywikibot
-from pywikibot import pagegenerators
+from pywikibot import pagegenerators, WikidataBot
 from datetime import timedelta
-class NewItemRobot(pywikibot.Bot):
-    """ A bot to create new items """
+class NewItemRobot(WikidataBot):
+
+    """ A bot to create new items. """
def __init__(self, generator, **kwargs):
         """Only accepts options defined in availableOptions."""
@@ -49,9 +51,7 @@
         self.lastEdit = self.getOption('lastedit')
         self.pageAgeBefore = self.repo.getcurrenttime() - timedelta(days=self.pageAge)
         self.lastEditBefore = self.repo.getcurrenttime() - timedelta(days=self.lastEdit)
-
-    def run(self):
-        """ Start the bot. """
+        self.treat_missing_item = True
         pywikibot.output('Page age is set to %s days so only pages created'
                          '\nbefore %s will be considered.'
                          % (self.pageAge, self.pageAgeBefore.isoformat()))
@@ -59,65 +59,63 @@
                          '\nbefore %s will be considered.'
                          % (self.lastEdit, self.lastEditBefore.isoformat()))
-        for page in self.generator:
-            self.current_page = page
-            if not page.exists():
-                pywikibot.output(u'%s does not exist anymore. Skipping...'
-                                 % page)
-                continue
-            try:
-                item = pywikibot.ItemPage.fromPage(page)
-            except pywikibot.NoPage:
-                pass
-            else:
-                pywikibot.output(u'%s already has an item: %s.' % (page, item))
-                if self.getOption('touch'):
-                    pywikibot.output(u'Doing a null edit on the page.')
-                    page.put(page.text)
-                continue
+    def treat(self, page, item):
+        """ Treat page/item. """
+        if item and item.exists():
+            pywikibot.output(u'%s already has an item: %s.' % (page, item))
+            if self.getOption('touch'):
+                pywikibot.output(u'Doing a null edit on the page.')
+                page.put(page.text)
+            return
-            if page.isRedirectPage():
-                pywikibot.output(u'%s is a redirect page. Skipping.' % page)
-            elif page.editTime() > self.lastEditBefore:
-                pywikibot.output(
-                    u'Last edit on %s was on %s.\nToo recent. Skipping.'
-                    % (page, page.editTime().isoformat()))
-            else:
-                (revId, revTimestamp, revUser,
-                 revComment) = page.getVersionHistory(reverseOrder=True,
-                                                      total=1)[0]
-                if revTimestamp > self.pageAgeBefore:
-                    pywikibot.output(
-                        u'Page creation of %s on %s is too recent. Skipping.'
-                        % (page, page.editTime().isoformat()))
-                elif page.langlinks():
-                    # FIXME: Implement this
-                    pywikibot.output(
-                        "Found language links (interwiki links).\n"
-                        "Haven't implemented that yet so skipping.")
-                else:
-                    # FIXME: i18n
-                    summary = (u'Bot: New item with sitelink from %s'
-                               % page.title(asLink=True, insite=self.repo))
+        self.current_page = page
-                    data = {'sitelinks':
-                            {page.site.dbName():
-                             {'site': page.site.dbName(),
-                              'title': page.title()}
-                             },
-                            'labels':
-                            {page.site.lang:
-                             {'language': page.site.lang,
-                              'value': page.title()}
-                             }
-                            }
-                    pywikibot.output(summary)
+        if page.isRedirectPage():
+            pywikibot.output(u'%s is a redirect page. Skipping.' % page)
+            return
+        if page.editTime() > self.lastEditBefore:
+            pywikibot.output(
+                u'Last edit on %s was on %s.\nToo recent. Skipping.'
+                % (page, page.editTime().isoformat()))
+            return
-                    # Create empty item object and add 'data'
-                    item = pywikibot.ItemPage(page.site.data_repository())
-                    item.editEntity(data, summary=summary)
-                    # And do a null edit to force update
-                    page.put(page.text)
+        (revId, revTimestamp, revUser,
+         revComment) = page.getVersionHistory(reverseOrder=True, total=1)[0]
+        if revTimestamp > self.pageAgeBefore:
+            pywikibot.output(
+                u'Page creation of %s on %s is too recent. Skipping.'
+                % (page, page.editTime().isoformat()))
+            return
+
+        if page.langlinks():
+            # FIXME: Implement this
+            pywikibot.output(
+                "Found language links (interwiki links).\n"
+                "Haven't implemented that yet so skipping.")
+            return
+
+        # FIXME: i18n
+        summary = (u'Bot: New item with sitelink from %s'
+                   % page.title(asLink=True, insite=self.repo))
+
+        data = {'sitelinks':
+                {page.site.dbName():
+                 {'site': page.site.dbName(),
+                  'title': page.title()}
+                 },
+                'labels':
+                {page.site.lang:
+                 {'language': page.site.lang,
+                  'value': page.title()}
+                 }
+                }
+
+        pywikibot.output(summary)
+
+        item = pywikibot.ItemPage(page.site.data_repository())
+        item.editEntity(data, summary=summary)
+        # And do a null edit to force update
+        page.put(page.text)
def main():
diff --git a/tox.ini b/tox.ini
index 3772be9..9877c43 100644
--- a/tox.ini
+++ b/tox.ini
@@ -36,7 +36,11 @@
     ./pywikibot/data/__init__.py \
     ./pywikibot/compat/userlib.py ./pywikibot/compat/catlib.py \
     ./pywikibot/compat/query.py \
+    ./scripts/claimit.py ./scripts/coordinate_import.py \
+    ./scripts/harvest_template.py ./scripts/illustrate_wikidata.py \
+    ./scripts/newitem.py \
     ./tests/aspects.py
+
 deps = flake8-docstrings
[testenv:nose]
-- 
To view, visit https://gerrit.wikimedia.org/r/161222
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Iaaaf3fa583a3e299899197ac9c67530f1972a861
Gerrit-PatchSet: 6
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg jayvdb@gmail.com
Gerrit-Reviewer: John Vandenberg jayvdb@gmail.com
Gerrit-Reviewer: Ladsgroup ladsgroup@gmail.com
Gerrit-Reviewer: Merlijn van Deen valhallasw@arctus.nl
Gerrit-Reviewer: XZise CommodoreFabianus@gmx.de
Gerrit-Reviewer: jenkins-bot <>