[Gerrit] improvements to reflinks.py - change (pywikibot/core) - Pywikibot-commits

22 Jun 2014

jenkins-bot has submitted this change and it was merged.

Change subject: improvements to reflinks.py
......................................................................


improvements to reflinks.py

- ReferencesRobot extends pywikibot.Bot
  and uses self.userPut()
  (with built-in 'always' option)

- use core-like page.text when possible

- pep8-compliant comments

Change-Id: Idd851f6e1e2e6f81888854713324f602d44c0b54
---
M scripts/reflinks.py
1 file changed, 54 insertions(+), 81 deletions(-)

Approvals:
  Ladsgroup: Looks good to me, approved
  jenkins-bot: Verified

diff --git a/scripts/reflinks.py b/scripts/reflinks.py
index e9fd399..998b745 100644
--- a/scripts/reflinks.py
+++ b/scripts/reflinks.py
@@ -54,10 +54,8 @@
 import StringIO
 
 import pywikibot
-from pywikibot import pagegenerators
-from pywikibot import xmlreader
+from pywikibot import i18n, pagegenerators, xmlreader, Bot
 import noreferences
-from pywikibot import i18n
 
 docuReplacements = {
     '&params;': pagegenerators.parameterHelp
@@ -233,25 +231,25 @@
 
     def transform(self, ispdf=False):
         """Normalize the title"""
-        #convert html entities
+        # convert html entities
         if not ispdf:
             self.title = pywikibot.html2unicode(self.title)
         self.title = re.sub(r'-+', '-', self.title)
-        #remove formatting, i.e long useless strings
+        # remove formatting, i.e long useless strings
         self.title = re.sub(r'[\.+\-=]{4,}', ' ', self.title)
-        #remove \n and \r and Unicode spaces from titles
+        # remove \n and \r and Unicode spaces from titles
         self.title = re.sub(r'(?u)\s', ' ', self.title)
         self.title = re.sub(r'[\n\r\t]', ' ', self.title)
-        #remove extra whitespaces
-        #remove leading and trailing ./;/,/-/_/+/ /
+        # remove extra whitespaces
+        # remove leading and trailing ./;/,/-/_/+/ /
         self.title = re.sub(r' +', ' ', self.title.strip(r'=.;,-+_
'))
 
         self.avoid_uppercase()
-        #avoid closing the link before the end
+        # avoid closing the link before the end
         self.title = self.title.replace(']', '&#93;')
-        #avoid multiple } being interpreted as a template inclusion
+        # avoid multiple } being interpreted as a template inclusion
         self.title = self.title.replace('}}', '}&#125;')
-        #prevent multiple quotes being interpreted as '' or '''
+        # prevent multiple quotes being interpreted as '' or '''
         self.title = self.title.replace('\'\'',
'\'&#39;')
         self.title = pywikibot.unicode2html(self.title, self.site.encoding())
         # TODO : remove HTML when both opening and closing tags are included
@@ -386,21 +384,21 @@
         return text
 
 
-class ReferencesRobot:
+class ReferencesRobot(Bot):
 
-    def __init__(self, generator, acceptall=False, limit=None, ignorepdf=False,
-                 summary=None):
+    def __init__(self, generator, **kwargs):
         """
         - generator : Page generator
-        - acceptall : boolean, is -always on ?
-        - limit : int, stop after n modified pages
-        - ignorepdf : boolean
 
         """
+        self.availableOptions.update({
+            'ignorepdf': False,  # boolean
+            'limit': None,  # int, stop after n modified pages
+            'summary': None,
+        })
+
+        super(ReferencesRobot, self).__init__(**kwargs)
         self.generator = generator
-        self.acceptall = acceptall
-        self.limit = limit
-        self.ignorepdf = ignorepdf
         self.site = pywikibot.Site()
         # Check
         manual = 'mw:Manual:Pywikibot/refLinks'
@@ -411,10 +409,10 @@
                 break
         if code:
             manual += '/%s' % code
-        if summary is None:
+        if self.getOption('summary') is None:
             self.msg = i18n.twtranslate(self.site, 'reflinks-msg', locals())
         else:
-            self.msg = summary
+            self.msg = self.getOption('summary')
         self.stopPage = pywikibot.Page(self.site,
                                        pywikibot.translate(self.site, stopPage))
 
@@ -446,42 +444,6 @@
         # Authorized mime types for HTML pages
         self.MIME = re.compile(
             r'application/(?:xhtml\+xml|xml)|text/(?:ht|x)ml')
-
-    def put_page(self, page, new):
-        """ Print diffs between orginal and new (text), put new text for
page
-
-        """
-        pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default}
<<<"
-                         % page.title())
-        pywikibot.showDiff(page.get(), new)
-        if not self.acceptall:
-            choice = pywikibot.inputChoice(u'Do you want to accept ' +
-                                           u'these changes?',
-                                           ['Yes', 'No', 'All'],
-                                           ['y', 'N', 'a'],
'N')
-            if choice == 'a':
-                self.acceptall = True
-            if choice == 'y':
-                page.text = new
-                page.save(self.msg, async=True)
-        if self.acceptall:
-            try:
-                page.text = new
-                page.save(self.msg)
-            except pywikibot.EditConflict:
-                pywikibot.output(u'Skipping %s because of edit conflict'
-                                  % (page.title(),))
-            except pywikibot.SpamfilterError as e:
-                pywikibot.output(
-                    u'Cannot change %s because of blacklist entry %s'
-                    % (page.title(), e.url))
-            except pywikibot.PageNotSaved as error:
-                pywikibot.error(u'putting page: %s' % (error.args,))
-            except pywikibot.LockedPage:
-                pywikibot.output(u'Skipping %s (locked page)'
-                                  % (page.title(),))
-            except pywikibot.ServerError as e:
-                pywikibot.output(u'Server Error : %s' % e)
 
     def httpError(self, err_num, link, pagetitleaslink):
         """Log HTTP Error"""
@@ -553,10 +515,10 @@
                     pywikibot.removeDisabledParts(page.get())):
 
                 link = match.group(u'url')
-                #debugging purpose
-                #print link
+                # debugging purpose
+                # print link
                 if u'jstor.org' in link:
-                    #TODO: Clean URL blacklist
+                    # TODO: Clean URL blacklist
                     continue
 
                 ref = RefLink(link, match.group('name'))
@@ -568,12 +530,12 @@
                     except UnicodeError:
                         ref.url = urllib2.quote(ref.url.encode("utf8"),
"://")
                         f = urllib2.urlopen(ref.url)
-                    #Try to get Content-Type from server
+                    # Try to get Content-Type from server
                     headers = f.info()
                     contentType = headers.getheader('Content-Type')
                     if contentType and not self.MIME.search(contentType):
                         if ref.link.lower().endswith('.pdf') and \
-                           not self.ignorepdf:
+                           not self.getOption('ignorepdf'):
                             # If file has a PDF suffix
                             self.getPDFTitle(ref, f)
                         else:
@@ -659,7 +621,7 @@
                     if f:
                         f.close()
 
-                #remove <script>/<style>/comments/CDATA tags
+                # remove <script>/<style>/comments/CDATA tags
                 linkedpagetext = self.NON_HTML.sub('', linkedpagetext)
 
                 meta_content = self.META_CONTENT.search(linkedpagetext)
@@ -765,16 +727,30 @@
 
             new_text = self.deduplicator.process(new_text)
 
-            if new_text == page.get():
-                pywikibot.output('No changes were necessary in %s'
-                                 % page.title(asLink=True))
+            try:
+                self.userPut(page, page.text, new_text, comment=self.msg)
+            except pywikibot.EditConflict:
+                pywikibot.output(u'Skipping %s because of edit conflict'
+                                 % page.title())
+            except pywikibot.SpamfilterError as e:
+                pywikibot.output(
+                    u'Cannot change %s because of blacklist entry %s'
+                    % (page.title(), e.url))
+            except pywikibot.PageNotSaved as error:
+                pywikibot.error(u'putting page: %s' % (error.args,))
+            except pywikibot.LockedPage:
+                pywikibot.output(u'Skipping %s (locked page)'
+                                 % page.title())
+            except pywikibot.ServerError as e:
+                pywikibot.output(u'Server Error : %s' % e)
+
+            if new_text == page.text:
                 continue
+            else:
+                editedpages += 1
 
-            editedpages += 1
-            self.put_page(page, new_text)
-
-            if self.limit and editedpages >= self.limit:
-                pywikibot.output('Edited %s pages, stopping.' % self.limit)
+            if self.getOption('limit') and editedpages >=
self.getOption('limit'):
+                pywikibot.output('Edited %s pages, stopping.' %
self.getOption('limit'))
                 return
 
             if editedpages % 20 == 0:
@@ -790,12 +766,9 @@
 
 def main():
     xmlFilename = None
-    always = False
-    ignorepdf = False
-    limit = None
+    options = {}
     namespaces = []
     generator = None
-    summary = None
 
     # Process global args and prepare generator args parser
     local_args = pywikibot.handleArgs()
@@ -808,13 +781,13 @@
             except ValueError:
                 namespaces.append(arg[11:])
         elif arg.startswith('-summary:'):
-            summary = arg[9:]
+            options['summary'] = arg[9:]
         elif arg == '-always':
-            always = True
+            options['always'] = True
         elif arg == '-ignorepdf':
-            ignorepdf = True
+            options['ignorepdf'] = True
         elif arg.startswith('-limit:'):
-            limit = int(arg[7:])
+            options['limit'] = int(arg[7:])
         elif arg.startswith('-xmlstart'):
             if len(arg) == 9:
                 xmlStart = pywikibot.input(
@@ -844,7 +817,7 @@
         return
     generator = pagegenerators.PreloadingGenerator(generator, step=50)
     generator = pagegenerators.RedirectFilterPageGenerator(generator)
-    bot = ReferencesRobot(generator, always, limit, ignorepdf, summary)
+    bot = ReferencesRobot(generator, **options)
     bot.run()
 
 if __name__ == "__main__":

-- 
To view, visit https://gerrit.wikimedia.org/r/138219
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Idd851f6e1e2e6f81888854713324f602d44c0b54
Gerrit-PatchSet: 4
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Ricordisamoa &lt;ricordisamoa(a)openmailbox.org&gt;
Gerrit-Reviewer: John Vandenberg &lt;jayvdb(a)gmail.com&gt;
Gerrit-Reviewer: Ladsgroup &lt;ladsgroup(a)gmail.com&gt;
Gerrit-Reviewer: Merlijn van Deen &lt;valhallasw(a)arctus.nl&gt;
Gerrit-Reviewer: Ricordisamoa &lt;ricordisamoa(a)openmailbox.org&gt;
Gerrit-Reviewer: Xqt &lt;info(a)gno.de&gt;
Gerrit-Reviewer: jenkins-bot <>