[Pywikibot-commits] [Gerrit] [PEP8] changes, code improvements - change (pywikibot/compat)

20 Dec 2013

jenkins-bot has submitted this change and it was merged.
Change subject: [PEP8] changes, code improvements
......................................................................
[PEP8] changes, code improvements
Change-Id: Ibfa8741849c0c59d38963afac94ba92a2765bdf7
---
M standardize_notes.py
1 file changed, 447 insertions(+), 308 deletions(-)
Approvals:
  Xqt: Looks good to me, approved
  jenkins-bot: Verified

diff --git a/standardize_notes.py b/standardize_notes.py
index fec9af1..2682e21 100644
--- a/standardize_notes.py
+++ b/standardize_notes.py
@@ -35,8 +35,9 @@
 """
 # Derived from replace.py
 #
-# (C) Daniel Herding, 2004
-# Copyright Scot E. Wilcoxon 2005
+# (c) Daniel Herding, 2004
+# (c) Scot E. Wilcoxon, 2005
+# (c) pywikibot team, 2006-2013
 #
 # Distributed under the terms of the MIT license.
 #
@@ -49,11 +50,19 @@
 #             doFindAllCitationSections(). (SEWilco)
 #
-import subprocess, sys, re, random
-import socket, urllib, robotparser
+import subprocess
+import sys
+import re
+import random
+import socket
+import urllib
+import robotparser
 from datetime import date
+import string
+
 import wikipedia as pywikibot
-import pagegenerators, config
+import pagegenerators
+import config
# httpcache is optional
 have_httpcache = True
@@ -64,38 +73,38 @@
# Summary messages in different languages
 msg = {
-       'ar':u'روبوت: معالجة مراجع تلقائية %s',
-       'de':u'Bot: Automatisierte Textersetzung %s',
-       'en':u'Robot: Automated reference processing %s',
-       'es':u'Robot: Reemplazo automático de texto %s',
-       'fr':u'Robot : Remplacement de texte automatisé %s',
-       'he':u'בוט: הופך את הערת השוליים %s לאוטומטית',
-       'hu':u'Robot: Automatikus szövegcsere %s',
-       'ia':u'Robot: Reimplaciamento automatic de texto %s',
-       'is':u'Vélmenni: breyti texta %s',
-       'nl':u'Bot: geautomatiseerde verwerking van referenties %s',
-       'pl':u'Robot automatycznie przetwarza źródła %s',
-       'pt':u'Bot: Mudança automática %s',
-       }
+    'ar': u'روبوت: معالجة مراجع تلقائية %s',
+    'de': u'Bot: Automatisierte Textersetzung %s',
+    'en': u'Robot: Automated reference processing %s',
+    'es': u'Robot: Reemplazo automático de texto %s',
+    'fr': u'Robot : Remplacement de texte automatisé %s',
+    'he': u'בוט: הופך את הערת השוליים %s לאוטומטית',
+    'hu': u'Robot: Automatikus szövegcsere %s',
+    'ia': u'Robot: Reimplaciamento automatic de texto %s',
+    'is': u'Vélmenni: breyti texta %s',
+    'nl': u'Bot: geautomatiseerde verwerking van referenties %s',
+    'pl': u'Robot automatycznie przetwarza źródła %s',
+    'pt': u'Bot: Mudança automática %s',
+}
fixes = {
-    # These replacements will convert alternate reference formats to format used
-    # by this tool.
+    # These replacements will convert alternate reference formats to format
+    # used by this tool.
     'ALTREFS': {
         'regex': True,
         # We don't want to mess up pages which discuss HTML tags, so we skip
         # all pages which contain nowiki tags.
         'exceptions':  ['<nowiki>[^<]{3,}</nowiki>'],
         'msg': {
-               'ar':u'روبوت: إضافة/ترتيب المراجع.',
-               'en':u'Robot: Adding/sorting references.',
-               'ar':u'روبوت: إضافة/ترتيب المراجع.',
-               'fr':u'Robot : Ajoute/trie les références.',
-               'he':u'בוט: מוסיף/מסדר הערות שוליים',
-               'ia':u'Robot: Addition/assortimento de referentias',
-               'nl':u'Bot: referenties toegevoegd/gesorteerd',
-               'pl':u'Robot dodaje/sortuje źródła',
-              },
+            'ar': u'روبوت: إضافة/ترتيب المراجع.',
+            'en': u'Robot: Adding/sorting references.',
+            'ar': u'روبوت: إضافة/ترتيب المراجع.',
+            'fr': u'Robot : Ajoute/trie les références.',
+            'he': u'בוט: מוסיף/מסדר הערות שוליים',
+            'ia': u'Robot: Addition/assortimento de referentias',
+            'nl': u'Bot: referenties toegevoegd/gesorteerd',
+            'pl': u'Robot dodaje/sortuje źródła',
+        },
         'replacements': [
             # Everything case-insensitive (?i)
             # These translate variations of footnote templates to ref|note
@@ -141,7 +150,7 @@
     'references',
     'source',
     'sources',
- ]
+]
# news sites for which to generate 'news reference' citations, the org name, and prefix to strip
 newssites = [
@@ -218,7 +227,9 @@
"""
-    def __init__(self, source, replacements, exceptions, regex = False, namespace = -1, textfilename = None, sqlfilename = None, categoryname = None, pagenames = None):
+    def __init__(self, source, replacements, exceptions, regex=False,
+                 namespace=-1, textfilename=None, sqlfilename=None,
+                 categoryname=None, pagenames=None):
         self.source = source
         self.replacements = replacements
         self.exceptions = exceptions
@@ -286,14 +297,14 @@
         """
         import catlib
         category = catlib.Category(pywikibot.getSite(), self.categoryname)
-        for page in category.articles(recurse = False):
+        for page in category.articles(recurse=False):
             yield page
def read_pages_from_text_file(self):
         """
-        Generator which will yield pages that are listed in a text file created by
-        the bot operator. Will regard everything inside [[double brackets]] as a
-        page name, and yield Pages for these pages.
+        Generator which will yield pages that are listed in a text file created
+        by the bot operator. Will regard everything inside [[double brackets]]
+        as a page name, and yield Pages for these pages.
Arguments:
             * textfilename - the textfile's path, either absolute or relative
@@ -306,13 +317,13 @@
         for line in f.readlines():
             # BUG: this will only find one link per line.
             # TODO: use findall() instead.
-            m=R.match(line)
+            m = R.match(line)
             if m:
                 yield pywikibot.Page(pywikibot.getSite(), m.group(1))
         f.close()
def read_pages_from_wiki_page(self):
-        '''
+        """
         Generator which will yield pages that are listed in a wiki page. Will
         regard everything inside [[double brackets]] as a page name, except for
         interwiki and category links, and yield Pages for these pages.
@@ -320,16 +331,14 @@
         Arguments:
             * pagetitle - the title of a page on the home wiki
-        '''
+        """
         listpage = pywikibot.Page(pywikibot.getSite(), self.pagetitle)
         list = pywikibot.get(listpage)
         # TODO - UNFINISHED
# TODO: Make MediaWiki's search feature available.
     def __iter__(self):
-        '''
-        Starts the generator.
-        '''
+        """ Starts the generator. """
         if self.source == 'sqldump':
             for pl in self.read_pages_from_sql_dump():
                 yield pl
@@ -343,10 +352,11 @@
             for pagename in self.pagenames:
                 yield pywikibot.Page(pywikibot.getSite(), pagename)
+
 class ReplaceRobot:
     def __init__(self, generator, replacements, refsequence, references,
-                 refusage, exceptions = [], regex = False, acceptall = False,
-                 summary = ''):
+                 refusage, exceptions=[], regex=False, acceptall=False,
+                 summary=''):
         self.generator = generator
         self.replacements = replacements
         self.exceptions = exceptions
@@ -361,6 +371,7 @@
         """
         If one of the exceptions applies for the given text, returns the
         substring. which matches the exception. Otherwise it returns None.
+
         """
         for exception in self.exceptions:
             if self.regex:
@@ -372,12 +383,12 @@
                 hit = original_text.find(exception)
                 if hit != -1:
                     return original_text[hit:hit + len(exception)]
-        return None
def doReplacements(self, new_text):
         """
         Returns the text which is generated by applying all replacements to the
         given text.
+
         """
# For any additional replacements, loop through them
@@ -396,15 +407,18 @@
                                                         refsectionname)
         # Read existing Notes section contents into references list
         pywikibot.output(u"Reading existing Notes section")
-        self.doReadReferencesSection( new_text, refsectionname )
-        while self.references and self.references[len(self.references)-1] == u'\n':
-            del self.references[len(self.references)-1]    # delete trailing empty lines
+        self.doReadReferencesSection(new_text, refsectionname)
+        while self.references and \
+              self.references[len(self.references) - 1] == u'\n':
+            # delete trailing empty lines
+            del self.references[len(self.references) - 1]
         # Convert any external links to footnote references
-        pywikibot.output(u"Converting external links" )
+        pywikibot.output(u"Converting external links")
         new_text = self.doConvertExternalLinks(new_text)
         # Accumulate ordered list of all references
         pywikibot.output(u"Collecting references")
-        (duplicatefound, self.refusage) = self.doBuildSequenceListOfReferences( new_text )
+        (duplicatefound,
+         self.refusage) = self.doBuildSequenceListOfReferences(new_text)
         # Rewrite references, including dealing with duplicates.
         pywikibot.output(u"Rewriting references")
         new_text = self.doRewriteReferences(new_text, self.refusage,
@@ -414,7 +428,7 @@
         self.references = self.doReorderReferences(self.references,
                                                    self.refusage)
         # Rebuild Notes section
-        pywikibot.output(u"Rebuilding References section" )
+        pywikibot.output(u"Rebuilding References section")
         new_text = self.doUpdateReferencesSection(new_text, self.refusage,
                                                   refsectionname)
         return new_text
@@ -424,71 +438,93 @@
         References. Adds References to reference list.
"""
-        new_text = ''                # Default is no text
+        new_text = ''
         skipsection = False
-        for text_line in original_text.splitlines(True):  # Scan all text line by line
+        # Scan all text line by line
+        for text_line in original_text.splitlines(True):
             # Check for protected sections
             m = re.search("== *(?P<sectionname>[^]|=]*) *==", text_line)
             # TODO: support subheadings within Notes section
             # TODO: support Notes in alphabetic order
             # TODO: support Notes in other orders
-            if m:    # if in a section, check if should skip this section
-                if m.group('sectionname').lower().strip() in referencesectionnames:
-                    skipsection = True        # skipsection left True so no further links converted
+            if m:  # if in a section, check if should skip this section
+                if m.group('sectionname').lower().strip() in \
+                   referencesectionnames:
+                    # skipsection left True so no further links converted
+                    skipsection = True
             if skipsection:
-                new_text = new_text + text_line        # skip section, so retain text.
+                new_text += text_line   # skip section, so retain text.
             else:
-                # TODO: recognize {{inline}} invisible footnotes when something can be done with them
-                #
+                # TODO: recognize {{inline}} invisible footnotes when something
+                #       can be done with them
+
                 # Ignore lines within comments
-                if not text_line.startswith( u'<!--'):
+                if not text_line.startswith(u'<!--'):
                     # Fix erroneous external links in double brackets
-                    Rextlink = re.compile(r'(?i)[[(?P<linkname>http://%5B%5E%5C%5D%5D+?)%5C%5D%5C]')
+                    Rextlink = re.compile(
+                        r'(?i)[[(?P<linkname>http://%5B%5E%5C%5D%5D+?)%5C%5D%5C]')
                     # TODO: compiling the regex each time might be inefficient
                     text_lineR = re.compile(Rextlink)
                     MOextlink = text_lineR.search(text_line)
-                    while MOextlink:    # find all links on line
+                    while MOextlink:  # find all links on line
                         extlink_linkname = MOextlink.group('linkname')
                         # Rewrite double brackets to single ones
-                        text_line=text_line[:MOextlink.start()] + '[%s]' % extlink_linkname + text_line[MOextlink.end(0):]
-                        MOextlink = text_lineR.search(text_line,MOextlink.start(0)+1)
-                    # Regular expression to look for external link [linkname linktext] - linktext is optional.
+                        text_line = text_line[:MOextlink.start()] + \
+                                    '[%s]' % extlink_linkname + \
+                                    text_line[MOextlink.end(0):]
+                        MOextlink = text_lineR.search(text_line,
+                                                      MOextlink.start(0) + 1)
+                    # Regular expression to look for external link
+                    # [linkname linktext] - linktext is optional.
                     # Also accepts erroneous pipe symbol as separator.
                     # Accepts wikilinks within <linktext>
-                    #Rextlink = re.compile(r'[^[][(?P<linkname>[h]*[ft]+tp:[^ []|]+?)(?P<linktext>[ |]+(( *[^]|]*)|( *[[.+?]])*)+)*][^]]')
-                    #Rextlink = re.compile(r'[(?P<linkname>[h]*[ft]+tp:[^ []|]+?)(?P<linktext>[ |]+(( *[^]|]*)|( *[[.+?]])*)+)*]')
-                    Rextlink = re.compile(r'(?i)[(?P<linkname>[h]*[ft]+tp:[^ []|]+?)(?P<linktext>[ |]+(( *[^]|]*)|( *[[.+?]])*)+)*]')
+                    Rextlink = re.compile(
+                        r'(?i)[(?P<linkname>[h]*[ft]+tp:[^ []|]+?)(?P<linktext>[ |]+((*[^]|]*)|(*[[.+?]])*)+)*]')
                     # TODO: compiling the regex each time might be inefficient
                     text_lineR = re.compile(Rextlink)
                     MOextlink = text_lineR.search(text_line)
-                    while MOextlink:    # find all links on line
+                    while MOextlink:  # find all links on line
                         extlink_linkname = MOextlink.group('linkname')
                         extlink_linktext = MOextlink.group('linktext')
                         self.refsequence += 1
-                        ( refname, reftext ) = self.doConvertLinkTextToReference(self.refsequence, extlink_linkname, extlink_linktext)
-                        self.references.append( reftext )    # append new entry to References
+                        (refname, reftext) = self.doConvertLinkTextToReference(
+                            self.refsequence, extlink_linkname,
+                            extlink_linktext)
+                        # append new entry to References
+                        self.references.append(reftext)
                         if extlink_linktext:
-                            # If there was text as part of link, reinsert text before footnote.
-                            text_line=text_line[:MOextlink.start(0)] + '%s{{ref|%s}}' % (extlink_linktext, refname) + text_line[MOextlink.end(0):]
+                            # If there was text as part of link, reinsert text
+                            # before footnote.
+                            text_line = (text_line[:MOextlink.start(0)] +
+                                         '%s{{ref|%s}}' % (extlink_linktext,
+                                                           refname) +
+                                         text_line[MOextlink.end(0):])
                         else:
-                            text_line=text_line[:MOextlink.start(0)] + '{{ref|%s}}' % refname + text_line[MOextlink.end(0):]
-                        MOextlink = text_lineR.search(text_line,MOextlink.start(0)+1)
+                            text_line = (text_line[:MOextlink.start(0)] +
+                                         '{{ref|%s}}' % refname +
+                                         text_line[MOextlink.end(0):])
+                        MOextlink = text_lineR.search(text_line,
+                                                      MOextlink.start(0) + 1)
                     # Search for {{doi}}
                     Rdoi = re.compile(r'(?i){{doi|(?P<doilink>[^}|]*)}}')
                     # TODO: compiling the regex each time might be inefficient
                     doiR = re.compile(Rdoi)
                     MOdoi = doiR.search(text_line)
-                    while MOdoi:        # find all doi on line
+                    while MOdoi:  # find all doi on line
                         doi_link = MOdoi.group('doilink')
                         if doi_link:
                             self.refsequence += 1
-                            ( refname, reftext ) = self.doConvertDOIToReference( self.refsequence, doi_link )
-                            self.references.append( reftext )        # append new entry to References
-                            text_line=text_line[:MOdoi.start(0)] + '{{ref|%s}}' % refname + text_line[MOdoi.end(0):]
-                            MOdoi = doiR.search(text_line, MOdoi.start(0)+1)
-                new_text = new_text + text_line    # append new line to new text
+                            (refname, reftext) = self.doConvertDOIToReference(
+                                self.refsequence, doi_link)
+                            # append new entry to References
+                            self.references.append(reftext)
+                            text_line = text_line[:MOdoi.start(0)] + \
+                                        '{{ref|%s}}' % refname + \
+                                        text_line[MOdoi.end(0):]
+                            MOdoi = doiR.search(text_line, MOdoi.start(0) + 1)
+                new_text += text_line  # append new line to new text
         if new_text == '':
-            new_text = original_text    # If somehow no new text, return original text
+            new_text = original_text  # If no new text, return original text
         return new_text
def doFindRefSection(self, original_text):
@@ -498,121 +534,157 @@
         """
         refsectionname = ''
         sectionname = ''
-        for text_line in original_text.splitlines(True):  # Scan all text line by line
-            if refsectionname == '':    # if ref section not found
+        # Scan all text line by line
+        for text_line in original_text.splitlines(True):
+            if not refsectionname:
                 # Check if line has a section name
-                m = re.search( r'==+(?P<sectionname>[^=]+)==', text_line )
-                if m:    # if in a section, remember section name
+                m = re.search(r'==+(?P<sectionname>[^=]+)==', text_line)
+                if m:  # if in a section, remember section name
                     sectionname = m.group('sectionname').strip()
-                    pywikibot.output( u'Section: %s' % sectionname )
-                else:    # else not a section name so look for reference
-                    n = re.search( r'(i?){{(note|ibid)[|]', text_line )
-                    if n:    # if reference found
-                        refsectionname = sectionname    # found reference section
-                        pywikibot.output( u'Ref section: %s' % refsectionname )
-                        break    # stop looking
+                    pywikibot.output(u'Section: %s' % sectionname)
+                else:  # else not a section name so look for reference
+                    n = re.search(r'(i?){{(note|ibid)[|]', text_line)
+                    if n:
+                        refsectionname = sectionname  # found reference section
+                        pywikibot.output(u'Ref section: %s' % refsectionname)
+                        break
         return refsectionname
def doFindAllCitationSections(self, original_text, refsectionname):
         """ Returns list of sections which may contain citations. """
-        refsectionlist = [ ( refsectionname) ]
+        refsectionlist = [refsectionname]
         sectionname = ''
-        for text_line in original_text.splitlines(True):  # Scan all text line by line
+        # Scan all text line by line
+        for text_line in original_text.splitlines(True):
             # Check if line has a section name
-            m = re.search( "==[ ]*(?P<sectionname>[^=]+)[ ]*==", text_line )
-            if m:    # if in a section, remember section name
+            m = re.search("==[ ]*(?P<sectionname>[^=]+)[ ]*==", text_line)
+            if m:
                 sectionname = m.group('sectionname').strip()
                 if sectionname.lower().strip() in referencesectionnames:
-                    if sectionname not in refsectionlist:    # if not already in list, add to list.
-                        refsectionlist.extend( sectionname )
+                    # if not already in list, add to list.
+                    if sectionname not in refsectionlist:
+                        refsectionlist.extend(sectionname)
         return refsectionlist
def doRewriteReferences(self, original_text, refusage, refsectionname):
         """
-        Returns the text which is generated by rewriting references, including duplicate refs.
+        Returns the text which is generated by rewriting references, including
+        duplicate refs.
+
         """
-        new_text = ''                # Default is no text
+        new_text = ''  # Default is no text
         skipsection = False
-        for text_line in original_text.splitlines(True):  # Scan all text line by line
+        # Scan all text line by line
+        for text_line in original_text.splitlines(True):
             # Check for protected sections
-            m = re.search( r'==+(?P<sectionname>[^=]+)==', text_line )
-            if m:    # if in a section, check if should skip this section
-                if refsectionname != '':    # if a certain section name has been identified
+            m = re.search(r'==+(?P<sectionname>[^=]+)==', text_line)
+            if m:  # if in a section, check if should skip this section
+                # if a certain section name has been identified
+                if refsectionname != '':
                     m_section = m.group('sectionname')
-                    pywikibot.output( u'Looking for "%s": "%s"' % (refsectionname,unicode(m_section)) )
+                    pywikibot.output(u'Looking for "%s": "%s"'
+                                     % (refsectionname, unicode(m_section)))
                     if unicode(m_section.strip()) == unicode(refsectionname):
-                        pywikibot.output( u'Found Ref section.')
-                        skipsection = True        # skipsection left True so no further links converted
-                else:                # else grab all possible sections
-                    if m.group('sectionname').lower().strip() in referencesectionnames:
-                        pywikibot.output('RefSection found by default names: %s' % m.group('sectionname') )
-                        skipsection = True        # skipsection left True so no further links converted
+                        pywikibot.output(u'Found Ref section.')
+                        skipsection = True
+                else:  # else grab all possible sections
+                    if m.group('sectionname').lower().strip() in \
+                       referencesectionnames:
+                        pywikibot.output(
+                            'RefSection found by default names: %s'
+                            % m.group('sectionname'))
+                        skipsection = True
             if skipsection:
-                new_text = new_text + text_line        # skip section, so retain text.
+                new_text += text_line
             else:
-                # TODO: recognize {{inline}} invisible footnotes when something can be done with them
+                # TODO: recognize {{inline}} invisible footnotes when something
+                # can be done with them
                 #
-                # Data structure: refusage[reference_key] = [ sequence_in_document, count, count_during_dup_handling ]
+                # Data structure:
+                # refusage[reference_key] = [sequence_in_document,
+                #                            count, count_during_dup_handling]
                 # Check for various references
                 # TODO: compiling the regex each time might be inefficient
-                Rtext_line = re.compile(r'(?i){{(?P<reftype>ref|ref_num|ref_label)|(?P<refname>[^}|]+?)}}')
-                m = Rtext_line.search( text_line )
-                alphabet26 = u'abcdefghijklmnopqrstuvwxyz'
+                Rtext_line = re.compile(
+                    r'(?i){{(?P<reftype>ref|ref_num|ref_label)|(?P<refname>[^}|]+?)}}')
+                m = Rtext_line.search(text_line)
                 while m:    # if found a reference
-                    if m.group('reftype').lower() in ('ref', 'ref_num', 'ref_label'):    # confirm ref
+                    if m.group('reftype').lower() in ('ref', 'ref_num',
+                                                      'ref_label'):
                         refkey = m.group('refname').strip()
                         if refkey != '':
                             if refkey in refusage:
-                                # pywikibot.output( u'refusage[%s] = %s' % (refkey,refusage[refkey]) )
-                                if refusage[refkey][2] == 0:    # if first use of reference
-                                    text_line=text_line[:m.start(0)] + '{{ref|%s}}' % (refkey) + text_line[m.end(0):]
-                                    refusage[refkey][2] += 1    # count use of reference
-                                else:                # else not first use of reference
-                                    text_line=text_line[:m.start(0)] + '{{ref_label|%s|%d|%s}}' % (refkey,(refusage[refkey][0])+1,alphabet26[((refusage[refkey][2])-1)%26]) + text_line[m.end(0):]
-                                    refusage[refkey][2] += 1    # count use of reference
+                                # pywikibot.output(u'refusage[%s] = %s' % (refkey,refusage[refkey]))
+                                # if first use of reference
+                                if refusage[refkey][2] == 0:
+                                    text_line = (
+                                        text_line[:m.start(0)] +
+                                        '{{ref|%s}}' % (refkey) +
+                                        text_line[m.end(0):])
+                                    # count use of reference
+                                    refusage[refkey][2] += 1
+                                else:  # else not first use of reference
+                                    text_line = (
+                                        text_line[:m.start(0)] +
+                                        '{{ref_label|%s|%d|%s}}'
+                                        % (refkey, (refusage[refkey][0]) + 1,
+                                           string.ascii_lowercase[
+                                               ((refusage[refkey][2]) - 1) % 26
+                                           ]) + text_line[m.end(0):])
+                                    # count use of reference
+                                    refusage[refkey][2] += 1
                             else:
-                                # Odd, because refusage list is populated the key should exist already.
-                                refusage[refkey] = [len(refusage),1,1]    # remember this reference
-                                text_line=text_line[:m.start(0)] + '{{ref|%s}}' % refkey + text_line[m.end(0):]
-                    m = Rtext_line.search( text_line, m.start(0)+1 )
-                new_text = new_text + text_line    # append new line to new text
+                                # Odd, because refusage list is populated the
+                                # key should exist already.
+
+                                # remember this reference
+                                refusage[refkey] = [len(refusage), 1, 1]
+                                text_line = (text_line[:m.start(0)] +
+                                             '{{ref|%s}}' % refkey +
+                                             text_line[m.end(0):])
+                    m = Rtext_line.search(text_line, m.start(0) + 1)
+                new_text += text_line
         if new_text == '':
-            new_text = original_text    # If somehow no new text, return original text
+            # If somehow no new text, return original text
+            new_text = original_text
         return new_text
-    def doGetTitleFromURL(self, extlink_linkname ):
+    def doGetTitleFromURL(self, extlink_linkname):
         """
         Returns text derived from between <title>...</title> tags through a URL.
         Obeys robots.txt restrictions.
+
         """
         # if no descriptive text get from web site, if not PDF
         urltitle = u''
         urlfile = None
         urlheaders = None
         if len(extlink_linkname) > 5:
-            socket.setdefaulttimeout(20)    # timeout in seconds
-            pywikibot.get_throttle()    # throttle down to Wikipedia rate
+            socket.setdefaulttimeout(20)  # timeout in seconds
+            pywikibot.get_throttle()  # throttle down to Wikipedia rate
             # Obey robots.txt restrictions
             rp = robotparser.RobotFileParser()
-            rp.set_url( extlink_linkname )
+            rp.set_url(extlink_linkname)
             try:
-                rp.read()    # read robots.txt
+                rp.read()  # read robots.txt
             except (IOError, socket.timeout):
                 pywikibot.output(u'Error accessing URL: %s'
                                  % unicode(extlink_linkname))
             else:
                 urlobj = None
-                if not rp.can_fetch( "*", extlink_linkname ):
+                if not rp.can_fetch("*", extlink_linkname):
                     pywikibot.output(u'Robot prohibited: %s'
                                      % unicode(extlink_linkname))
                 else:    # else access allowed
                     try:
                         if have_httpcache:
                             cache = HTTPCache(extlink_linkname)
-                            urlfile = cache.filename()    # filename of cached date
+                            # filename of cached date
+                            urlfile = cache.filename()
                             urlheaders = cache.info()
                         else:
-                            (urlfile, urlheaders) = urllib.urlretrieve(extlink_linkname)
+                            (urlfile,
+                             urlheaders) = urllib.urlretrieve(extlink_linkname)
                     except IOError:
                         pywikibot.output(u'Error accessing URL. %s'
                                          % unicode(extlink_linkname))
@@ -626,13 +698,17 @@
                                             unicode(extlink_linkname)))
                     except:    # Ignore other errors
                         pass
-                if urlfile != None:
-                    urlobj = open( urlfile )
+                if urlfile:
+                    urlobj = open(urlfile)
                     if extlink_linkname.lower().endswith('.pdf'):
                         # If file has a PDF suffix
-                        pywikibot.output( u'PDF file.')
+                        pywikibot.output(u'PDF file.')
                         try:
-                            pdfinfo_out = subprocess.Popen([r"pdfinfo","/dev/stdin"], stdin=urlobj, stdout=subprocess.PIPE, shell=False).communicate()[0]
+                            pdfinfo_out = subprocess.Popen(
+                                [r"pdfinfo", "/dev/stdin"],
+                                stdin=urlobj,
+                                stdout=subprocess.PIPE,
+                                shell=False).communicate()[0]
                             for aline in pdfinfo_out.splitlines():
                                 if aline.lower().startswith('title'):
                                     urltitle = aline.split(None)[1:]
@@ -646,66 +722,78 @@
                                         urlauthor = ' '.join(urlauthor)
                                         if urlauthor:
                                             pywikibot.output(u'author: %s'
-                                                             % urlauthor )
+                                                             % urlauthor)
                         except ValueError:
-                            pywikibot.output( u'pdfinfo value error.')
+                            pywikibot.output(u'pdfinfo value error.')
                         except OSError:
-                            pywikibot.output( u'pdfinfo OS error.')
-                        except:    # Ignore errors
-                            pywikibot.output( u'PDF processing error.')
+                            pywikibot.output(u'pdfinfo OS error.')
+                        except:  # Ignore errors
+                            pywikibot.output(u'PDF processing error.')
                             pass
-                        pywikibot.output( u'PDF done.')
+                        pywikibot.output(u'PDF done.')
                         if urlobj:
                             urlobj.close()
                     else:
                         # urlinfo = urlobj.info()
                         aline = urlobj.read()
                         maxalines = 100
-                        while maxalines > 0 and aline and urltitle == '':
-                            maxalines -= 1    # reduce number of lines left to consider
-                            titleRE = re.search("(?i)<title>(?P<HTMLtitle>[^<>]+)", aline)
+                        while maxalines > 0 and aline and not urltitle:
+                            # reduce number of lines left to consider
+                            maxalines -= 1
+                            titleRE = re.search(
+                                "(?i)<title>(?P<HTMLtitle>[^<>]+)", aline)
                             if titleRE:
                                 try:
-                                    urltitle = unicode(titleRE.group('HTMLtitle'), 'utf-8')
+                                    urltitle = unicode(titleRE.group('HTMLtitle'),
+                                                       'utf-8')
                                 except:
-                                    urltitle = u' '    # error, no title
-                                urltitle = u' '.join(urltitle.split())    # merge whitespace
-                                pywikibot.output( u'::::Title: %s' % urltitle )
-                                break    # found a title so stop looking
+                                    urltitle = u' '  # error, no title
+                                urltitle = u' '.join(urltitle.split())
+                                pywikibot.output(u'::::Title: %s' % urltitle)
+                                break  # found a title so stop looking
                             else:
                                 if maxalines < 1:
                                     pywikibot.output(
                                         u'No title in URL. %s'
-                                        % unicode(extlink_linkname) )
+                                        % unicode(extlink_linkname))
                         else:
-                            if urlobj != None:
-                                pywikibot.output( u'::+URL: ' + extlink_linkname )
+                            if urlobj:
+                                pywikibot.output(u'::+URL: ' + extlink_linkname)
                                 # urlinfo = urlobj.info()
                                 aline = urlobj.read()
                                 full_page = ''
                                 # while aline and urltitle == '':
                                 while aline:
                                     full_page = full_page + aline
-                                    titleRE = re.search("(?i)<title>(?P<HTMLtitle>[^<>]+)", aline)
+                                    titleRE = re.search(
+                                        "(?i)<title>(?P<HTMLtitle>[^<>]+)",
+                                        aline)
                                     if titleRE:
                                         if titleRE.group('HTMLtitle'):
                                             urltitle = u''
                                             try:
-                                                urltitle = unicode(titleRE.group('HTMLtitle'), 'utf-8')
-                                                urltitle = u' '.join(urltitle.split())    # merge whitespace
-                                                pywikibot.output( u'::::Title: %s' % urltitle )
+                                                urltitle = unicode(
+                                                    titleRE.group('HTMLtitle'),
+                                                    'utf-8')
+                                                urltitle = u' '.join(
+                                                    urltitle.split())
+                                                pywikibot.output(
+                                                    u'::::Title: %s'
+                                                    % urltitle)
                                             except:
                                                 aline = urlobj.read()
                                                 continue
                                             else:
                                                 aline = urlobj.read()
                                                 continue
-                                            break    # found a title so stop looking
+                                            # found a title so stop looking
+                                            break
                                         else:
                                             aline = urlobj.read()
                                     else:
                                         aline = urlobj.read()
-                                if urltitle != '': pywikibot.output( u'title: ' + urltitle )
+                                if urltitle:
+                                    pywikibot.output(u'title: ' + urltitle)
                                 # Try a more advanced search
                                 ##from nltk.parser.probabilistic import *
                                 ##from nltk.tokenizer import *
@@ -734,37 +822,43 @@
                                 ##WhitespaceTokenizer(SUBTOKENS='WORDS').tokenize(text_token)
                                 #unitagger.tag(text_token)
                                 #britagger.tag(text_token)
-                                ### pywikibot.output( unicode(text_token) )
+                                ### pywikibot.output(unicode(text_token))
                 else:
-                    pywikibot.output( u'No data retrieved.')
+                    pywikibot.output(u'No data retrieved.')
             socket.setdefaulttimeout(200)
-            urltitle = urltitle.replace(u'|',u':')
+            urltitle = urltitle.replace(u'|', u':')
         return urltitle.strip()
-    def doConvertLinkTextToReference(self, refsequence, extlink_linkname, extlink_linktext):
-        """
-        Returns the text which is generated by converting a link to
+    def doConvertLinkTextToReference(self, refsequence, extlink_linkname,
+                                     extlink_linktext):
+        """ Returns the text which is generated by converting a link to
         a format suitable for the References section.
+
         """
         refname = u'refbot.%d' % refsequence
-        m = re.search("[\w]+://([\w].)*(?P<siteend>[\w.]+)[/\Z]", extlink_linkname)
+        m = re.search("[\w]+://([\w].)*(?P<siteend>[\w.]+)[/\Z]",
+                      extlink_linkname)
         if m:
-            refname = m.group('siteend') + u'.%d' % refsequence    # use end of site URL as reference name
-        new_text = u'# {{note|%s}} %s' % (refname, self.doConvertRefToCitation( extlink_linktext, extlink_linkname, refname ) ) + '\n'
+            # use end of site URL as reference name
+            refname = m.group('siteend') + u'.%d' % refsequence
+        new_text = u'# {{note|%s}} %s' % (refname,
+                                          self.doConvertRefToCitation(
+                                              extlink_linktext,
+                                              extlink_linkname,
+                                              refname)) + '\n'
         return (refname, new_text)
-    def doConvertRefToCitation(self, extlink_linktext, extlink_linkname, refname ):
-        """
-        Returns text with a citation created from link information
-        """
+    def doConvertRefToCitation(self, extlink_linktext, extlink_linkname,
+                               refname):
+        """ Returns text with a citation created from link information """
         new_text = u''
         now = date.today()
-        if extlink_linktext == None or len(extlink_linktext.strip()) < 20:
-            pywikibot.output( u'Fetching URL: %s' % unicode(extlink_linkname) )
-            urltitle = self.doGetTitleFromURL( extlink_linkname )    # try to get title from URL
-            if urltitle == None or urltitle == '':
+        if not extlink_linktext or len(extlink_linktext.strip()) < 20:
+            pywikibot.output(u'Fetching URL: %s' % unicode(extlink_linkname))
+            urltitle = self.doGetTitleFromURL(extlink_linkname)    # try to get title from URL
+            if not urltitle:
                 urltitle = extlink_linkname
-            pywikibot.output( u'Title is: %s' % urltitle )
+            pywikibot.output(u'Title is: %s' % urltitle)
             extlink_linktext = urltitle
             for newref in self.references:        # scan through all references
                 if extlink_linkname in newref:        # if undescribed linkname same as a previous entry
@@ -773,27 +867,29 @@
                     else:
                         extlink_linktext = extlink_linkname + ' (See above)'
                     break    # found a matching previous linkname so stop looking
-            if extlink_linktext == None or len(extlink_linktext) < 20:
+            if not extlink_linktext or len(extlink_linktext) < 20:
                 exlink_linktext = urltitle
         # Look for a news web site
         for (sitename, newscompany, stripprefix) in newssites:
-            if refname.startswith( sitename ):
+            if refname.startswith(sitename):
             # If there is a prefix to strip from the title
                 if stripprefix and extlink_linktext.startswith(stripprefix):
                     extlink_linktext = extlink_linktext[len(stripprefix):]
-                    new_text = u'{{news reference | title=%s | url=%s | urldate=%s | org=%s }}' % ( extlink_linktext, extlink_linkname, now.isoformat(), newscompany ) + '\n'
+                    new_text = u'{{news reference | title=%s | url=%s | urldate=%s | org=%s }}' % (extlink_linktext, extlink_linkname, now.isoformat(), newscompany) + '\n'
                     break
         else:        # else no special site found
-            new_text = u'{{web reference | title=%s | url=%s | date=%s }}' % ( extlink_linktext, extlink_linkname, now.isoformat() )
+            new_text = u'{{web reference | title=%s | url=%s | date=%s }}' % (extlink_linktext, extlink_linkname, now.isoformat())
         return (new_text)
def doConvertDOIToReference(self, refsequence, doi_linktext):
         """
         Returns the text which is generated by converting a DOI reference to
         a format suitable for the Notes section.
+
         """
         # TODO: look up DOI info and create full reference
-        urltitle = self.doGetTitleFromURL('http://dx.doi.org/' + doi_linktext ) # try to get title from URL
+        # try to get title from URL
+        urltitle = self.doGetTitleFromURL('http://dx.doi.org/' + doi_linktext)
         refname = 'refbot%d' % refsequence
         if urltitle:
             new_text = '# {{note|%s}} %s {{doi|%s}}\n' \
@@ -806,6 +902,7 @@
     def doBuildSequenceListOfReferences(self, original_text):
         """
         Returns a list with all found references and sequence numbers.
+
         """
         duplicatefound = False
         refusage = {}
@@ -813,18 +910,20 @@
         for text_line in original_text.splitlines(True):  # Scan all text line by line
             # Check for various references
             Rtext_line = re.compile(r'(?i){{(?P<reftype>ref|ref_num|ref_label)|(?P<refname>[^}|]+?)}}')
-            m = Rtext_line.search( text_line )
+            m = Rtext_line.search(text_line)
             while m:    # if found a reference
-                if m.group('reftype').lower() in ('ref', 'ref_num', 'ref_label'):    # confirm ref
+                if m.group('reftype').lower() in ('ref', 'ref_num', 'ref_label'):
                     refkey = m.group('refname').strip()
                     if refkey != '':
                         if refkey in refusage:
-                            refusage[refkey][1] += 1    # duplicate use of reference
+                            # duplicate use of reference
+                            refusage[refkey][1] += 1
                             duplicatefound = True
                         else:
-                            refusage[refkey] = [len(refusage),0,0]    # remember this reference
-                m = Rtext_line.search( text_line, m.end() )
-        pywikibot.output( u'Number of refs: %d' % (len(refusage)) )
+                            # remember this reference
+                            refusage[refkey] = [len(refusage), 0, 0]
+                m = Rtext_line.search(text_line, m.end())
+        pywikibot.output(u'Number of refs: %d' % (len(refusage)))
         return (duplicatefound, refusage)
def doReadReferencesSection(self, original_text, refsectionname):
@@ -832,180 +931,212 @@
         Returns the text which is generated by reading the Notes section.
         Also appends references to self.references.
         Contents of all Notes sections will be read.
+
         """
         # TODO: support subsections within Notes
         new_text = ''
         intargetsection = False
-        for text_line in original_text.splitlines(True):  # Scan all text line by line
+        for text_line in original_text.splitlines(True):
             # Check for target section
-            m = re.search( r'==+(?P<sectionname>[^=]+)==', text_line )
-            if m:    # if in a section, check if Notes section
-                if refsectionname != '':    # if a certain section name has been identified
+            m = re.search(r'==+(?P<sectionname>[^=]+)==', text_line)
+            if m:  # if in a section, check if Notes section
+                # if a certain section name has been identified
+                if refsectionname != '':
                     m_section = m.group('sectionname')
                     pywikibot.output(u'Looking for "%s": "%s"'
-                                    % (refsectionname,m_section) )
+                                     % (refsectionname, m_section))
                     if unicode(m_section.strip()) == unicode(refsectionname):
                         pywikibot.output(u'Read Ref section.')
                         intargetsection = True
-                        new_text = new_text + text_line
+                        new_text += text_line
                     else:
                         intargetsection = False
-                else:                # else grab all possible sections
-                    if m.group('sectionname').lower().strip() in referencesectionnames:
+                else:  # else grab all possible sections
+                    if m.group('sectionname').lower().strip() in \
+                       referencesectionnames:
                         intargetsection = True
-                        new_text = new_text + text_line
+                        new_text += text_line
                     else:
                         intargetsection = False
             else:
-                if intargetsection:    # if inside target section, remember this reference line
-                    if text_line.strip() != '':
-                        if text_line.lstrip()[0] in u'[{':    # if line starts with non-Ref WikiSyntax
-                            intargetsection = False        # flag as not being in section
+                # if inside target section, remember this reference line
+                if intargetsection:
+                    if text_line.strip():
+                        if text_line.lstrip()[0] in u'[{':  # if line starts with non-Ref WikiSyntax
+                            intargetsection = False
                         # TODO: need better way to handle special cases at end of refs
                         if text_line.strip() == u'<!--READ ME!! PLEASE DO NOT JUST ADD NEW NOTES AT THE BOTTOM. See the instructions above on ordering. -->':    # This line ends some Notes sections
-                            intargetsection = False        # flag as not being in section
-                        if text_line.strip() == u'</div>':    # This line ends some Notes sections
-                            intargetsection = False        # flag as not being in section
+                            intargetsection = False
+                        if text_line.strip() == u'</div>':  # This line ends some Notes sections
+                            intargetsection = False
                     if intargetsection:    # if still inside target section
                         # Convert any # wiki list to *; will be converted later if a reference
                         if text_line[0] == '#':
                             text_line = '*' + text_line[1:]
                         self.references.append(text_line.rstrip() + u'\n')
-                        new_text = new_text + text_line.rstrip() + u'\n'
+                        new_text += text_line.rstrip() + u'\n'
         return new_text
def doReorderReferences(self, references, refusage):
         """
         Returns the new references list after reordering to match refusage list
         Non-references are moved to top, unused references to bottom.
+
         """
         # TODO: add tests for duplicate references/Ibid handling.
         newreferences = references
-        if references != [] and refusage != {}:
+        if references and refusage:
             newreferences = []
-            for i in range(len(references)):        # move nonrefs to top of list
+            for i in xrange(len(references)):  # move nonrefs to top of list
                 text_line = references[i]
                 # TODO: compile search?
-                m = re.search(r'(?i)[*#][\s]*{{(?P<reftype>note)|(?P<refname>[^}|]+?)}}', text_line)
+                m = re.search(
+                    r'(?i)[*#][\s]*{{(?P<reftype>note)|(?P<refname>[^}|]+?)}}',
+                    text_line)
                 # Special test to ignore Footnote instructions comment.
                 text_line_stripped = text_line.strip()
-                if text_line_stripped.startswith(u'4) Add ') or not m:    # if no ref found
-                    newreferences.append(text_line)    # add nonref to new list
+                # if no ref found
+                if text_line_stripped.startswith(u'4) Add ') or not m:
+                    newreferences.append(text_line)  # add nonref to new list
                     references[i] = None
             refsort = {}
-            for refkey in refusage.keys():        # build list of keys in document order
-                refsort[ refusage[refkey][0] ] = refkey    # refsort contains reference key names
-            alphabet26 = u'abcdefghijklmnopqrstuvwxyz'
-            for i in range(len(refsort)):        # collect references in document order
-                for search_num in range(len(references)):    # find desired entry
+            # build list of keys in document order
+            for refkey in refusage.keys():
+                # refsort contains reference key names
+                refsort[refusage[refkey][0]] = refkey
+            # collect references in document order
+            for i in xrange(len(refsort)):
+                for search_num in range(len(references)):  # find desired entry
                     search_line = references[search_num]
                     if search_line:
                         # TODO: compile search?
-                        # Note that the expression finds all neighboring note|note_label expressions.
-                        m2 = re.search(r'(?i)[*#]([\s]*{{(?P<reftype>note|note_label)|(?P<refname>[^}|]+?)}})+', search_line)
+                        # Note that the expression finds all neighboring
+                        # note|note_label expressions.
+                        m2 = re.search(
+                            r'(?i)[*#]([\s]*{{(?P<reftype>note|note_label)|(?P<refname>[^}|]+?)}})+',
+                            search_line)
                         if m2:
                             refkey = m2.group('refname').strip()
-                            if refkey == refsort[i]:    # if expected ref found
+                            if refkey == refsort[i]:  # if expected ref found
                                 # Rewrite references
-                                note_text = '# {{note|%s}}' % refkey    # rewrite note tag
-                                if refusage[refkey][1] > 1:        # if more than one reference to citation
-                                    for n in range(refusage[refkey][1]):    # loop through all repetitions
-                                        note_text = note_text + '{{note_label|%s|%d|%s}}' % (refkey,(refusage[refkey][0])+1,alphabet26[n%26])
-                                search_line=search_line[:m2.start(0)] + note_text + search_line[m2.end(0):]
-                                newreferences.append(search_line)    # found, add entry
-                                del references[search_num]        # delete used reference
-                                break    # stop the search loop after entry found
-            newreferences = newreferences + references        # append any unused references
+                                note_text = '# {{note|%s}}' % refkey
+                                # if more than one reference to citation
+                                if refusage[refkey][1] > 1:
+                                    # loop through all repetitions
+                                    for n in xrange(refusage[refkey][1]):
+                                        note_text += (
+                                            '{{note_label|%s|%d|%s}}'
+                                            % (refkey,
+                                               (refusage[refkey][0]) + 1,
+                                               string.ascii_lowercase[n % 26]))
+                                search_line = search_line[:m2.start(0)] + \
+                                              note_text + \
+                                              search_line[m2.end(0):]
+                                newreferences.append(search_line)
+                                # delete used reference
+                                del references[search_num]
+                                break  # stop the search loop after entry found
+            newreferences += references  # append any unused references
         return newreferences
-    def doUpdateReferencesSection(self, original_text, refusage, refsectionname):
+    def doUpdateReferencesSection(self, original_text, refusage,
+                                  refsectionname):
         """
         Returns the text which is generated by rebuilding the Notes section.
         Rewrite Notes section from references list.
+
         """
         new_text = ''
         intargetsection = False
-        for text_line in original_text.splitlines(True):  # Scan all text line by line
+        # Scan all text line by line
+        for text_line in original_text.splitlines(True):
             # Check for target section
-            m = re.search( r'==+(?P<sectionname>[^=]+)==', text_line )
-            if m:    # if in a section, check if Notes section
-                if refsectionname != '':    # if a certain section name has been identified
+            m = re.search(r'==+(?P<sectionname>[^=]+)==', text_line)
+            if m:
+                if refsectionname != '':
                     m_section = m.group('sectionname')
-                    pywikibot.output( u'Looking for "%s": "%s"' % (refsectionname,m_section) )
+                    pywikibot.output(u'Looking for "%s": "%s"'
+                                     % (refsectionname, m_section))
                     if unicode(m_section.strip()) == unicode(refsectionname):
-                        pywikibot.output( u'Updating Ref section.')
-                        intargetsection = True        # flag as being in section
+                        pywikibot.output(u'Updating Ref section.')
+                        intargetsection = True
                     else:
-                        intargetsection = False        # flag as not being in section
-                else:                # else grab all possible sections
-                    if m.group('sectionname').lower().strip() in referencesectionnames:
-                        intargetsection = True        # flag as being in section
-                    else:
-                        intargetsection = False        # flag as not being in section
+                        intargetsection = False
+                else:  # else grab all possible sections
+                    intargetsection = (m.group('sectionname').lower().strip()
+                                       in referencesectionnames)
                 if intargetsection:
-                    new_text = new_text + text_line    # append new line to new text
+                    new_text += text_line
                     if self.references != []:
-                        for newref in self.references:        # scan through all references
-                            if newref != None:
-                                new_text = new_text + newref.rstrip() + u'\n'    # insert references
-                        new_text = new_text + u'\n'    # one trailing blank line
-                        self.references = []            # empty references
+                        # scan through all references
+                        for newref in self.references:
+                            if newref:
+                                # insert references
+                                new_text += newref.rstrip() + u'\n'
+                        new_text += u'\n'
+                        self.references = []
                 else:
-                    new_text = new_text + text_line    # copy section headline
+                    new_text += text_line  # copy section headline
             else:
                 if intargetsection:
-                    if text_line.strip() != '':
-                        if text_line.lstrip()[0] in u'[{':    # if line starts with non-Ref WikiSyntax
-                            intargetsection = False        # flag as not being in section
+                    if text_line.strip():
+                        # if line starts with non-Ref WikiSyntax
+                        if text_line.lstrip()[0] in u'[{':
+                            # flag as not being in section
+                            intargetsection = False
                         # TODO: need better way to handle special cases at end of refs
-                        if text_line.strip() == u'<!--READ ME!! PLEASE DO NOT JUST ADD NEW NOTES AT THE BOTTOM. See the instructions above on ordering. -->':    # This line ends some Notes sections
-                            intargetsection = False        # flag as not being in section
-                        if text_line.strip() == u'</div>':    # This line ends some Notes sections
-                            intargetsection = False        # flag as not being in section
-                if not intargetsection:            # if not in Notes section, remember line
-                    new_text = new_text + text_line    # append new line to new text
+                        if text_line.strip() == u'<!--READ ME!! PLEASE DO NOT JUST ADD NEW NOTES AT THE BOTTOM. See the instructions above on ordering. -->':
+                            intargetsection = False
+                        if text_line.strip() == u'</div>':
+                            intargetsection = False
+                if not intargetsection:
+                    new_text += text_line
         # If references list not emptied, there was no Notes section found
-        if self.references != []:
+        if self.references:
             # New Notes section needs to be created at bottom.
-            text_line_counter = 0        # current line
-            last_text_line_counter_value = 0    # number of last line of possible text
-            for text_line in original_text.splitlines(True):  # Search for last normal text line
-                text_line_counter += 1        # count this line
-                if text_line.strip() != '':
-                    if text_line.lstrip()[0].isalnum():    # if line starts with alphanumeric
-                        last_text_line_counter = text_line_counter    # number of last line of possible text
+            text_line_counter = 0
+            # number of last line of possible text
+            last_text_line_counter_value = 0
+            # Search for last normal text line
+            for text_line in original_text.splitlines(True):
+                text_line_counter += 1
+                if text_line.strip():
+                    if text_line.lstrip()[0].isalnum():
+                        # number of last line of possible text
+                        last_text_line_counter = text_line_counter
                     else:
-                        if text_line.lstrip()[0] in u'<=!|*#':    # if line starts with recognized wiki char
-                            if not text_line.startswith(u'<!--'):    # if line not start with a comment
-                                last_text_line_counter = text_line_counter    # number of last line of possible content
-            new_text = ''            # erase previous new_text
-            text_line_counter = 0        # current line
-            for text_line in original_text.splitlines(True):  # Search for last normal text line
-                text_line_counter += 1        # count this line
-                if last_text_line_counter == text_line_counter:    # if found insertion point
-                    new_text = new_text + text_line    # append new line to new text
-                    new_text = new_text + '\n== Notes ==\n'    # set to standard name
-                    new_text = new_text + u'{{subst:Footnote3text}}\n'
-                    if self.references != []:
-                        for newref in self.references:        # scan through all references
+                        # if line starts with recognized wiki char
+                        if text_line.lstrip()[0] in u'<=!|*#':
+                            if not text_line.startswith(u'<!--'):
+                                # number of last line of possible content
+                                last_text_line_counter = text_line_counter
+            new_text = ''
+            text_line_counter = 0
+            # Search for last normal text line
+            for text_line in original_text.splitlines(True):
+                text_line_counter += 1
+                # if found insertion point
+                if last_text_line_counter == text_line_counter:
+                    new_text += text_line
+                    new_text += '\n== Notes ==\n'  # set to standard name
+                    new_text += u'{{subst:Footnote3text}}\n'
+                    if self.references:
+                        for newref in self.references:
                             if newref is not None:
-                                new_text = new_text + newref    # insert references
-                        new_text = new_text + u'\n'    # one trailing blank line
-                        self.references = []            # empty references
+                                new_text += newref
+                        new_text += u'\n'
+                        self.references = []
                 else:
-                    new_text = new_text + text_line    # append new line to new text
-        if new_text == '':
-            new_text = original_text    # If somehow no new text, return original text
+                    new_text += text_line
+        if not new_text:
+            new_text = original_text
         return new_text
def run(self):
-        """
-        Starts the robot.
-        """
+        """ Starts the robot. """
         # Run the generator which will yield Pages to pages which might need to be
         # changed.
         for pl in self.generator:
-            print ''
             try:
                 # Load the page's text from the wiki
                 original_text = pl.get()
@@ -1038,6 +1169,7 @@
                     if self.acceptall or choice in ['y', 'Y']:
                         pl.put(new_text, self.summary)
+
 def main():
     # How we want to retrieve information on which pages need to be changed.
     # Can either be 'sqldump', 'textfile' or 'userinput'.
@@ -1045,15 +1177,16 @@
     # Array which will collect commandline parameters.
     # First element is original text, second element is replacement text.
     commandline_replacements = []
-    # A dictionary where keys are original texts and values are replacement texts.
+    # A dictionary where keys are original texts and values are replacement
+    # texts.
     replacements = {}
     # Don't edit pages which contain certain texts.
     exceptions = []
     # Should the elements of 'replacements' and 'exceptions' be interpreted
     # as regular expressions?
     regex = False
-    # the dump's path, either absolute or relative, which will be used when source
-    # is 'sqldump'.
+    # the dump's path, either absolute or relative, which will be used when
+    # source is 'sqldump'.
     sqlfilename = None
     # the textfile's path, either absolute or relative, which will be used when
     # source is 'textfile'.
@@ -1062,8 +1195,8 @@
     categoryname = None
     # a list of pages which will be used when source is 'userinput'.
     pagenames = []
-    # will become True when the user presses a ('yes to all') or uses the -always
-    # commandline paramater.
+    # will become True when the user presses a ('yes to all') or uses the
+    # -always commandline paramater.
     acceptall = False
     # Which namespace should be processed when using a SQL dump
     # default to -1 which means all namespaces will be processed
@@ -1120,20 +1253,26 @@
         else:
             commandline_replacements.append(arg)
-    if source == None or len(commandline_replacements) not in [0, 2]:
-        # syntax error, show help text from the top of this file
-        pywikibot.output(__doc__, 'utf-8')
+    if not (source and len(commandline_replacements) in (0, 2)):
+        # show help text from the top of this file
+        pywikibot.showHelp()
         return
-    if (len(commandline_replacements) == 2):
+
+    if len(commandline_replacements) == 2:
         replacements[commandline_replacements[0]] = commandline_replacements[1]
-        editSummary = pywikibot.translate(pywikibot.getSite(), msg) % ' (-' + commandline_replacements[0] + ' +' + commandline_replacements[1] + ')'
+        editSummary = pywikibot.translate(pywikibot.getSite(), msg,
+                                          ' (-' + commandline_replacements[0] +
+                                          ' +' + commandline_replacements[1] +
+                                          ')')
     else:
         change = ''
-        default_summary_message =  pywikibot.translate(pywikibot.getSite(), msg) % change
+        default_summary_message = pywikibot.translate(pywikibot.getSite(), msg,
+                                                      change)
         pywikibot.output(u'The summary message will default to: %s'
                          % default_summary_message)
         summary_message = pywikibot.input(
-            u'Press Enter to use this default message, or enter a description of the changes your bot will make:')
+            u'Press Enter to use this default message, or enter a\n'
+            u'description of the changes your bot will make:')
         if summary_message == '':
             summary_message = default_summary_message
         editSummary = summary_message
@@ -1156,7 +1295,7 @@
     gen = ReplacePageGenerator(source, replacements, exceptions, regex,
                                namespace, textfilename, sqlfilename,
                                categoryname, pagenames)
-    preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 20)
+    preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber=20)
     bot = ReplaceRobot(preloadingGen, replacements, refsequence, references,
                        refusage, exceptions, regex, acceptall, editSummary)
     bot.run()
-- 
To view, visit https://gerrit.wikimedia.org/r/100146
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ibfa8741849c0c59d38963afac94ba92a2765bdf7
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: Xqt info@gno.de
Gerrit-Reviewer: Ladsgroup ladsgroup@gmail.com
Gerrit-Reviewer: Siebrand siebrand@wikimedia.org
Gerrit-Reviewer: Xqt info@gno.de
Gerrit-Reviewer: jenkins-bot


    

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

[Pywikibot-commits] [Gerrit] [PEP8] changes, code improvements - change (pywikibot/compat)