[Gerrit] pywikibot/core[master]: [cleanup] cleanup scripts/weblinkchecker.py - Pywikibot-commits

23 Sep 2018

jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/462283 )
Change subject: [cleanup] cleanup scripts/weblinkchecker.py
......................................................................
[cleanup] cleanup scripts/weblinkchecker.py
- remove preleading "u" from strings
- use single quotes for string literals
- use str.format(...) instead for modulo for changed
  lines.
Change-Id: I8f7922e60ae95bf316ea323df5f8ce792f30d76e
---
M scripts/weblinkchecker.py
1 file changed, 67 insertions(+), 63 deletions(-)
Approvals:
  Xqt: Looks good to me, approved
  jenkins-bot: Verified

diff --git a/scripts/weblinkchecker.py b/scripts/weblinkchecker.py
index a46fe34..6923a6f 100755
--- a/scripts/weblinkchecker.py
+++ b/scripts/weblinkchecker.py
@@ -355,7 +355,7 @@
         if not self.serverEncoding:
             try:
                 pywikibot.output(
-                    u'Contacting server %s to find out its default encoding...'
+                    'Contacting server %s to find out its default encoding...'
                     % self.host)
                 conn = self.getConnection()
                 conn.request('HEAD', '/', None, self.header)
@@ -366,7 +366,7 @@
             if not self.serverEncoding:
                 # TODO: We might also load a page, then check for an encoding
                 # definition in a HTML meta tag.
-                pywikibot.output('Error retrieving server's default charset. '
+                pywikibot.output("Error retrieving server's default charset. "
                                  'Using ISO 8859-1.')
                 # most browsers use ISO 8859-1 (Latin-1) as the default.
                 self.serverEncoding = 'iso8859-1'
@@ -447,8 +447,9 @@
                     self.changeUrl(redirTarget)
                     return True
                 elif redirTarget.startswith('/'):
-                    self.changeUrl(u'%s://%s%s'
-                                   % (self.protocol, self.host, redirTarget))
+                    self.changeUrl('{0}://{1}{2}'
+                                   .format(self.protocol, self.host,
+                                           redirTarget))
                     return True
                 else:  # redirect to relative position
                     # cut off filename
@@ -462,9 +463,9 @@
                             # change /foo/bar/ to /foo/
                             directory = directory[:-1]
                             directory = directory[:directory.rindex('/') + 1]
-                    self.changeUrl('%s://%s%s%s'
-                                   % (self.protocol, self.host, directory,
-                                      redirTarget))
+                    self.changeUrl('{0}://{1}{2}{3}'
+                                   .format(self.protocol, self.host, directory,
+                                           redirTarget))
                     return True
         else:
             return False  # not a redirect
@@ -478,10 +479,10 @@
         try:
             wasRedirected = self.resolveRedirect(useHEAD=useHEAD)
         except UnicodeError as error:
-            return False, u'Encoding Error: %s (%s)' % (
+            return False, 'Encoding Error: {0} ({1})'.format(
                 error.__class__.__name__, error)
         except httplib.error as error:
-            return False, u'HTTP Error: %s' % error.__class__.__name__
+            return False, 'HTTP Error: {}'.format(error.__class__.__name__)
         except socket.error as error:
             # https://docs.python.org/2/library/socket.html :
             # socket.error :
@@ -500,7 +501,7 @@
             # TODO: decode msg. On Linux, it's encoded in UTF-8.
             # How is it encoded in Windows? Or can we somehow just
             # get the English message?
-            return False, u'Socket Error: %s' % repr(msg)
+            return False, 'Socket Error: {}'.format(repr(msg))
         if wasRedirected:
             if self.url in self.redirectChain:
                 if useHEAD:
@@ -514,10 +515,11 @@
                         HTTPignore=self.HTTPignore)
                     return redirChecker.check(useHEAD=False)
                 else:
-                    urlList = ['[%s]' % url
+                    urlList = ['[{0}]'.format(url)
                                for url in self.redirectChain + [self.url]]
                     return (False,
-                            u'HTTP Redirect Loop: %s' % ' -> '.join(urlList))
+                            'HTTP Redirect Loop: {0}'.format(
+                                ' -> '.join(urlList)))
             elif len(self.redirectChain) >= 19:
                 if useHEAD:
                     # Some servers don't seem to handle HEAD requests properly,
@@ -530,11 +532,11 @@
                         HTTPignore=self.HTTPignore)
                     return redirChecker.check(useHEAD=False)
                 else:
-                    urlList = ['[%s]' % url
+                    urlList = ['[{0}]'.format(url)
                                for url in self.redirectChain + [self.url]]
                     return (False,
-                            u'Long Chain of Redirects: %s'
-                            % ' -> '.join(urlList))
+                            'Long Chain of Redirects: {0}'
+                            .format(' -> '.join(urlList)))
             else:
                 redirChecker = LinkChecker(self.url, self.redirectChain,
                                            self.serverEncoding,
@@ -544,24 +546,25 @@
             try:
                 conn = self.getConnection()
             except httplib.error as error:
-                return False, u'HTTP Error: %s' % error.__class__.__name__
+                return False, 'HTTP Error: {0}'.format(
+                    error.__class__.__name__)
             try:
-                conn.request('GET', '%s%s'
-                             % (self.path, self.query), None, self.header)
+                conn.request('GET', '{0}{1}'.format(self.path, self.query),
+                             None, self.header)
             except socket.error as error:
-                return False, u'Socket Error: %s' % repr(error[1])
+                return False, 'Socket Error: {0}'.format(repr(error[1]))
             try:
                 self.response = conn.getresponse()
             except Exception as error:
-                return False, u'Error: %s' % error
+                return False, 'Error: {0}'.format(error)
             # read the server's encoding, in case we need it later
             self.readEncodingFromResponse(self.response)
             # site down if the server status is between 400 and 499
             alive = self.response.status not in range(400, 500)
             if self.response.status in self.HTTPignore:
                 alive = False
-            return alive, '%s %s' % (self.response.status,
-                                     self.response.reason)
+            return alive, '{0} {1}'.format(self.response.status,
+                                           self.response.reason)
class LinkCheckThread(threading.Thread):
@@ -586,8 +589,8 @@
             'Connection': 'keep-alive',
         }
         # identification for debugging purposes
-        self.setName((u'%s - %s' % (page.title(), url)).encode('utf-8',
-                                                               'replace'))
+        self.setName(('{0} - {1}'.format(page.title(),
+                                         url().encode('utf-8', 'replace'))))
         self.HTTPignore = HTTPignore
         self._use_fake_user_agent = config.fake_user_agent_default.get(
             'weblinkchecker', False)
@@ -606,8 +609,8 @@
                                        'weblinkchecker-badurl_msg',
                                        {'URL': self.url})
         except Exception:
-            pywikibot.output('Exception while processing URL %s in page %s'
-                             % (self.url, self.page.title()))
+            pywikibot.output('Exception while processing URL {0} in page {1}'
+                             .format(self.url, self.page.title()))
             raise
         if (r.status == requests.codes.ok and
                 str(r.status) not in self.HTTPignore):
@@ -616,11 +619,11 @@
             message = '{0}'.format(r.status)
         if ok:
             if self.history.setLinkAlive(self.url):
-                pywikibot.output('*Link to %s in [[%s]] is back alive.'
-                                 % (self.url, self.page.title()))
+                pywikibot.output('*Link to {0} in [[{1}]] is back alive.'
+                                 .format(self.url, self.page.title()))
         else:
-            pywikibot.output('*[[%s]] links to %s - %s.'
-                             % (self.page.title(), self.url, message))
+            pywikibot.output('*[[{0}]] links to {1} - {2}.'
+                             .format(self.page.title(), self.url, message))
             self.history.setLinkDead(self.url, message, self.page,
                                      config.weblink_dead_days)
@@ -658,8 +661,8 @@
             self.site = site
         self.semaphore = threading.Semaphore()
         self.datfilename = pywikibot.config.datafilepath(
-            'deadlinks', 'deadlinks-%s-%s.dat' % (self.site.family.name,
-                                                  self.site.code))
+            'deadlinks', 'deadlinks-{0}-{1}.dat'.format(self.site.family.name,
+                                                        self.site.code))
         # Count the number of logged links, so that we can insert captions
         # from time to time
         self.logCount = 0
@@ -673,19 +676,20 @@
     def log(self, url, error, containingPage, archiveURL):
         """Log an error report to a text file in the deadlinks subdirectory."""
         if archiveURL:
-            errorReport = u'* %s ([%s archive])\n' % (url, archiveURL)
+            errorReport = '* {0} ([{1} archive])\n'.format(url, archiveURL)
         else:
-            errorReport = u'* %s\n' % url
+            errorReport = '* {0}\n'.format(url)
         for (pageTitle, date, error) in self.historyDict[url]:
             # ISO 8601 formulation
             isoDate = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(date))
-            errorReport += "** In [[%s]] on %s, %s\n" % (pageTitle, isoDate,
-                                                         error)
-        pywikibot.output(u"** Logging link for deletion.")
+            errorReport += '** In [[{0}]] on {1}, {2}\n'.format(
+                pageTitle, isoDate, error)
+        pywikibot.output('** Logging link for deletion.')
         txtfilename = pywikibot.config.datafilepath('deadlinks',
-                                                    'results-%s-%s.txt'
-                                                    % (self.site.family.name,
-                                                       self.site.lang))
+                                                    'results-{0}-{1}.txt'
+                                                    .format(
+                                                        self.site.family.name,
+                                                        self.site.lang))
         with codecs.open(txtfilename, 'a', 'utf-8') as txtfile:
             self.logCount += 1
             if self.logCount % 30 == 0:
@@ -832,21 +836,20 @@
                     count = ''
                     # Check if there is already such a caption on
                     # the talk page.
-                    while re.search('= *%s%s *=' % (caption, count),
+                    while re.search('= *{0}{1} *='.format(caption, count),
                                     content) is not None:
                         i += 1
                         count = ' ' + str(i)
                     caption += count
-                    content += '== %s ==\n\n%s\n\n%s%s\n--~~~~' % \
-                               (caption,
-                                i18n.twtranslate(containingPage.site,
-                                                 'weblinkchecker-report'),
-                                errorReport,
-                                archiveMsg)
-                    comment = '[[%s#%s|→]] %s' % \
-                              (talkPage.title(), caption,
-                               i18n.twtranslate(containingPage.site,
-                                                'weblinkchecker-summary'))
+                    content += '== {0} ==\n\n{1}\n\n{2}{3}\n--~~~~'.format(
+                        caption, i18n.twtranslate(containingPage.site,
+                                                  'weblinkchecker-report'),
+                        errorReport, archiveMsg)
+
+                    comment = '[[{0}#{1}|→]] {2}'.format(
+                        talkPage.title(), caption,
+                        i18n.twtranslate(containingPage.site,
+                                         'weblinkchecker-summary'))
                     try:
                         talkPage.put(content, comment)
                     except pywikibot.SpamfilterError as error:
@@ -870,7 +873,7 @@
             generator=generator, site=site)
if config.report_dead_links_on_talk:
-            pywikibot.log("Starting talk page thread")
+            pywikibot.log('Starting talk page thread')
             reportThread = DeadLinkReportThread()
             # thread dies when program terminates
             # reportThread.setDaemon(True)
@@ -960,7 +963,8 @@
     HTTPignore = []
if isinstance(memento_client, ImportError):
-        warn('memento_client not imported: %s' % memento_client, ImportWarning)
+        warn('memento_client not imported: {0}'.format(memento_client),
+             ImportWarning)
# Process global args and prepare generator args parser
     local_args = pywikibot.handle_args(args)
@@ -980,7 +984,7 @@
         elif arg.startswith('-xmlstart'):
             if len(arg) == 9:
                 xmlStart = pywikibot.input(
-                    u'Please enter the dumped article to start with:')
+                    'Please enter the dumped article to start with:')
             else:
                 xmlStart = arg[10:]
         elif arg.startswith('-xml'):
@@ -1016,18 +1020,18 @@
             # Don't wait longer than 30 seconds for threads to finish.
             while countLinkCheckThreads() > 0 and waitTime < 30:
                 try:
-                    pywikibot.output(u"Waiting for remaining %i threads to "
-                                     u"finish, please wait..."
-                                     % countLinkCheckThreads())
+                    pywikibot.output('Waiting for remaining {0} threads to '
+                                     'finish, please wait...'
+                                     .format(countLinkCheckThreads()))
                     # wait 1 second
                     time.sleep(1)
                     waitTime += 1
                 except KeyboardInterrupt:
-                    pywikibot.output(u'Interrupted.')
+                    pywikibot.output('Interrupted.')
                     break
             if countLinkCheckThreads() > 0:
-                pywikibot.output(u'Remaining %i threads will be killed.'
-                                 % countLinkCheckThreads())
+                pywikibot.output('Remaining {0} threads will be killed.'
+                                 .format(countLinkCheckThreads()))
                 # Threads will die automatically because they are daemonic.
             if bot.history.reportThread:
                 bot.history.reportThread.shutdown()
@@ -1037,9 +1041,9 @@
                     while bot.history.reportThread.isAlive():
                         time.sleep(0.1)
                 except KeyboardInterrupt:
-                    pywikibot.output(u'Report thread interrupted.')
+                    pywikibot.output('Report thread interrupted.')
                     bot.history.reportThread.kill()
-            pywikibot.output(u'Saving history...')
+            pywikibot.output('Saving history...')
             bot.history.save()
         return True
     else:
@@ -1047,5 +1051,5 @@
         return False
-if __name__ == "__main__":
+if __name__ == '__main__':
     main()
-- 
To view, visit https://gerrit.wikimedia.org/r/462283
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-MessageType: merged
Gerrit-Change-Id: I8f7922e60ae95bf316ea323df5f8ce792f30d76e
Gerrit-Change-Number: 462283
Gerrit-PatchSet: 11
Gerrit-Owner: D3r1ck01 alangiderick@gmail.com
Gerrit-Reviewer: D3r1ck01 alangiderick@gmail.com
Gerrit-Reviewer: John Vandenberg jayvdb@gmail.com
Gerrit-Reviewer: Xqt info@gno.de
Gerrit-Reviewer: jenkins-bot (75)