jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/675844 )
Change subject: [IMPR] Improvements for interwiki.py ......................................................................
[IMPR] Improvements for interwiki.py
- simplify InterwikiBotConfig arg parsing for two items - [bugfix] exceptions must derive from BaseException - shorten comparing len - decrease nested flow statements - use defaultdict in interwiki_graph.Subject
Change-Id: Ie559eb9063b52acf4d531819a4474c908b7d032e --- M scripts/interwiki.py 1 file changed, 162 insertions(+), 156 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/scripts/interwiki.py b/scripts/interwiki.py index 8cac302..0096eab 100755 --- a/scripts/interwiki.py +++ b/scripts/interwiki.py @@ -516,12 +516,10 @@ self.parenthesesonly = True elif arg == 'localright': self.followinterwiki = False - elif arg == 'array': - if value.isdigit(): - self.minsubjects = int(value) - elif arg == 'query': - if value.isdigit(): - self.maxquerysize = int(value) + elif arg == 'array' and value.isdigit(): + self.minsubjects = int(value) + elif arg == 'query' and value.isdigit(): + self.maxquerysize = int(value) elif arg == 'back': self.nobackonly = True elif arg == 'async': @@ -811,8 +809,9 @@ # Bug-check: Isn't there any work still in progress? We can't work on # different sites at a time! if self.pending: - raise "BUG: Can't start to work on {}; still working on {}".format( - site, self.pending) + raise RuntimeError( + "BUG: Can't start to work on {}; still working on {}" + .format(site, self.pending)) # Prepare a list of suitable pages result = [] for page in self.todo.filter(site): @@ -1262,7 +1261,7 @@ break
if self.origin == page: - self.untranslated = (len(iw) == 0) + self.untranslated = not iw if self.conf.untranslatedonly: # Ignore the interwiki links. iw = () @@ -1307,13 +1306,12 @@
for link in iw: linkedPage = pywikibot.Page(link) - if self.conf.hintsareright: - if linkedPage.site in self.hintedsites: - pywikibot.output( - 'NOTE: {}: {} extra interwiki on hinted site ' - 'ignored {}' - .format(self.origin, page, linkedPage)) - break + if self.conf.hintsareright \ + and linkedPage.site in self.hintedsites: + pywikibot.output( + 'NOTE: {}: {} extra interwiki on hinted site ' + 'ignored {}'.format(self.origin, page, linkedPage)) + break
if not self.skipPage(page, linkedPage, counter): if self.conf.followinterwiki or page == self.origin: @@ -1372,7 +1370,7 @@ errorCount = self.problemfound # Build up a dictionary of all pages found, with the site as key. # Each value will be a list of pages. - new = {} + new = defaultdict(list) for page in self.done: if page.exists() and not page.isRedirectPage() \ and not page.isCategoryRedirect(): @@ -1382,16 +1380,13 @@
# inhibit the forwarding families pages to be updated. continue - if site == self.origin.site: - if page != self.origin: - self.problem('Found link to ' + page) - self.whereReport(page) - errorCount += 1 - else: - if site in new: - new[site].append(page) - else: - new[site] = [page] + + if site != self.origin.site: + new[site].append(page) + elif page != self.origin: + self.problem('Found link to ' + page) + self.whereReport(page) + errorCount += 1
# See if new{} contains any problematic values result = {} @@ -1519,10 +1514,10 @@ # processing # TODO: should be move to assemble() # replaceLinks will skip the site it's working on. - if self.origin.site not in new: # TODO: make this possible as well. - if not self.origin.site.family.interwiki_forward: - new[self.origin.site] = self.origin + if self.origin.site not in new \ + and not self.origin.site.family.interwiki_forward: + new[self.origin.site] = self.origin
updatedSites = [] notUpdatedSites = [] @@ -1607,21 +1602,23 @@
def replaceLinks(self, page, newPages): """Return True if saving was successful.""" - if self.conf.localonly: - # In this case only continue on the Page we started with - if page != self.origin: - raise SaveError('-localonly and page != origin') + # In this case only continue on the Page we started with + if self.conf.localonly and page != self.origin: + raise SaveError('-localonly and page != origin') + if page.section(): # This is not a page, but a subpage. Do not edit it. pywikibot.output('Not editing {}: not doing interwiki on subpages' .format(page)) raise SaveError('Link has a #section') + try: pagetext = page.get() except pywikibot.NoPage: pywikibot.output('Not editing {}: page does not exist' .format(page)) raise SaveError("Page doesn't exist") + if page_empty_check(page): pywikibot.output('Not editing {}: page is empty'.format(page)) raise SaveError('Page is empty.') @@ -1667,9 +1664,9 @@ # Do not add interwiki links to foreign families that page.site() does # not forward to for stmp in new.keys(): - if stmp.family != page.site.family: - if stmp.family.name != page.site.family.interwiki_forward: - del new[stmp] + if stmp.family != page.site.family \ + and stmp.family.name != page.site.family.interwiki_forward: + del new[stmp]
# Put interwiki links into a map old = {} @@ -1751,87 +1748,86 @@ ask = False if self.conf.confirm and not self.conf.always: ask = True - # If we need to ask, do so - if ask: - if self.conf.autonomous: - # If we cannot ask, deny permission - answer = 'n' - else: - answer = pywikibot.input_choice('Submit?', - [('Yes', 'y'), ('No', 'n'), - ('open in Browser', 'b'), - ('Give up', 'g'), - ('Always', 'a')], - automatic_quit=False) - if answer == 'b': - pywikibot.bot.open_webbrowser(page) - return True - if answer == 'a': - # don't ask for the rest of this subject - self.conf.always = True - answer = 'y' - else: + + if not ask: # If we do not need to ask, allow answer = 'y' - - # If we got permission to submit, do so - if answer == 'y': - self.conf.note('Updating live wiki...') - timeout = 60 - page.text = newtext - while True: - try: - page.save(summary=mcomment, - asynchronous=self.conf.asynchronous, - nocreate=True) - except pywikibot.NoCreateError: - pywikibot.exception() - return False - except pywikibot.LockedPage: - pywikibot.output('Page {} is locked. Skipping.' - .format(page)) - raise SaveError('Locked') - except pywikibot.EditConflict: - pywikibot.output( - 'ERROR putting page: An edit conflict occurred. ' - 'Giving up.') - raise SaveError('Edit conflict') - except pywikibot.SpamblacklistError as error: - pywikibot.output( - 'ERROR putting page: {0} blacklisted by spamfilter. ' - 'Giving up.'.format(error.url)) - raise SaveError('Spam filter') - except pywikibot.PageSaveRelatedError as error: - pywikibot.output('ERROR putting page: {}' - .format(error.args,)) - raise SaveError('PageSaveRelatedError') - except (socket.error, IOError) as error: - if timeout > 3600: - raise - pywikibot.output('ERROR putting page: {}' - .format(error.args,)) - pywikibot.output('Sleeping {} seconds before trying again.' - .format(timeout,)) - timeout *= 2 - pywikibot.sleep(timeout) - except pywikibot.ServerError: - if timeout > 3600: - raise - pywikibot.output('ERROR putting page: ServerError.') - pywikibot.output('Sleeping {} seconds before trying again.' - .format(timeout,)) - timeout *= 2 - pywikibot.sleep(timeout) - else: - break - return True + elif self.conf.autonomous: + # If we cannot ask, deny permission + answer = 'n' + else: # If we need to ask, do so + answer = pywikibot.input_choice('Submit?', + [('Yes', 'y'), ('No', 'n'), + ('open in Browser', 'b'), + ('Give up', 'g'), + ('Always', 'a')], + automatic_quit=False) + if answer == 'b': + pywikibot.bot.open_webbrowser(page) + return True + if answer == 'a': + # don't ask for the rest of this subject + self.conf.always = True + answer = 'y'
if answer == 'g': raise GiveUpOnPage('User asked us to give up')
- raise LinkMustBeRemoved('Found incorrect link to {} in {}' - .format(', '.join(x.code for x in removing), - page)) + # If we got permission to submit, do so + if answer != 'y': + raise LinkMustBeRemoved( + 'Found incorrect link to {} in {}' + .format(', '.join(x.code for x in removing), page)) + + self.conf.note('Updating live wiki...') + timeout = 60 + page.text = newtext + while True: + try: + page.save(summary=mcomment, + asynchronous=self.conf.asynchronous, + nocreate=True) + except pywikibot.NoCreateError: + pywikibot.exception() + return False + except pywikibot.LockedPage: + pywikibot.output('Page {} is locked. Skipping.' + .format(page)) + raise SaveError('Locked') + except pywikibot.EditConflict: + pywikibot.output( + 'ERROR putting page: An edit conflict occurred. ' + 'Giving up.') + raise SaveError('Edit conflict') + except pywikibot.SpamblacklistError as error: + pywikibot.output( + 'ERROR putting page: {0} blacklisted by spamfilter. ' + 'Giving up.'.format(error.url)) + raise SaveError('Spam filter') + except pywikibot.PageSaveRelatedError as error: + pywikibot.output('ERROR putting page: {}' + .format(error.args,)) + raise SaveError('PageSaveRelatedError') + except (socket.error, IOError) as error: + if timeout > 3600: + raise + pywikibot.output('ERROR putting page: {}' + .format(error.args,)) + pywikibot.output('Sleeping {} seconds before trying again.' + .format(timeout,)) + timeout *= 2 + pywikibot.sleep(timeout) + except pywikibot.ServerError: + if timeout > 3600: + raise + pywikibot.output('ERROR putting page: ServerError.') + pywikibot.output('Sleeping {} seconds before trying again.' + .format(timeout,)) + timeout *= 2 + pywikibot.sleep(timeout) + else: + break + return True
def reportBacklinks(self, new, updatedSites): """ @@ -1847,45 +1843,49 @@ try: for site in expectedSites - set(updatedSites): page = new[site] - if not page.section(): + if page.section(): + continue + + try: + linkedPages = {pywikibot.Page(link) + for link in page.iterlanglinks()} + except pywikibot.NoPage: + pywikibot.warning( + 'Page {} does no longer exist?!'.format(page)) + break + + # To speed things up, create a dictionary which maps sites + # to pages. This assumes that there is only one interwiki + # link per language. + linkedPagesDict = {} + for linkedPage in linkedPages: + linkedPagesDict[linkedPage.site] = linkedPage + for expectedPage in expectedPages - linkedPages: + if expectedPage == page: + continue try: - linkedPages = {pywikibot.Page(link) - for link in page.iterlanglinks()} - except pywikibot.NoPage: + linkedPage = linkedPagesDict[expectedPage.site] pywikibot.warning( - 'Page {} does no longer exist?!'.format(page)) - break - # To speed things up, create a dictionary which maps sites - # to pages. This assumes that there is only one interwiki - # link per language. - linkedPagesDict = {} - for linkedPage in linkedPages: - linkedPagesDict[linkedPage.site] = linkedPage - for expectedPage in expectedPages - linkedPages: - if expectedPage != page: - try: - linkedPage = linkedPagesDict[expectedPage.site] - pywikibot.warning( - '{}: {} does not link to {} but to {}' - .format(page.site.family.name, - page, expectedPage, linkedPage)) - except KeyError: - if not expectedPage.site.is_data_repository(): - pywikibot.warning( - '{}: {} does not link to {}' - .format(page.site.family.name, - page, expectedPage)) - # Check for superfluous links - for linkedPage in linkedPages: - if linkedPage not in expectedPages: - # Check whether there is an alternative page on - # that language. - # In this case, it was already reported above. - if linkedPage.site not in expectedSites: - pywikibot.warning( - '{}: {} links to incorrect {}' - .format(page.site.family.name, - page, linkedPage)) + '{}: {} does not link to {} but to {}' + .format(page.site.family.name, + page, expectedPage, linkedPage)) + except KeyError: + if not expectedPage.site.is_data_repository(): + pywikibot.warning('{}: {} does not link to {}' + .format(page.site.family.name, + page, expectedPage)) + # Check for superfluous links + for linkedPage in linkedPages: + if linkedPage in expectedPages: + continue + # Check whether there is an alternative page on + # that language. + # In this case, it was already reported above. + if linkedPage.site not in expectedSites: + pywikibot.warning('{}: {} links to incorrect {}' + .format(page.site.family.name, + page, linkedPage)) + except (socket.error, IOError): pywikibot.output('ERROR: could not report backlinks')
@@ -2006,6 +2006,7 @@ """Return the first subject that is still being worked on.""" if self.subjects: return self.subjects[0] + return None
def maxOpenSite(self): """ @@ -2014,7 +2015,7 @@ If there is nothing left, return None. Only languages that are TODO for the first Subject are returned. """ - max = 0 + max_ = 0 maxlang = None if not self.firstSubject(): return None @@ -2024,12 +2025,15 @@ # because we have to wait before submitting another modification to # go live. Select any language from counts. oc = self.counts - if pywikibot.Site() in oc: - return pywikibot.Site() + + default_site = pywikibot.Site() + if default_site in oc: + return default_site + for lang in oc: count = self.counts[lang] - if count > max: - max = count + if count > max_: + max_ = count maxlang = lang return maxlang
@@ -2091,9 +2095,11 @@ if len(pageGroup) >= self.conf.maxquerysize: # We have found enough pages to fill the bandwidth. break - if len(pageGroup) == 0: + + if not pageGroup: pywikibot.output('NOTE: Nothing left to do 2') return False + # Get the content of the assembled list in one blow gen = site.preloadpages(pageGroup, templates=True, langlinks=True, pageprops=True) @@ -2117,7 +2123,7 @@
def isDone(self): """Check whether there is still more work to do.""" - return len(self) == 0 and self.pageGenerator is None + return self and self.pageGenerator is None
def plus(self, site, count=1): """Helper routine that the Subject class expects in a counter."""
pywikibot-commits@lists.wikimedia.org