jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/756153 )
Change subject: [IMPR] move page checking code from batchLoaded to check_page ......................................................................
[IMPR] move page checking code from batchLoaded to check_page
- decrease nested frow statements - decrease cyclomatic complexity of batchLoaded method by 10 %
Change-Id: Ic51f2571cbffd47930df5bb50e6f0b8dbecefab6 --- M scripts/interwiki.py 1 file changed, 179 insertions(+), 181 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/scripts/interwiki.py b/scripts/interwiki.py index 1f222fe..edfb329 100755 --- a/scripts/interwiki.py +++ b/scripts/interwiki.py @@ -1098,6 +1098,184 @@ if self.conf.hintsareright: self.hintedsites.add(page.site)
+ def check_page(self, page, counter): + """Check whether any iw links should be added to the todo list.""" + if not page.exists(): + self.conf.remove.append(str(page)) + self.conf.note('{} does not exist. Skipping.'.format(page)) + if page == self.origin: + # The page we are working on is the page that does not + # exist. No use in doing any work on it in that case. + for site, count in self.todo.iter_values_len(): + counter.minus(site, count) + self.todo.clear() + # In some rare cases it might be we already did check some + # 'automatic' links + self.done.clear() + return + + if page.isRedirectPage(): + redirectTargetPage = page.getRedirectTarget() + redir = '' + elif page.isCategoryRedirect(): + redirectTargetPage = page.getCategoryRedirectTarget() + redir = 'category ' + else: + redir = None + + if redir is not None: + self.conf.note('{} is {}redirect to {}' + .format(page, redir, redirectTargetPage)) + if self.origin is None or page == self.origin: + # the 1st existig page becomes the origin page, if none was + # supplied + if self.conf.initialredirect: + # don't follow another redirect; it might be a self + # loop + if not redirectTargetPage.isRedirectPage() \ + and not redirectTargetPage.isCategoryRedirect(): + self.origin = redirectTargetPage + self.todo.append(redirectTargetPage) + counter.plus(redirectTargetPage.site) + else: + # This is a redirect page to the origin. We don't need + # to follow the redirection. + # In this case we can also stop all hints! + for site, count in self.todo.iter_values_len(): + counter.minus(site, count) + self.todo.clear() + elif not self.conf.followredirect: + self.conf.note('not following {}redirects.'.format(redir)) + elif page.isStaticRedirect(): + self.conf.note('not following static {}redirects.' + .format(redir)) + elif (page.site.family == redirectTargetPage.site.family + and not self.skipPage(page, redirectTargetPage, counter)): + if self.addIfNew(redirectTargetPage, counter, page): + if config.interwiki_shownew: + pywikibot.output('{}: {} gives new {}redirect {}' + .format(self.origin, page, redir, + redirectTargetPage)) + return + + # must be behind the page.isRedirectPage() part + # otherwise a redirect error would be raised + if page_empty_check(page): + self.conf.remove.append(str(page)) + self.conf.note('{} is empty. Skipping.'.format(page)) + if page == self.origin: + for site, count in self.todo.iter_values_len(): + counter.minus(site, count) + self.todo.clear() + self.done.clear() + self.origin = None + return + + if page.section(): + self.conf.note('{} is a page section. Skipping.'.format(page)) + return + + # Page exists, isn't a redirect, and is a plain link (no section) + if self.origin is None: + # the 1st existig page becomes the origin page, if none was + # supplied + self.origin = page + + try: + iw = page.langlinks() + except UnknownSiteError: + self.conf.note('site {} does not exist.'.format(page.site)) + return + + (skip, alternativePage) = self.disambigMismatch(page, counter) + if skip: + pywikibot.output('NOTE: ignoring {} and its interwiki links' + .format(page)) + self.done.remove(page) + iw = () + if alternativePage: + # add the page that was entered by the user + self.addIfNew(alternativePage, counter, None) + + duplicate = None + for p in self.done.filter(page.site): + if p != page and p.exists() \ + and not p.isRedirectPage() and not p.isCategoryRedirect(): + duplicate = p + break + + if self.origin == page: + self.untranslated = not iw + if self.conf.untranslatedonly: + # Ignore the interwiki links. + iw = () + if self.conf.lacklanguage: + if self.conf.lacklanguage in (link.site.lang for link in iw): + iw = () + self.workonme = False + if len(iw) < self.conf.minlinks: + iw = () + self.workonme = False + + elif self.conf.autonomous and duplicate and not skip: + pywikibot.output('Stopping work on {} because duplicate pages' + ' {} and {} are found' + .format(self.originP, duplicate, page)) + self.makeForcedStop(counter) + try: + with codecs.open( + pywikibot.config.datafilepath('autonomous_problems.dat'), + 'a', 'utf-8') as f: + f.write('* {} {{Found more than one link for {}}}' + .format(self.origin, page.site)) + if config.interwiki_graph and config.interwiki_graph_url: + filename = interwiki_graph.getFilename( + self.origin, + extension=config.interwiki_graph_formats[0]) + f.write(' [{}{} graph]' + .format(config.interwiki_graph_url, filename)) + f.write('\n') + # FIXME: What errors are we catching here? + except Exception: + pywikibot.output( + 'File autonomous_problems.dat open or corrupted! ' + 'Try again with -restore.') + sys.exit() + iw = () + + for link in iw: + linkedPage = pywikibot.Page(link) + if self.conf.hintsareright and linkedPage.site in self.hintedsites: + pywikibot.output( + 'NOTE: {}: {} extra interwiki on hinted site ignored {}' + .format(self.origin, page, linkedPage)) + break + + if not self.skipPage(page, linkedPage, counter): + if self.conf.followinterwiki or page == self.origin: + if self.addIfNew(linkedPage, counter, page): + # It is new. Also verify whether it is the second + # on the same site + lpsite = linkedPage.site + for prevPage in self.found_in: + if prevPage != linkedPage and \ + prevPage.site == lpsite: + # Still, this could be "no problem" as + # either may be a redirect to the other. + # No way to find out quickly! + pywikibot.output( + 'NOTE: {}: {} gives duplicate ' + 'interwiki on same site {}' + .format(self.origin, page, linkedPage)) + break + else: + if config.interwiki_shownew: + pywikibot.output( + '{}: {} gives new interwiki {}' + .format(self.origin, page, linkedPage)) + if self.forcedStop: + break + def batchLoaded(self, counter): """ Notify that the promised batch of pages was loaded. @@ -1136,187 +1314,7 @@
# Now check whether any interwiki links should be added to the # todo list. - - if not page.exists(): - self.conf.remove.append(str(page)) - self.conf.note('{} does not exist. Skipping.'.format(page)) - if page == self.origin: - # The page we are working on is the page that does not - # exist. No use in doing any work on it in that case. - for site, count in self.todo.iter_values_len(): - counter.minus(site, count) - self.todo.clear() - # In some rare cases it might be we already did check some - # 'automatic' links - self.done.clear() - continue - - if page.isRedirectPage() or page.isCategoryRedirect(): - if page.isRedirectPage(): - redirectTargetPage = page.getRedirectTarget() - redir = '' - else: - redirectTargetPage = page.getCategoryRedirectTarget() - redir = 'category ' - self.conf.note('{} is {}redirect to {}' - .format(page, redir, redirectTargetPage)) - if self.origin is None or page == self.origin: - # the 1st existig page becomes the origin page, if none was - # supplied - if self.conf.initialredirect: - # don't follow another redirect; it might be a self - # loop - if not redirectTargetPage.isRedirectPage() \ - and not redirectTargetPage.isCategoryRedirect(): - self.origin = redirectTargetPage - self.todo.append(redirectTargetPage) - counter.plus(redirectTargetPage.site) - else: - # This is a redirect page to the origin. We don't need - # to follow the redirection. - # In this case we can also stop all hints! - for site, count in self.todo.iter_values_len(): - counter.minus(site, count) - self.todo.clear() - elif not self.conf.followredirect: - self.conf.note('not following {}redirects.'.format(redir)) - elif page.isStaticRedirect(): - self.conf.note('not following static {}redirects.' - .format(redir)) - elif (page.site.family == redirectTargetPage.site.family - and not self.skipPage(page, redirectTargetPage, - counter)): - if self.addIfNew(redirectTargetPage, counter, page): - if config.interwiki_shownew: - pywikibot.output('{}: {} gives new {}redirect {}' - .format(self.origin, - page, redir, - redirectTargetPage)) - continue - - # must be behind the page.isRedirectPage() part - # otherwise a redirect error would be raised - if page_empty_check(page): - self.conf.remove.append(str(page)) - self.conf.note('{} is empty. Skipping.'.format(page)) - if page == self.origin: - for site, count in self.todo.iter_values_len(): - counter.minus(site, count) - self.todo.clear() - self.done.clear() - self.origin = None - continue - - if page.section(): - self.conf.note('{} is a page section. Skipping.'.format(page)) - continue - - # Page exists, isn't a redirect, and is a plain link (no section) - if self.origin is None: - # the 1st existig page becomes the origin page, if none was - # supplied - self.origin = page - - try: - iw = page.langlinks() - except UnknownSiteError: - self.conf.note('site {} does not exist.'.format(page.site)) - continue - - (skip, alternativePage) = self.disambigMismatch(page, counter) - if skip: - pywikibot.output('NOTE: ignoring {} and its interwiki links' - .format(page)) - self.done.remove(page) - iw = () - if alternativePage: - # add the page that was entered by the user - self.addIfNew(alternativePage, counter, None) - - duplicate = None - for p in self.done.filter(page.site): - if p != page and p.exists() and \ - not p.isRedirectPage() and not p.isCategoryRedirect(): - duplicate = p - break - - if self.origin == page: - self.untranslated = not iw - if self.conf.untranslatedonly: - # Ignore the interwiki links. - iw = () - if self.conf.lacklanguage: - if self.conf.lacklanguage in (link.site.lang - for link in iw): - iw = () - self.workonme = False - if len(iw) < self.conf.minlinks: - iw = () - self.workonme = False - - elif self.conf.autonomous and duplicate and not skip: - pywikibot.output('Stopping work on {} because duplicate pages' - ' {} and {} are found' - .format(self.originP, duplicate, page)) - self.makeForcedStop(counter) - try: - with codecs.open( - pywikibot.config.datafilepath( - 'autonomous_problems.dat'), - 'a', 'utf-8') as f: - f.write('* {} {{Found more than one link for {}}}' - .format(self.origin, page.site)) - if config.interwiki_graph \ - and config.interwiki_graph_url: - filename = interwiki_graph.getFilename( - self.origin, - extension=config.interwiki_graph_formats[0]) - f.write(' [{}{} graph]' - .format(config.interwiki_graph_url, - filename)) - f.write('\n') - # FIXME: What errors are we catching here? - except Exception: - # raise - pywikibot.output( - 'File autonomous_problems.dat open or corrupted! ' - 'Try again with -restore.') - sys.exit() - iw = () - - for link in iw: - linkedPage = pywikibot.Page(link) - if self.conf.hintsareright \ - and linkedPage.site in self.hintedsites: - pywikibot.output( - 'NOTE: {}: {} extra interwiki on hinted site ' - 'ignored {}'.format(self.origin, page, linkedPage)) - break - - if not self.skipPage(page, linkedPage, counter): - if self.conf.followinterwiki or page == self.origin: - if self.addIfNew(linkedPage, counter, page): - # It is new. Also verify whether it is the second - # on the same site - lpsite = linkedPage.site - for prevPage in self.found_in: - if prevPage != linkedPage and \ - prevPage.site == lpsite: - # Still, this could be "no problem" as - # either may be a redirect to the other. - # No way to find out quickly! - pywikibot.output( - 'NOTE: {}: {} gives duplicate ' - 'interwiki on same site {}' - .format(self.origin, page, linkedPage)) - break - else: - if config.interwiki_shownew: - pywikibot.output( - '{}: {} gives new interwiki {}' - .format(self.origin, page, linkedPage)) - if self.forcedStop: - break + self.check_page(page, counter)
# These pages are no longer 'in progress' self.pending.clear()
pywikibot-commits@lists.wikimedia.org