jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/299449 )
Change subject: solve_disambiguation.py: Detect redirects and resolve them ......................................................................
solve_disambiguation.py: Detect redirects and resolve them
1. ReferringPageGeneratorWithIgnore class should also return pages that refer to a disambiguation page via a redirect. 2. The `treat` function should also treat those redirects. To achieve this, rename the old `treat` function to `treat_disamb_only` (with minor changes). The new treat function will loop over the disambiguation page and all of its redirects and pass them to the old function. 3. The old `treat` function did not return False under any circumstances. Correct the documentation and the code accordingly.
Bug: T118777 Change-Id: I18df4fdf6aa137a6a99e3edb533870fdce622ca7 --- M scripts/solve_disambiguation.py 1 file changed, 45 insertions(+), 18 deletions(-)
Approvals: jenkins-bot: Verified Xqt: Looks good to me, approved
diff --git a/scripts/solve_disambiguation.py b/scripts/solve_disambiguation.py index 74ad11b..6b0fce0 100755 --- a/scripts/solve_disambiguation.py +++ b/scripts/solve_disambiguation.py @@ -81,6 +81,7 @@ #
import codecs +from itertools import chain import os import re
@@ -397,7 +398,6 @@ # TODO: start yielding before all referring pages have been found refs = [ page for page in self.disambPage.getReferences( - follow_redirects=False, withTemplateInclusion=False, namespaces=0 if self.main_only else None ) @@ -638,11 +638,12 @@ """ Check if the text matches any of the ignore regexes.
- For a given text, returns False if none of the regular - expressions given in the dictionary at the top of this class - matches a substring of the text. - Otherwise returns the substring which is matched by one of - the regular expressions. + @param text: wikitext of a page + @type text: str + @return: None if none of the regular expressions + given in the dictionary at the top of this class matches + a substring of the text, otherwise the matched substring + @rtype: str or None """ for ig in self.ignore_contents_regexes: match = ig.search(text) @@ -697,16 +698,44 @@ flags=re.X)
def treat(self, refPage, disambPage): - """Treat a page. + """Resolve the links to disambPage or its redirects.
@param disambPage: the disambiguation page or redirect we don't want anything to link to @type disambPage: pywikibot.Page @param refPage: a page linking to disambPage @type refPage: pywikibot.Page - @return: False if the user pressed q to completely quit the program, - True otherwise - @rtype: bool + @rtype: None + + """ + nochange = True + + for page in chain( + (disambPage,), disambPage.getReferences(redirectsOnly=True) + ): + treat_result = self.treat_disamb_only(refPage, page) + if treat_result == 'nextpage': + return + elif treat_result in ('nochange', 'done'): + continue + elif nochange: + nochange = False + + if nochange: + pywikibot.output('No changes necessary in ' + refPage.title()) + + def treat_disamb_only(self, refPage, disambPage): + """Resolve the links to disambPage but don't look for its redirects. + + @param disambPage: the disambiguation page or redirect we don't want + anything to link to + @type disambPage: pywikibot.Page + @param refPage: a page linking to disambPage + @type refPage: pywikibot.Page + @return: "nextpage" if the user enters "n" to skip this page, + "nochange" if the page needs no change, and + "done" if the page is processed successfully + @rtype: str
""" # TODO: break this function up into subroutines! @@ -750,8 +779,7 @@ preloadingGen = pagegenerators.PreloadingGenerator(gen) for refPage2 in preloadingGen: # run until the user selected 'quit' - if not self.treat(refPage2, refPage): - break + self.treat(refPage2, refPage) elif choice == 'c': text = refPage.get(get_redirect=True) include = "redirect" @@ -772,9 +800,8 @@ m = self.linkR.search(text, pos=curpos) if not m: if n == 0: - pywikibot.output(u"No changes necessary in %s" - % refPage.title()) - return True + # No changes necessary for this disambiguation title. + return 'nochange' else: # stop loop and save page break @@ -854,7 +881,7 @@ # If run with the -primary argument, skip this # occurrence next time. self.primaryIgnoreManager.ignore(refPage) - return True + return 'nextpage'
# The link looks like this: # [[page_title|link_text]]trailing_chars @@ -939,7 +966,7 @@ % (new_page_title, section, link_text) text = text[:m.start()] + newlink + text[m.end():] continue - + # Todo: This line is unreachable (T155337) pywikibot.output(text[max(0, m.start() - 30):m.end() + 30]) if text == original_text: pywikibot.output(u'\nNo changes have been made:\n') @@ -956,7 +983,7 @@ pywikibot.output(u'Page not saved: page is locked') except pywikibot.PageNotSaved as error: pywikibot.output(u'Page not saved: %s' % error.args) - return True + return 'done'
def findAlternatives(self, disambPage): """Extend self.alternatives using correctcap of disambPage.linkedPages.
pywikibot-commits@lists.wikimedia.org