jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/458792 )
Change subject: [IMPR] Improvements fo MakeCatBot.include() ......................................................................
[IMPR] Improvements fo MakeCatBot.include()
- set sortKey directly in workingcat - improved link checking in checklinks() which uses chained generators and print progress dots for each page - introduce print_dot method for that - simplify changing category by using change_category method - "checked" container becomes a set - removes checkforward which is always True
6th step detached from Id7ca3461d for easier reviewing
Change-Id: I32238bdb412783be2e84c04be7c09bd2c652b51f --- M scripts/makecat.py 1 file changed, 50 insertions(+), 47 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/scripts/makecat.py b/scripts/makecat.py index 7d72ca5..76c31bf 100755 --- a/scripts/makecat.py +++ b/scripts/makecat.py @@ -50,6 +50,7 @@ # Distributed under the terms of the MIT license. # import codecs +from itertools import chain from textwrap import fill
import pywikibot @@ -77,7 +78,6 @@ }) super().__init__(**kwargs) self.skipdates = self.opt.nodate - self.checkforward = True self.checkbackward = not self.opt.forward self.checkbroken = not (self.opt.forward and self.opt.exist) @@ -107,6 +107,12 @@ color_format('\n>>> {lightpurple}{0}{default} <<<', page.title()))
+ @staticmethod + def print_dot(condition=True): + """Print a single dot if conditon is True.""" + if condition: + pywikibot.output('.', newline=False) + def needcheck(self, page): """Verify whether the current page may be processed.""" global checked @@ -114,53 +120,50 @@ or page in checked or self.skipdates and page.autoFormat()[0] is not None)
- def change_category(self, page, catlist): + def change_category(self, page, categories, summary): """Change the category of page.""" - pass + global workingcat, parentcats + for category in categories: + if self.removeparent and category in parentcats: + page.change_category(workingcat, summary=summary) + return True + return False
- def include(self, pl, checklinks=True, realinclude=True, linkterm=None, + def include(self, page, checklinks=True, realinclude=True, linkterm=None, summary=''): """Include the current page to the working category.""" global workingcat, parentcats - global checked, tocheck - cl = checklinks - mysite = self.site + actualworkingcat = workingcat if linkterm: - actualworkingcat = pywikibot.Category(mysite, workingcat.title(), - sort_key=linkterm) - else: - actualworkingcat = workingcat - if realinclude: - try: - text = pl.get() - except pywikibot.NoPage: - pass - except pywikibot.IsRedirectPage: - cl = True + actualworkingcat.sortKey = linkterm + if realinclude and page.exists(): + if page.isRedirectPage(): + checklinks = True else: - cats = list(pl.categories()) - if workingcat not in cats: - for c in cats: - if c in parentcats: - if self.removeparent: - pl.change_category(actualworkingcat, - summary=summary) - break - else: - pl.put(textlib.replaceCategoryLinks( - text, cats + [actualworkingcat], site=pl.site), - summary=summary) - if cl: - if self.checkforward: - for page2 in pl.linkedPages(): - if self.needcheck(page2): - tocheck.append(page2) - checked[page2] = page2 - if self.checkbackward: - for ref_page in pl.getReferences(): - if self.needcheck(ref_page): - tocheck.append(ref_page) - checked[ref_page] = ref_page + cats = list(page.categories()) + if workingcat not in cats \ + and not self.change_category(page, cats, summary): + newtext = textlib.replaceCategoryLinks( + page.text, cats + [actualworkingcat], + site=page.site) + page.put(newtext, summary=summary) + + if checklinks: + self.checklinks(page) + + def checklinks(self, page): + """Check whether the page has to be added to the tocheck deque.""" + global checked, tocheck + pywikibot.output('\nChecking links for "{}"...' + .format(page.title()), newline=False) + generators = [page.linkedPages()] + if self.checkbackward: + generators.append(page.getReferences()) + for i, linked_page in enumerate(chain(*generators)): + self.print_dot(not i % 25) + if self.needcheck(linked_page): + tocheck.append(linked_page) + checked.add(linked_page)
def skip_page(self, page): """Check whether the page is to be skipped.""" @@ -177,7 +180,7 @@ pl2 = pl.getRedirectTarget() if self.needcheck(pl2): tocheck.append(pl2) - checked[pl2] = pl2 + checked.add(pl2) return ctoshow = 500 pywikibot.output('') @@ -225,7 +228,7 @@ elif answer == 'o': pagetitle = pywikibot.input('Specify page to add:') page = pywikibot.Page(pywikibot.Site(), pagetitle) - if page not in checked.keys(): + if page not in checked: self.include(page, summary=summary) elif answer == 's': if not pl.exists(): @@ -268,7 +271,7 @@ global checked, tocheck global excludefile
- checked = {} + checked = set() tocheck = DequeGenerator()
workingcatname = '' @@ -311,7 +314,7 @@ if not line: continue pl = pywikibot.Page(mysite, line) - checked[pl] = pl + checked.add(pl)
excludefile = codecs.open(filename, 'a', encoding=mysite.encoding()) except IOError: @@ -331,7 +334,7 @@ for cat in subcatlist: artlist = list(cat.articles()) for page in artlist: - checked[page] = page + checked.add(page)
# Fetch articles in category, and mark as already checked (seen) # If category is empty, ask user if they want to look for pages @@ -349,7 +352,7 @@ articles = [pl]
for pl in articles: - checked[pl] = pl + checked.add(pl) bot.include(pl, summary=summary)
gen = pagegenerators.DequePreloadingGenerator(tocheck)