jenkins-bot submitted this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
[IMPR] Improvements fo MakeCatBot.include()

- set sortKey directly in workingcat
- improved link checking in checklinks() which uses chained generators
and print progress dots for each page
- introduce print_dot method for that
- simplify changing category by using change_category method
- "checked" container becomes a set
- removes checkforward which is always True

6th step detached from Id7ca3461d for easier reviewing

Change-Id: I32238bdb412783be2e84c04be7c09bd2c652b51f
---
M scripts/makecat.py
1 file changed, 50 insertions(+), 47 deletions(-)

diff --git a/scripts/makecat.py b/scripts/makecat.py
index 7d72ca5..76c31bf 100755
--- a/scripts/makecat.py
+++ b/scripts/makecat.py
@@ -50,6 +50,7 @@
# Distributed under the terms of the MIT license.
#
import codecs
+from itertools import chain
from textwrap import fill

import pywikibot
@@ -77,7 +78,6 @@
})
super().__init__(**kwargs)
self.skipdates = self.opt.nodate
- self.checkforward = True
self.checkbackward = not self.opt.forward
self.checkbroken = not (self.opt.forward
and self.opt.exist)
@@ -107,6 +107,12 @@
color_format('\n>>> {lightpurple}{0}{default} <<<',
page.title()))

+ @staticmethod
+ def print_dot(condition=True):
+ """Print a single dot if conditon is True."""
+ if condition:
+ pywikibot.output('.', newline=False)
+
def needcheck(self, page):
"""Verify whether the current page may be processed."""
global checked
@@ -114,53 +120,50 @@
or page in checked
or self.skipdates and page.autoFormat()[0] is not None)

- def change_category(self, page, catlist):
+ def change_category(self, page, categories, summary):
"""Change the category of page."""
- pass
+ global workingcat, parentcats
+ for category in categories:
+ if self.removeparent and category in parentcats:
+ page.change_category(workingcat, summary=summary)
+ return True
+ return False

- def include(self, pl, checklinks=True, realinclude=True, linkterm=None,
+ def include(self, page, checklinks=True, realinclude=True, linkterm=None,
summary=''):
"""Include the current page to the working category."""
global workingcat, parentcats
- global checked, tocheck
- cl = checklinks
- mysite = self.site
+ actualworkingcat = workingcat
if linkterm:
- actualworkingcat = pywikibot.Category(mysite, workingcat.title(),
- sort_key=linkterm)
- else:
- actualworkingcat = workingcat
- if realinclude:
- try:
- text = pl.get()
- except pywikibot.NoPage:
- pass
- except pywikibot.IsRedirectPage:
- cl = True
+ actualworkingcat.sortKey = linkterm
+ if realinclude and page.exists():
+ if page.isRedirectPage():
+ checklinks = True
else:
- cats = list(pl.categories())
- if workingcat not in cats:
- for c in cats:
- if c in parentcats:
- if self.removeparent:
- pl.change_category(actualworkingcat,
- summary=summary)
- break
- else:
- pl.put(textlib.replaceCategoryLinks(
- text, cats + [actualworkingcat], site=pl.site),
- summary=summary)
- if cl:
- if self.checkforward:
- for page2 in pl.linkedPages():
- if self.needcheck(page2):
- tocheck.append(page2)
- checked[page2] = page2
- if self.checkbackward:
- for ref_page in pl.getReferences():
- if self.needcheck(ref_page):
- tocheck.append(ref_page)
- checked[ref_page] = ref_page
+ cats = list(page.categories())
+ if workingcat not in cats \
+ and not self.change_category(page, cats, summary):
+ newtext = textlib.replaceCategoryLinks(
+ page.text, cats + [actualworkingcat],
+ site=page.site)
+ page.put(newtext, summary=summary)
+
+ if checklinks:
+ self.checklinks(page)
+
+ def checklinks(self, page):
+ """Check whether the page has to be added to the tocheck deque."""
+ global checked, tocheck
+ pywikibot.output('\nChecking links for "{}"...'
+ .format(page.title()), newline=False)
+ generators = [page.linkedPages()]
+ if self.checkbackward:
+ generators.append(page.getReferences())
+ for i, linked_page in enumerate(chain(*generators)):
+ self.print_dot(not i % 25)
+ if self.needcheck(linked_page):
+ tocheck.append(linked_page)
+ checked.add(linked_page)

def skip_page(self, page):
"""Check whether the page is to be skipped."""
@@ -177,7 +180,7 @@
pl2 = pl.getRedirectTarget()
if self.needcheck(pl2):
tocheck.append(pl2)
- checked[pl2] = pl2
+ checked.add(pl2)
return
ctoshow = 500
pywikibot.output('')
@@ -225,7 +228,7 @@
elif answer == 'o':
pagetitle = pywikibot.input('Specify page to add:')
page = pywikibot.Page(pywikibot.Site(), pagetitle)
- if page not in checked.keys():
+ if page not in checked:
self.include(page, summary=summary)
elif answer == 's':
if not pl.exists():
@@ -268,7 +271,7 @@
global checked, tocheck
global excludefile

- checked = {}
+ checked = set()
tocheck = DequeGenerator()

workingcatname = ''
@@ -311,7 +314,7 @@
if not line:
continue
pl = pywikibot.Page(mysite, line)
- checked[pl] = pl
+ checked.add(pl)

excludefile = codecs.open(filename, 'a', encoding=mysite.encoding())
except IOError:
@@ -331,7 +334,7 @@
for cat in subcatlist:
artlist = list(cat.articles())
for page in artlist:
- checked[page] = page
+ checked.add(page)

# Fetch articles in category, and mark as already checked (seen)
# If category is empty, ask user if they want to look for pages
@@ -349,7 +352,7 @@
articles = [pl]

for pl in articles:
- checked[pl] = pl
+ checked.add(pl)
bot.include(pl, summary=summary)

gen = pagegenerators.DequePreloadingGenerator(tocheck)

To view, visit change 458792. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I32238bdb412783be2e84c04be7c09bd2c652b51f
Gerrit-Change-Number: 458792
Gerrit-PatchSet: 5
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: John Vandenberg <jayvdb@gmail.com>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-CC: Dalba <dalba.wiki@gmail.com>
Gerrit-MessageType: merged