jenkins-bot has submitted this change and it was merged.
Change subject: [FEAT] Scripts: Use more intelligent generators ......................................................................
[FEAT] Scripts: Use more intelligent generators
Instead of using the pagegenerators as often as possible, this is only using them, when they provide additional funtionality. Otherwise it's using the normal methods of a page. This removes all usages of the NamespaceFilterPageGenerator in the scripts which does only filter out invalid pages but still requests them.
Change-Id: Iddc36b040ff010467559ea8fd7523056a511cb6f --- M pywikibot/pagegenerators.py M scripts/add_text.py M scripts/checkimages.py M scripts/commonscat.py M scripts/disambredir.py M scripts/fixing_redirects.py M scripts/nowcommons.py M scripts/solve_disambiguation.py M scripts/spamremove.py M scripts/templatecount.py M scripts/unlink.py M scripts/weblinkchecker.py 12 files changed, 33 insertions(+), 80 deletions(-)
Approvals: John Vandenberg: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py index 792e092..601f809 100644 --- a/pywikibot/pagegenerators.py +++ b/pywikibot/pagegenerators.py @@ -1010,7 +1010,8 @@
def CategorizedPageGenerator(category, recurse=False, start=None, - step=None, total=None, content=False): + step=None, total=None, content=False, + namespaces=None): """Yield all pages in a specific category.
If recurse is True, pages in subcategories are included as well; if @@ -1026,7 +1027,7 @@
""" kwargs = dict(recurse=recurse, step=step, total=total, - content=content) + content=content, namespaces=namespaces) if start: kwargs['sortby'] = 'sortkey' kwargs['startsort'] = start @@ -1473,7 +1474,7 @@ ImageGenerator = FileGenerator
-def PageWithTalkPageGenerator(generator): +def PageWithTalkPageGenerator(generator, return_talk_only=False): """Yield pages and associated talk pages from another generator.
Only yields talk pages if the original generator yields a non-talk page, @@ -1481,7 +1482,8 @@
""" for page in generator: - yield page + if not return_talk_only or page.isTalkPage(): + yield page if not page.isTalkPage(): yield page.toggleTalkPage()
diff --git a/scripts/add_text.py b/scripts/add_text.py index 3bfc2b9..ae21071 100644 --- a/scripts/add_text.py +++ b/scripts/add_text.py @@ -288,7 +288,6 @@ textfile = None talkPage = False reorderEnabled = True - namespaces = []
# Put the text above or below the text? up = False @@ -346,14 +345,7 @@ pywikibot.error("The text to add wasn't given.") return if talkPage: - generator = pagegenerators.PageWithTalkPageGenerator(generator) - site = pywikibot.Site() - for namespace in site.namespaces(): - index = site.getNamespaceIndex(namespace) - if index % 2 == 1 and index > 0: - namespaces += [index] - generator = pagegenerators.NamespaceFilterPageGenerator( - generator, namespaces, site) + generator = pagegenerators.PageWithTalkPageGenerator(generator, True) for page in generator: (text, newtext, always) = add_text(page, addText, summary, regexSkip, regexSkipUrl, always, up, True, diff --git a/scripts/checkimages.py b/scripts/checkimages.py index 315c606..19f07e6 100644 --- a/scripts/checkimages.py +++ b/scripts/checkimages.py @@ -1770,7 +1770,7 @@ firstPageTitle = arg[7:] firstPageTitle = firstPageTitle.split(":")[1:] generator = pywikibot.Site().allpages(start=firstPageTitle, - namespace=6) + namespace=6) repeat = False elif arg.startswith('-page'): if len(arg) == 5: @@ -1804,7 +1804,7 @@ catName = str(arg[5:]) catSelected = pywikibot.Category(pywikibot.Site(), 'Category:%s' % catName) - generator = pg.CategorizedPageGenerator(catSelected) + generator = catSelected.articles(namespaces=[6]) repeat = False elif arg.startswith('-ref'): if len(arg) == 4: @@ -1812,8 +1812,8 @@ u'The references of what page should I parse?')) elif len(arg) > 4: refName = str(arg[5:]) - generator = pg.ReferringPageGenerator( - pywikibot.Page(pywikibot.Site(), refName)) + ref = pywikibot.Page(pywikibot.Site(), refName) + generator = ref.getReferences(namespaces=[6]) repeat = False
if not generator: @@ -1862,7 +1862,6 @@ Bot.takesettings() if waitTime: generator = Bot.wait(waitTime, generator, normal, limit) - generator = pg.NamespaceFilterPageGenerator(generator, 6, site) for image in generator: # Setting the image for the main class Bot.setParameters(image.title(withNamespace=False)) diff --git a/scripts/commonscat.py b/scripts/commonscat.py index cc9fb19..6df29f7 100755 --- a/scripts/commonscat.py +++ b/scripts/commonscat.py @@ -500,10 +500,7 @@ @type args: list of unicode """ options = {} - generator = None checkcurrent = False - ns = [] - ns.append(14)
# Process global args and prepare generator args parser local_args = pywikibot.handle_args(args) @@ -527,14 +524,10 @@ primaryCommonscat, commonscatAlternatives = \ CommonscatBot.getCommonscatTemplate( site.code) - generator = pagegenerators.NamespaceFilterPageGenerator( - pagegenerators.ReferringPageGenerator( - pywikibot.Page(site, u'Template:' + primaryCommonscat), - onlyTemplateInclusion=True), - ns, - site) - - if not generator: + template_page = pywikibot.Page(site, u'Template:' + primaryCommonscat) + generator = template_page.getReferences(namespaces=14, + onlyTemplateInclusion=True) + else: generator = genFactory.getCombinedGenerator()
if generator: diff --git a/scripts/disambredir.py b/scripts/disambredir.py index 4f3cdcf..8739f3a 100644 --- a/scripts/disambredir.py +++ b/scripts/disambredir.py @@ -156,7 +156,6 @@ """ local_args = pywikibot.handle_args(args)
- generator = None start = local_args[0] if local_args else '!'
mysite = pywikibot.Site() @@ -164,17 +163,13 @@ mysite.disambcategory() except pywikibot.Error as e: pywikibot.output(e) - else: - generator = pagegenerators.CategorizedPageGenerator( - mysite.disambcategory(), start=start) - - if not generator: pywikibot.showHelp() return
+ generator = pagegenerators.CategorizedPageGenerator( + mysite.disambcategory(), start=start, content=True, namespaces=[0]) + # only work on articles - generator = pagegenerators.NamespaceFilterPageGenerator(generator, [0]) - generator = pagegenerators.PreloadingGenerator(generator) pagestodo = [] pagestoload = [] for page in generator: diff --git a/scripts/fixing_redirects.py b/scripts/fixing_redirects.py index 10de73d..180163e 100644 --- a/scripts/fixing_redirects.py +++ b/scripts/fixing_redirects.py @@ -217,8 +217,7 @@ if featured: featuredList = i18n.translate(mysite, featured_articles) ref = pywikibot.Page(pywikibot.Site(), featuredList) - gen = pagegenerators.ReferringPageGenerator(ref) - gen = pagegenerators.NamespaceFilterPageGenerator(gen, [0]) + gen = ref.getReferences(namespaces=[0]) if not gen: gen = genFactory.getCombinedGenerator() if gen: diff --git a/scripts/nowcommons.py b/scripts/nowcommons.py index ebdf184..3125fdb 100644 --- a/scripts/nowcommons.py +++ b/scripts/nowcommons.py @@ -271,11 +271,10 @@ nowCommonsTemplates = [pywikibot.Page(self.site, title, ns=10) for title in self.ncTemplates()] - gens = [pg.ReferringPageGenerator(t, followRedirects=True, - onlyTemplateInclusion=True) + gens = [t.getReferences(followRedirects=True, namespaces=[6], + onlyTemplateInclusion=True) for t in nowCommonsTemplates] gen = pg.CombinedPageGenerator(gens) - gen = pg.NamespaceFilterPageGenerator(gen, [6]) gen = pg.DuplicateFilterPageGenerator(gen) gen = pg.PreloadingGenerator(gen) return gen diff --git a/scripts/solve_disambiguation.py b/scripts/solve_disambiguation.py index 83d1050..e5c07e2 100644 --- a/scripts/solve_disambiguation.py +++ b/scripts/solve_disambiguation.py @@ -1090,15 +1090,9 @@ minimum = int(arg[5:]) elif arg.startswith('-start'): try: - if len(arg) <= len('-start:'): - generator = pagegenerators.CategorizedPageGenerator( - pywikibot.Site().disambcategory()) - else: - generator = pagegenerators.CategorizedPageGenerator( - pywikibot.Site().disambcategory(), - start=arg[7:]) - generator = pagegenerators.NamespaceFilterPageGenerator( - generator, [0]) + generator = pagegenerators.CategorizedPageGenerator( + pywikibot.Site().disambcategory(), + start=arg[7:], namespaces=[0]) except pywikibot.NoPage: pywikibot.output("Disambiguation category for your wiki is not known.") raise diff --git a/scripts/spamremove.py b/scripts/spamremove.py index 92f70a2..0f8303e 100755 --- a/scripts/spamremove.py +++ b/scripts/spamremove.py @@ -34,7 +34,7 @@ #
import pywikibot -from pywikibot import pagegenerators, i18n +from pywikibot import i18n from pywikibot.editor import TextEditor
@@ -67,10 +67,7 @@ return
mysite = pywikibot.Site() - pages = mysite.exturlusage(spamSite) - if namespaces: - pages = pagegenerators.NamespaceFilterPageGenerator(pages, namespaces) - pages = pagegenerators.PreloadingGenerator(pages) + pages = mysite.exturlusage(spamSite, namespaces=namespaces, content=True)
summary = i18n.twtranslate(mysite, 'spamremove-remove', {'url': spamSite}) diff --git a/scripts/templatecount.py b/scripts/templatecount.py index 036b012..8e36b94 100644 --- a/scripts/templatecount.py +++ b/scripts/templatecount.py @@ -40,7 +40,6 @@
import datetime import pywikibot -from pywikibot import pagegenerators
templates = ['ref', 'note', 'ref label', 'note label', 'reflist']
@@ -100,12 +99,8 @@ mytpl = mysite.ns_index(mysite.template_namespace()) for template in templates: transcludingArray = [] - gen = pagegenerators.ReferringPageGenerator( - pywikibot.Page(mysite, template, ns=mytpl), - onlyTemplateInclusion=True) - if namespaces: - gen = pagegenerators.NamespaceFilterPageGenerator(gen, - namespaces) + gen = pywikibot.Page(mysite, template, ns=mytpl).getReferences( + namespaces=namespaces, onlyTemplateInclusion=True) for page in gen: transcludingArray.append(page) yield template, transcludingArray diff --git a/scripts/unlink.py b/scripts/unlink.py index ea24f7c..de05fdd 100755 --- a/scripts/unlink.py +++ b/scripts/unlink.py @@ -31,7 +31,7 @@ import re import pywikibot from pywikibot.editor import TextEditor -from pywikibot import pagegenerators, i18n, Bot +from pywikibot import i18n, Bot
class UnlinkBot(Bot): @@ -49,10 +49,8 @@ self.pageToUnlink = pageToUnlink linktrail = self.pageToUnlink.site.linktrail()
- gen = pagegenerators.ReferringPageGenerator(pageToUnlink) - if self.getOption('namespaces') != []: - gen = pagegenerators.NamespaceFilterPageGenerator(gen, self.getOption('namespaces')) - self.generator = pagegenerators.PreloadingGenerator(gen) + self.generator = pageToUnlink.getReferences( + namespaces=self.getOption('namespaces'), content=True) # The regular expression which finds links. Results consist of four # groups: # diff --git a/scripts/weblinkchecker.py b/scripts/weblinkchecker.py index a94bb79..ba33030 100644 --- a/scripts/weblinkchecker.py +++ b/scripts/weblinkchecker.py @@ -214,7 +214,7 @@ continue self.skipping = False page = pywikibot.Page(self.site, entry.title) - if not self.namespaces == []: + if self.namespaces: if page.namespace() not in self.namespaces: continue found = False @@ -852,9 +852,6 @@ """ gen = None xmlFilename = None - # Which namespaces should be processed? - # default to [] which means all namespaces will be processed - namespaces = [] HTTPignore = [] day = 7
@@ -867,11 +864,6 @@ config.report_dead_links_on_talk = True elif arg == '-notalk': config.report_dead_links_on_talk = False - elif arg.startswith('-namespace:'): - try: - namespaces.append(int(arg[11:])) - except ValueError: - namespaces.append(arg[11:]) elif arg == '-repeat': gen = RepeatPageGenerator() elif arg.startswith('-ignore:'): @@ -897,13 +889,11 @@ xmlStart except NameError: xmlStart = None - gen = XmlDumpPageGenerator(xmlFilename, xmlStart, namespaces) + gen = XmlDumpPageGenerator(xmlFilename, xmlStart, genFactory.namespaces)
if not gen: gen = genFactory.getCombinedGenerator() if gen: - if namespaces != []: - gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) # fetch at least 240 pages simultaneously from the wiki, but more if # a high thread number is set. pageNumber = max(240, config.max_external_links * 2)
pywikibot-commits@lists.wikimedia.org