jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/724000 )
Change subject: [IMPR] Derive blockpageschecker.py from CurrentPageBot
......................................................................
[IMPR] Derive blockpageschecker.py from CurrentPageBot
- rename showQuest() function to invoke_editor() method
- move the template handling inside the remove_templates() coroutine
which is started with BaseBot.setup() and closed in BaseBot.teardown()
- move understandBlock() function as private function into coroutine
- use a generator chain for categories instead of creating a list of pages
- use BaseBot.userPut() method instead of save_page function
Bug: T196851
Bug: T171713
Change-Id: Id23491f8e5d30685787a5e78cecab97b921e78d6
---
M scripts/blockpageschecker.py
M tox.ini
2 files changed, 301 insertions(+), 322 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/blockpageschecker.py b/scripts/blockpageschecker.py
index 4516561..759ead7 100755
--- a/scripts/blockpageschecker.py
+++ b/scripts/blockpageschecker.py
@@ -45,22 +45,17 @@
# Distributed under the terms of the MIT license.
#
import re
-import time
import webbrowser
+from collections import namedtuple
+from itertools import chain
+
import pywikibot
-from pywikibot import config, i18n, pagegenerators
-from pywikibot.exceptions import (
- EditConflictError,
- Error,
- IsRedirectPageError,
- LockedPageError,
- NoPageError,
- PageSaveRelatedError,
- ServerError,
- SpamblacklistError,
-)
-from pywikibot.tools.formatter import color_format
+
+from pywikibot import i18n, pagegenerators
+from pywikibot.bot import ExistingPageBot, SingleSiteBot
+from pywikibot.editor import TextEditor
+from pywikibot.exceptions import Error
# This is required for the text that is shown when you run this script
@@ -170,45 +165,273 @@
project_inserted = ['ar', 'cs', 'fr', 'it', 'ja', 'pt', 'sr', 'ur', 'zh']
# END PREFERENCES
+ParsedTemplate = namedtuple('ParsedTemplate', 'blocktype, regex')
-def understandBlock(text, TTP, TSP, TSMP, TTMP, TU):
- """Understand if the page is blocked and if it has the right template."""
- results = 'sysop-total', 'autoconfirmed-total', 'unique'
- for index, template in enumerate((TTP, TSP, TU)):
- if not template:
- continue
- for catchRegex in template:
- resultCatch = re.findall(catchRegex, text)
- if resultCatch:
- return results[index], catchRegex
+class CheckerBot(ExistingPageBot, SingleSiteBot):
- if TSMP and TTMP and TTP != TTMP and TSP != TSMP:
- for catchRegex in TTMP:
- resultCatch = re.findall(catchRegex, text)
- if resultCatch:
- return ('sysop-move', catchRegex)
- for catchRegex in TSMP:
- resultCatch = re.findall(catchRegex, text)
- if resultCatch:
- return ('autoconfirmed-move', catchRegex)
- # If editable means that we have no regex, won't change anything with this
- # regex
- return ('editable', r'\A\n')
+ """Bot to remove stale protection templates from unprotected pages."""
+ update_options = {
+ 'show': False,
+ 'move': False,
+ }
-def showQuest(page):
- """Ask for an editor and invoke it."""
- quest = pywikibot.input_choice(
- 'Do you want to open the page?',
- [('with browser', 'b'), ('with gui', 'g'), ('no', 'n')], 'n',
- automatic_quit=False)
- if quest == 'b':
- webbrowser.open('{}?redirect=no'.format(page.full_url()))
- elif quest == 'g':
- from pywikibot import editor as editarticle
- editor = editarticle.TextEditor()
- editor.edit(page.text)
+ def invoke_editor(self, page):
+ """Ask for an editor and invoke it."""
+ choice = pywikibot.input_choice(
+ 'Do you want to open the page?',
+ [('with browser', 'b'), ('with gui', 'g'), ('no', 'n')], 'n')
+ if choice == 'b':
+ webbrowser.open('{}?redirect=no'.format(page.full_url()))
+ elif choice == 'g':
+ editor = TextEditor()
+ editor.edit(page.text)
+
+ def setup(self):
+ """Initialize the coroutine for parsing templates."""
+ self.parse_tempates = self.remove_templates()
+ self.parse_tempates.send(None)
+
+ def teardown(self):
+ """Close the coroutine."""
+ self.parse_tempates.close()
+
+ def treat_page(self):
+ """Load the given page, do some changes, and save it."""
+ page = self.current_page
+ if page.isRedirectPage():
+ if self.opt.always:
+ pywikibot.warning('{} is a redirect; skipping'.format(page))
+ elif self.opt.show:
+ self.invoke_editor(page)
+ return
+
+ newtext = self.parse_tempates.send((page.text, page.protection()))
+ next(self.parse_tempates)
+
+ commentUsed = i18n.twtranslate(self.site, 'blockpageschecker-summary')
+ self.userPut(page, page.text, newtext, summary=commentUsed)
+
+ def skip_page(self, page):
+ """Skip if the user has not permission to edit."""
+ # FIXME: This check does not work :
+ # PreloadingGenerator cannot set correctly page.editRestriction
+ # (see bug T57322)
+ # if not page.has_permission():
+ # pywikibot.output(
+ # "{} is sysop-protected : this account can't edit "
+ # "it! Skipping...".format(pagename))
+ # continue
+ page.protection()
+ if not page.has_permission():
+ pywikibot.warning(
+ "{} is protected: this account can't edit it! Skipping..."
+ .format(page))
+ return True
+
+ return super().skip_page(page)
+
+ def remove_templates(self):
+ """Understand if the page is blocked has the right template."""
+
+ def understand_block():
+ """Understand if the page is blocked has the right template."""
+ results = 'sysop-total', 'autoconfirmed-total', 'unique'
+ for index, template in enumerate((TTP, TSP, TU)):
+ if not template:
+ continue
+
+ for catchRegex in template:
+ resultCatch = re.findall(catchRegex, text)
+ if resultCatch:
+ return ParsedTemplate(results[index], catchRegex)
+
+ if TSMP and TTMP and TTP != TTMP and TSP != TSMP:
+ for catchRegex in TTMP:
+ resultCatch = re.findall(catchRegex, text)
+ if resultCatch:
+ return ParsedTemplate('sysop-move', catchRegex)
+
+ for catchRegex in TSMP:
+ resultCatch = re.findall(catchRegex, text)
+ if resultCatch:
+ return ParsedTemplate('autoconfirmed-move', catchRegex)
+
+ # If editable means that we have no regex, won't change anything
+ # with this regex
+ return ParsedTemplate('editable', r'\A\n')
+
+ TSP = i18n.translate(self.site, templateSemiProtection)
+ TTP = i18n.translate(self.site, templateTotalProtection)
+ TSMP = i18n.translate(self.site, templateSemiMoveProtection)
+ TTMP = i18n.translate(self.site, templateTotalMoveProtection)
+ TNR = i18n.translate(self.site, templateNoRegex)
+ TU = i18n.translate(self.site, templateUnique)
+
+ while True:
+ text, restrictions = yield
+ if text is None:
+ continue
+
+ # Understand, according to the template in the page, what should
+ # be the protection and compare it with what there really is.
+ TemplateInThePage = understand_block()
+
+ # Only to see if the text is the same or not...
+ oldtext = text
+ # keep track of the changes for each step (edit then move)
+ changes = -1
+
+ editRestr = restrictions.get('edit')
+ if not editRestr:
+ # page is not edit-protected
+ # Deleting the template because the page doesn't need it.
+ if not (TTP or TSP):
+ raise Error(
+ 'This script is not localized to use it on {}.\n'
+ 'Missing "templateSemiProtection" or'
+ '"templateTotalProtection"'.format(self.site.sitename))
+
+ if TU:
+ replaceToPerform = '|'.join(TTP + TSP + TU)
+ else:
+ replaceToPerform = '|'.join(TTP + TSP)
+ text, changes = re.subn(
+ '<noinclude>({})</noinclude>'.format(replaceToPerform),
+ '', text)
+ if not changes:
+ text, changes = re.subn(
+ '({})'.format(replaceToPerform), '', text)
+ msg = 'The page is editable for all'
+ if not self.opt.move:
+ msg += ', deleting the template..'
+ pywikibot.output(msg + '.')
+
+ elif editRestr[0] == 'sysop':
+ # total edit protection
+ if TemplateInThePage.blocktype == 'sysop-total' and TTP \
+ or TemplateInThePage.blocktype == 'unique' and TU:
+ msg = 'The page is protected to the sysop'
+ if not self.opt.move:
+ msg += ', skipping...'
+ pywikibot.output(msg)
+ else:
+ if not TNR or TU and not TNR[4] or not (TU or TNR[1]):
+ raise Error(
+ 'This script is not localized to use it on \n{}. '
+ 'Missing "templateNoRegex"'
+ .format(self.site.sitename))
+
+ pywikibot.output(
+ 'The page is protected to the sysop, but the template '
+ 'seems not correct. Fixing...')
+ if TU:
+ text, changes = re.subn(
+ TemplateInThePage.regex, TNR[4], text)
+ else:
+ text, changes = re.subn(
+ TemplateInThePage.regex, TNR[1], text)
+
+ elif TSP or TU:
+ # implicitly
+ # editRestr[0] = 'autoconfirmed', edit-Semi-protection
+ if TemplateInThePage.blocktype in ('autoconfirmed-total',
+ 'unique'):
+ msg = ('The page is editable only for the autoconfirmed '
+ 'users')
+ if not self.opt.move:
+ msg += ', skipping...'
+ pywikibot.output(msg)
+ else:
+ if not TNR or TU and not TNR[4] or not (TU or TNR[1]):
+ raise Error(
+ 'This script is not localized to use it on \n'
+ '{}. Missing "templateNoRegex"'
+ .format(self.site.sitename))
+ pywikibot.output(
+ 'The page is editable only for the autoconfirmed '
+ 'users, but the template seems not correct. Fixing...')
+ if TU:
+ text, changes = re.subn(
+ TemplateInThePage.regex, TNR[4], text)
+ else:
+ text, changes = re.subn(
+ TemplateInThePage.regex, TNR[0], text)
+
+ if not changes:
+ # We tried to fix edit-protection templates, but it did
+ # not work.
+ pywikibot.warning('No edit-protection template could be found')
+
+ if self.opt.move and changes > -1:
+ # checking move protection now
+ moveRestr = restrictions.get('move')
+ changes = -1
+
+ if not moveRestr:
+ pywikibot.output('The page is movable for all, deleting '
+ 'the template...')
+ # Deleting the template because the page doesn't need it.
+ if TU:
+ replaceToPerform = '|'.join(TSMP + TTMP + TU)
+ else:
+ replaceToPerform = '|'.join(TSMP + TTMP)
+ text, changes = re.subn(
+ '<noinclude>({})</noinclude>'.format(replaceToPerform),
+ '', text)
+ if not changes:
+ text, changes = re.subn(
+ '({})'.format(replaceToPerform), '', text)
+ elif moveRestr[0] == 'sysop':
+ # move-total-protection
+ if TemplateInThePage.blocktype == 'sysop-move' and TTMP \
+ or TemplateInThePage.blocktype == 'unique' and TU:
+ pywikibot.output('The page is protected from moving '
+ 'to the sysop, skipping...')
+ if TU:
+ # no changes needed, better to revert the old text.
+ text = oldtext
+ else:
+ pywikibot.output(
+ 'The page is protected from moving to the sysop, '
+ 'but the template seems not correct. Fixing...')
+ if TU:
+ text, changes = re.subn(
+ TemplateInThePage.regex, TNR[4], text)
+ else:
+ text, changes = re.subn(
+ TemplateInThePage.regex, TNR[3], text)
+
+ elif TSMP or TU:
+ # implicitly
+ # moveRestr[0] = 'autoconfirmed', move-semi-protection
+ if TemplateInThePage.blocktype in ('autoconfirmed-move',
+ 'unique'):
+ pywikibot.output('The page is movable only for the '
+ 'autoconfirmed users, skipping...')
+ if TU:
+ # no changes needed, better to revert the old text.
+ text = oldtext
+ else:
+ pywikibot.output(
+ 'The page is movable only for the autoconfirmed '
+ 'users, but the template seems not correct. '
+ 'Fixing...')
+ if TU:
+ text, changes = re.subn(
+ TemplateInThePage.regex, TNR[4], text)
+ else:
+ text, changes = re.subn(
+ TemplateInThePage.regex, TNR[2], text)
+
+ if not changes:
+ # We tried to fix move-protection templates
+ # but it did not work
+ pywikibot.warning(
+ 'No move-protection template could be found')
+
+ yield text
def main(*args: str) -> None:
@@ -221,296 +444,52 @@
"""
# Loading the comments
global categoryToCheck, project_inserted
- # always, define a generator to understand if the user sets one,
- # defining what's genFactory
- always = False
- generator = False
- show = False
- moveBlockCheck = False
- protectedpages = False
- protectType = 'edit'
- namespace = 0
+
+ options = {}
+ generator = None
# Process global args and prepare generator args parser
local_args = pywikibot.handle_args(args)
- genFactory = pagegenerators.GeneratorFactory()
+ site = pywikibot.Site()
- # Process local args
- for arg in local_args:
- option, sep, value = arg.partition(':')
- if option == '-always':
- always = True
- elif option == '-move':
- moveBlockCheck = True
- elif option == '-show':
- show = True
- elif option in ('-protectedpages', '-moveprotected'):
- protectedpages = True
- if option == '-moveprotected':
- protectType = 'move'
- if value:
- namespace = int(value)
- else:
- genFactory.handle_arg(arg)
-
- if config.mylang not in project_inserted:
+ if site.code not in project_inserted:
pywikibot.output('Your project is not supported by this script.\n'
'You have to edit the script and add it!')
return
- site = pywikibot.Site()
+ # Process pagegenerators arguments
+ gen_factory = pagegenerators.GeneratorFactory(site)
+ local_args = gen_factory.handle_args(local_args)
- if protectedpages:
- generator = site.protectedpages(namespace=namespace, type=protectType)
- # Take the right templates to use, the category and the comment
- TSP = i18n.translate(site, templateSemiProtection)
- TTP = i18n.translate(site, templateTotalProtection)
- TSMP = i18n.translate(site, templateSemiMoveProtection)
- TTMP = i18n.translate(site, templateTotalMoveProtection)
- TNR = i18n.translate(site, templateNoRegex)
- TU = i18n.translate(site, templateUnique)
+ # Process local args
+ for arg in local_args:
+ arg, sep, value = arg.partition(':')
+ option = arg[1:]
+ if arg in ('-always', '-move', '-show'):
+ options[option] = True
+ elif arg in ('-protectedpages', '-moveprotected'):
+ protect_type = 'move' if option.startswith('move') else 'edit'
+ generator = site.protectedpages(namespace=int(value or 0),
+ type=protect_type)
- categories = i18n.translate(site, categoryToCheck)
- commentUsed = i18n.twtranslate(site, 'blockpageschecker-summary')
if not generator:
- generator = genFactory.getCombinedGenerator()
+ generator = gen_factory.getCombinedGenerator()
+
if not generator:
- generator = []
- pywikibot.output('Loading categories...')
# Define the category if no other generator has been set
- for CAT in categories:
- cat = pywikibot.Category(site, CAT)
+ gen = []
+ categories = i18n.translate(site, categoryToCheck)
+ for category_name in categories:
+ cat = pywikibot.Category(site, category_name)
# Define the generator
- gen = pagegenerators.CategorizedPageGenerator(cat)
- for pageCat in gen:
- generator.append(pageCat)
- pywikibot.output('Categories loaded, start!')
- # Main Loop
- if not genFactory.nopreload:
- generator = pagegenerators.PreloadingGenerator(generator,
- groupsize=60)
- for page in generator:
- pagename = page.title(as_link=True)
- pywikibot.output('Loading {}...'.format(pagename))
- try:
- text = page.text
- except NoPageError:
- pywikibot.output("{} doesn't exist! Skipping...".format(pagename))
- continue
- except IsRedirectPageError:
- pywikibot.output('{} is a redirect! Skipping...'.format(pagename))
- if show:
- showQuest(page)
- continue
- # FIXME: This check does not work :
- # PreloadingGenerator cannot set correctly page.editRestriction
- # (see bug T57322)
- # if not page.has_permission():
- # pywikibot.output(
- # "{} is sysop-protected : this account can't edit "
- # "it! Skipping...".format(pagename))
- # continue
- restrictions = page.protection()
- try:
- editRestr = restrictions['edit']
- except KeyError:
- editRestr = None
- if not page.has_permission():
- pywikibot.output('{} is protected: '
- "this account can't edit it! Skipping..."
- .format(pagename))
- continue
+ gen.append(pagegenerators.CategorizedPageGenerator(cat))
+ generator = chain.from_iterable(gen)
- # Understand, according to the template in the page, what should be the
- # protection and compare it with what there really is.
- TemplateInThePage = understandBlock(text, TTP, TSP, TSMP, TTMP, TU)
- # Only to see if the text is the same or not...
- oldtext = text
- # keep track of the changes for each step (edit then move)
- changes = -1
+ if not gen_factory.nopreload:
+ generator = pagegenerators.PreloadingGenerator(generator, groupsize=60)
- if not editRestr:
- # page is not edit-protected
- # Deleting the template because the page doesn't need it.
- if not (TTP or TSP):
- raise Error(
- 'This script is not localized to use it on \n{}. '
- 'Missing "templateSemiProtection" or'
- '"templateTotalProtection"'.format(site.sitename))
-
- if TU:
- replaceToPerform = '|'.join(TTP + TSP + TU)
- else:
- replaceToPerform = '|'.join(TTP + TSP)
- text, changes = re.subn('<noinclude>({})</noinclude>'
- .format(replaceToPerform), '', text)
- if changes == 0:
- text, changes = re.subn('({})'.format(replaceToPerform), '', text)
- msg = 'The page is editable for all'
- if not moveBlockCheck:
- msg += ', deleting the template..'
- pywikibot.output(msg + '.')
-
- elif editRestr[0] == 'sysop':
- # total edit protection
- if (TemplateInThePage[0] == 'sysop-total' and TTP) or \
- (TemplateInThePage[0] == 'unique' and TU):
- msg = 'The page is protected to the sysop'
- if not moveBlockCheck:
- msg += ', skipping...'
- pywikibot.output(msg)
- else:
- if not TNR or TU and not TNR[4] or not (TU or TNR[1]):
- raise Error(
- 'This script is not localized to use it on \n{}. '
- 'Missing "templateNoRegex"'.format(
- site.sitename))
-
- pywikibot.output('The page is protected to the sysop, but the '
- 'template seems not correct. Fixing...')
- if TU:
- text, changes = re.subn(TemplateInThePage[1], TNR[4], text)
- else:
- text, changes = re.subn(TemplateInThePage[1], TNR[1], text)
-
- elif TSP or TU:
- # implicitly editRestr[0] = 'autoconfirmed', edit-Semi-protection
- if TemplateInThePage[0] == 'autoconfirmed-total' or \
- TemplateInThePage[0] == 'unique':
- msg = 'The page is editable only for the autoconfirmed users'
- if not moveBlockCheck:
- msg += ', skipping...'
- pywikibot.output(msg)
- else:
- if not TNR or TU and not TNR[4] or not (TU or TNR[1]):
- raise Error(
- 'This script is not localized to use it on \n{}. '
- 'Missing "templateNoRegex"'.format(
- site.sitename))
- pywikibot.output('The page is editable only for the '
- 'autoconfirmed users, but the template '
- 'seems not correct. Fixing...')
- if TU:
- text, changes = re.subn(TemplateInThePage[1], TNR[4], text)
- else:
- text, changes = re.subn(TemplateInThePage[1], TNR[0], text)
-
- if changes == 0:
- # We tried to fix edit-protection templates, but it did not work.
- pywikibot.warning('No edit-protection template could be found')
-
- if moveBlockCheck and changes > -1:
- # checking move protection now
- try:
- moveRestr = restrictions['move']
- except KeyError:
- moveRestr = False
- changes = -1
-
- if not moveRestr:
- pywikibot.output('The page is movable for all, deleting the '
- 'template...')
- # Deleting the template because the page doesn't need it.
- if TU:
- replaceToPerform = '|'.join(TSMP + TTMP + TU)
- else:
- replaceToPerform = '|'.join(TSMP + TTMP)
- text, changes = re.subn('<noinclude>({})</noinclude>'
- .format(replaceToPerform), '', text)
- if changes == 0:
- text, changes = re.subn('({})'.format(replaceToPerform),
- '', text)
- elif moveRestr[0] == 'sysop':
- # move-total-protection
- if (TemplateInThePage[0] == 'sysop-move' and TTMP) or \
- (TemplateInThePage[0] == 'unique' and TU):
- pywikibot.output('The page is protected from moving to '
- 'the sysop, skipping...')
- if TU:
- # no changes needed, better to revert the old text.
- text = oldtext
- else:
- pywikibot.output('The page is protected from moving to '
- 'the sysop, but the template seems not '
- 'correct. Fixing...')
- if TU:
- text, changes = re.subn(TemplateInThePage[1], TNR[4],
- text)
- else:
- text, changes = re.subn(TemplateInThePage[1], TNR[3],
- text)
-
- elif TSMP or TU:
- # implicitly moveRestr[0] = 'autoconfirmed',
- # move-semi-protection
- if TemplateInThePage[0] == 'autoconfirmed-move' or \
- TemplateInThePage[0] == 'unique':
- pywikibot.output('The page is movable only for the '
- 'autoconfirmed users, skipping...')
- if TU:
- # no changes needed, better to revert the old text.
- text = oldtext
- else:
- pywikibot.output('The page is movable only for the '
- 'autoconfirmed users, but the template '
- 'seems not correct. Fixing...')
- if TU:
- text, changes = re.subn(TemplateInThePage[1], TNR[4],
- text)
- else:
- text, changes = re.subn(TemplateInThePage[1], TNR[2],
- text)
-
- if changes == 0:
- # We tried to fix move-protection templates but it did not work
- pywikibot.warning('No move-protection template could be found')
-
- if oldtext != text:
- # Ok, asking if the change has to be performed and do it if yes.
- pywikibot.output(color_format(
- '\n\n>>> {lightpurple}{0}{default} <<<', page.title()))
- pywikibot.showDiff(oldtext, text)
- if not always:
- choice = pywikibot.input_choice('Do you want to accept these '
- 'changes?',
- [('Yes', 'y'), ('No', 'n'),
- ('All', 'a')], 'n')
- if choice == 'a':
- always = True
- if always or choice == 'y':
- save_page(page, text, commentUsed)
-
-
-def save_page(page, text, comment):
- """Save a given page."""
- # To prevent Infinite loops
- error_count = 0
- while True:
- try:
- page.put(text, comment, force=True)
- except EditConflictError:
- pywikibot.output('Edit conflict! skip!')
- except ServerError:
- # Sometimes there is this error that's quite annoying
- # because can block the whole process for nothing.
- error_count += 1
- if error_count < 5:
- pywikibot.output('Server Error! Wait..')
- time.sleep(3)
- continue
- # Prevent Infinite Loops
- raise ServerError('Fifth Server Error!')
- except SpamblacklistError as e:
- pywikibot.output('Cannot change {} because of '
- 'blacklist entry {}'
- .format(page.title(), e.url))
- except LockedPageError:
- pywikibot.output('The page is still protected. '
- 'Skipping...')
- except PageSaveRelatedError as error:
- pywikibot.output('Error putting page: {}'
- .format(error.args))
- break
+ bot = CheckerBot(site=site, generator=generator, **options)
+ bot.run()
if __name__ == '__main__':
diff --git a/tox.ini b/tox.ini
index 53db0f8..9c66d78 100644
--- a/tox.ini
+++ b/tox.ini
@@ -163,7 +163,7 @@
pywikibot/xmlreader.py: N802, N803, N806
scripts/archive/*: N802, N803, N806, N815, N816
scripts/add_text.py: N803, N806
- scripts/blockpageschecker.py : E501, N802, N803, N806, N816
+ scripts/blockpageschecker.py : N806, N816
scripts/category.py: N802, N803, N806
scripts/category_redirect.py: N802, N803, N806
scripts/checkimages.py: N801, N802, N803, N806, N816
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/724000
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Id23491f8e5d30685787a5e78cecab97b921e78d6
Gerrit-Change-Number: 724000
Gerrit-PatchSet: 5
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: D3r1ck01 <xsavitar.wiki(a)aol.com>
Gerrit-Reviewer: Meno25 <meno25mail(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/724028 )
Change subject: [IMPR] Adjust groupsize with pagegenerators.PreloadingGenerator
......................................................................
[IMPR] Adjust groupsize with pagegenerators.PreloadingGenerator
- add a new APISite property "maxlimit" to find out how many pages can
be specified at a time for preloading.
- use "maxlimit" in APISite.load_pages_from_pageids(), APISite.preloadpages
and pagegenerators.PreloadingGenerator
Bug: T291770
Change-Id: Ib51b04786d25a86a64eb460ec624a68247740354
---
M pywikibot/pagegenerators.py
M pywikibot/site/_apisite.py
M pywikibot/site/_generators.py
3 files changed, 18 insertions(+), 16 deletions(-)
Approvals:
Matěj Suchánek: Looks good to me, but someone else must approve
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index 0c28979..fb2494b 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -2287,6 +2287,8 @@
for page in generator:
site = page.site
sites.setdefault(site, []).append(page)
+
+ groupsize = min(groupsize, site.maxlimit)
if len(sites[site]) >= groupsize:
# if this site is at the groupsize, process it
group = sites.pop(site)
diff --git a/pywikibot/site/_apisite.py b/pywikibot/site/_apisite.py
index 4a79b23..57503f0 100644
--- a/pywikibot/site/_apisite.py
+++ b/pywikibot/site/_apisite.py
@@ -438,6 +438,18 @@
api._invalidate_superior_cookies(self.family)
@property
+ def maxlimit(self):
+ """Get the maximum limit of pages to be retrieved.
+
+ .. versionadded:: 7.0
+ """
+ parameter = self._paraminfo.parameter('query+info', 'prop')
+ if self.logged_in() and self.has_right('apihighlimits'):
+ return int(parameter['highlimit'])
+
+ return int(parameter['limit']) # T78333, T161783
+
+ @property
def userinfo(self):
"""Retrieve userinfo from site and store in _userinfo attribute.
diff --git a/pywikibot/site/_generators.py b/pywikibot/site/_generators.py
index 30f1245..0a5d745 100644
--- a/pywikibot/site/_generators.py
+++ b/pywikibot/site/_generators.py
@@ -66,14 +66,7 @@
# Validate pageids.
gen = (str(int(p)) for p in pageids if int(p) > 0)
- # Find out how many pages can be specified at a time.
- parameter = self._paraminfo.parameter('query+info', 'prop')
- if self.logged_in() and self.has_right('apihighlimits'):
- groupsize = int(parameter['highlimit'])
- else:
- groupsize = int(parameter['limit'])
-
- for sublist in itergroup(filter_unique(gen), groupsize):
+ for sublist in itergroup(filter_unique(gen), self.maxlimit):
# Store the order of the input data.
priority_dict = dict(zip(sublist, range(len(sublist))))
@@ -131,13 +124,7 @@
if pageprops:
props += '|pageprops'
- parameter = self._paraminfo.parameter('query+info', 'prop')
- if self.logged_in() and self.has_right('apihighlimits'):
- max_ids = int(parameter['highlimit'])
- else:
- max_ids = int(parameter['limit']) # T78333, T161783
-
- for sublist in itergroup(pagelist, min(groupsize, max_ids)):
+ for sublist in itergroup(pagelist, min(groupsize, self.maxlimit)):
# Do not use p.pageid property as it will force page loading.
pageids = [str(p._pageid) for p in sublist
if hasattr(p, '_pageid') and p._pageid > 0]
@@ -155,7 +142,8 @@
rvgen = api.PropertyGenerator(props, site=self)
rvgen.set_maximum_items(-1) # suppress use of "rvlimit" parameter
- if len(pageids) == len(sublist) and len(set(pageids)) <= max_ids:
+ if len(pageids) == len(sublist) \
+ and len(set(pageids)) <= self.maxlimit:
# only use pageids if all pages have them
rvgen.request['pageids'] = set(pageids)
else:
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/724028
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ib51b04786d25a86a64eb460ec624a68247740354
Gerrit-Change-Number: 724028
Gerrit-PatchSet: 2
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Framawiki <framawiki(a)tools.wmflabs.org>
Gerrit-Reviewer: Matěj Suchánek <matejsuchanek97(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged