jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/724028 )
Change subject: [IMPR] Adjust groupsize with pagegenerators.PreloadingGenerator ......................................................................
[IMPR] Adjust groupsize with pagegenerators.PreloadingGenerator
- add a new APISite property "maxlimit" to find out how many pages can be specified at a time for preloading. - use "maxlimit" in APISite.load_pages_from_pageids(), APISite.preloadpages and pagegenerators.PreloadingGenerator
Bug: T291770 Change-Id: Ib51b04786d25a86a64eb460ec624a68247740354 --- M pywikibot/pagegenerators.py M pywikibot/site/_apisite.py M pywikibot/site/_generators.py 3 files changed, 18 insertions(+), 16 deletions(-)
Approvals: Matěj Suchánek: Looks good to me, but someone else must approve Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py index 0c28979..fb2494b 100644 --- a/pywikibot/pagegenerators.py +++ b/pywikibot/pagegenerators.py @@ -2287,6 +2287,8 @@ for page in generator: site = page.site sites.setdefault(site, []).append(page) + + groupsize = min(groupsize, site.maxlimit) if len(sites[site]) >= groupsize: # if this site is at the groupsize, process it group = sites.pop(site) diff --git a/pywikibot/site/_apisite.py b/pywikibot/site/_apisite.py index 4a79b23..57503f0 100644 --- a/pywikibot/site/_apisite.py +++ b/pywikibot/site/_apisite.py @@ -438,6 +438,18 @@ api._invalidate_superior_cookies(self.family)
@property + def maxlimit(self): + """Get the maximum limit of pages to be retrieved. + + .. versionadded:: 7.0 + """ + parameter = self._paraminfo.parameter('query+info', 'prop') + if self.logged_in() and self.has_right('apihighlimits'): + return int(parameter['highlimit']) + + return int(parameter['limit']) # T78333, T161783 + + @property def userinfo(self): """Retrieve userinfo from site and store in _userinfo attribute.
diff --git a/pywikibot/site/_generators.py b/pywikibot/site/_generators.py index 30f1245..0a5d745 100644 --- a/pywikibot/site/_generators.py +++ b/pywikibot/site/_generators.py @@ -66,14 +66,7 @@ # Validate pageids. gen = (str(int(p)) for p in pageids if int(p) > 0)
- # Find out how many pages can be specified at a time. - parameter = self._paraminfo.parameter('query+info', 'prop') - if self.logged_in() and self.has_right('apihighlimits'): - groupsize = int(parameter['highlimit']) - else: - groupsize = int(parameter['limit']) - - for sublist in itergroup(filter_unique(gen), groupsize): + for sublist in itergroup(filter_unique(gen), self.maxlimit): # Store the order of the input data. priority_dict = dict(zip(sublist, range(len(sublist))))
@@ -131,13 +124,7 @@ if pageprops: props += '|pageprops'
- parameter = self._paraminfo.parameter('query+info', 'prop') - if self.logged_in() and self.has_right('apihighlimits'): - max_ids = int(parameter['highlimit']) - else: - max_ids = int(parameter['limit']) # T78333, T161783 - - for sublist in itergroup(pagelist, min(groupsize, max_ids)): + for sublist in itergroup(pagelist, min(groupsize, self.maxlimit)): # Do not use p.pageid property as it will force page loading. pageids = [str(p._pageid) for p in sublist if hasattr(p, '_pageid') and p._pageid > 0] @@ -155,7 +142,8 @@ rvgen = api.PropertyGenerator(props, site=self) rvgen.set_maximum_items(-1) # suppress use of "rvlimit" parameter
- if len(pageids) == len(sublist) and len(set(pageids)) <= max_ids: + if len(pageids) == len(sublist) \ + and len(set(pageids)) <= self.maxlimit: # only use pageids if all pages have them rvgen.request['pageids'] = set(pageids) else: