jenkins-bot submitted this change.

View Change


Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
[IMPR] Improvements for APISite.preloadpages

- Don't show the "Retrieving <num> pages from site" message by default:
add a "quiet" parameter to Site.preloadpages to suppress the message
by default
- add this parameter to PreloadingGenerator and DequePreloadingGenerator but
enable the message here by default
- also enable the message within interwiki.py
- set default value of groupsize parameter to None;
in this case APISite.maxlimit is used as default value
- use this default value in archivebot.py
- update tests

Change-Id: Ie534a1360329463b9c8df27b41e61d78da816968
---
M scripts/interwiki.py
M tests/site_generators_tests.py
M pywikibot/site/_generators.py
M scripts/archivebot.py
M pywikibot/pagegenerators/__init__.py
5 files changed, 69 insertions(+), 29 deletions(-)

diff --git a/pywikibot/pagegenerators/__init__.py b/pywikibot/pagegenerators/__init__.py
index 4608b6c..661dacb 100644
--- a/pywikibot/pagegenerators/__init__.py
+++ b/pywikibot/pagegenerators/__init__.py
@@ -13,7 +13,7 @@
&params;
"""
#
-# (C) Pywikibot team, 2008-2022
+# (C) Pywikibot team, 2008-2023
#
# Distributed under the terms of the MIT license.
#
@@ -613,13 +613,15 @@


def PreloadingGenerator(generator: Iterable['pywikibot.page.Page'],
- groupsize: int = 50
+ groupsize: int = 50,
+ quiet: bool = False
) -> Iterator['pywikibot.page.Page']:
- """
- Yield preloaded pages taken from another generator.
+ """Yield preloaded pages taken from another generator.

:param generator: pages to iterate over
:param groupsize: how many pages to preload at once
+ :param quiet: If False (default), show the "Retrieving pages"
+ message
"""
# pages may be on more than one site, for example if an interwiki
# generator is used, so use a separate preloader for each site
@@ -633,17 +635,25 @@
if len(sites[site]) >= groupsize:
# if this site is at the groupsize, process it
group = sites.pop(site)
- yield from site.preloadpages(group, groupsize=groupsize)
+ yield from site.preloadpages(group, groupsize=groupsize,
+ quiet=quiet)

for site, pages in sites.items():
# process any leftover sites that never reached the groupsize
- yield from site.preloadpages(pages, groupsize=groupsize)
+ yield from site.preloadpages(pages, groupsize=groupsize, quiet=quiet)


def DequePreloadingGenerator(generator: Iterable['pywikibot.page.Page'],
- groupsize: int = 50
+ groupsize: int = 50,
+ quiet: bool = False
) -> Iterator['pywikibot.page.Page']:
- """Preload generator of type DequeGenerator."""
+ """Preload generator of type DequeGenerator.
+
+ :param generator: pages to iterate over
+ :param groupsize: how many pages to preload at once
+ :param quiet: If False (default), show the "Retrieving pages"
+ message
+ """
assert isinstance(generator, DequeGenerator), \
'generator must be a DequeGenerator object'

@@ -652,7 +662,7 @@
if not page_count:
return

- yield from PreloadingGenerator(generator, page_count)
+ yield from PreloadingGenerator(generator, page_count, quiet)


def PreloadingEntityGenerator(generator: Iterable['pywikibot.page.Page'],
diff --git a/pywikibot/site/_generators.py b/pywikibot/site/_generators.py
index 966c76e..2cb414a 100644
--- a/pywikibot/site/_generators.py
+++ b/pywikibot/site/_generators.py
@@ -89,32 +89,43 @@
self,
pagelist,
*,
- groupsize: int = 50,
+ groupsize: Optional[int] = None,
templates: bool = False,
langlinks: bool = False,
pageprops: bool = False,
categories: bool = False,
- content: bool = True
+ content: bool = True,
+ quiet: bool = True
):
"""Return a generator to a list of preloaded pages.

- Pages are iterated in the same order than in the underlying pagelist.
- In case of duplicates in a groupsize batch, return the first entry.
+ Pages are iterated in the same order than in the underlying
+ pagelist. In case of duplicates in a groupsize batch, return the
+ first entry.

.. versionchanged:: 7.6
*content* parameter was added.
.. versionchanged:: 7.7
*categories* parameter was added.
+ .. versionchanged:: 8.1
+ *groupsize* is maxlimit by default. *quiet* parameter was
+ added. No longer show the "Retrieving pages from site"
+ message by default.

:param pagelist: an iterable that returns Page objects
- :param groupsize: how many Pages to query at a time
- :param templates: preload pages (typically templates) transcluded in
- the provided pages
- :param langlinks: preload all language links from the provided pages
- to other languages
- :param pageprops: preload various properties defined in page content
+ :param groupsize: how many Pages to query at a time. If None
+ (default), :attr:`maxlimit
+ <pywikibot.site._apisite.APISite.maxlimit>` is used.
+ :param templates: preload pages (typically templates)
+ transcluded in the provided pages
+ :param langlinks: preload all language links from the provided
+ pages to other languages
+ :param pageprops: preload various properties defined in page
+ content
:param categories: preload page categories
:param content: preload page content
+ :param quiet: If True (default), do not show the "Retrieving
+ pages" message
"""
props = 'revisions|info|categoryinfo'
if templates:
@@ -126,7 +137,8 @@
if categories:
props += '|categories'

- for sublist in itergroup(pagelist, min(groupsize, self.maxlimit)):
+ groupsize = min(groupsize or self.maxlimit, self.maxlimit)
+ for sublist in itergroup(pagelist, groupsize):
# Do not use p.pageid property as it will force page loading.
pageids = [str(p._pageid) for p in sublist
if hasattr(p, '_pageid') and p._pageid > 0]
@@ -151,8 +163,8 @@
else:
rvgen.request['titles'] = list(cache.keys())
rvgen.request['rvprop'] = self._rvprops(content=content)
- pywikibot.info('Retrieving {} pages from {}.'
- .format(len(cache), self))
+ if not quiet:
+ pywikibot.info(f'Retrieving {len(cache)} pages from {self}.')

for pagedata in rvgen:
pywikibot.debug(f'Preloading {pagedata}')
diff --git a/scripts/archivebot.py b/scripts/archivebot.py
index 05a36ef..07f4771 100755
--- a/scripts/archivebot.py
+++ b/scripts/archivebot.py
@@ -717,9 +717,7 @@
for c in range(counter):
params = self.get_params(thread.timestamp, c + 1)
self.get_archive_page(pattern % params, params)
- list(self.site.preloadpages(
- self.archives.values(),
- groupsize=self.site.maxlimit))
+ list(self.site.preloadpages(self.archives.values()))

while not counter_found and counter > 1 \
and not archive.exists():
diff --git a/scripts/interwiki.py b/scripts/interwiki.py
index d503d69..a8cb116 100755
--- a/scripts/interwiki.py
+++ b/scripts/interwiki.py
@@ -328,7 +328,7 @@

"""
#
-# (C) Pywikibot team, 2003-2022
+# (C) Pywikibot team, 2003-2023
#
# Distributed under the terms of the MIT license.
#
@@ -1992,7 +1992,7 @@

# Get the content of the assembled list in one blow
gen = site.preloadpages(pageGroup, templates=True, langlinks=True,
- pageprops=True)
+ pageprops=True, quiet=False)
for _ in gen:
# we don't want to do anything with them now. The
# page contents will be read via the Subject class.
diff --git a/tests/site_generators_tests.py b/tests/site_generators_tests.py
index 86fcb78..2f5a68a 100755
--- a/tests/site_generators_tests.py
+++ b/tests/site_generators_tests.py
@@ -2203,8 +2203,8 @@
links = list(mysite.pagelinks(mainpage, total=20))

with suppress_warnings(WARN_SITE_CODE, category=UserWarning):
- gen = mysite.preloadpages(links, groupsize=5, langlinks=True)
- pages = list(gen)
+ pages = list(mysite.preloadpages(links, groupsize=5,
+ langlinks=True, quiet=False))

self.assertLength(links, pages)
for page in pages:

To view, visit change 904576. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ie534a1360329463b9c8df27b41e61d78da816968
Gerrit-Change-Number: 904576
Gerrit-PatchSet: 7
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: Matěj Suchánek <matejsuchanek97@gmail.com>
Gerrit-Reviewer: Mpaa <mpaa.wiki@gmail.com>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged