jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/561697 )
Change subject: [FEAT] Add ability to preload categories ......................................................................
[FEAT] Add ability to preload categories
Bug: T241689 Change-Id: If855e98008e5f13e0a087af66de5d4d09511198e --- M pywikibot/data/api/_generators.py M pywikibot/page/_pages.py M pywikibot/site/_generators.py M tests/site_tests.py 4 files changed, 55 insertions(+), 14 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/data/api/_generators.py b/pywikibot/data/api/_generators.py index f4802f0..21b39c4 100644 --- a/pywikibot/data/api/_generators.py +++ b/pywikibot/data/api/_generators.py @@ -914,21 +914,30 @@
def _update_templates(page, templates) -> None: """Update page templates.""" - templ_pages = [pywikibot.Page(page.site, tl['title']) for tl in templates] + templ_pages = {pywikibot.Page(page.site, tl['title']) for tl in templates} if hasattr(page, '_templates'): - page._templates.extend(templ_pages) + page._templates |= templ_pages else: page._templates = templ_pages
+def _update_categories(page, categories): + """Update page categories.""" + cat_pages = {pywikibot.Page(page.site, ct['title']) for ct in categories} + if hasattr(page, '_categories'): + page._categories |= cat_pages + else: + page._categories = cat_pages + + def _update_langlinks(page, langlinks) -> None: """Update page langlinks.""" - links = [pywikibot.Link.langlinkUnsafe(link['lang'], link['*'], + links = {pywikibot.Link.langlinkUnsafe(link['lang'], link['*'], source=page.site) - for link in langlinks] + for link in langlinks}
if hasattr(page, '_langlinks'): - page._langlinks.extend(links) + page._langlinks |= links else: page._langlinks = links
@@ -996,12 +1005,17 @@ if 'templates' in pagedict: _update_templates(page, pagedict['templates']) elif 'templates' in props: - page._templates = [] + page._templates = set() + + if 'categories' in pagedict: + _update_categories(page, pagedict['categories']) + elif 'categories' in props: + page._categories = set()
if 'langlinks' in pagedict: _update_langlinks(page, pagedict['langlinks']) elif 'langlinks' in props: - page._langlinks = [] + page._langlinks = set()
if 'coordinates' in pagedict: _update_coordinates(page, pagedict['coordinates']) diff --git a/pywikibot/page/_pages.py b/pywikibot/page/_pages.py index 1ead289..1df86a1 100644 --- a/pywikibot/page/_pages.py +++ b/pywikibot/page/_pages.py @@ -31,7 +31,7 @@
import pywikibot from pywikibot import Timestamp, config, date, i18n, textlib -from pywikibot.backports import Generator, Iterable, List +from pywikibot.backports import Generator, Iterable, Iterator, List from pywikibot.cosmetic_changes import CANCEL, CosmeticChangesToolkit from pywikibot.exceptions import ( Error, @@ -1492,10 +1492,10 @@ # this list if the method was called with include_obsolete=False # (which is the default) if not hasattr(self, '_langlinks'): - self._langlinks = list(self.iterlanglinks(include_obsolete=True)) + self._langlinks = set(self.iterlanglinks(include_obsolete=True))
if include_obsolete: - return self._langlinks + return list(self._langlinks) return [i for i in self._langlinks if not i.site.obsolete]
def iterlanglinks(self, @@ -1526,7 +1526,7 @@ """ return pywikibot.ItemPage.fromPage(self)
- def templates(self, content: bool = False): + def templates(self, content: bool = False) -> List['pywikibot.Page']: """ Return a list of Page objects for templates used on this Page.
@@ -1546,9 +1546,9 @@ del self._templates
if not hasattr(self, '_templates'): - self._templates = list(self.itertemplates(content=content)) + self._templates = set(self.itertemplates(content=content))
- return self._templates + return list(self._templates)
def itertemplates(self, total: Optional[int] = None, @@ -1584,7 +1584,7 @@ def categories(self, with_sort_key: bool = False, total: Optional[int] = None, - content: bool = False): + content: bool = False) -> Iterator['pywikibot.Page']: """ Iterate categories that the article is in.
@@ -1599,6 +1599,15 @@ if with_sort_key: raise NotImplementedError('with_sort_key is not implemented')
+ # Data might have been preloaded + # Delete cache if content is needed and elements have no content + if hasattr(self, '_categories'): + if (content + and not all(c.has_content() for c in self._categories)): + del self._categories + else: + return itertools.islice(self._categories, total) + return self.site.pagecategories(self, total=total, content=content)
def extlinks(self, total: Optional[int] = None): diff --git a/pywikibot/site/_generators.py b/pywikibot/site/_generators.py index aad1b86..41e9f2c 100644 --- a/pywikibot/site/_generators.py +++ b/pywikibot/site/_generators.py @@ -94,6 +94,7 @@ templates: bool = False, langlinks: bool = False, pageprops: bool = False, + categories: bool = False, content: bool = True ): """Return a generator to a list of preloaded pages. @@ -108,6 +109,7 @@ :param langlinks: preload all language links from the provided pages to other languages :param pageprops: preload various properties defined in page content + @param categories: preload page categories :param content: preload page content """ props = 'revisions|info|categoryinfo' @@ -117,6 +119,8 @@ props += '|langlinks' if pageprops: props += '|pageprops' + if categories: + props += '|categories'
for sublist in itergroup(pagelist, min(groupsize, self.maxlimit)): # Do not use p.pageid property as it will force page loading. diff --git a/tests/site_tests.py b/tests/site_tests.py index 32ae187..b22c0df 100755 --- a/tests/site_tests.py +++ b/tests/site_tests.py @@ -3183,6 +3183,20 @@ if count >= 5: break
+ def test_preload_categories(self): + """Test preloading categories works.""" + mysite = self.get_site() + cats = mysite.randompages(total=10, namespaces=14) + gen = mysite.preloadpages(cats, categories=True) + for count, page in enumerate(gen): + with self.subTest(page=page.title()): + self.assertTrue(hasattr(page, '_categories')) + # content=True will bypass cache + self.assertEqual(page._categories, + set(page.categories(content=True))) + if count >= 5: + break + def test_preload_content(self): """Test preloading templates and langlinks works.""" mysite = self.get_site()