jenkins-bot submitted this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
[FEAT] Add ability to preload categories

Bug: T241689
Change-Id: If855e98008e5f13e0a087af66de5d4d09511198e
---
M pywikibot/data/api/_generators.py
M pywikibot/page/_pages.py
M pywikibot/site/_generators.py
M tests/site_tests.py
4 files changed, 55 insertions(+), 14 deletions(-)

diff --git a/pywikibot/data/api/_generators.py b/pywikibot/data/api/_generators.py
index f4802f0..21b39c4 100644
--- a/pywikibot/data/api/_generators.py
+++ b/pywikibot/data/api/_generators.py
@@ -914,21 +914,30 @@

def _update_templates(page, templates) -> None:
"""Update page templates."""
- templ_pages = [pywikibot.Page(page.site, tl['title']) for tl in templates]
+ templ_pages = {pywikibot.Page(page.site, tl['title']) for tl in templates}
if hasattr(page, '_templates'):
- page._templates.extend(templ_pages)
+ page._templates |= templ_pages
else:
page._templates = templ_pages


+def _update_categories(page, categories):
+ """Update page categories."""
+ cat_pages = {pywikibot.Page(page.site, ct['title']) for ct in categories}
+ if hasattr(page, '_categories'):
+ page._categories |= cat_pages
+ else:
+ page._categories = cat_pages
+
+
def _update_langlinks(page, langlinks) -> None:
"""Update page langlinks."""
- links = [pywikibot.Link.langlinkUnsafe(link['lang'], link['*'],
+ links = {pywikibot.Link.langlinkUnsafe(link['lang'], link['*'],
source=page.site)
- for link in langlinks]
+ for link in langlinks}

if hasattr(page, '_langlinks'):
- page._langlinks.extend(links)
+ page._langlinks |= links
else:
page._langlinks = links

@@ -996,12 +1005,17 @@
if 'templates' in pagedict:
_update_templates(page, pagedict['templates'])
elif 'templates' in props:
- page._templates = []
+ page._templates = set()
+
+ if 'categories' in pagedict:
+ _update_categories(page, pagedict['categories'])
+ elif 'categories' in props:
+ page._categories = set()

if 'langlinks' in pagedict:
_update_langlinks(page, pagedict['langlinks'])
elif 'langlinks' in props:
- page._langlinks = []
+ page._langlinks = set()

if 'coordinates' in pagedict:
_update_coordinates(page, pagedict['coordinates'])
diff --git a/pywikibot/page/_pages.py b/pywikibot/page/_pages.py
index 1ead289..1df86a1 100644
--- a/pywikibot/page/_pages.py
+++ b/pywikibot/page/_pages.py
@@ -31,7 +31,7 @@

import pywikibot
from pywikibot import Timestamp, config, date, i18n, textlib
-from pywikibot.backports import Generator, Iterable, List
+from pywikibot.backports import Generator, Iterable, Iterator, List
from pywikibot.cosmetic_changes import CANCEL, CosmeticChangesToolkit
from pywikibot.exceptions import (
Error,
@@ -1492,10 +1492,10 @@
# this list if the method was called with include_obsolete=False
# (which is the default)
if not hasattr(self, '_langlinks'):
- self._langlinks = list(self.iterlanglinks(include_obsolete=True))
+ self._langlinks = set(self.iterlanglinks(include_obsolete=True))

if include_obsolete:
- return self._langlinks
+ return list(self._langlinks)
return [i for i in self._langlinks if not i.site.obsolete]

def iterlanglinks(self,
@@ -1526,7 +1526,7 @@
"""
return pywikibot.ItemPage.fromPage(self)

- def templates(self, content: bool = False):
+ def templates(self, content: bool = False) -> List['pywikibot.Page']:
"""
Return a list of Page objects for templates used on this Page.

@@ -1546,9 +1546,9 @@
del self._templates

if not hasattr(self, '_templates'):
- self._templates = list(self.itertemplates(content=content))
+ self._templates = set(self.itertemplates(content=content))

- return self._templates
+ return list(self._templates)

def itertemplates(self,
total: Optional[int] = None,
@@ -1584,7 +1584,7 @@
def categories(self,
with_sort_key: bool = False,
total: Optional[int] = None,
- content: bool = False):
+ content: bool = False) -> Iterator['pywikibot.Page']:
"""
Iterate categories that the article is in.

@@ -1599,6 +1599,15 @@
if with_sort_key:
raise NotImplementedError('with_sort_key is not implemented')

+ # Data might have been preloaded
+ # Delete cache if content is needed and elements have no content
+ if hasattr(self, '_categories'):
+ if (content
+ and not all(c.has_content() for c in self._categories)):
+ del self._categories
+ else:
+ return itertools.islice(self._categories, total)
+
return self.site.pagecategories(self, total=total, content=content)

def extlinks(self, total: Optional[int] = None):
diff --git a/pywikibot/site/_generators.py b/pywikibot/site/_generators.py
index aad1b86..41e9f2c 100644
--- a/pywikibot/site/_generators.py
+++ b/pywikibot/site/_generators.py
@@ -94,6 +94,7 @@
templates: bool = False,
langlinks: bool = False,
pageprops: bool = False,
+ categories: bool = False,
content: bool = True
):
"""Return a generator to a list of preloaded pages.
@@ -108,6 +109,7 @@
:param langlinks: preload all language links from the provided pages
to other languages
:param pageprops: preload various properties defined in page content
+ @param categories: preload page categories
:param content: preload page content
"""
props = 'revisions|info|categoryinfo'
@@ -117,6 +119,8 @@
props += '|langlinks'
if pageprops:
props += '|pageprops'
+ if categories:
+ props += '|categories'

for sublist in itergroup(pagelist, min(groupsize, self.maxlimit)):
# Do not use p.pageid property as it will force page loading.
diff --git a/tests/site_tests.py b/tests/site_tests.py
index 32ae187..b22c0df 100755
--- a/tests/site_tests.py
+++ b/tests/site_tests.py
@@ -3183,6 +3183,20 @@
if count >= 5:
break

+ def test_preload_categories(self):
+ """Test preloading categories works."""
+ mysite = self.get_site()
+ cats = mysite.randompages(total=10, namespaces=14)
+ gen = mysite.preloadpages(cats, categories=True)
+ for count, page in enumerate(gen):
+ with self.subTest(page=page.title()):
+ self.assertTrue(hasattr(page, '_categories'))
+ # content=True will bypass cache
+ self.assertEqual(page._categories,
+ set(page.categories(content=True)))
+ if count >= 5:
+ break
+
def test_preload_content(self):
"""Test preloading templates and langlinks works."""
mysite = self.get_site()

To view, visit change 561697. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: If855e98008e5f13e0a087af66de5d4d09511198e
Gerrit-Change-Number: 561697
Gerrit-PatchSet: 8
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: Dvorapa <dvorapa@seznam.cz>
Gerrit-Reviewer: Erutuon <erutuon.wmflabs@gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw@arctus.nl>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-CC: Mpaa <mpaa.wiki@gmail.com>
Gerrit-MessageType: merged