jenkins-bot submitted this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
[bugfix]: fix partial caching in Category.subcategories()

Partial caching in category.subcategories() leads to incomplete data
in subsequent calls.

Changes:
- save api calls for categories with no subcategories, based on
cat.categoryinfo['subcats'] info.

At the expense of an extra api call for the base category, for a
cleaner design.
Subcategories have categoryinfo already cached (no extra api calls
needed).

- consider cache valid only if all subcategories have been fetched.
this is guaranteed only if total=none.

- also invalidate cache if called with content=true, after chache is
created with content=false in previous calls.

Bug: T88217
Change-Id: Ifdd4ae5eb1e78764b8d1aaaa286004bcb7705760
---
M pywikibot/page/_pages.py
M tests/category_tests.py
2 files changed, 70 insertions(+), 6 deletions(-)

diff --git a/pywikibot/page/_pages.py b/pywikibot/page/_pages.py
index 9537cd8..b5ebb4a 100644
--- a/pywikibot/page/_pages.py
+++ b/pywikibot/page/_pages.py
@@ -2390,15 +2390,23 @@
:param content: if True, retrieve the content of the current version
of each category description page (default False)
"""
+
+ def is_cache_valid(cache: dict, content: bool) -> bool:
+ return cache['content'] or not content
+
+ if not self.categoryinfo['subcats']:
+ return
+
if not isinstance(recurse, bool) and recurse:
recurse = recurse - 1

- if not hasattr(self, '_subcats'):
- self._subcats = []
- for member in self.site.categorymembers(
+ if (not hasattr(self, '_subcats')
+ or not is_cache_valid(self._subcats, content)):
+ cache = {'data': [], 'content': content}
+
+ for subcat in self.site.categorymembers(
self, member_type='subcat', total=total, content=content):
- subcat = Category(member)
- self._subcats.append(subcat)
+ cache['data'].append(subcat)
yield subcat
if total is not None:
total -= 1
@@ -2415,8 +2423,11 @@
total -= 1
if total == 0:
return
+ else:
+ # cache is valid only if all subcategories are fetched (T88217)
+ self._subcats = cache
else:
- for subcat in self._subcats:
+ for subcat in self._subcats['data']:
yield subcat
if total is not None:
total -= 1
diff --git a/tests/category_tests.py b/tests/category_tests.py
index 2e947c2..3bd8f1f 100755
--- a/tests/category_tests.py
+++ b/tests/category_tests.py
@@ -103,9 +103,62 @@
self.assertIn(c1, subcategories)
self.assertNotIn(c2, subcategories)

+ cat = pywikibot.Category(site, 'Category:Wikipedians by gender')
subcategories_total = list(cat.subcategories(total=2))
self.assertLength(subcategories_total, 2)

+ def test_subcategories_cache_length(self):
+ """Test the subcategories cache length."""
+ site = self.get_site()
+
+ # test cache is valid only if all members of cat are iterated.
+ cat = pywikibot.Category(site, 'Category:Wikipedians by gender')
+ subcategories = list(cat.subcategories(total=2))
+ self.assertLength(subcategories, 2)
+ self.assertFalse(hasattr(cat, '_subcats'))
+
+ subcategories = list(cat.subcategories())
+ self.assertGreater(len(subcategories), 2)
+ self.assertTrue(hasattr(cat, '_subcats'))
+
+ # new cat, no cached data.
+ cat = pywikibot.Category(site, 'Category:Wikipedians by gender')
+
+ # cache not available yet due to partial iteration.
+ gen = cat.subcategories()
+ _ = next(gen)
+ self.assertFalse(hasattr(cat, '_subcats'))
+
+ # cache available.
+ _ = list(gen)
+ self.assertTrue(hasattr(cat, '_subcats'))
+
+ def test_subcategories_cache_content(self):
+ """Test the subcategories cache content."""
+ site = self.get_site()
+ cat = pywikibot.Category(site, 'Category:Wikipedians by gender')
+
+ subcategories = list(cat.subcategories(content=False))
+ cache_id_1 = id(cat._subcats)
+ cache_len_1 = len(subcategories)
+ subcat = subcategories[0]
+ self.assertFalse(subcat.has_content())
+
+ # toggle content.
+ subcategories = list(cat.subcategories(content=True))
+ cache_len_2 = len(subcategories)
+ cache_id_2 = id(cat._subcats)
+ subcat = subcategories[0]
+ self.assertTrue(subcat.has_content())
+ # cache reloaded.
+ self.assertNotEqual(cache_id_1, cache_id_2)
+ self.assertTrue(cache_len_1, cache_len_2)
+
+ # cache not reloaded.
+ _ = list(cat.subcategories(content=True))
+ cache_id_3 = id(cat._subcats)
+ self.assertEqual(cache_id_2, cache_id_3)
+
def test_subcategories_recurse(self):
"""Test the subcategories method with recurse=True."""
site = self.get_site()

To view, visit change 818135. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ifdd4ae5eb1e78764b8d1aaaa286004bcb7705760
Gerrit-Change-Number: 818135
Gerrit-PatchSet: 6
Gerrit-Owner: Mpaa <mpaa.wiki@gmail.com>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged