jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/818135 )
Change subject: [bugfix]: fix partial caching in Category.subcategories() ......................................................................
[bugfix]: fix partial caching in Category.subcategories()
Partial caching in category.subcategories() leads to incomplete data in subsequent calls.
Changes: - save api calls for categories with no subcategories, based on cat.categoryinfo['subcats'] info.
At the expense of an extra api call for the base category, for a cleaner design. Subcategories have categoryinfo already cached (no extra api calls needed).
- consider cache valid only if all subcategories have been fetched. this is guaranteed only if total=none.
- also invalidate cache if called with content=true, after chache is created with content=false in previous calls.
Bug: T88217 Change-Id: Ifdd4ae5eb1e78764b8d1aaaa286004bcb7705760 --- M pywikibot/page/_pages.py M tests/category_tests.py 2 files changed, 70 insertions(+), 6 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/page/_pages.py b/pywikibot/page/_pages.py index 9537cd8..b5ebb4a 100644 --- a/pywikibot/page/_pages.py +++ b/pywikibot/page/_pages.py @@ -2390,15 +2390,23 @@ :param content: if True, retrieve the content of the current version of each category description page (default False) """ + + def is_cache_valid(cache: dict, content: bool) -> bool: + return cache['content'] or not content + + if not self.categoryinfo['subcats']: + return + if not isinstance(recurse, bool) and recurse: recurse = recurse - 1
- if not hasattr(self, '_subcats'): - self._subcats = [] - for member in self.site.categorymembers( + if (not hasattr(self, '_subcats') + or not is_cache_valid(self._subcats, content)): + cache = {'data': [], 'content': content} + + for subcat in self.site.categorymembers( self, member_type='subcat', total=total, content=content): - subcat = Category(member) - self._subcats.append(subcat) + cache['data'].append(subcat) yield subcat if total is not None: total -= 1 @@ -2415,8 +2423,11 @@ total -= 1 if total == 0: return + else: + # cache is valid only if all subcategories are fetched (T88217) + self._subcats = cache else: - for subcat in self._subcats: + for subcat in self._subcats['data']: yield subcat if total is not None: total -= 1 diff --git a/tests/category_tests.py b/tests/category_tests.py index 2e947c2..3bd8f1f 100755 --- a/tests/category_tests.py +++ b/tests/category_tests.py @@ -103,9 +103,62 @@ self.assertIn(c1, subcategories) self.assertNotIn(c2, subcategories)
+ cat = pywikibot.Category(site, 'Category:Wikipedians by gender') subcategories_total = list(cat.subcategories(total=2)) self.assertLength(subcategories_total, 2)
+ def test_subcategories_cache_length(self): + """Test the subcategories cache length.""" + site = self.get_site() + + # test cache is valid only if all members of cat are iterated. + cat = pywikibot.Category(site, 'Category:Wikipedians by gender') + subcategories = list(cat.subcategories(total=2)) + self.assertLength(subcategories, 2) + self.assertFalse(hasattr(cat, '_subcats')) + + subcategories = list(cat.subcategories()) + self.assertGreater(len(subcategories), 2) + self.assertTrue(hasattr(cat, '_subcats')) + + # new cat, no cached data. + cat = pywikibot.Category(site, 'Category:Wikipedians by gender') + + # cache not available yet due to partial iteration. + gen = cat.subcategories() + _ = next(gen) + self.assertFalse(hasattr(cat, '_subcats')) + + # cache available. + _ = list(gen) + self.assertTrue(hasattr(cat, '_subcats')) + + def test_subcategories_cache_content(self): + """Test the subcategories cache content.""" + site = self.get_site() + cat = pywikibot.Category(site, 'Category:Wikipedians by gender') + + subcategories = list(cat.subcategories(content=False)) + cache_id_1 = id(cat._subcats) + cache_len_1 = len(subcategories) + subcat = subcategories[0] + self.assertFalse(subcat.has_content()) + + # toggle content. + subcategories = list(cat.subcategories(content=True)) + cache_len_2 = len(subcategories) + cache_id_2 = id(cat._subcats) + subcat = subcategories[0] + self.assertTrue(subcat.has_content()) + # cache reloaded. + self.assertNotEqual(cache_id_1, cache_id_2) + self.assertTrue(cache_len_1, cache_len_2) + + # cache not reloaded. + _ = list(cat.subcategories(content=True)) + cache_id_3 = id(cat._subcats) + self.assertEqual(cache_id_2, cache_id_3) + def test_subcategories_recurse(self): """Test the subcategories method with recurse=True.""" site = self.get_site()
pywikibot-commits@lists.wikimedia.org