jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/858688 )
Change subject: [bugfix] Only check for duplicated in Category.articles() ......................................................................
[bugfix] Only check for duplicated in Category.articles()
Also add test for infinite subcategories recursion.
Bug: T323413 Change-Id: I9cc7c1ba1589454207e06d72fc326c1537905538 --- M tests/category_tests.py M pywikibot/page/_category.py 2 files changed, 122 insertions(+), 34 deletions(-)
Approvals: Matěj Suchánek: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/page/_category.py b/pywikibot/page/_category.py index d6ea85f..56d5635 100644 --- a/pywikibot/page/_category.py +++ b/pywikibot/page/_category.py @@ -72,11 +72,43 @@ >>> len(list(cat.subcategories(recurse=2, total=50))) 50
- .. seealso:: :attr:`categoryinfo` + Subcategories of the same level of each subtree are yielded + first before the next subcategories level are yielded. For example + having this category tree:
+ .. code-block:: text + + A + +-- B + | +-- E + | | +-- H + | +-- F + | +-- G + +-- C + | +-- I + | | +-- E + | | +-- H + | +-- J + | +-- K + | +-- L + | +-- G + +-- D + + Subcategories are yields in the following order: + *B, C, D, E, F, G, H, I, J, E, H, K, L, G* + + .. seealso:: :attr:`categoryinfo` + .. warning:: Categories may have infinite recursions of + subcategories. If ``recurse`` option is given as ``True`` or + an ``int`` value and this value is less than + `sys.getrecursionlimit()`, an ``RecursionError`` may be + raised. Be careful if passing this generator to a collection + in such case. .. versionchanged:: 8.0 all parameters are keyword arguments only. Additional - parameters are supported. + parameters are supported. The order of subcategories are + yielded was changed. The old order was + *B, E, H, F, G, C, I, E, H, J, K, L, G, D*
:param recurse: if not False or 0, also iterate articles in subcategories. If an int, limit recursion to this number of @@ -102,12 +134,14 @@
def articles(self, *, recurse: Union[int, bool] = False, + total: Optional[int] = None, **kwargs: Any) -> Iterable[Page]: """ Yield all articles in the current category.
- By default, yields all pages in the category that are not - subcategories. + Yields all pages in the category that are not subcategories. + Duplicates are filtered. To enable duplicates use :meth:`members` + with ``member_type=['page', 'file']`` instead.
**Usage:**
@@ -122,6 +156,12 @@ 3 4
+ .. warning:: Categories may have infinite recursions of + subcategories. If ``recurse`` option is given as ``True`` or + an ``int`` value and this value is less than + `sys.getrecursionlimit()`, an ``RecursionError`` may be + raised. Be careful if passing this generator to a collection + in such case. .. versionchanged:: 8.0 all parameters are keyword arguments only.
@@ -129,6 +169,8 @@ subcategories. If an int, limit recursion to this number of levels. (Example: ``recurse=1`` will iterate articles in first-level subcats, but no deeper.) + :param total: iterate no more than this number of pages in + total (at all levels) :param kwargs: Additional parameters. Refer to :meth:`APISite.categorymembers() <pywikibot.site._generators.GeneratorsMixin.categorymembers>` @@ -138,8 +180,24 @@ raise TypeError( "articles() got an unexpected keyword argument 'member_type'")
- return self.members( - member_type=['page', 'file'], recurse=recurse, **kwargs) + member_type = ['page', 'file'] + if not recurse: + yield from self.members( + member_type=member_type, total=total, **kwargs) + return + + seen = set() + for member in self.members( + member_type=member_type, recurse=recurse, **kwargs): + if member.pageid in seen: + continue + + seen.add(member.pageid) + yield member + if total is not None: + total -= 1 + if total == 0: + return
def members(self, *, recurse: bool = False, @@ -159,11 +217,16 @@ Calling this method with ``member_type='subcat'`` is equal to calling :meth:`subcategories`. Calling this method with ``member_type=['page', 'file']`` is equal to calling - :meth:`articles`. + :meth:`articles` except that the later will filter duplicates.
.. seealso:: :meth:`APISite.categorymembers() <pywikibot.site._generators.GeneratorsMixin.categorymembers>` - + .. warning:: Categories may have infinite recursions of + subcategories. If ``recurse`` option is given as ``True`` or + an ``int`` value and this value is less than + `sys.getrecursionlimit()`, an ``RecursionError`` may be + raised. Be careful if passing this generator to a collection + in such case. .. versionchanged:: 8.0 all parameters are keyword arguments only. Additional parameters are supported. @@ -179,10 +242,7 @@ <pywikibot.site._generators.GeneratorsMixin.categorymembers>` for complete list. """ - seen = set() for member in self.site.categorymembers(self, total=total, **kwargs): - if recurse: - seen.add(hash(member)) yield member if total is not None: total -= 1 @@ -190,24 +250,17 @@ return
if recurse: - if not isinstance(recurse, bool) and recurse: + if not isinstance(recurse, bool): recurse -= 1
for subcat in self.subcategories(): for member in subcat.members( recurse=recurse, total=total, **kwargs): - hash_value = hash(member) - if hash_value in seen: - continue - - seen.add(hash_value) yield member - if total is None: - continue - - total -= 1 - if total == 0: - return + if total is not None: + total -= 1 + if total == 0: + return
def isEmptyCategory(self) -> bool: # noqa: N802 """Return True if category has no members (including subcategories).""" diff --git a/tests/category_tests.py b/tests/category_tests.py index 862ed62..f2ea9cf 100755 --- a/tests/category_tests.py +++ b/tests/category_tests.py @@ -20,20 +20,28 @@ NOCATEGORYNAMESPACE_RE = "'(.*?)' is not in the category namespace!" NOREDIRECTPAGE_RE = r'Page [[(.*?)]] is not a redirect page.'
- family = 'wikipedia' - code = 'en' + sites = { + 'enwp': { + 'family': 'wikipedia', + 'code': 'en', + }, + 'test2': { + 'family': 'wikipedia', + 'code': 'test2', + }, + }
cached = True
def test_init(self): """Test the category's __init__ for one condition that can't be dry.""" - site = self.get_site() + site = self.get_site('enwp') with self.assertRaisesRegex(ValueError, self.NOCATEGORYNAMESPACE_RE): pywikibot.Category(site, 'Wikipedia:Test')
def test_is_empty(self): """Test if category is empty or not.""" - site = self.get_site() + site = self.get_site('enwp') cat_empty = pywikibot.Category(site, 'Category:foooooo') cat_not_empty = pywikibot.Category(site, 'Category:Wikipedia categories') @@ -42,7 +50,7 @@
def test_is_hidden(self): """Test isHiddenCategory.""" - site = self.get_site() + site = self.get_site('enwp') cat_hidden = pywikibot.Category(site, 'Category:Hidden categories') cat_not_hidden = pywikibot.Category(site, 'Category:Wikipedia') self.assertTrue(cat_hidden.isHiddenCategory()) @@ -50,7 +58,7 @@
def test_categoryinfo(self): """Test the categoryinfo property.""" - site = self.get_site() + site = self.get_site('enwp') cat = pywikibot.Category(site, 'Category:Female Wikipedians') categoryinfo = cat.categoryinfo self.assertGreaterEqual(categoryinfo['files'], 0) @@ -68,7 +76,7 @@
def test_members(self): """Test the members method.""" - site = self.get_site() + site = self.get_site('enwp') cat = pywikibot.Category(site, 'Category:Wikipedia legal policies') p1 = pywikibot.Page(site, 'Category:Wikipedia disclaimers') p2 = pywikibot.Page(site, 'Wikipedia:Privacy policy') @@ -94,7 +102,7 @@
def test_subcategories(self): """Test the subcategories method.""" - site = self.get_site() + site = self.get_site('enwp') cat = pywikibot.Category(site, 'Category:Wikipedians by gender') c1 = pywikibot.Category(site, 'Category:Female Wikipedians') c2 = pywikibot.Category(site, 'Category:Lesbian Wikipedians') @@ -109,7 +117,7 @@
def test_subcategories_recurse(self): """Test the subcategories method with recurse=True.""" - site = self.get_site() + site = self.get_site('enwp') cat = pywikibot.Category(site, 'Category:Wikipedians by gender') c1 = pywikibot.Category(site, 'Category:Female Wikipedians') c2 = pywikibot.Category(site, 'Category:Lesbian Wikipedians') @@ -118,9 +126,24 @@ self.assertIn(c1, subcategories_recurse) self.assertIn(c2, subcategories_recurse)
+ def test_subcategories_infinite_recurse(self): + """Test infinite subcategories method with recurse.""" + site = self.get_site('test2') + cat = pywikibot.Category(site, 'Categories') + big = pywikibot.Category(site, 'Really big category') + result = list(cat.subcategories(recurse=3)) + self.assertEqual(result.count(cat), 2) + self.assertEqual(result.count(big), 4) + # check that the result is balanced + self.assertEqual(result[:4].count(cat), 1) + self.assertEqual(result[:4].count(big), 2) + + for member in set(result): + self.assertIsInstance(member, pywikibot.Category) + def test_articles(self): """Test the articles method.""" - site = self.get_site() + site = self.get_site('enwp') cat = pywikibot.Category(site, 'Category:Wikipedia legal policies') p1 = pywikibot.Page(site, 'Wikipedia:Privacy policy') p2 = pywikibot.Page(site, 'Wikipedia:Risk disclaimer') @@ -142,7 +165,7 @@
def test_redirects(self): """Test the redirects method.""" - site = self.get_site() + site = self.get_site('enwp') cat1 = pywikibot.Category(site, 'Category:Fonts') cat2 = pywikibot.Category(site, 'Category:Typefaces')