jenkins-bot submitted this change.

View Change


Approvals: Matěj Suchánek: Looks good to me, approved jenkins-bot: Verified
[bugfix] Only check for duplicated in Category.articles()

Also add test for infinite subcategories recursion.

Bug: T323413
Change-Id: I9cc7c1ba1589454207e06d72fc326c1537905538
---
M tests/category_tests.py
M pywikibot/page/_category.py
2 files changed, 122 insertions(+), 34 deletions(-)

diff --git a/pywikibot/page/_category.py b/pywikibot/page/_category.py
index d6ea85f..56d5635 100644
--- a/pywikibot/page/_category.py
+++ b/pywikibot/page/_category.py
@@ -72,11 +72,43 @@
>>> len(list(cat.subcategories(recurse=2, total=50)))
50

- .. seealso:: :attr:`categoryinfo`
+ Subcategories of the same level of each subtree are yielded
+ first before the next subcategories level are yielded. For example
+ having this category tree:

+ .. code-block:: text
+
+ A
+ +-- B
+ | +-- E
+ | | +-- H
+ | +-- F
+ | +-- G
+ +-- C
+ | +-- I
+ | | +-- E
+ | | +-- H
+ | +-- J
+ | +-- K
+ | +-- L
+ | +-- G
+ +-- D
+
+ Subcategories are yields in the following order:
+ *B, C, D, E, F, G, H, I, J, E, H, K, L, G*
+
+ .. seealso:: :attr:`categoryinfo`
+ .. warning:: Categories may have infinite recursions of
+ subcategories. If ``recurse`` option is given as ``True`` or
+ an ``int`` value and this value is less than
+ `sys.getrecursionlimit()`, an ``RecursionError`` may be
+ raised. Be careful if passing this generator to a collection
+ in such case.
.. versionchanged:: 8.0
all parameters are keyword arguments only. Additional
- parameters are supported.
+ parameters are supported. The order of subcategories are
+ yielded was changed. The old order was
+ *B, E, H, F, G, C, I, E, H, J, K, L, G, D*

:param recurse: if not False or 0, also iterate articles in
subcategories. If an int, limit recursion to this number of
@@ -102,12 +134,14 @@

def articles(self, *,
recurse: Union[int, bool] = False,
+ total: Optional[int] = None,
**kwargs: Any) -> Iterable[Page]:
"""
Yield all articles in the current category.

- By default, yields all pages in the category that are not
- subcategories.
+ Yields all pages in the category that are not subcategories.
+ Duplicates are filtered. To enable duplicates use :meth:`members`
+ with ``member_type=['page', 'file']`` instead.

**Usage:**

@@ -122,6 +156,12 @@
3
4

+ .. warning:: Categories may have infinite recursions of
+ subcategories. If ``recurse`` option is given as ``True`` or
+ an ``int`` value and this value is less than
+ `sys.getrecursionlimit()`, an ``RecursionError`` may be
+ raised. Be careful if passing this generator to a collection
+ in such case.
.. versionchanged:: 8.0
all parameters are keyword arguments only.

@@ -129,6 +169,8 @@
subcategories. If an int, limit recursion to this number of
levels. (Example: ``recurse=1`` will iterate articles in
first-level subcats, but no deeper.)
+ :param total: iterate no more than this number of pages in
+ total (at all levels)
:param kwargs: Additional parameters. Refer to
:meth:`APISite.categorymembers()
<pywikibot.site._generators.GeneratorsMixin.categorymembers>`
@@ -138,8 +180,24 @@
raise TypeError(
"articles() got an unexpected keyword argument 'member_type'")

- return self.members(
- member_type=['page', 'file'], recurse=recurse, **kwargs)
+ member_type = ['page', 'file']
+ if not recurse:
+ yield from self.members(
+ member_type=member_type, total=total, **kwargs)
+ return
+
+ seen = set()
+ for member in self.members(
+ member_type=member_type, recurse=recurse, **kwargs):
+ if member.pageid in seen:
+ continue
+
+ seen.add(member.pageid)
+ yield member
+ if total is not None:
+ total -= 1
+ if total == 0:
+ return

def members(self, *,
recurse: bool = False,
@@ -159,11 +217,16 @@
Calling this method with ``member_type='subcat'`` is equal to
calling :meth:`subcategories`. Calling this method with
``member_type=['page', 'file']`` is equal to calling
- :meth:`articles`.
+ :meth:`articles` except that the later will filter duplicates.

.. seealso:: :meth:`APISite.categorymembers()
<pywikibot.site._generators.GeneratorsMixin.categorymembers>`
-
+ .. warning:: Categories may have infinite recursions of
+ subcategories. If ``recurse`` option is given as ``True`` or
+ an ``int`` value and this value is less than
+ `sys.getrecursionlimit()`, an ``RecursionError`` may be
+ raised. Be careful if passing this generator to a collection
+ in such case.
.. versionchanged:: 8.0
all parameters are keyword arguments only. Additional
parameters are supported.
@@ -179,10 +242,7 @@
<pywikibot.site._generators.GeneratorsMixin.categorymembers>`
for complete list.
"""
- seen = set()
for member in self.site.categorymembers(self, total=total, **kwargs):
- if recurse:
- seen.add(hash(member))
yield member
if total is not None:
total -= 1
@@ -190,24 +250,17 @@
return

if recurse:
- if not isinstance(recurse, bool) and recurse:
+ if not isinstance(recurse, bool):
recurse -= 1

for subcat in self.subcategories():
for member in subcat.members(
recurse=recurse, total=total, **kwargs):
- hash_value = hash(member)
- if hash_value in seen:
- continue
-
- seen.add(hash_value)
yield member
- if total is None:
- continue
-
- total -= 1
- if total == 0:
- return
+ if total is not None:
+ total -= 1
+ if total == 0:
+ return

def isEmptyCategory(self) -> bool: # noqa: N802
"""Return True if category has no members (including subcategories)."""
diff --git a/tests/category_tests.py b/tests/category_tests.py
index 862ed62..f2ea9cf 100755
--- a/tests/category_tests.py
+++ b/tests/category_tests.py
@@ -20,20 +20,28 @@
NOCATEGORYNAMESPACE_RE = "'(.*?)' is not in the category namespace!"
NOREDIRECTPAGE_RE = r'Page \[\[(.*?)\]\] is not a redirect page.'

- family = 'wikipedia'
- code = 'en'
+ sites = {
+ 'enwp': {
+ 'family': 'wikipedia',
+ 'code': 'en',
+ },
+ 'test2': {
+ 'family': 'wikipedia',
+ 'code': 'test2',
+ },
+ }

cached = True

def test_init(self):
"""Test the category's __init__ for one condition that can't be dry."""
- site = self.get_site()
+ site = self.get_site('enwp')
with self.assertRaisesRegex(ValueError, self.NOCATEGORYNAMESPACE_RE):
pywikibot.Category(site, 'Wikipedia:Test')

def test_is_empty(self):
"""Test if category is empty or not."""
- site = self.get_site()
+ site = self.get_site('enwp')
cat_empty = pywikibot.Category(site, 'Category:foooooo')
cat_not_empty = pywikibot.Category(site,
'Category:Wikipedia categories')
@@ -42,7 +50,7 @@

def test_is_hidden(self):
"""Test isHiddenCategory."""
- site = self.get_site()
+ site = self.get_site('enwp')
cat_hidden = pywikibot.Category(site, 'Category:Hidden categories')
cat_not_hidden = pywikibot.Category(site, 'Category:Wikipedia')
self.assertTrue(cat_hidden.isHiddenCategory())
@@ -50,7 +58,7 @@

def test_categoryinfo(self):
"""Test the categoryinfo property."""
- site = self.get_site()
+ site = self.get_site('enwp')
cat = pywikibot.Category(site, 'Category:Female Wikipedians')
categoryinfo = cat.categoryinfo
self.assertGreaterEqual(categoryinfo['files'], 0)
@@ -68,7 +76,7 @@

def test_members(self):
"""Test the members method."""
- site = self.get_site()
+ site = self.get_site('enwp')
cat = pywikibot.Category(site, 'Category:Wikipedia legal policies')
p1 = pywikibot.Page(site, 'Category:Wikipedia disclaimers')
p2 = pywikibot.Page(site, 'Wikipedia:Privacy policy')
@@ -94,7 +102,7 @@

def test_subcategories(self):
"""Test the subcategories method."""
- site = self.get_site()
+ site = self.get_site('enwp')
cat = pywikibot.Category(site, 'Category:Wikipedians by gender')
c1 = pywikibot.Category(site, 'Category:Female Wikipedians')
c2 = pywikibot.Category(site, 'Category:Lesbian Wikipedians')
@@ -109,7 +117,7 @@

def test_subcategories_recurse(self):
"""Test the subcategories method with recurse=True."""
- site = self.get_site()
+ site = self.get_site('enwp')
cat = pywikibot.Category(site, 'Category:Wikipedians by gender')
c1 = pywikibot.Category(site, 'Category:Female Wikipedians')
c2 = pywikibot.Category(site, 'Category:Lesbian Wikipedians')
@@ -118,9 +126,24 @@
self.assertIn(c1, subcategories_recurse)
self.assertIn(c2, subcategories_recurse)

+ def test_subcategories_infinite_recurse(self):
+ """Test infinite subcategories method with recurse."""
+ site = self.get_site('test2')
+ cat = pywikibot.Category(site, 'Categories')
+ big = pywikibot.Category(site, 'Really big category')
+ result = list(cat.subcategories(recurse=3))
+ self.assertEqual(result.count(cat), 2)
+ self.assertEqual(result.count(big), 4)
+ # check that the result is balanced
+ self.assertEqual(result[:4].count(cat), 1)
+ self.assertEqual(result[:4].count(big), 2)
+
+ for member in set(result):
+ self.assertIsInstance(member, pywikibot.Category)
+
def test_articles(self):
"""Test the articles method."""
- site = self.get_site()
+ site = self.get_site('enwp')
cat = pywikibot.Category(site, 'Category:Wikipedia legal policies')
p1 = pywikibot.Page(site, 'Wikipedia:Privacy policy')
p2 = pywikibot.Page(site, 'Wikipedia:Risk disclaimer')
@@ -142,7 +165,7 @@

def test_redirects(self):
"""Test the redirects method."""
- site = self.get_site()
+ site = self.get_site('enwp')
cat1 = pywikibot.Category(site, 'Category:Fonts')
cat2 = pywikibot.Category(site, 'Category:Typefaces')


To view, visit change 858688. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I9cc7c1ba1589454207e06d72fc326c1537905538
Gerrit-Change-Number: 858688
Gerrit-PatchSet: 13
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: Dvorapa <dvorapa@seznam.cz>
Gerrit-Reviewer: Matěj Suchánek <matejsuchanek97@gmail.com>
Gerrit-Reviewer: jenkins-bot
Gerrit-CC: Mpaa <mpaa.wiki@gmail.com>
Gerrit-MessageType: merged