jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/700445 )
Change subject: [IMPR] use *iterables instead of genlist in intersect_generators
......................................................................
[IMPR] use *iterables instead of genlist in intersect_generators
- as common in itertool enable multiple argumensts for *iterables
instead of a single genlist list
- deprecate then old behaviour
- force allow_duplicates to be a keyword argument and deprecate
the unwanted unwanted usage as positional argument
- early return if there are less than 2 iterables
- don't import Counter again
- update documentation and add a sample
- update tests accordingly
Change-Id: Iec0fc7f9c220883b8f3b5e76786539b9320bb3f2
---
M pywikibot/pagegenerators.py
M pywikibot/tools/__init__.py
M tests/thread_tests.py
3 files changed, 44 insertions(+), 17 deletions(-)
Approvals:
Matěj Suchánek: Looks good to me, but someone else must approve
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index 5be66b9..1545ca2 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -593,7 +593,7 @@
'"-intersect" ignored as only one generator is specified.')
elif self.intersect:
# By definition no duplicates are possible.
- dupfiltergen = intersect_generators(self.gens)
+ dupfiltergen = intersect_generators(*self.gens)
else:
dupfiltergen = _filter_unique_pages(itertools.chain(*self.gens))
diff --git a/pywikibot/tools/__init__.py b/pywikibot/tools/__init__.py
index 52b40ff..996efd8 100644
--- a/pywikibot/tools/__init__.py
+++ b/pywikibot/tools/__init__.py
@@ -926,11 +926,10 @@
.format(thd, thd.queue.qsize()), self._logger)
-def intersect_generators(genlist, allow_duplicates=False):
- """
- Intersect generators listed in genlist.
+def intersect_generators(*iterables, allow_duplicates: bool = False):
+ """Intersect generators listed in iterables.
- Yield items only if they are yielded by all generators in genlist.
+ Yield items only if they are yielded by all generators of iterables.
Threads (via ThreadedGenerator) are used in order to run generators
in parallel, so that items can be yielded before generators are
exhausted.
@@ -939,13 +938,42 @@
Quitting before all generators are finished is attempted if
there is no more chance of finding an item in all queues.
- :param genlist: list of page generators
- :type genlist: list
- :param allow_duplicates: allow duplicates if present in all generators
- :type allow_duplicates: bool
+ Sample:
+ >>> iterables = 'mississippi', 'missouri'
+ >>> list(intersect_generators(*iterables))
+ ['m', 'i', 's']
+ >>> list(intersect_generators(*iterables, allow_duplicates=True))
+ ['m', 'i', 's', 's', 'i']
+
+ :param iterables: page generators
+ :param allow_duplicates: optional keyword argument to allow duplicates
+ if present in all generators
"""
+ # 'allow_duplicates' must be given as keyword argument
+ if iterables and iterables[-1] in (True, False):
+ allow_duplicates = iterables[-1]
+ iterables = iterables[:-1]
+ issue_deprecation_warning("'allow_duplicates' as positional argument",
+ 'keyword argument "allow_duplicates={}"'
+ .format(allow_duplicates),
+ since='6.4.0')
+
+ # iterables must not be given as tuple or list
+ if len(iterables) == 1 and isinstance(iterables[0], (list, tuple)):
+ iterables = iterables[0]
+ issue_deprecation_warning("'iterables' as list type",
+ "consecutive iterables or use '*' to unpack",
+ since='6.4.0')
+
+ if not iterables:
+ return
+
+ if len(iterables) == 1:
+ yield from iterables[0]
+ return
+
# If any generator is empty, no pages are going to be returned
- for source in genlist:
+ for source in iterables:
if not source:
debug('At least one generator ({!r}) is empty and execution was '
'skipped immediately.'.format(source), 'intersect')
@@ -953,20 +981,19 @@
# Item is cached to check that it is found n_gen
# times before being yielded.
- from collections import Counter
- cache = collections.defaultdict(Counter)
- n_gen = len(genlist)
+ cache = collections.defaultdict(collections.Counter)
+ n_gen = len(iterables)
# Class to keep track of alive threads.
# Start new threads and remove completed threads.
thrlist = ThreadList()
- for source in genlist:
+ for source in iterables:
threaded_gen = ThreadedGenerator(name=repr(source), target=source)
threaded_gen.daemon = True
thrlist.append(threaded_gen)
- ones = Counter(thrlist)
+ ones = collections.Counter(thrlist)
seen = {}
while True:
diff --git a/tests/thread_tests.py b/tests/thread_tests.py
index dfc7c07..44ec655 100644
--- a/tests/thread_tests.py
+++ b/tests/thread_tests.py
@@ -49,7 +49,7 @@
# first otherwise the generator is empty the second time.
datasets = [list(gen) for gen in gens]
set_result = set(datasets[0]).intersection(*datasets[1:])
- result = list(intersect_generators(datasets))
+ result = list(intersect_generators(*datasets))
self.assertCountEqual(set(result), result)
self.assertCountEqual(result, set_result)
@@ -63,7 +63,7 @@
for dataset in datasets[1:]:
counter_result = counter_result & Counter(dataset)
counter_result = list(counter_result.elements())
- result = list(intersect_generators(datasets, allow_duplicates=True))
+ result = list(intersect_generators(*datasets, allow_duplicates=True))
self.assertCountEqual(counter_result, result)
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/700445
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Iec0fc7f9c220883b8f3b5e76786539b9320bb3f2
Gerrit-Change-Number: 700445
Gerrit-PatchSet: 7
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Matěj Suchánek <matejsuchanek97(a)gmail.com>
Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/700439 )
Change subject: [IMPR] use a sentinel variable to determine the end of an iterable
......................................................................
[IMPR] use a sentinel variable to determine the end of an iterable
This enables None to be part of the iterables chain
Change-Id: I76e770735fc1e92fa5cd448751d3da645123760a
---
M pywikibot/tools/__init__.py
1 file changed, 4 insertions(+), 2 deletions(-)
Approvals:
Xqt: Looks good to me, approved
Matěj Suchánek: Looks good to me, but someone else must approve
jenkins-bot: Verified
diff --git a/pywikibot/tools/__init__.py b/pywikibot/tools/__init__.py
index f81ee0b..52b40ff 100644
--- a/pywikibot/tools/__init__.py
+++ b/pywikibot/tools/__init__.py
@@ -1029,9 +1029,11 @@
:return: the combined generator of iterables
:rtype: generator
"""
+ sentinel = object()
return (item
- for item in itertools.chain.from_iterable(zip_longest(*iterables))
- if item is not None)
+ for item in itertools.chain.from_iterable(
+ zip_longest(*iterables, fillvalue=sentinel))
+ if item is not sentinel)
def filter_unique(iterable, container=None, key=None, add=None):
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/700439
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I76e770735fc1e92fa5cd448751d3da645123760a
Gerrit-Change-Number: 700439
Gerrit-PatchSet: 4
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Matěj Suchánek <matejsuchanek97(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/700163 )
Change subject: [deps] update setuptools requirements
......................................................................
[deps] update setuptools requirements
After backport of vendor package version.Version due to T284299
setuptools 49.4.0 is no longer necessary. Fall back to the very
minimal version which 20.8.1 due to requirement.marker.
Newer version of setuptools is recommended.
- use 48.0.0 with python_version >= 3.10 because setuptools adopts
distutils from the Python 3.9 standard library which is deprecated
since Python 3.10 and will be dropped with Python 3.12
- use 38.5.2 with python_version >= 3.7 due to the fix of RuntimeError
in pkg_resources.parse_requirements on Python 3.7
See also: https://setuptools.readthedocs.io/en/latest/history.html
Bug: T284297
Change-Id: I35008f29735037dfe70392803e94551738572d66
---
M requirements.txt
M setup.py
2 files changed, 8 insertions(+), 4 deletions(-)
Approvals:
JJMC89: Looks good to me, approved
jenkins-bot: Verified
diff --git a/requirements.txt b/requirements.txt
index d45bdae..60156bd 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -20,8 +20,10 @@
# mandatory dependencies, others are optional
requests>=2.20.1, < 2.26.0; python_version < '3.6'
requests>=2.20.1 ; python_version >= '3.6'
-setuptools>=49.4.0, !=50.0.0, <50.2.0 ; python_version < '3.6'
-setuptools>=49.4.0 ; python_version >= '3.6'
+setuptools>=48.0.0 ; python_version >= '3.10'
+setuptools>=38.5.2 ; python_version >= '3.7'
+setuptools>=20.8.1 ; python_version >= '3.6'
+setuptools>=20.8.1, !=50.0.0, <50.2.0 ; python_version < '3.6'
# MediaWiki markup parser
# mwparserfromhell is default, wikitextparser can be used instead
diff --git a/setup.py b/setup.py
index 2d5438f..17e3310 100644
--- a/setup.py
+++ b/setup.py
@@ -110,8 +110,10 @@
'requests>=2.20.1,<2.26.0;python_version<"3.6"',
'requests>=2.20.1;python_version>="3.6"',
# PEP 440
- 'setuptools>=49.4.0, !=50.0.0, <50.2.0 ; python_version < "3.6"',
- 'setuptools>=49.4.0 ; python_version >= "3.6"',
+ 'setuptools>=48.0.0 ; python_version >= "3.10"',
+ 'setuptools>=38.5.2 ; python_version >= "3.7"',
+ 'setuptools>=20.8.1 ; python_version >= "3.6"',
+ 'setuptools>=20.8.1, !=50.0.0, <50.2.0 ; python_version < "3.6"',
]
# in addition either mwparserfromhell or wikitextparser is required
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/700163
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I35008f29735037dfe70392803e94551738572d66
Gerrit-Change-Number: 700163
Gerrit-PatchSet: 2
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Dvorapa <dvorapa(a)seznam.cz>
Gerrit-Reviewer: JJMC89 <JJMC89.Wikimedia(a)gmail.com>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged