jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/927188 )
Change subject: [IMPR] backport itertools.batched() from Python 3.12 ......................................................................
[IMPR] backport itertools.batched() from Python 3.12
- backport itertools.batched() from Python 3.12 - deprecate tools.itertools.itergroup() - change any itergroup call with batched()
Change-Id: I1b51d60e27171eb0f00087f68a194b4f396aef37 --- M pywikibot/tools/__init__.py M pywikibot/pagegenerators/_generators.py M scripts/template.py M pywikibot/site/_datasite.py M pywikibot/site/_generators.py M pywikibot/tools/itertools.py M pywikibot/backports.py M scripts/replace.py M scripts/maintenance/colors.py M scripts/claimit.py M pywikibot/data/api/_paraminfo.py 11 files changed, 105 insertions(+), 58 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/backports.py b/pywikibot/backports.py index d68e49c..40a798f 100644 --- a/pywikibot/backports.py +++ b/pywikibot/backports.py @@ -1,6 +1,6 @@ """This module contains backports to support older Python versions.""" # -# (C) Pywikibot team, 2014-2022 +# (C) Pywikibot team, 2014-2023 # # Distributed under the terms of the MIT license. # @@ -164,3 +164,43 @@ return zip(a, b) else: from itertools import pairwise + + +# gh-98363 +if PYTHON_VERSION < (3, 12) or SPHINX_RUNNING: + def batched(iterable, n: int) -> Generator[Any, None, None]: + """Batch data from the *iterable* into tuples of length *n*. + + .. note:: The last batch may be shorter than *n*. + + Example: + + >>> i = batched(range(25), 10) + >>> print(next(i)) + (0, 1, 2, 3, 4, 5, 6, 7, 8, 9) + >>> print(next(i)) + (10, 11, 12, 13, 14, 15, 16, 17, 18, 19) + >>> print(next(i)) + (20, 21, 22, 23, 24) + >>> print(next(i)) + Traceback (most recent call last): + ... + StopIteration + + .. seealso:: :python:`itertools.batched + <library/itertools.html#itertools.batched>`, + backported from Python 3.12. + .. versionadded:: 8.2 + + :param n: How many items of the iterable to get in one chunk + """ + group = [] + for item in iterable: + group.append(item) + if len(group) == n: + yield tuple(group) + group.clear() + if group: + yield tuple(group) +else: + from itertools import batched diff --git a/pywikibot/data/api/_paraminfo.py b/pywikibot/data/api/_paraminfo.py index f3accae..d0a71f1 100644 --- a/pywikibot/data/api/_paraminfo.py +++ b/pywikibot/data/api/_paraminfo.py @@ -1,6 +1,6 @@ """Object representing API parameter information.""" # -# (C) Pywikibot team, 2014-2022 +# (C) Pywikibot team, 2014-2023 # # Distributed under the terms of the MIT license. # @@ -9,8 +9,7 @@
import pywikibot from pywikibot import config -from pywikibot.backports import Dict, removeprefix -from pywikibot.tools.itertools import itergroup +from pywikibot.backports import Dict, batched, removeprefix
__all__ = ['ParamInfo'] @@ -196,12 +195,11 @@ """ def module_generator(): """A generator yielding batches of modules.""" - i = itergroup(sorted(modules), self._limit) - for batch in i: + for batch in batched(sorted(modules), self._limit): for failed_module in failed_modules: yield [failed_module] - del failed_modules[:] - yield batch + failed_modules.clear() + yield list(batch)
modules -= set(self._paraminfo) if not modules: diff --git a/pywikibot/pagegenerators/_generators.py b/pywikibot/pagegenerators/_generators.py index 3d781a9..f6f2003 100644 --- a/pywikibot/pagegenerators/_generators.py +++ b/pywikibot/pagegenerators/_generators.py @@ -1,6 +1,6 @@ """Page filter generators provided by the pagegenerators module.""" # -# (C) Pywikibot team, 2008-2022 +# (C) Pywikibot team, 2008-2023 # # Distributed under the terms of the MIT license. # @@ -27,12 +27,13 @@ List, Sequence, Tuple, + batched, ) from pywikibot.comms import http from pywikibot.exceptions import APIError, ServerError from pywikibot.tools import deprecated from pywikibot.tools.collections import GeneratorWrapper -from pywikibot.tools.itertools import filter_unique, itergroup +from pywikibot.tools.itertools import filter_unique
OPT_SITE_TYPE = Optional['pywikibot.site.BaseSite'] @@ -1023,8 +1024,8 @@ :param site: Site for generator results. """ repo = site.data_repository() - for sublist in itergroup(gen, 50): - req = {'ids': [item.id for item in sublist], + for batch in batched(gen, 50): + req = {'ids': [item.id for item in batch], 'sitefilter': site.dbName(), 'action': 'wbgetentities', 'props': 'sitelinks'} diff --git a/pywikibot/site/_datasite.py b/pywikibot/site/_datasite.py index a5601ba..119d1eb 100644 --- a/pywikibot/site/_datasite.py +++ b/pywikibot/site/_datasite.py @@ -12,6 +12,7 @@ from warnings import warn
import pywikibot +from pywikibot.backports import batched from pywikibot.data import api from pywikibot.exceptions import ( APIError, @@ -23,7 +24,6 @@ from pywikibot.site._apisite import APISite from pywikibot.site._decorators import need_extension, need_right, need_version from pywikibot.tools import merge_unique_dicts, remove_last_args -from pywikibot.tools.itertools import itergroup
__all__ = ('DataSite', ) @@ -204,21 +204,20 @@ return data['entities']
def preload_entities(self, pagelist, groupsize: int = 50): - """ - Yield subclasses of WikibaseEntity's with content prefilled. + """Yield subclasses of WikibaseEntity's with content prefilled.
- Note that pages will be iterated in a different order - than in the underlying pagelist. + .. note:: Pages will be iterated in a different order than in + the underlying pagelist.
- :param pagelist: an iterable that yields either WikibaseEntity objects, - or Page objects linked to an ItemPage. + :param pagelist: an iterable that yields either WikibaseEntity + objects, or Page objects linked to an ItemPage. :param groupsize: how many pages to query at a time """ if not hasattr(self, '_entity_namespaces'): self._cache_entity_namespaces() - for sublist in itergroup(pagelist, groupsize): + for batch in batched(pagelist, groupsize): req = {'ids': [], 'titles': [], 'sites': []} - for p in sublist: + for p in batch: if isinstance(p, pywikibot.page.WikibaseEntity): ident = p._defined_by() for key in ident: diff --git a/pywikibot/site/_generators.py b/pywikibot/site/_generators.py index 2cb414a..04ded77 100644 --- a/pywikibot/site/_generators.py +++ b/pywikibot/site/_generators.py @@ -12,7 +12,7 @@ from typing import Any, Optional, Union
import pywikibot -from pywikibot.backports import Dict, Generator, Iterable, List # skipcq +from pywikibot.backports import Dict, Generator, Iterable, List, batched from pywikibot.data import api from pywikibot.exceptions import ( APIError, @@ -25,7 +25,7 @@ from pywikibot.site._decorators import need_right from pywikibot.site._namespace import NamespaceArgType from pywikibot.tools import is_ip_address, issue_deprecation_warning -from pywikibot.tools.itertools import filter_unique, itergroup +from pywikibot.tools.itertools import filter_unique
class GeneratorsMixin: @@ -55,13 +55,13 @@ # Validate pageids. gen = (str(int(p)) for p in pageids if int(p) > 0)
- for sublist in itergroup(filter_unique(gen), self.maxlimit): + for batch in batched(filter_unique(gen), self.maxlimit): # Store the order of the input data. - priority_dict = dict(zip(sublist, range(len(sublist)))) + priority_dict = dict(zip(batch, range(len(batch))))
prio_queue = [] next_prio = 0 - params = {'pageids': sublist, } + params = {'pageids': batch} rvgen = api.PropertyGenerator('info', site=self, parameters=params)
for pagedata in rvgen: @@ -138,13 +138,13 @@ props += '|categories'
groupsize = min(groupsize or self.maxlimit, self.maxlimit) - for sublist in itergroup(pagelist, groupsize): + for batch in batched(pagelist, groupsize): # Do not use p.pageid property as it will force page loading. - pageids = [str(p._pageid) for p in sublist + pageids = [str(p._pageid) for p in batch if hasattr(p, '_pageid') and p._pageid > 0] cache = {} # In case of duplicates, return the first entry. - for priority, page in enumerate(sublist): + for priority, page in enumerate(batch): try: cache.setdefault(page.title(with_section=False), (priority, page)) @@ -156,7 +156,7 @@ rvgen = api.PropertyGenerator(props, site=self) rvgen.set_maximum_items(-1) # suppress use of "rvlimit" parameter
- if len(pageids) == len(sublist) \ + if len(pageids) == len(batch) \ and len(set(pageids)) <= self.maxlimit: # only use pageids if all pages have them rvgen.request['pageids'] = set(pageids) diff --git a/pywikibot/tools/__init__.py b/pywikibot/tools/__init__.py index ae9e715..1c0cac5 100644 --- a/pywikibot/tools/__init__.py +++ b/pywikibot/tools/__init__.py @@ -838,7 +838,8 @@ # Deprecate objects which has to be imported from tools.itertools instead wrapper.add_deprecated_attr( 'itergroup', - replacement_name='pywikibot.tools.itertools.itergroup', + # new replacement in 8.2 + replacement_name='pywikibot.backports.batched', since='7.6.0') wrapper.add_deprecated_attr( 'islice_with_ellipsis', diff --git a/pywikibot/tools/itertools.py b/pywikibot/tools/itertools.py index 56e09ad..4166aaf 100644 --- a/pywikibot/tools/itertools.py +++ b/pywikibot/tools/itertools.py @@ -14,9 +14,9 @@ from itertools import chain, zip_longest from typing import Any
-from pywikibot.backports import Generator +from pywikibot.backports import batched, Generator from pywikibot.logging import debug -from pywikibot.tools import issue_deprecation_warning +from pywikibot.tools import deprecated, issue_deprecation_warning
__all__ = ( @@ -28,6 +28,7 @@ )
+@deprecated('backports.batched()', since='8.2.0') def itergroup(iterable, size: int, strict: bool = False) -> Generator[Any, None, None]: @@ -47,21 +48,20 @@ ... StopIteration
+ .. versionadded:: 7.6 + The *strict* parameter. + .. deprecated:: 8.2 + Use :func:`backports.batched` instead. + :param size: How many items of the iterable to get in one chunk :param strict: If True, raise a ValueError if length of iterable is not divisible by `size`. :raises ValueError: iterable is not divisible by size """ - group = [] - for item in iterable: - group.append(item) - if len(group) == size: - yield group - group = [] - if group: - if strict: + for group in batched(iterable, size): + if strict and len(group) < size: raise ValueError('iterable is not divisible by size.') - yield group + yield list(group)
def islice_with_ellipsis(iterable, *args, marker: str = '…'): diff --git a/scripts/claimit.py b/scripts/claimit.py index 71ee8ab..620b86c 100755 --- a/scripts/claimit.py +++ b/scripts/claimit.py @@ -46,14 +46,13 @@
""" # -# (C) Pywikibot team, 2013-2022 +# (C) Pywikibot team, 2013-2023 # # Distributed under the terms of the MIT license. # import pywikibot from pywikibot import WikidataBot, pagegenerators -from pywikibot.backports import removeprefix -from pywikibot.tools.itertools import itergroup +from pywikibot.backports import batched, removeprefix
# This is required for the text that is shown when you run this script @@ -127,7 +126,7 @@
claims = [] repo = pywikibot.Site().data_repository() - for property_id, target_str in itergroup(commandline_claims, 2): + for property_id, target_str in batched(commandline_claims, 2): claim = pywikibot.Claim(repo, property_id) if claim.type == 'wikibase-item': target = pywikibot.ItemPage(repo, target_str) diff --git a/scripts/maintenance/colors.py b/scripts/maintenance/colors.py index 4c7910b..fada5d7 100755 --- a/scripts/maintenance/colors.py +++ b/scripts/maintenance/colors.py @@ -1,12 +1,12 @@ #!/usr/bin/env python3 """Utility to show pywikibot colors.""" # -# (C) Pywikibot team, 2016-2022 +# (C) Pywikibot team, 2016-2023 # # Distributed under the terms of the MIT license. # import pywikibot -from pywikibot.tools.itertools import itergroup +from pywikibot.backports import batched from pywikibot.userinterfaces.terminal_interface_base import colors
@@ -26,7 +26,7 @@
for bg_col in bg_colors: # Three lines per each backgoung color. - for fg_col_group in itergroup(fg_colors, n_fg_colors / 4 + 1): + for fg_col_group in batched(fg_colors, n_fg_colors / 4 + 1): line = '' for fg_col in fg_col_group: line += ' ' diff --git a/scripts/replace.py b/scripts/replace.py index 4cce7ab..9d9cd94 100755 --- a/scripts/replace.py +++ b/scripts/replace.py @@ -154,11 +154,10 @@
import pywikibot from pywikibot import editor, fixes, i18n, pagegenerators, textlib -from pywikibot.backports import Dict, Generator, List, Pattern, Tuple +from pywikibot.backports import Dict, Generator, List, Pattern, Tuple, batched from pywikibot.bot import ExistingPageBot, SingleSiteBot from pywikibot.exceptions import InvalidPageError, NoPageError from pywikibot.tools import chars -from pywikibot.tools.itertools import itergroup
# This is required for the text that is shown when you run this script @@ -977,7 +976,7 @@ # The summary stored here won't be actually used but is only an example site = pywikibot.Site() single_summary = None - for old, new in itergroup(commandline_replacements, 2): + for old, new in batched(commandline_replacements, 2): replacement = Replacement(old, new) if not single_summary: single_summary = i18n.twtranslate( diff --git a/scripts/template.py b/scripts/template.py index 118c3f4..d44250b 100755 --- a/scripts/template.py +++ b/scripts/template.py @@ -109,13 +109,10 @@
import pywikibot from pywikibot import i18n, pagegenerators, textlib +from pywikibot.backports import batched from pywikibot.bot import SingleSiteBot from pywikibot.pagegenerators import XMLDumpPageGenerator -from pywikibot.tools.itertools import ( - filter_unique, - itergroup, - roundrobin_generators, -) +from pywikibot.tools.itertools import filter_unique, roundrobin_generators from scripts.replace import ReplaceRobot as ReplaceBot
@@ -268,7 +265,7 @@ templates = dict.fromkeys(template_names) else: try: - templates = dict(itergroup(template_names, 2, strict=True)) + templates = dict(batched(template_names, 2)) except ValueError: pywikibot.info('Unless using solely -subst or -remove, you must ' 'give an even number of template names.')