jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1007387?usp=email )
Change subject: [bugfix] remove content parameter of ItemPage.page_gen method
......................................................................
[bugfix] remove content parameter of ItemPage.page_gen method
The filter needs the page.text and the text is preloaded Page for page
already. Now always use the site.preload method to bulk load the content.
Bug: T358635
Change-Id: I62dca5845492a636cf556761d95bde5f5210d9f8
---
M pywikibot/proofreadpage.py
1 file changed, 22 insertions(+), 10 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/proofreadpage.py b/pywikibot/proofreadpage.py
index 2df5eb1..e5dc94d 100644
--- a/pywikibot/proofreadpage.py
+++ b/pywikibot/proofreadpage.py
@@ -47,7 +47,7 @@
from pywikibot.data.api import ListGenerator, Request
from pywikibot.exceptions import Error, InvalidTitleError, OtherPageSaveError
from pywikibot.page import PageSourceType
-from pywikibot.tools import MediaWikiVersion, cached
+from pywikibot.tools import MediaWikiVersion, cached, remove_last_args
try:
@@ -1305,12 +1305,13 @@
"""
return len(self._page_from_numbers)
- def page_gen(self, start: int = 1,
- end: int | None = None,
- filter_ql: Sequence[int] | None = None,
- only_existing: bool = False,
- content: bool = True
- ) -> Iterable[pywikibot.page.Page]:
+ @remove_last_args(['content']) # since 9.0.0
+ def page_gen(
+ self, start: int = 1,
+ end: int | None = None,
+ filter_ql: Sequence[int] | None = None,
+ only_existing: bool = False
+ ) -> Iterable[pywikibot.page.Page]:
"""Return a page generator which yields pages contained in Index page.
Range is [start ... end], extremes included.
@@ -1324,7 +1325,6 @@
:param filter_ql: filters quality levels
if None: all but 'Without Text'.
:param only_existing: yields only existing pages.
- :param content: preload content.
"""
if end is None:
end = self.num_pages
@@ -1346,8 +1346,7 @@
gen = [(self.get_number(p), p) for p in gen]
gen = [p for n, p in sorted(gen)]
- if content:
- gen = self.site.preloadpages(gen)
+ gen = self.site.preloadpages(gen)
# Filter by QL.
gen = (p for p in gen if p.ql in filter_ql)
# Yield only existing.
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1007387?usp=email
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I62dca5845492a636cf556761d95bde5f5210d9f8
Gerrit-Change-Number: 1007387
Gerrit-PatchSet: 1
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1005523?usp=email )
Change subject: [bugfix] Use site.code instead of site.lang in interwiki.py
......................................................................
[bugfix] Use site.code instead of site.lang in interwiki.py
The site.code specifies the site. This may be different from language
spezifier 'lang'. Therefore Use site.code instead of site.lang.
Also add a new method get_alternative() to ask for a alternative page
hint to prevent code duplication.
Change-Id: I15d66b29d13bbf60c31c5a1cb0003d53601110ae
---
M scripts/interwiki.py
1 file changed, 42 insertions(+), 21 deletions(-)
Approvals:
jenkins-bot: Verified
Xqt: Looks good to me, approved
diff --git a/scripts/interwiki.py b/scripts/interwiki.py
index ead72d0..33102ef 100755
--- a/scripts/interwiki.py
+++ b/scripts/interwiki.py
@@ -821,6 +821,21 @@
or self.namespaceMismatch(page, target, counter) \
or self.wiktionaryMismatch(target)
+ def get_alternative(
+ self,
+ site: pywikibot.site.BaseSite
+ ) -> pywikibot.Page | None:
+ """Ask for an alternative Page for a given site.
+
+ :param site: a BaseSite
+ """
+ title = pywikibot.input(f'Give the alternative page for code '
+ f'{site.code!r} (without site code)')
+ if title:
+ return pywikibot.Page(site, title)
+
+ return None
+
def namespaceMismatch(self, linkingPage, linkedPage, counter) -> bool:
"""
Check whether or not the given page has a different namespace.
@@ -836,9 +851,9 @@
# Allow for a mapping between different namespaces
crossFrom = self.origin.site.family.crossnamespace.get(
self.origin.namespace(), {})
- crossTo = crossFrom.get(self.origin.site.lang,
+ crossTo = crossFrom.get(self.origin.site.code,
crossFrom.get('_default', {}))
- nsmatch = crossTo.get(linkedPage.site.lang,
+ nsmatch = crossTo.get(linkedPage.site.code,
crossTo.get('_default', []))
if linkedPage.namespace() in nsmatch:
return False
@@ -879,15 +894,10 @@
if choice == 'g':
self.makeForcedStop(counter)
elif choice == 'a':
- newHint = pywikibot.input(
- 'Give the alternative for language {}, not '
- 'using a language code:'
- .format(linkedPage.site.lang))
- if newHint:
- alternativePage = pywikibot.Page(
- linkedPage.site, newHint)
+ alternative_page = self.get_alternative(linkedPage.site)
+ if alternative_page:
# add the page that was entered by the user
- self.addIfNew(alternativePage, counter, None)
+ self.addIfNew(alternative_page, counter, None)
else:
pywikibot.info(
f'NOTE: ignoring {linkedPage} and its interwiki links')
@@ -986,11 +996,8 @@
return (True, None)
if choice == 'a':
- newHint = pywikibot.input(
- f'Give the alternative for language {page.site.lang}, '
- f'not using a language code:')
- alternativePage = pywikibot.Page(page.site, newHint)
- return (True, alternativePage)
+ alternative_page = self.get_alternative(page.site)
+ return (True, alternative_page)
if choice == 'g':
self.makeForcedStop(counter)
@@ -1001,7 +1008,7 @@
def isIgnored(self, page) -> bool:
"""Return True if pages is to be ignored."""
- if page.site.lang in self.conf.neverlink:
+ if page.site.code in self.conf.neverlink:
pywikibot.info(f'Skipping link {page} to an ignored language')
return True
@@ -1183,7 +1190,7 @@
# Ignore the interwiki links.
iw = ()
if self.conf.lacklanguage \
- and self.conf.lacklanguage in (link.site.lang for link in iw):
+ and self.conf.lacklanguage in (link.site.code for link in iw):
iw = ()
self.workonme = False
if len(iw) < self.conf.minlinks:
@@ -1269,10 +1276,9 @@
if dictName is not None:
if self.origin:
pywikibot.warning(
- '{}:{} relates to {}:{}, which is an '
- 'auto entry {}({})'
- .format(self.origin.site.lang, self.origin,
- page.site.lang, page, dictName, year))
+ f'{self.origin.site.code}:{self.origin} relates '
+ f'to {page.site.code}:{page}, which is an auto '
+ f'entry {dictName}({year})')
# Abort processing if the bot is running in autonomous mode
if self.conf.autonomous:
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1005523?usp=email
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I15d66b29d13bbf60c31c5a1cb0003d53601110ae
Gerrit-Change-Number: 1005523
Gerrit-PatchSet: 2
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: D3r1ck01 <dalangi-ctr(a)wikimedia.org>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1005504?usp=email )
Change subject: [cleanup] use self.origin attribute instead of property
......................................................................
[cleanup] use self.origin attribute instead of property
There is no reason tjo have properties here instead of an attribute
Change-Id: I8ed6bed0dd7bc28a5855893b2b0c1d0d49f4712a
---
M pywikibot/interwiki_graph.py
1 file changed, 12 insertions(+), 10 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/interwiki_graph.py b/pywikibot/interwiki_graph.py
index 66eb8be..40d67c4 100644
--- a/pywikibot/interwiki_graph.py
+++ b/pywikibot/interwiki_graph.py
@@ -62,7 +62,7 @@
:param origin: the page on the 'origin' wiki
"""
# Remember the "origin page"
- self._origin = origin
+ self.origin = origin
# found_in is a dictionary where pages are keys and lists of
# pages are values. It stores where we found each page.
@@ -72,15 +72,6 @@
if origin:
self.found_in = {origin: []}
- @property
- def origin(self) -> pywikibot.page.Page | None:
- """Page on the origin wiki."""
- return self._origin
-
- @origin.setter
- def origin(self, value: pywikibot.page.Page | None) -> None:
- self._origin = value
-
class GraphDrawer:
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1005504?usp=email
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I8ed6bed0dd7bc28a5855893b2b0c1d0d49f4712a
Gerrit-Change-Number: 1005504
Gerrit-PatchSet: 1
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1006184?usp=email )
Change subject: Populate MediaInfo._content with expected attributes when loaded
......................................................................
Populate MediaInfo._content with expected attributes when loaded
Added pageid, ns, title, lastrevid, modified, id values to _content
attribute when it is loaded using mediainfo.get() so the format
is identical with values returned by wbgetentities.
Bug: T357608
Change-Id: I9178e3fe5a3a1ccba439864891bb1834fb28b050
---
M pywikibot/page/_wikibase.py
M tests/file_tests.py
2 files changed, 56 insertions(+), 27 deletions(-)
Approvals:
jenkins-bot: Verified
Xqt: Looks good to me, approved
diff --git a/pywikibot/page/_wikibase.py b/pywikibot/page/_wikibase.py
index b5de1b3..143e323 100644
--- a/pywikibot/page/_wikibase.py
+++ b/pywikibot/page/_wikibase.py
@@ -451,11 +451,6 @@
def get(self, force: bool = False) -> dict:
"""Fetch all MediaInfo entity data and cache it.
- .. note:: This method may raise exception even if the associated file
- exists because the mediainfo may not have been initialized yet.
- :attr:`labels` and :attr:`statements` can still be accessed and
- modified. :meth:`exists` suppresses the exception.
-
.. note:: dicts returned by this method are references to content
of this entity and their modifying may indirectly cause
unwanted change to the live content
@@ -464,23 +459,51 @@
:raise NoWikibaseEntityError: if this entity doesn't exist
:return: actual data which entity holds
"""
- if self.id == '-1':
- if not force:
- try:
- data = self.file.latest_revision.slots['mediainfo']['*']
- except NoPageError as exc:
+ if force or not hasattr(self, '_content'):
+ if force:
+ self.file.clear_cache()
+
+ # accessing latest_revision loads the file data
+ try:
+ latest_revision = self.file.latest_revision
+ except NoPageError as exc:
+ raise NoWikibaseEntityError(self) from exc
+ except Error as exc:
+ error_message = str(exc)
+ if 'is not a file' in error_message:
raise NoWikibaseEntityError(self) from exc
- except KeyError:
- # reuse the reserved ID for better message
- self.id = 'M' + str(self.file.pageid)
- raise NoWikibaseEntityError(self) from None
+ else:
+ raise Error(self) from exc
- self._content = jsonlib.loads(data)
- self.id = self._content['id']
+ # Create _content. Format is same as with wbgetentities
+ # https://commons.wikimedia.org/w/api.php?action=wbgetentities&ids=M20985340
+ data = {
+ 'title': self.file.title,
+ 'lastrevid': latest_revision['revid'],
+ 'modified': str(latest_revision['timestamp']),
+ 'type': 'mediainfo',
+ 'pageid': self.file.pageid,
+ 'ns': self.file.namespace,
+ 'id': 'M' + str(self.file.pageid),
+ 'labels': {},
+ 'statements': {}
+ }
- self._assert_has_id()
+ # Update 'id', 'labels' and 'statements' if mediainfo is available.
+ # MediaInfo is returned only when it has values.
+ if 'mediainfo' in latest_revision.slots:
+ mediainfo_json = latest_revision.slots['mediainfo']['*']
+ mediainfo_data = jsonlib.loads(mediainfo_json)
+ data.update(mediainfo_data)
- return super().get(force=force)
+ self._content = data
+ self.id = self._content['id']
+
+ self._assert_has_id()
+
+ # Do not pass the force parameter to the upper level because
+ # reloading files without MediaInfo will fail.
+ return super().get()
def getID(self, numeric: bool = False):
"""
@@ -526,10 +549,6 @@
'The provided Claim instance is already used in an entity')
self._assert_has_id()
- if not hasattr(self, '_revid'):
- # workaround for uninitialized mediainfo's
- self._revid = self.file.latest_revision_id
-
self.repo.addClaim(self, claim, bot=bot, **kwargs)
claim.on_item = self
diff --git a/tests/file_tests.py b/tests/file_tests.py
index 40cc008..744d45c 100755
--- a/tests/file_tests.py
+++ b/tests/file_tests.py
@@ -430,11 +430,7 @@
item = page.data_item()
self.assertIsInstance(item, pywikibot.MediaInfo)
- # Get fails as there is no mediainfo.
- with self.assertRaises(NoWikibaseEntityError):
- item.get()
-
- self.assertFalse(item.exists())
+ self.assertTrue(item.exists())
self.assertEqual(f'M{page.pageid}', item.id)
self.assertIsInstance(
item.labels, pywikibot.page._collections.LanguageDict)
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1006184?usp=email
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I9178e3fe5a3a1ccba439864891bb1834fb28b050
Gerrit-Change-Number: 1006184
Gerrit-PatchSet: 3
Gerrit-Owner: Zache-tool <kimmo.virtanen(a)gmail.com>
Gerrit-Reviewer: Ipr1 <ilkka.prusi(a)gmail.com>
Gerrit-Reviewer: Matěj Suchánek <matejsuchanek97(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1007565?usp=email )
Change subject: [IMPR] backport itertools.batched from Python 3.13
......................................................................
[IMPR] backport itertools.batched from Python 3.13
Python 3.13 batched has a strict option like our own itertools.itergroup
function.
Change-Id: Id3a334ac60ad20128ff14224fc1fc0eb2f5ac52b
---
M pywikibot/backports.py
M pywikibot/tools/itertools.py
2 files changed, 41 insertions(+), 14 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/backports.py b/pywikibot/backports.py
index b7a2dab..e49e815 100644
--- a/pywikibot/backports.py
+++ b/pywikibot/backports.py
@@ -5,7 +5,7 @@
instead. The *SimpleQueue* queue; use ``queue.SimpleQueue`` instead.
"""
#
-# (C) Pywikibot team, 2014-2023
+# (C) Pywikibot team, 2014-2024
#
# Distributed under the terms of the MIT license.
#
@@ -135,11 +135,13 @@
# gh-98363
-if PYTHON_VERSION < (3, 12) or SPHINX_RUNNING:
- def batched(iterable, n: int) -> Generator[tuple, None, None]:
+if PYTHON_VERSION < (3, 13) or SPHINX_RUNNING:
+ def batched(iterable, n: int, *,
+ strict: bool = False) -> Generator[tuple, None, None]:
"""Batch data from the *iterable* into tuples of length *n*.
- .. note:: The last batch may be shorter than *n*.
+ .. note:: The last batch may be shorter than *n* if *strict* is
+ True or raise a ValueError otherwise.
Example:
@@ -159,17 +161,32 @@
<library/itertools.html#itertools.batched>`,
backported from Python 3.12.
.. versionadded:: 8.2
+ .. versionchanged:: 9.0
+ Added *strict* option, backported from Python 3.13
:param n: How many items of the iterable to get in one chunk
+ :param strict: raise a ValueError if the final batch is shorter
+ than *n*.
+ :raise ValueError: the final batch is shorter than *n*.
"""
- group = []
- for item in iterable:
- group.append(item)
- if len(group) == n:
+ msg = f'The final batch is shorter than n={n}'
+ if PYTHON_VERSION < (3, 12):
+ group = []
+ for item in iterable:
+ group.append(item)
+ if len(group) == n:
+ yield tuple(group)
+ group.clear()
+ if group:
+ if strict:
+ raise ValueError(msg)
yield tuple(group)
- group.clear()
- if group:
- yield tuple(group)
+ else: # PYTHON_VERSION == (3, 12)
+ from itertools import batched as _batched
+ for group in _batched(iterable, n):
+ if strict and len(group) < n:
+ raise ValueError(msg)
+ yield group
else:
from itertools import batched # type: ignore[no-redef]
diff --git a/pywikibot/tools/itertools.py b/pywikibot/tools/itertools.py
index 4a17c43..41fc5ca 100644
--- a/pywikibot/tools/itertools.py
+++ b/pywikibot/tools/itertools.py
@@ -60,9 +60,7 @@
not divisible by `size`.
:raises ValueError: iterable is not divisible by size
"""
- for group in batched(iterable, size):
- if strict and len(group) < size:
- raise ValueError('iterable is not divisible by size.')
+ for group in batched(iterable, size, strict=strict):
yield list(group)
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1007565?usp=email
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Id3a334ac60ad20128ff14224fc1fc0eb2f5ac52b
Gerrit-Change-Number: 1007565
Gerrit-PatchSet: 4
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1004618?usp=email )
Change subject: [IMPR] Raise APIError if the same error comes twice within submit loop
......................................................................
[IMPR] Raise APIError if the same error comes twice within submit loop
api.Request.submit handles API errors within a loop but in some
circumstances the loop will never leaved if the same error comes again
and again. Therefore raise APIError with the previous API error if the
same error occured twice in the same submit loop-
Bug: T357870
Change-Id: Id9f140c9d8815ef622d47cd90a29518f23665a4a
---
M pywikibot/data/api/_requests.py
1 file changed, 42 insertions(+), 1 deletion(-)
Approvals:
JJMC89: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/data/api/_requests.py b/pywikibot/data/api/_requests.py
index bc40eec..8facad1 100644
--- a/pywikibot/data/api/_requests.py
+++ b/pywikibot/data/api/_requests.py
@@ -1,6 +1,6 @@
"""Objects representing API requests."""
#
-# (C) Pywikibot team, 2007-2023
+# (C) Pywikibot team, 2007-2024
#
# Distributed under the terms of the MIT license.
#
@@ -945,18 +945,36 @@
self._params['token'] = tokens
return True
+ def wait(self, delay: int | None = None) -> None:
+ """Determine how long to wait after a failed request.
+
+ Also reset last API error with wait cycles.
+
+ .. versionadded: 9.0
+
+ :param delay: Minimum time in seconds to wait. Overwrites
+ ``retry_wait`` variable if given. The delay doubles each
+ retry until ``retry_max`` seconds is reached.
+ """
+ self.last_error = dict.fromkeys(['code', 'info'])
+ super().wait(delay)
+
def submit(self) -> dict:
"""Submit a query and parse the response.
.. versionchanged:: 8.0.4
in addition to *readapidenied* also try to login when API
response is *notloggedin*.
+ .. versionchanged:: 9.0
+ Raise :exc:`pywikibot.exceptions.APIError` if the same error
+ comes twice in a row within the loop.
:return: a dict containing data retrieved from api.php
"""
self._add_defaults()
use_get = self._use_get()
retries = 0
+ self.last_error = dict.fromkeys(['code', 'info'])
while True:
paramstring = self._http_param_string()
@@ -1003,6 +1021,11 @@
code = error.setdefault('code', 'Unknown')
info = error.setdefault('info', None)
+ if (code == self.last_error['code']
+ and info == self.last_error['info']):
+ raise pywikibot.exceptions.APIError(**self.last_error)
+ self.last_error = error
+
if not self._logged_in(code):
continue
@@ -1019,6 +1042,8 @@
lag = float(lag['lag']) if lag else 0.0
self.site.throttle.lag(lag * retries)
+ # reset last error
+ self.last_error = dict.fromkeys(['code', 'info'])
continue
if code == 'help' and self.action == 'help':
@@ -1060,6 +1085,7 @@
pywikibot.error(f'Retrying failed {msg}')
continue
raise NoUsernameError(f'Failed {msg}')
+
if code == 'cirrussearch-too-busy-error': # T170647
self.wait()
continue
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1004618?usp=email
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Id9f140c9d8815ef622d47cd90a29518f23665a4a
Gerrit-Change-Number: 1004618
Gerrit-PatchSet: 4
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: JJMC89 <JJMC89.Wikimedia(a)gmail.com>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged