jenkins-bot submitted this change.

View Change

Approvals: Meno25: Looks good to me, approved jenkins-bot: Verified
[IMPR] Ignore SectionError in fixing_redirects.py script

- Add ignore_section parameter to APISite.getredirtarget and
getRedirectTarget method of BasePage and ItemPage; if not set, raise
SectionError if the section was not found on target page. The
ignore_section parameter is set by default.
- Use ignore_section=False within FixingRedirectBot.get_target() and
catch the SectionError to be ignored

Bug: T370295
Change-Id: I1d3f0a9bd8c14ddd087d2199e334be37f8988316
---
M pywikibot/page/_basepage.py
M pywikibot/page/_wikibase.py
M pywikibot/site/_apisite.py
M scripts/fixing_redirects.py
4 files changed, 75 insertions(+), 40 deletions(-)

diff --git a/pywikibot/page/_basepage.py b/pywikibot/page/_basepage.py
index 596bacc..21181ba 100644
--- a/pywikibot/page/_basepage.py
+++ b/pywikibot/page/_basepage.py
@@ -1798,16 +1798,28 @@

return self._pageimage

- def getRedirectTarget(self):
- """
- Return a Page object for the target this Page redirects to.
+ def getRedirectTarget(self, *,
+ ignore_section: bool = True) -> pywikibot.Page:
+ """Return a Page object for the target this Page redirects to.

- If this page is not a redirect page, will raise an
- IsNotRedirectPageError. This method also can raise a NoPageError.
+ .. versionadded:: 9.3
+ *ignore_section* parameter

- :rtype: pywikibot.Page
+ .. seealso:: :meth:`Site.getredirtarget()
+ <pywikibot.site._apisite.APISite.getredirtarget>`
+
+ :param ignore_section: do not include section to the target even
+ the link has one
+
+ :raises CircularRedirectError: page is a circular redirect
+ :raises InterwikiRedirectPageError: the redirect target is on
+ another site
+ :raises IsNotRedirectPageError: page is not a redirect
+ :raises RuntimeError: no redirects found
+ :raises SectionError: the section is not found on target page
+ and *ignore_section* is not set
"""
- return self.site.getredirtarget(self)
+ return self.site.getredirtarget(self, ignore_section=ignore_section)

def moved_target(self):
"""
diff --git a/pywikibot/page/_wikibase.py b/pywikibot/page/_wikibase.py
index b00e5ce..951f89a 100644
--- a/pywikibot/page/_wikibase.py
+++ b/pywikibot/page/_wikibase.py
@@ -1170,14 +1170,31 @@

return data

- def getRedirectTarget(self):
- """Return the redirect target for this page."""
- target = super().getRedirectTarget()
+ def getRedirectTarget(self, *, ignore_section: bool = True):
+ """Return the redirect target for this page.
+
+ .. versionadded:: 9.3
+ *ignore_section* parameter
+
+ .. seealso:: :meth:`page.BasePage.getRedirectTarget`
+
+ :param ignore_section: do not include section to the target even
+ the link has one
+
+ :raises CircularRedirectError: page is a circular redirect
+ :raises InterwikiRedirectPageError: the redirect target is on
+ another site
+ :raises Error: target page has wrong content model
+ :raises IsNotRedirectPageError: page is not a redirect
+ :raises RuntimeError: no redirects found
+ :raises SectionError: the section is not found on target page
+ and *ignore_section* is not set
+ """
+ target = super().getRedirectTarget(ignore_section=ignore_section)
cmodel = target.content_model
if cmodel != 'wikibase-item':
- raise Error('{} has redirect target {} with content model {} '
- 'instead of wikibase-item'
- .format(self, target, cmodel))
+ raise Error(f'{self} has redirect target {target} with content '
+ f'model {cmodel} instead of wikibase-item')
return self.__class__(target.site, target.title(), target.namespace())

def iterlinks(self, family=None):
diff --git a/pywikibot/site/_apisite.py b/pywikibot/site/_apisite.py
index abca600..8f07e39 100644
--- a/pywikibot/site/_apisite.py
+++ b/pywikibot/site/_apisite.py
@@ -1594,20 +1594,28 @@
def getredirtarget(
self,
page: BasePage,
+ *,
+ ignore_section: bool = True
) -> pywikibot.page.Page:
- """
- Return page object for the redirect target of page.
+ """Return page object for the redirect target of page.
+
+ .. versionadded:: 9.3
+ *ignore_section* parameter
+
+ .. seealso:: :meth:`page.BasePage.getRedirectTarget`

:param page: page to search redirects for
+ :param ignore_section: do not include section to the target even
+ the link has one
:return: redirect target of page

- :raises pywikibot.exceptions.IsNotRedirectPageError: page is not a
- redirect
+ :raises CircularRedirectError: page is a circular redirect
+ :raises InterwikiRedirectPageError: the redirect target is on
+ another site
+ :raises IsNotRedirectPageError: page is not a redirect
:raises RuntimeError: no redirects found
- :raises pywikibot.exceptions.CircularRedirectError: page is a circular
- redirect
- :raises pywikibot.exceptions.InterwikiRedirectPageError: the redirect
- target is on another site
+ :raises SectionError: the section is not found on target page
+ and *ignore_section* is not set
"""
if not self.page_isredirect(page):
raise IsNotRedirectPageError(page)
@@ -1626,13 +1634,14 @@
raise RuntimeError(
f"getredirtarget: No 'redirects' found for page {title}.")

- redirmap = {item['from']: {'title': item['to'],
- 'section': '#'
- + item['tofragment']
- if 'tofragment' in item
- and item['tofragment']
- else ''}
- for item in result['query']['redirects']}
+ redirmap = {
+ item['from']: {
+ 'title': item['to'],
+ 'section': '#' + item['tofragment']
+ if 'tofragment' in item and item['tofragment'] else ''
+ }
+ for item in result['query']['redirects']
+ }

# Normalize title
for item in result['query'].get('normalized', []):
@@ -1687,6 +1696,11 @@
target = pywikibot.FilePage(target)
elif ns == Namespace.CATEGORY:
target = pywikibot.Category(target)
+
+ if not ignore_section:
+ # get the content; this raises SectionError if section is not found
+ target.text
+
page._redirtarget = target
return page._redirtarget

diff --git a/scripts/fixing_redirects.py b/scripts/fixing_redirects.py
index 4774b1a..55fc3f8 100755
--- a/scripts/fixing_redirects.py
+++ b/scripts/fixing_redirects.py
@@ -41,8 +41,9 @@
InvalidPageError,
InvalidTitleError,
NoMoveTargetError,
+ SectionError,
)
-from pywikibot.textlib import does_text_contain_section, isDisabled
+from pywikibot.textlib import isDisabled
from pywikibot.tools import first_lower
from pywikibot.tools import first_upper as firstcap

@@ -169,22 +170,13 @@
target = page.moved_target()
elif page.isRedirectPage():
try:
- target = page.getRedirectTarget()
+ target = page.getRedirectTarget(ignore_section=False)
except (CircularRedirectError,
InvalidTitleError,
InterwikiRedirectPageError):
pass
- except RuntimeError as e:
+ except (RuntimeError, SectionError) as e:
pywikibot.error(e)
- else:
- section = target.section()
- if section and not does_text_contain_section(target.text,
- section):
- pywikibot.warning(
- f'Section #{section} not found on page '
- f'{target.title(as_link=True, with_section=False)}'
- )
- target = None

if target is not None \
and target.namespace() in [2, 3] and page.namespace() not in [2, 3]:

To view, visit change 1055439. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-MessageType: merged
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I1d3f0a9bd8c14ddd087d2199e334be37f8988316
Gerrit-Change-Number: 1055439
Gerrit-PatchSet: 5
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: D3r1ck01 <dalangi-ctr@wikimedia.org>
Gerrit-Reviewer: Meno25 <meno25mail@gmail.com>
Gerrit-Reviewer: jenkins-bot