jenkins-bot submitted this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
[fix] raise SectionError if a section does not exists on a page

Also add some tests and update documentation.

Bug: T107141
Change-Id: Ib515e94657e7100695b69d77934d448ce424e232
---
M pywikibot/page/_basepage.py
M tests/page_tests.py
2 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/pywikibot/page/_basepage.py b/pywikibot/page/_basepage.py
index 8eaeeb0..1c51f20 100644
--- a/pywikibot/page/_basepage.py
+++ b/pywikibot/page/_basepage.py
@@ -292,11 +292,10 @@
return title

def section(self) -> str | None:
- """
- Return the name of the section this Page refers to.
+ """Return the name of the section this Page refers to.

- The section is the part of the title following a '#' character, if
- any. If no section is present, return None.
+ The section is the part of the title following a ``#`` character,
+ if any. If no section is present, return None.
"""
try:
section = self._link.section
@@ -373,12 +372,14 @@
...
pywikibot.exceptions.IsRedirectPageError: ... is a redirect page.

+ .. versionchanged:: 9.2
+ :exc:`exceptions.SectionError` is raised if the
+ :meth:`section` does not exists
.. seealso:: :attr:`text` property

:param force: reload all page attributes, including errors.
:param get_redirect: return the redirect text, do not follow the
redirect, do not raise an exception.
-
:raises NoPageError: The page does not exist.
:raises IsRedirectPageError: The page is a redirect.
:raises SectionError: The section does not exist on a page with
@@ -394,7 +395,18 @@
if not get_redirect:
raise

- return self.latest_revision.text # type: ignore[attr-defined]
+ text = self.latest_revision.text
+
+ # check for valid section in title
+ page_section = self.section()
+ if page_section:
+ content = textlib.extract_sections(text, self.site)
+ headings = {section.heading for section in content.sections}
+ if page_section not in headings:
+ raise SectionError(f'{page_section!r} is not a valid section '
+ f'of {self.title(with_section=False)}')
+
+ return text

def has_content(self) -> bool:
"""
diff --git a/tests/page_tests.py b/tests/page_tests.py
index b56cf4b..ea4dfd8 100755
--- a/tests/page_tests.py
+++ b/tests/page_tests.py
@@ -22,6 +22,7 @@
IsNotRedirectPageError,
IsRedirectPageError,
NoPageError,
+ SectionError,
TimeoutError,
UnknownExtensionError,
)
@@ -974,6 +975,7 @@

def testPageGet(self):
"""Test ``Page.get()`` on different types of pages."""
+ fail_msg = '{page!r}.get() raised {error!r} unexpectedly!'
site = self.get_site('en')
p1 = pywikibot.Page(site, 'User:Legoktm/R2')
p2 = pywikibot.Page(site, 'User:Legoktm/R1')
@@ -986,9 +988,27 @@
r'{} is a redirect page\.'
.format(re.escape(str(p2)))):
p2.get()
+
+ try:
+ p2.get(get_redirect=True)
+ except (IsRedirectPageError, NoPageError, SectionError) as e:
+ self.fail(fail_msg.format(page=p2, error=e))
+
with self.assertRaisesRegex(NoPageError, NO_PAGE_RE):
p3.get()

+ page = pywikibot.Page(site, 'User:Legoktm/R2#Section')
+ with self.assertRaisesRegex(SectionError,
+ "'Section' is not a valid section"):
+ page.get()
+
+ site = pywikibot.Site('mediawiki')
+ page = pywikibot.Page(site, 'Manual:Pywikibot/2.0 #See_also')
+ try:
+ page.get()
+ except (IsRedirectPageError, NoPageError, SectionError) as e:
+ self.fail(fail_msg.format(page=page, error=e))
+
def test_set_redirect_target(self):
"""Test set_redirect_target method."""
# R1 redirects to R2 and R3 doesn't exist.

To view, visit change 1030049. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ib515e94657e7100695b69d77934d448ce424e232
Gerrit-Change-Number: 1030049
Gerrit-PatchSet: 3
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged