jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/516527 )
Change subject: [IMPR] use namedtuple as result of textlib.extract_sections ......................................................................
[IMPR] use namedtuple as result of textlib.extract_sections
Modify tests accordingly
Change-Id: Ie3e3e3f1891365178ad0b93fe1dfb2f8b1a0f0f4 --- M pywikibot/textlib.py M tests/textlib_tests.py 2 files changed, 69 insertions(+), 60 deletions(-)
Approvals: Hazard-SJ: Looks good to me, but someone else must approve Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py index 972c290..e2a177c 100644 --- a/pywikibot/textlib.py +++ b/pywikibot/textlib.py @@ -18,7 +18,7 @@ from collections import OrderedDict, namedtuple from contextlib import suppress from html.parser import HTMLParser -from typing import Optional, Union +from typing import List, NamedTuple, Optional, Tuple, Union
import pywikibot from pywikibot.exceptions import InvalidTitle, SiteDefinitionError @@ -834,6 +834,7 @@ # ------------------------------- _Heading = namedtuple('_Heading', ('text', 'start', 'end')) _Section = namedtuple('_Section', ('title', 'content')) +_Content = namedtuple('_Content', ('header', 'sections', 'footer'))
def _extract_headings(text: str, site) -> list: @@ -864,16 +865,21 @@ return []
-def extract_sections(text: str, site=None) -> tuple: +def extract_sections( + text: str, site=None +) -> NamedTuple('_Content', [('header', str), # noqa: F821 + ('body', List[Tuple[str, str]]), # noqa: F821 + ('footer', str)]): # noqa: F821 """ Return section headings and contents found in text.
- @return: The returned tuple contains the text parsed into three - parts: The first part is a string containing header part above - the first heading. The last part is also a string containing - footer part after the last section. The middle part is a list - of tuples, each tuple containing a string with section heading - and a string with section content. Example article:: + @return: The returned namedtuple contains the text parsed into + header, contents and footer parts: The header part is a string + containing text part above the first heading. The footer part + is also a string containing text part after the last section. + The section part is a list of tuples, each tuple containing a + string with section heading and a string with section content. + Example article::
'''A''' is a thing.
@@ -885,15 +891,14 @@
[[Category:Things starting with A]]
- ...is parsed into the following tuple:: + ...is parsed into the following namedtuple::
- (header, body, footer) - header = "'''A''' is a thing." - body = [('== History of A ==', 'Some history...'), - ('== Usage of A ==', 'Some usage...')] - footer = '[[Category:Things starting with A]]' + result = extract_sections(text, site) + result.header = "'''A''' is a thing." + result.body = [('== History of A ==', 'Some history...'), + ('== Usage of A ==', 'Some usage...')] + result.footer = '[[Category:Things starting with A]]'
- @rtype: tuple of (str, list of tuples, str) """ headings = _extract_headings(text, site) sections = _extract_sections(text, headings) @@ -912,7 +917,7 @@ sections[-1].title, last_section_content[:-len(footer)]) else: header = header[:-len(footer)] - return header, sections, footer + return _Content(header, sections, footer)
# ----------------------------------------------- diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py index b647ab6..d6db787 100644 --- a/tests/textlib_tests.py +++ b/tests/textlib_tests.py @@ -1628,71 +1628,77 @@
"""Test the extract_sections function."""
+ def _extract_sections_tests(self, result, header, sections, footer): + """Test extract_sections function.""" + self.assertIsInstance(result, tuple) + self.assertIsInstance(result.sections, list) + self.assertEqual(result, (header, sections, footer)) + self.assertEqual(result.header, header) + self.assertEqual(result.sections, sections) + self.assertEqual(result.footer, footer) + if result.sections: + for section in sections: + self.assertIsInstance(section, tuple) + self.assertLength(section, 2) + def test_no_sections_no_footer(self): """Test for text having no sections or footer.""" - self.assertEqual( - extract_sections('text', self.site), - ('text', [], '') - ) + text = 'text' + result = extract_sections(text, self.site) + self._extract_sections_tests(result, text, [], '')
def test_no_sections_with_footer(self): """Test for text having footer but no section.""" - self.assertEqual( - extract_sections('text\n\n[[Category:A]]', self.site), - ('text\n\n', [], '[[Category:A]]') - ) + text = 'text\n\n[[Category:A]]' + result = extract_sections(text, self.site) + self._extract_sections_tests(result, 'text\n\n', [], '[[Category:A]]')
def test_with_section_no_footer(self): """Test for text having sections but no footer.""" - self.assertEqual( - extract_sections( - 'text\n\n' + text = ('text\n\n' '==title==\n' - 'content', - self.site), - ('text\n\n', [('==title==', '\ncontent')], '') - ) + 'content') + result = extract_sections(text, self.site) + self._extract_sections_tests( + result, 'text\n\n', [('==title==', '\ncontent')], '')
def test_with_section_with_footer(self): """Test for text having sections and footer.""" - self.assertEqual( - extract_sections( - 'text\n\n' + text = ('text\n\n' '==title==\n' 'content\n' - '[[Category:A]]\n', - self.site), - ('text\n\n', [('==title==', '\ncontent\n')], '[[Category:A]]\n') - ) + '[[Category:A]]\n') + result = extract_sections(text, self.site) + self._extract_sections_tests( + result, + 'text\n\n', [('==title==', '\ncontent\n')], '[[Category:A]]\n')
def test_with_h1_and_h2_sections(self): """Test for text having h1 and h2 sections.""" - self.assertEqual( - extract_sections( - 'text\n\n' + text = ('text\n\n' '=first level=\n' 'foo\n' '==title==\n' - 'bar', - self.site), - ('text\n\n', - [('=first level=', '\nfoo\n'), ('==title==', '\nbar')], - '') - ) + 'bar') + result = extract_sections(text, self.site) + self._extract_sections_tests( + result, + 'text\n\n', + [('=first level=', '\nfoo\n'), ('==title==', '\nbar')], + '')
def test_with_h4_and_h2_sections(self): """Test for text having h4 and h2 sections.""" - self.assertEqual( - extract_sections( - 'text\n\n' + text = ('text\n\n' '====title====\n' '==title 2==\n' - 'content', - self.site), - ('text\n\n', - [('====title====', '\n'), ('==title 2==', '\ncontent')], - '') - ) + 'content') + result = extract_sections(text, self.site) + self._extract_sections_tests( + result, + 'text\n\n', + [('====title====', '\n'), ('==title 2==', '\ncontent')], + '')
def test_long_comment(self): r"""Test for text having a long expanse of white space. @@ -1705,10 +1711,8 @@ https://www.regular-expressions.info/catastrophic.html """ text = '<!-- -->' - self.assertEqual( - extract_sections(text, self.site), - (text, [], '') - ) + result = extract_sections(text, self.site) + self._extract_sections_tests(result, text, [], '')
if __name__ == '__main__': # pragma: no cover