jenkins-bot has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/529616 )
Change subject: proofreadpage.py: fix footer detection
......................................................................
proofreadpage.py: fix footer detection
Fix footer detection when footer contains a </div> tag at the end.
Bug: T230301
Change-Id: Ia2be695b5e8ea06f9760c86eebd3e8e836305a77
---
M pywikibot/proofreadpage.py
M tests/proofreadpage_tests.py
2 files changed, 24 insertions(+), 6 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/proofreadpage.py b/pywikibot/proofreadpage.py
index e323d76..5ca3dfd 100644
--- a/pywikibot/proofreadpage.py
+++ b/pywikibot/proofreadpage.py
@@ -126,6 +126,7 @@
close_tag = '</noinclude>'
p_open = re.compile(r'<noinclude>')
p_close = re.compile(r'(</div>|\n\n\n)?</noinclude>')
+ p_close_no_div = re.compile('</noinclude>') # V2 page format.
# phetools ocr utility
_HOCR_CMD = ('https://tools.wmflabs.org/phetools/hocr_cgi.py?'
@@ -461,24 +462,32 @@
@raise Error: the page is not formatted according to ProofreadPage
extension.
"""
+ def _assert_len(len_oq, len_cq, title):
+ if (len_oq != len_cq) or (len_oq < 2 or len_cq < 2):
+ raise pywikibot.Error('ProofreadPage %s: invalid format'
+ % title)
+
# Property force page text loading.
if not (hasattr(self, '_text') or self.text):
self._create_empty_page()
return
+ _title = self.title(as_link=True)
+
open_queue = list(self.p_open.finditer(self._text))
close_queue = list(self.p_close.finditer(self._text))
-
- len_oq = len(open_queue)
- len_cq = len(close_queue)
- if (len_oq != len_cq) or (len_oq < 2 or len_cq < 2):
- raise pywikibot.Error('ProofreadPage %s: invalid format'
- % self.title(as_link=True))
+ _assert_len(len(open_queue), len(close_queue), _title)
f_open, f_close = open_queue[0], close_queue[0]
self._full_header = FullHeader(
self._text[f_open.end():f_close.start()])
+ # check version of page format and in case recompute last match,
+ # in order not to include </div>.
+ if not self._full_header._has_div:
+ close_queue = list(self.p_close_no_div.finditer(self._text))
+ _assert_len(len(open_queue), len(close_queue), _title)
+
l_open, l_close = open_queue[-1], close_queue[-1]
self._footer = self._text[l_open.end():l_close.start()]
diff --git a/tests/proofreadpage_tests.py b/tests/proofreadpage_tests.py
index 8a55cce..99722c4 100644
--- a/tests/proofreadpage_tests.py
+++ b/tests/proofreadpage_tests.py
@@ -172,6 +172,10 @@
'title1': 'User:Popular Science Monthly Volume 1.djvu/12'
}
+ div_in_footer = {
+ 'title': 'Page:Pywikibot unlinked test page',
+ }
+
class_pagetext_fmt = {
True: ('<div class="pagetext">\n\n\n',
'</div>'),
False: ('', ''),
@@ -232,6 +236,11 @@
self.assertEqual(page.header, self.valid['header'])
self.assertEqual(page.footer, self.valid['footer'])
+ def test_div_in_footer(self):
+ """Test ProofreadPage page parsing functions."""
+ page = ProofreadPage(self.site, self.div_in_footer['title'])
+ self.assertTrue(page.footer.endswith('</div>'))
+
def test_decompose_recompose_text(self):
"""Test ProofreadPage page decomposing/composing
text."""
page = ProofreadPage(self.site, self.valid['title'])
--
To view, visit
https://gerrit.wikimedia.org/r/529616
To unsubscribe, or for help writing mail filters, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-MessageType: merged
Gerrit-Change-Id: Ia2be695b5e8ea06f9760c86eebd3e8e836305a77
Gerrit-Change-Number: 529616
Gerrit-PatchSet: 1
Gerrit-Owner: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot (75)