jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/529616 )
Change subject: proofreadpage.py: fix footer detection ......................................................................
proofreadpage.py: fix footer detection
Fix footer detection when footer contains a </div> tag at the end.
Bug: T230301 Change-Id: Ia2be695b5e8ea06f9760c86eebd3e8e836305a77 --- M pywikibot/proofreadpage.py M tests/proofreadpage_tests.py 2 files changed, 24 insertions(+), 6 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/proofreadpage.py b/pywikibot/proofreadpage.py index e323d76..5ca3dfd 100644 --- a/pywikibot/proofreadpage.py +++ b/pywikibot/proofreadpage.py @@ -126,6 +126,7 @@ close_tag = '</noinclude>' p_open = re.compile(r'<noinclude>') p_close = re.compile(r'(</div>|\n\n\n)?</noinclude>') + p_close_no_div = re.compile('</noinclude>') # V2 page format.
# phetools ocr utility _HOCR_CMD = ('https://tools.wmflabs.org/phetools/hocr_cgi.py?' @@ -461,24 +462,32 @@ @raise Error: the page is not formatted according to ProofreadPage extension. """ + def _assert_len(len_oq, len_cq, title): + if (len_oq != len_cq) or (len_oq < 2 or len_cq < 2): + raise pywikibot.Error('ProofreadPage %s: invalid format' + % title) + # Property force page text loading. if not (hasattr(self, '_text') or self.text): self._create_empty_page() return
+ _title = self.title(as_link=True) + open_queue = list(self.p_open.finditer(self._text)) close_queue = list(self.p_close.finditer(self._text)) - - len_oq = len(open_queue) - len_cq = len(close_queue) - if (len_oq != len_cq) or (len_oq < 2 or len_cq < 2): - raise pywikibot.Error('ProofreadPage %s: invalid format' - % self.title(as_link=True)) + _assert_len(len(open_queue), len(close_queue), _title)
f_open, f_close = open_queue[0], close_queue[0] self._full_header = FullHeader( self._text[f_open.end():f_close.start()])
+ # check version of page format and in case recompute last match, + # in order not to include </div>. + if not self._full_header._has_div: + close_queue = list(self.p_close_no_div.finditer(self._text)) + _assert_len(len(open_queue), len(close_queue), _title) + l_open, l_close = open_queue[-1], close_queue[-1] self._footer = self._text[l_open.end():l_close.start()]
diff --git a/tests/proofreadpage_tests.py b/tests/proofreadpage_tests.py index 8a55cce..99722c4 100644 --- a/tests/proofreadpage_tests.py +++ b/tests/proofreadpage_tests.py @@ -172,6 +172,10 @@ 'title1': 'User:Popular Science Monthly Volume 1.djvu/12' }
+ div_in_footer = { + 'title': 'Page:Pywikibot unlinked test page', + } + class_pagetext_fmt = { True: ('<div class="pagetext">\n\n\n', '</div>'), False: ('', ''), @@ -232,6 +236,11 @@ self.assertEqual(page.header, self.valid['header']) self.assertEqual(page.footer, self.valid['footer'])
+ def test_div_in_footer(self): + """Test ProofreadPage page parsing functions.""" + page = ProofreadPage(self.site, self.div_in_footer['title']) + self.assertTrue(page.footer.endswith('</div>')) + def test_decompose_recompose_text(self): """Test ProofreadPage page decomposing/composing text.""" page = ProofreadPage(self.site, self.valid['title'])
pywikibot-commits@lists.wikimedia.org