jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/505365 )
Change subject: Fix regression: archivebot should preserve h1 headers ......................................................................
Fix regression: archivebot should preserve h1 headers
Follows-up: I644f17dc6b1c775de134c5e795c456423cc40147
Bug: T221445 Change-Id: Ibcad74be6faf9c8137f081847374283acc6e2f2f --- M scripts/archivebot.py M tests/archivebot_tests.py 2 files changed, 32 insertions(+), 2 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/scripts/archivebot.py b/scripts/archivebot.py index a44398e..b6f9187 100755 --- a/scripts/archivebot.py +++ b/scripts/archivebot.py @@ -441,10 +441,11 @@ self.archives = {} self.archived_threads = 0
- # Exclude non-thread headings + # Exclude unsupported headings (h1, h3, etc): + # adding the marker will make them ignored by extract_sections() text = self.get() marker = findmarker(text) - text = re.sub(r'^===', marker + r'===', text, flags=re.M) + text = re.sub(r'^((=|={3,})[^=])', marker + r'\1', text, flags=re.M)
# Find threads, avoid archiving categories or interwiki header, threads, footer = extract_sections(text, self.site) diff --git a/tests/archivebot_tests.py b/tests/archivebot_tests.py index 66243e6..6896711 100644 --- a/tests/archivebot_tests.py +++ b/tests/archivebot_tests.py @@ -271,6 +271,35 @@ page.load_page() self.assertEqual([x.title for x in page.threads], ['A', 'B'])
+ def testThreadsWithSubsections(self): + """Test recognizing threads with subsections. + + Talk:For-pywikibot-archivebot/subsections must have:: + + {{User:MiszaBot/config + |archive = Talk:Main_Page/archive + |algo = old(30d) + }} + = Front matter = + placeholder + == A == + foo bar + === A1 === + foo bar bar + ==== A11 ==== + foo + == B == + foo bar bar bar + """ + site = self.get_site() + page = pywikibot.Page(site, 'Talk:For-pywikibot-archivebot/testcase2') + tmpl = pywikibot.Page(site, 'User:MiszaBot/config') + archiver = archivebot.PageArchiver( + page=page, template=tmpl, salt='', force=False) + page = archivebot.DiscussionPage(page, archiver) + page.load_page() + self.assertEqual([x.title for x in page.threads], ['A', 'B']) +
if __name__ == '__main__': # pragma: no cover try:
pywikibot-commits@lists.wikimedia.org