jenkins-bot merged this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
Fix regression: archivebot should preserve h1 headers

Follows-up: I644f17dc6b1c775de134c5e795c456423cc40147

Bug: T221445
Change-Id: Ibcad74be6faf9c8137f081847374283acc6e2f2f
---
M scripts/archivebot.py
M tests/archivebot_tests.py
2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/scripts/archivebot.py b/scripts/archivebot.py
index a44398e..b6f9187 100755
--- a/scripts/archivebot.py
+++ b/scripts/archivebot.py
@@ -441,10 +441,11 @@
self.archives = {}
self.archived_threads = 0

- # Exclude non-thread headings
+ # Exclude unsupported headings (h1, h3, etc):
+ # adding the marker will make them ignored by extract_sections()
text = self.get()
marker = findmarker(text)
- text = re.sub(r'^===', marker + r'===', text, flags=re.M)
+ text = re.sub(r'^((=|={3,})[^=])', marker + r'\1', text, flags=re.M)

# Find threads, avoid archiving categories or interwiki
header, threads, footer = extract_sections(text, self.site)
diff --git a/tests/archivebot_tests.py b/tests/archivebot_tests.py
index 66243e6..6896711 100644
--- a/tests/archivebot_tests.py
+++ b/tests/archivebot_tests.py
@@ -271,6 +271,35 @@
page.load_page()
self.assertEqual([x.title for x in page.threads], ['A', 'B'])

+ def testThreadsWithSubsections(self):
+ """Test recognizing threads with subsections.
+
+ Talk:For-pywikibot-archivebot/subsections must have::
+
+ {{User:MiszaBot/config
+ |archive = Talk:Main_Page/archive
+ |algo = old(30d)
+ }}
+ = Front matter =
+ placeholder
+ == A ==
+ foo bar
+ === A1 ===
+ foo bar bar
+ ==== A11 ====
+ foo
+ == B ==
+ foo bar bar bar
+ """
+ site = self.get_site()
+ page = pywikibot.Page(site, 'Talk:For-pywikibot-archivebot/testcase2')
+ tmpl = pywikibot.Page(site, 'User:MiszaBot/config')
+ archiver = archivebot.PageArchiver(
+ page=page, template=tmpl, salt='', force=False)
+ page = archivebot.DiscussionPage(page, archiver)
+ page.load_page()
+ self.assertEqual([x.title for x in page.threads], ['A', 'B'])
+

if __name__ == '__main__': # pragma: no cover
try:

To view, visit change 505365. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-MessageType: merged
Gerrit-Change-Id: Ibcad74be6faf9c8137f081847374283acc6e2f2f
Gerrit-Change-Number: 505365
Gerrit-PatchSet: 5
Gerrit-Owner: Whym <whym@whym.org>
Gerrit-Reviewer: D3r1ck01 <xsavitar.wiki@aol.com>
Gerrit-Reviewer: Dvorapa <dvorapa@seznam.cz>
Gerrit-Reviewer: John Vandenberg <jayvdb@gmail.com>
Gerrit-Reviewer: Whym <whym@whym.org>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot (75)