Xqt has submitted this change. (
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/820469
)
Change subject: [IMPR] Preserve thread order in archive even if threads are archived
later
......................................................................
[IMPR] Preserve thread order in archive even if threads are archived later
Bug: T314560
Bug: T312773
Change-Id: I8190482d43271e594fa0c4f7a916182ee1117bed
---
M scripts/archivebot.py
1 file changed, 41 insertions(+), 6 deletions(-)
Approvals:
Xqt: Verified; Looks good to me, approved
diff --git a/scripts/archivebot.py b/scripts/archivebot.py
index d981637..305e7fa 100755
--- a/scripts/archivebot.py
+++ b/scripts/archivebot.py
@@ -101,9 +101,12 @@
-namespace:NS only archive pages from a given namespace
-page:PAGE archive a single PAGE, default ns is a user talk page
-salt:SALT specify salt
+ -keep Preserve thread order in archive even if threads are
+ archived later
.. versionchanged:: 7.6
- Localized variables for "archive" template parameter are supported
+ Localized variables for "archive" template parameter are supported.
+ `-keep` option was added.
"""
#
# (C) Pywikibot team, 2006-2022
@@ -314,7 +317,7 @@
Feed threads to it and run an update() afterwards.
"""
- def __init__(self, source, archiver, params=None) -> None:
+ def __init__(self, source, archiver, params=None, keep=False) -> None:
"""Initializer."""
super().__init__(source)
self.threads = []
@@ -330,6 +333,7 @@
else:
self.timestripper = self.archiver.timestripper
self.params = params
+ self.keep = keep
try:
self.load_page()
except NoPageError:
@@ -340,8 +344,28 @@
if self.params:
self.header = self.header % self.params
+ @staticmethod
+ def max(
+ ts1: Optional[pywikibot.Timestamp],
+ ts2: Optional[pywikibot.Timestamp]
+ ) -> Optional[pywikibot.Timestamp]:
+ """Calculate the maximum of two timestamps but allow None as
value.
+
+ .. versionadded:: 7.6
+ """
+ if ts1 is None:
+ return ts2
+ if ts2 is None:
+ return ts1
+ return max(ts1, ts2)
+
def load_page(self) -> None:
- """Load the page to be archived and break it up into
threads."""
+ """Load the page to be archived and break it up into threads.
+
+ .. versionchanged:: 7.6
+ If `-keep` option is given run through all threads and set
+ the current timestamp to the previous if the current is lower.
+ """
self.header = ''
self.threads = []
self.archives = {}
@@ -369,6 +393,13 @@
cur_thread.feed_line(line)
self.threads.append(cur_thread)
+ if self.keep and len(self.threads) > 1:
+ # set the timestamp to the previous if the current is lower
+ prev = self.threads[0].timestamp
+ for thread in self.threads:
+ thread.timestamp = self.max(prev, thread.timestamp)
+ prev = thread.timestamp
+
# This extra info is not desirable when run under the unittest
# framework, which may be run either directly or via setup.py
if pywikibot.calledModuleName() not in ['archivebot_tests',
'setup']:
@@ -429,7 +460,8 @@
algo = 'none'
- def __init__(self, page, template, salt: str, force: bool = False) -> None:
+ def __init__(self, page, template, salt: str, force: bool = False,
+ keep=False) -> None:
"""Initializer.
:param page: a page object to be archived
@@ -458,7 +490,7 @@
except KeyError: # mw < 1.28
self.maxsize = 2096128 # 2 MB - 1 KB gap
- self.page = DiscussionPage(page, self)
+ self.page = DiscussionPage(page, self, keep=keep)
self.comment_params = {
'from': self.page.title(),
}
@@ -769,6 +801,7 @@
salt = ''
force = False
calc = None
+ keep = False
templates = []
local_args = pywikibot.handle_args(args)
@@ -798,6 +831,8 @@
pagename = value
elif option == 'namespace':
namespace = value
+ elif option == 'keep':
+ keep = True
site = pywikibot.Site()
@@ -843,7 +878,7 @@
# Catching exceptions, so that errors in one page do not bail out
# the entire process
try:
- archiver = PageArchiver(pg, tmpl, salt, force)
+ archiver = PageArchiver(pg, tmpl, salt, force, keep)
archiver.run()
except ArchiveBotSiteConfigError as e:
# no stack trace for errors originated by pages on-site
--
To view, visit
https://gerrit.wikimedia.org/r/c/pywikibot/core/+/820469
To unsubscribe, or for help writing mail filters, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I8190482d43271e594fa0c4f7a916182ee1117bed
Gerrit-Change-Number: 820469
Gerrit-PatchSet: 4
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Whym <whym(a)whym.org>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged