jenkins-bot submitted this change.
[IMPR] process pages in parallel tasks with -async option
This implementation is experimental yet. Processing time is about
7 times faster depending on number of CPU kernels.
Note: The script cannot be interrupted after it is started with this option
Change-Id: I530640292c38890595197441f480f17b82fa4254
---
M scripts/archivebot.py
1 file changed, 18 insertions(+), 5 deletions(-)
diff --git a/scripts/archivebot.py b/scripts/archivebot.py
index c0e2f37..13f250e 100755
--- a/scripts/archivebot.py
+++ b/scripts/archivebot.py
@@ -105,13 +105,15 @@
-keep Preserve thread order in archive even if threads are
archived later
-sort Sort archive by timestamp; should not be used with -keep
+ -async Run the bot in parallel tasks. This is experimental
+ and the bot cannot be stopped with KeyboardInterrupt
.. versionchanged:: 7.6
Localized variables for "archive" template parameter are supported.
`User:MiszaBot/config` is the default template. `-keep` option was
added.
.. versionchanged:: 7.7
- `-sort` option was added.
+ `-sort` and `-async` options were added.
"""
#
# (C) Pywikibot team, 2006-2022
@@ -124,6 +126,7 @@
import re
import time
from collections import OrderedDict, defaultdict
+from concurrent.futures import ThreadPoolExecutor
from hashlib import md5
from math import ceil
from textwrap import fill
@@ -132,7 +135,7 @@
import pywikibot
from pywikibot import i18n
-from pywikibot.backports import List, Set, Tuple, pairwise
+from pywikibot.backports import List, Set, Tuple, nullcontext, pairwise
from pywikibot.exceptions import Error, NoPageError
from pywikibot.textlib import (
TimeStripper,
@@ -850,6 +853,7 @@
calc = None
keep = False
sort = False
+ asyncronous = False
templates = []
local_args = pywikibot.handle_args(args)
@@ -883,6 +887,8 @@
keep = True
elif option == 'sort':
sort = True
+ elif option == 'async':
+ asyncronous = True
site = pywikibot.Site()
@@ -913,15 +919,22 @@
elif pagename:
gen = [pywikibot.Page(site, pagename, ns=3)]
else:
+
ns = [str(namespace)] if namespace is not None else []
pywikibot.output('Fetching template transclusions...')
gen = tmpl.getReferences(only_template_inclusion=True,
follow_redirects=False,
namespaces=ns,
content=True)
- for pg in gen:
- if not process_page(pg, tmpl, salt, force, keep, sort):
- return
+
+ botargs = tmpl, salt, force, keep, sort
+ context = ThreadPoolExecutor if asyncronous else nullcontext
+ with context() as executor:
+ for pg in gen:
+ if asyncronous:
+ executor.submit(process_page, pg, *botargs)
+ elif not process_page(pg, *botargs):
+ return
if __name__ == '__main__':
To view, visit change 817190. To unsubscribe, or for help writing mail filters, visit settings.