jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/817190 )
Change subject: [IMPR] process pages in parallel tasks with -async option ......................................................................
[IMPR] process pages in parallel tasks with -async option
This implementation is experimental yet. Processing time is about 7 times faster depending on number of CPU kernels.
Note: The script cannot be interrupted after it is started with this option
Change-Id: I530640292c38890595197441f480f17b82fa4254 --- M scripts/archivebot.py 1 file changed, 18 insertions(+), 5 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/scripts/archivebot.py b/scripts/archivebot.py index c0e2f37..13f250e 100755 --- a/scripts/archivebot.py +++ b/scripts/archivebot.py @@ -105,13 +105,15 @@ -keep Preserve thread order in archive even if threads are archived later -sort Sort archive by timestamp; should not be used with -keep + -async Run the bot in parallel tasks. This is experimental + and the bot cannot be stopped with KeyboardInterrupt
.. versionchanged:: 7.6 Localized variables for "archive" template parameter are supported. `User:MiszaBot/config` is the default template. `-keep` option was added. .. versionchanged:: 7.7 - `-sort` option was added. + `-sort` and `-async` options were added. """ # # (C) Pywikibot team, 2006-2022 @@ -124,6 +126,7 @@ import re import time from collections import OrderedDict, defaultdict +from concurrent.futures import ThreadPoolExecutor from hashlib import md5 from math import ceil from textwrap import fill @@ -132,7 +135,7 @@
import pywikibot from pywikibot import i18n -from pywikibot.backports import List, Set, Tuple, pairwise +from pywikibot.backports import List, Set, Tuple, nullcontext, pairwise from pywikibot.exceptions import Error, NoPageError from pywikibot.textlib import ( TimeStripper, @@ -850,6 +853,7 @@ calc = None keep = False sort = False + asyncronous = False templates = []
local_args = pywikibot.handle_args(args) @@ -883,6 +887,8 @@ keep = True elif option == 'sort': sort = True + elif option == 'async': + asyncronous = True
site = pywikibot.Site()
@@ -913,15 +919,22 @@ elif pagename: gen = [pywikibot.Page(site, pagename, ns=3)] else: + ns = [str(namespace)] if namespace is not None else [] pywikibot.output('Fetching template transclusions...') gen = tmpl.getReferences(only_template_inclusion=True, follow_redirects=False, namespaces=ns, content=True) - for pg in gen: - if not process_page(pg, tmpl, salt, force, keep, sort): - return + + botargs = tmpl, salt, force, keep, sort + context = ThreadPoolExecutor if asyncronous else nullcontext + with context() as executor: + for pg in gen: + if asyncronous: + executor.submit(process_page, pg, *botargs) + elif not process_page(pg, *botargs): + return
if __name__ == '__main__':