jenkins-bot submitted this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
[IMPR] process pages in parallel tasks with -async option

This implementation is experimental yet. Processing time is about
7 times faster depending on number of CPU kernels.

Note: The script cannot be interrupted after it is started with this option

Change-Id: I530640292c38890595197441f480f17b82fa4254
---
M scripts/archivebot.py
1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/scripts/archivebot.py b/scripts/archivebot.py
index c0e2f37..13f250e 100755
--- a/scripts/archivebot.py
+++ b/scripts/archivebot.py
@@ -105,13 +105,15 @@
-keep Preserve thread order in archive even if threads are
archived later
-sort Sort archive by timestamp; should not be used with -keep
+ -async Run the bot in parallel tasks. This is experimental
+ and the bot cannot be stopped with KeyboardInterrupt

.. versionchanged:: 7.6
Localized variables for "archive" template parameter are supported.
`User:MiszaBot/config` is the default template. `-keep` option was
added.
.. versionchanged:: 7.7
- `-sort` option was added.
+ `-sort` and `-async` options were added.
"""
#
# (C) Pywikibot team, 2006-2022
@@ -124,6 +126,7 @@
import re
import time
from collections import OrderedDict, defaultdict
+from concurrent.futures import ThreadPoolExecutor
from hashlib import md5
from math import ceil
from textwrap import fill
@@ -132,7 +135,7 @@

import pywikibot
from pywikibot import i18n
-from pywikibot.backports import List, Set, Tuple, pairwise
+from pywikibot.backports import List, Set, Tuple, nullcontext, pairwise
from pywikibot.exceptions import Error, NoPageError
from pywikibot.textlib import (
TimeStripper,
@@ -850,6 +853,7 @@
calc = None
keep = False
sort = False
+ asyncronous = False
templates = []

local_args = pywikibot.handle_args(args)
@@ -883,6 +887,8 @@
keep = True
elif option == 'sort':
sort = True
+ elif option == 'async':
+ asyncronous = True

site = pywikibot.Site()

@@ -913,15 +919,22 @@
elif pagename:
gen = [pywikibot.Page(site, pagename, ns=3)]
else:
+
ns = [str(namespace)] if namespace is not None else []
pywikibot.output('Fetching template transclusions...')
gen = tmpl.getReferences(only_template_inclusion=True,
follow_redirects=False,
namespaces=ns,
content=True)
- for pg in gen:
- if not process_page(pg, tmpl, salt, force, keep, sort):
- return
+
+ botargs = tmpl, salt, force, keep, sort
+ context = ThreadPoolExecutor if asyncronous else nullcontext
+ with context() as executor:
+ for pg in gen:
+ if asyncronous:
+ executor.submit(process_page, pg, *botargs)
+ elif not process_page(pg, *botargs):
+ return


if __name__ == '__main__':

To view, visit change 817190. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I530640292c38890595197441f480f17b82fa4254
Gerrit-Change-Number: 817190
Gerrit-PatchSet: 11
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: D3r1ck01 <xsavitar.wiki@aol.com>
Gerrit-Reviewer: PotsdamLamb
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged