jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/695200 )
Change subject: [IMPR] Use different logfiles for multiple processes (Step 2) ......................................................................
[IMPR] Use different logfiles for multiple processes (Step 2)
- Add a hash of the script name to throttle log. - Add a new method "get_pid" to Throttle which which returns the pid (process identifier) if there a script is running multiple times; return 0 otherwise. - Deprecate multiplydelay parameter - Parameters except site parameter must be keyworded
Bug: T56685 Change-Id: I97e2b26f6a68ce9d5c59a2dfd4f53000d5c5d3bf --- M pywikibot/bot.py M pywikibot/site/_basesite.py M pywikibot/throttle.py 3 files changed, 69 insertions(+), 19 deletions(-)
Approvals: JJMC89: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/bot.py b/pywikibot/bot.py index 86dfbbf..3c79a38 100644 --- a/pywikibot/bot.py +++ b/pywikibot/bot.py @@ -346,11 +346,19 @@
# if user has enabled file logging, configure file handler if module_name in config.log or '*' in config.log: + # get PID + throttle = pywikibot.Site().throttle # initialize a Throttle object + pid = throttle.get_pid(module_name) # get the global PID if needed + pid = str(pid) + '-' if pid > 1 else '' + if config.logfilename: + # keep config.logfilename unchanged logfile = config.datafilepath('logs', config.logfilename) else: - logfile = config.datafilepath('logs', '{}-bot.log' - .format(module_name)) + # add PID to logfle name + logfile = config.datafilepath('logs', '{}-{}bot.log' + .format(module_name, pid)) + file_handler = RotatingFileHandler(filename=logfile, maxBytes=1024 * config.logfilesize, backupCount=config.logfilecount, diff --git a/pywikibot/site/_basesite.py b/pywikibot/site/_basesite.py index eecc0d9..8900c83 100644 --- a/pywikibot/site/_basesite.py +++ b/pywikibot/site/_basesite.py @@ -102,7 +102,7 @@ def throttle(self): """Return this Site's throttle. Initialize a new one if needed.""" if not hasattr(self, '_throttle'): - self._throttle = Throttle(self, multiplydelay=True) + self._throttle = Throttle(self) return self._throttle
@property diff --git a/pywikibot/throttle.py b/pywikibot/throttle.py index edb6625..87cbb8c 100644 --- a/pywikibot/throttle.py +++ b/pywikibot/throttle.py @@ -9,7 +9,7 @@ import threading import time
-from collections import namedtuple +from collections import namedtuple, Counter from contextlib import suppress from typing import Optional, Union
@@ -17,11 +17,19 @@
from pywikibot import config
+from pywikibot.tools import deprecated, deprecated_args, PYTHON_VERSION + +if PYTHON_VERSION < (3, 6): + from hashlib import md5 + blake2b = None +else: + from hashlib import blake2b +
_logger = 'wiki.throttle'
-FORMAT_LINE = '{pid} {time} {site}\n' -ProcEntry = namedtuple('ProcEntry', ['pid', 'time', 'site']) +FORMAT_LINE = '{module_id} {pid} {time} {site}\n' +ProcEntry = namedtuple('ProcEntry', ['module_id', 'pid', 'time', 'site'])
# global process identifier # @@ -43,11 +51,11 @@
"""
- def __init__(self, site, + @deprecated_args(multiplydelay=True) + def __init__(self, site, *, mindelay: Optional[int] = None, maxdelay: Optional[int] = None, - writedelay: Union[int, float, None] = None, - multiplydelay: bool = True): + writedelay: Union[int, float, None] = None): """Initializer.""" self.lock = threading.RLock() self.lock_write = threading.RLock() @@ -73,11 +81,34 @@ self.retry_after = 0 # set by http.request self.delay = 0 self.checktime = 0 - self.multiplydelay = multiplydelay - if self.multiplydelay: - self.checkMultiplicity() + self.modules = Counter() + + self.checkMultiplicity() self.setDelays()
+ @property + @deprecated(since='6.2', future_warning=True) + def multiplydelay(self): + """DEPRECATED attribute.""" + return True + + @multiplydelay.setter + @deprecated(since='6.2', future_warning=True) + def multiplydelay(self): + """DEPRECATED attribute setter.""" + + @staticmethod + def _module_hash(module=None) -> str: + """Convert called module name to a hash.""" + if module is None: + module = pywikibot.calledModuleName() + module = module.encode() + if blake2b: + hashobj = blake2b(module, digest_size=2) + else: + hashobj = md5(module) + return hashobj.hexdigest()[:4] # slice for Python 3.5 + def _read_file(self, raise_exc=False): """Yield process entries from file.""" try: @@ -91,8 +122,13 @@ for line in lines: # parse line; format is "pid timestamp site" try: - _pid, _time, _site = line.split(' ') + items = line.split(' ') + if len(items) == 3: # read legacy format + _id, _pid, _time, _site = self._module_hash(), *items + else: + _id, _pid, _time, _site = items proc_entry = ProcEntry( + module_id=_id, pid=int(_pid), time=int(float(_time)), site=_site.rstrip() @@ -142,9 +178,11 @@
self.checktime = time.time() processes.append( - ProcEntry(pid=pid, time=self.checktime, site=mysite)) + ProcEntry(module_id=self._module_hash(), pid=pid, + time=self.checktime, site=mysite)) + self.modules = Counter(p.module_id for p in processes)
- self._write_file(sorted(processes, key=lambda x: x.pid)) + self._write_file(sorted(processes, key=lambda p: p.pid))
self.process_multiplicity = count pywikibot.log('Found {} {} processes running, including this one.' @@ -175,14 +213,13 @@ thisdelay = self.writedelay else: thisdelay = self.delay - if not self.multiplydelay: - return thisdelay
# We're checking for multiple processes if time.time() > self.checktime + self.checkdelay: self.checkMultiplicity() - if thisdelay < (self.mindelay * self.next_multiplicity): - thisdelay = self.mindelay * self.next_multiplicity + multiplied_delay = self.mindelay * self.next_multiplicity + if thisdelay < multiplied_delay: + thisdelay = multiplied_delay elif thisdelay > self.maxdelay: thisdelay = self.maxdelay thisdelay *= self.process_multiplicity @@ -287,3 +324,8 @@ # account for any time we waited while acquiring the lock wait = delay - (time.time() - started) self.wait(wait) + + def get_pid(self, module: str) -> int: + """Get the global pid if the module is running multiple times.""" + global pid + return pid if self.modules[self._module_hash(module)] > 1 else 0
pywikibot-commits@lists.wikimedia.org