jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/511326 )
Change subject: [IMPR] Use ThreadList with weblinkchecker.py ......................................................................
[IMPR] Use ThreadList with weblinkchecker.py
Also simplify ignore url checking
Change-Id: I714bebfb1221ec73dc2893de5c6592257ec27e78 --- M scripts/weblinkchecker.py 1 file changed, 9 insertions(+), 17 deletions(-)
Approvals: Xqt: Looks good to me, approved Matěj Suchánek: Looks good to me, but someone else must approve jenkins-bot: Verified
diff --git a/scripts/weblinkchecker.py b/scripts/weblinkchecker.py index 8d4fd46..62f2413 100755 --- a/scripts/weblinkchecker.py +++ b/scripts/weblinkchecker.py @@ -133,7 +133,7 @@ from pywikibot.pagegenerators import ( XMLDumpPageGenerator as _XMLDumpPageGenerator, ) -from pywikibot.tools import deprecated +from pywikibot.tools import deprecated, ThreadList from pywikibot.tools.formatter import color_format
try: @@ -857,32 +857,24 @@ self.HTTPignore = HTTPignore self.day = day
+ # Limit the number of threads started at the same time + self.threads = ThreadList(limit=config.max_external_links, + wait_time=config.retry_wait) + def treat_page(self): """Process one page.""" page = self.current_page - text = page.get() - for url in weblinksIn(text): + for url in weblinksIn(page.text): for ignoreR in ignorelist: if ignoreR.match(url): break - else: # not ignore url - # Limit the number of threads started at the same time. Each - # thread will check one page, then die. - while threading.activeCount() >= config.max_external_links: - pywikibot.sleep(config.retry_wait) + else: + # Each thread will check one page, then die. thread = LinkCheckThread(page, url, self.history, self.HTTPignore, self.day) # thread dies when program terminates thread.setDaemon(True) - try: - thread.start() - except threading.ThreadError: - pywikibot.warning( - "Can't start a new thread.\nPlease decrease " - 'max_external_links in your user-config.py or use\n' - "'-max_external_links:' option with a smaller value. " - 'Default is 50.') - raise + self.threads.append(thread)
def RepeatPageGenerator():
pywikibot-commits@lists.wikimedia.org