jenkins-bot has submitted this change and it was merged.
Change subject: Convert weblinkchecker to SingleSiteBot
......................................................................
Convert weblinkchecker to SingleSiteBot
History object loads data for only a single site.
Change-Id: I01d26ba0c8e8b6ad8b3beac0ec9744d749185bc9
---
M scripts/weblinkchecker.py
1 file changed, 20 insertions(+), 18 deletions(-)
Approvals:
John Vandenberg: Looks good to me, but someone else must approve
XZise: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/weblinkchecker.py b/scripts/weblinkchecker.py
index 6d00cce..5452fdc 100755
--- a/scripts/weblinkchecker.py
+++ b/scripts/weblinkchecker.py
@@ -116,7 +116,9 @@
memento_client = e
import pywikibot
+
from pywikibot import i18n, config, pagegenerators, textlib, xmlreader, weblib
+from pywikibot.bot import ExistingPageBot, SingleSiteBot
# TODO: Convert to httlib2
if sys.version_info[0] > 2:
@@ -244,7 +246,7 @@
yield m.group('urlb')
-class XmlDumpPageGenerator:
+class XmlDumpPageGenerator(object):
"""Xml generator that yiels pages containing a web
link."""
@@ -597,7 +599,7 @@
self.history.setLinkDead(self.url, message, self.page, self.day)
-class History:
+class History(object):
"""
Store previously found dead links.
@@ -621,9 +623,12 @@
"""
- def __init__(self, reportThread):
+ def __init__(self, reportThread, site=None):
self.reportThread = reportThread
- self.site = pywikibot.Site()
+ if not site:
+ self.site = pywikibot.Site()
+ else:
+ self.site = site
self.semaphore = threading.Semaphore()
self.datfilename = pywikibot.config.datafilepath(
'deadlinks', 'deadlinks-%s-%s.dat' % (self.site.family.name,
self.site.code))
@@ -820,7 +825,7 @@
self.semaphore.release()
-class WeblinkCheckerRobot:
+class WeblinkCheckerRobot(SingleSiteBot, ExistingPageBot):
"""
Bot which will search for dead weblinks.
@@ -828,8 +833,11 @@
It uses several LinkCheckThreads at once to process pages from generator.
"""
- def __init__(self, generator, HTTPignore=None, day=7):
- self.generator = generator
+ def __init__(self, generator, HTTPignore=None, day=7, site=True):
+ """Constructor."""
+ super(WeblinkCheckerRobot, self).__init__(
+ generator=generator, site=site)
+
if config.report_dead_links_on_talk:
pywikibot.log("Starting talk page thread")
reportThread = DeadLinkReportThread()
@@ -838,23 +846,17 @@
reportThread.start()
else:
reportThread = None
- self.history = History(reportThread)
+ self.history = History(reportThread, site=self.site)
if HTTPignore is None:
self.HTTPignore = []
else:
self.HTTPignore = HTTPignore
self.day = day
- def run(self):
- for page in self.generator:
- self.checkLinksIn(page)
-
- def checkLinksIn(self, page):
- try:
- text = page.get()
- except pywikibot.NoPage:
- pywikibot.output(u'%s does not exist.' % page.title())
- return
+ def treat_page(self):
+ """Process one page."""
+ page = self.current_page
+ text = page.get()
for url in weblinksIn(text):
ignoreUrl = False
for ignoreR in ignorelist:
--
To view, visit
https://gerrit.wikimedia.org/r/239555
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I01d26ba0c8e8b6ad8b3beac0ec9744d749185bc9
Gerrit-PatchSet: 3
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>