jenkins-bot has submitted this change and it was merged.
Change subject: [FEAT] Process a single page with redirect.py
......................................................................
[FEAT] Process a single page with redirect.py
- Additional -page option enables to work on a single page to solve
it's problems.
- Re-enable XML file for broken redirect
- Remove old screen scraping code for broken redirect special page
Change-Id: I6e7da9ba91c7eb820b10b07a77076a093a2c2b2a
---
M scripts/redirect.py
1 file changed, 25 insertions(+), 32 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/redirect.py b/scripts/redirect.py
index c39a931..0fde4ab 100755
--- a/scripts/redirect.py
+++ b/scripts/redirect.py
@@ -36,6 +36,8 @@
If neither of -xml -fullscan -moves is given, info will be
loaded from a special page of the live wiki.
+-page:title Work on a single page
+
-namespace:n Namespace to process. Can be given multiple times, for several
namespaces. If omitted, only the main (article) namespace is
treated.
@@ -82,7 +84,7 @@
def __init__(self, xmlFilename=None, namespaces=[], offset=-1,
use_move_log=False, use_api=False, start=None, until=None,
- number=None, step=None):
+ number=None, step=None, page_title=None):
self.site = pywikibot.Site()
self.xmlFilename = xmlFilename
self.namespaces = namespaces
@@ -95,6 +97,7 @@
self.api_until = until
self.api_number = number
self.api_step = step
+ self.page_title = page_title
def get_redirects_from_dump(self, alsoGetPageTitles=False):
"""
@@ -267,38 +270,22 @@
count += 1
if count >= self.api_number:
break
- elif not self.xmlFilename:
+ elif self.xmlFilename:
+ # retrieve information from XML dump
+ pywikibot.output(
+ u'Getting a list of all redirects and of all page titles...')
+ redirs, pageTitles = self.get_redirects_from_dump(
+ alsoGetPageTitles=True)
+ for (key, value) in redirs.items():
+ if value not in pageTitles:
+ yield key
+ elif self.page_title:
+ yield self.page_title
+ else:
# retrieve information from broken redirect special page
pywikibot.output(u'Retrieving special page...')
for redir_name in self.site.broken_redirects():
yield redir_name.title()
-
-# TODO: add XML dump support
-## elif self.xmlFilename == None:
-## # retrieve information from the live wiki's maintenance page
-## # broken redirect maintenance page's URL
-## path = self.site.broken_redirects_address(default_limit=False)
-## pywikibot.output(u'Retrieving special page...')
-## maintenance_txt = self.site.getUrl(path)
-##
-## # regular expression which finds redirects which point to a
-## # non-existing page inside the HTML
-## Rredir = re.compile('\<li\>\<a href=".+?"
title="(.*?)"')
-##
-## redir_names = Rredir.findall(maintenance_txt)
-## pywikibot.output(u'Retrieved %d redirects from special page.\n'
-## % len(redir_names))
-## for redir_name in redir_names:
-## yield redir_name
-## else:
-## # retrieve information from XML dump
-## pywikibot.output(
-## u'Getting a list of all redirects and of all page titles...')
-## redirs, pageTitles = self.get_redirects_from_dump(
-## alsoGetPageTitles=True)
-## for (key, value) in redirs.items():
-## if value not in pageTitles:
-## yield key
def retrieve_double_redirects(self):
if self.use_move_log:
@@ -326,6 +313,8 @@
yield key
pywikibot.output(u'\nChecking redirect %i of %i...'
% (num + 1, len(redict)))
+ elif self.page_title:
+ yield self.page_title
else:
# retrieve information from double redirect special page
pywikibot.output(u'Retrieving special page...')
@@ -516,8 +505,8 @@
u"Won't delete anything."
% targetPage.title(asLink=True))
else:
- #we successfully get the target page, meaning that
- #it exists and is not a redirect: no reason to touch it.
+ # we successfully get the target page, meaning that
+ # it exists and is not a redirect: no reason to touch it.
pywikibot.output(
u'Redirect target %s does exist! Won\'t delete
anything.'
% targetPage.title(asLink=True))
@@ -753,6 +742,8 @@
until = ''
number = None
step = None
+ pagename = None
+
for arg in pywikibot.handle_args(args):
if arg == 'double' or arg == 'do':
action = 'double'
@@ -796,6 +787,8 @@
number = int(arg[7:])
elif arg.startswith('-step:'):
step = int(arg[6:])
+ elif arg.startswith('-page:'):
+ pagename = arg[6:]
elif arg == '-always':
options['always'] = True
elif arg == '-delete':
@@ -812,7 +805,7 @@
else:
pywikibot.Site().login()
gen = RedirectGenerator(xmlFilename, namespaces, offset, moved_pages,
- fullscan, start, until, number, step)
+ fullscan, start, until, number, step, pagename)
bot = RedirectRobot(action, gen, number=number, **options)
bot.run()
--
To view, visit
https://gerrit.wikimedia.org/r/173665
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I6e7da9ba91c7eb820b10b07a77076a093a2c2b2a
Gerrit-PatchSet: 4
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>