jenkins-bot has submitted this change and it was merged.
Change subject: improvements to unlink.py
......................................................................
improvements to unlink.py
- UnlinkBot extends pywikibot.Bot
with built-in 'always' option
- BREAKING: require the page name to be wrapped in quotes
- use core-like page.save
- remove obsolete script name from showHelp
- make localized strings depend on the actual working site
Change-Id: I24757c086d1eda88d71148a547aebaa8aeca56ec
---
M scripts/unlink.py
1 file changed, 37 insertions(+), 32 deletions(-)
Approvals:
John Vandenberg: Looks good to me, but someone else must approve
Ladsgroup: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/unlink.py b/scripts/unlink.py
index f481f24..b6b5d09 100755
--- a/scripts/unlink.py
+++ b/scripts/unlink.py
@@ -1,23 +1,22 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
-
"""
This bot unlinks a page on every page that links to it.
This script understands this command-line argument:
- -namespace:n - Number of namespace to process. The parameter can be used
+ -namespace:n Number of namespace to process. The parameter can be used
multiple times. It works in combination with all other
parameters, except for the -start parameter. If you e.g.
want to iterate over all user pages starting at User:M, use
-start:User:M.
-All other parameters will be regarded as part of the title of the page that
-should be unlinked.
+Any other parameter will be regarded as the title of the page
+that should be unlinked.
Example:
-python unlink.py Foo bar -namespace:0 -namespace:6
+python unlink.py "Foo bar" -namespace:0 -namespace:6
Removes links to the page [[Foo bar]] in articles and image descriptions.
"""
@@ -31,20 +30,27 @@
import re
import pywikibot
-from pywikibot import pagegenerators
from pywikibot.editor import TextEditor
-from pywikibot import i18n
+from pywikibot import pagegenerators, i18n, Bot
-class UnlinkBot:
+class UnlinkBot(Bot):
- def __init__(self, pageToUnlink, namespaces, always):
+ def __init__(self, pageToUnlink, **kwargs):
+ self.availableOptions.update({
+ 'namespaces': [],
+ # Which namespaces should be processed?
+ # default to [] which means all namespaces will be processed
+ })
+
+ super(UnlinkBot, self).__init__(**kwargs)
self.pageToUnlink = pageToUnlink
+ linktrail = self.pageToUnlink.site.linktrail()
+
gen = pagegenerators.ReferringPageGenerator(pageToUnlink)
- if namespaces != []:
- gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
+ if self.getOption('namespaces') != []:
+ gen = pagegenerators.NamespaceFilterPageGenerator(gen,
self.getOption('namespaces'))
self.generator = pagegenerators.PreloadingGenerator(gen)
- linktrail = pywikibot.getSite().linktrail()
# The regular expression which finds links. Results consist of four
# groups:
#
@@ -62,9 +68,8 @@
# note that the definition of 'letter' varies from language to language.
self.linkR =
re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>%s)'
% linktrail)
- self.always = always
self.done = False
- self.comment = i18n.twtranslate(pywikibot.getSite(), 'unlink-unlinking',
+ self.comment = i18n.twtranslate(self.pageToUnlink.site,
'unlink-unlinking',
self.pageToUnlink.title())
def handleNextLink(self, text, match, context=100):
@@ -91,7 +96,7 @@
else:
# at the beginning of the link, start red color.
# at the end of the link, reset the color to default
- if self.always:
+ if self.getOption('always'):
choice = 'a'
else:
pywikibot.output(
@@ -121,7 +126,7 @@
return self.handleNextLink(text, match,
context=context + 100)
elif choice == 'a':
- self.always = True
+ self.options['always'] = True
elif choice == 'q':
self.done = True
return text, False
@@ -152,7 +157,8 @@
pywikibot.output(u'No changes necessary.')
else:
pywikibot.showDiff(oldText, text)
- page.put(text, self.comment)
+ page.text = text
+ page.save(self.comment)
except pywikibot.NoPage:
pywikibot.output(u"Page %s does not exist?!"
% page.title(asLink=True))
@@ -170,31 +176,30 @@
def main():
- # This temporary array is used to read the page title if one single
- # page that should be unlinked.
- pageTitle = []
- # Which namespaces should be processed?
- # default to [] which means all namespaces will be processed
- namespaces = []
- always = False
+ # This temporary string is used to read the title
+ # of the page that should be unlinked.
+ page_title = None
+ options = {}
for arg in pywikibot.handleArgs():
if arg.startswith('-namespace:'):
+ if 'namespaces' not in options:
+ options['namespaces'] = []
try:
- namespaces.append(int(arg[11:]))
+ options['namespaces'].append(int(arg[11:]))
except ValueError:
- namespaces.append(arg[11:])
+ options['namespaces'].append(arg[11:])
elif arg == '-always':
- always = True
+ options['always'] = True
else:
- pageTitle.append(arg)
+ page_title = arg
- if pageTitle:
- page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle))
- bot = UnlinkBot(page, namespaces, always)
+ if page_title:
+ page = pywikibot.Page(pywikibot.Site(), page_title)
+ bot = UnlinkBot(page, **options)
bot.run()
else:
- pywikibot.showHelp('unlink')
+ pywikibot.showHelp()
if __name__ == "__main__":
main()
--
To view, visit
https://gerrit.wikimedia.org/r/137346
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I24757c086d1eda88d71148a547aebaa8aeca56ec
Gerrit-PatchSet: 2
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Ricordisamoa <ricordisamoa(a)openmailbox.org>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>