jenkins-bot has submitted this change and it was merged.
Change subject: Ported unlink.py for Core branch, not using mwparserfromhell anymore
Change-Id: Ic533bc9b9e1921b8ff0bcd07235492160b08a6b2
......................................................................
Ported unlink.py for Core branch,
not using mwparserfromhell anymore
Change-Id: Ic533bc9b9e1921b8ff0bcd07235492160b08a6b2
---
A scripts/unlink.py
1 file changed, 201 insertions(+), 0 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/unlink.py b/scripts/unlink.py
new file mode 100755
index 0000000..43f6723
--- /dev/null
+++ b/scripts/unlink.py
@@ -0,0 +1,201 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+"""
+This bot unlinks a page on every page that links to it.
+
+This script understands this command-line argument:
+
+ -namespace:n - Number of namespace to process. The parameter can be used
+ multiple times. It works in combination with all other
+ parameters, except for the -start parameter. If you e.g.
+ want to iterate over all user pages starting at User:M, use
+ -start:User:M.
+
+All other parameters will be regarded as part of the title of the page that
+should be unlinked.
+
+Example:
+
+python unlink.py Foo bar -namespace:0 -namespace:6
+
+ Removes links to the page [[Foo bar]] in articles and image descriptions.
+"""
+#
+# (C) Pywikibot team, 2007-2014
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
+#
+
+import re
+import pywikibot
+from pywikibot import pagegenerators
+from pywikibot.editor import TextEditor
+from pywikibot import i18n
+from pywikibot import config
+
+
+class UnlinkBot:
+
+ def __init__(self, pageToUnlink, namespaces, always):
+ self.pageToUnlink = pageToUnlink
+ gen = pagegenerators.ReferringPageGenerator(pageToUnlink)
+ if namespaces != []:
+ gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
+ self.generator = pagegenerators.PreloadingGenerator(gen)
+ linktrail = pywikibot.getSite().linktrail()
+ # The regular expression which finds links. Results consist of four
+ # groups:
+ #
+ # group title is the target page title, that is, everything
+ # before | or ].
+ #
+ # group section is the page section.
+ # It'll include the # to make life easier for us.
+ #
+ # group label is the alternative link title, that's everything
+ # between | and ].
+ #
+ # group linktrail is the link trail, that's letters after ]] which are
+ # part of the word.
+ # note that the definition of 'letter' varies from language to language.
+ self.linkR =
re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>%s)'
+ % linktrail)
+ self.always = always
+ self.done = False
+ self.comment = i18n.twtranslate(pywikibot.getSite(), 'unlink-unlinking',
+ self.pageToUnlink.title())
+
+ def handleNextLink(self, text, match, context=100):
+ """
+ Returns a tuple (text, jumpToBeginning).
+
+ text is the unicode string after the current link has been processed.
+ jumpToBeginning is a boolean which specifies if the cursor position
+ should be reset to 0. This is required after the user has edited the
+ article.
+ """
+ # ignore interwiki links and links to sections of the same page as well
+ # as section links
+ if not match.group('title') \
+ or self.pageToUnlink.site.isInterwikiLink(match.group('title')) \
+ or match.group('section'):
+ return text, False
+ linkedPage = pywikibot.Page(self.pageToUnlink.site,
+ match.group('title'))
+ # Check whether the link found is to the current page itself.
+ if linkedPage != self.pageToUnlink:
+ # not a self-link
+ return text, False
+ else:
+ # at the beginning of the link, start red color.
+ # at the end of the link, reset the color to default
+ if self.always:
+ choice = 'a'
+ else:
+ pywikibot.output(
+ text[max(0, match.start() - context):match.start()]
+ + '\03{lightred}' + text[match.start():match.end()]
+ + '\03{default}' + text[match.end():match.end() + context])
+ choice = pywikibot.inputChoice(
+ u'\nWhat shall be done with this link?\n',
+ ['unlink', 'skip', 'edit', 'more
context',
+ 'unlink all', 'quit'],
+ ['U', 's', 'e', 'm', 'a',
'q'], 'u')
+ pywikibot.output(u'')
+
+ if choice == 's':
+ # skip this link
+ return text, False
+ elif choice == 'e':
+ editor = TextEditor()
+ newText = editor.edit(text, jumpIndex=match.start())
+ # if user didn't press Cancel
+ if newText:
+ return newText, True
+ else:
+ return text, True
+ elif choice == 'm':
+ # show more context by recursive self-call
+ return self.handleNextLink(text, match,
+ context=context + 100)
+ elif choice == 'a':
+ self.always = True
+ elif choice == 'q':
+ self.done = True
+ return text, False
+ new = match.group('label') or match.group('title')
+ new += match.group('linktrail')
+ return text[:match.start()] + new + text[match.end():], False
+
+ def treat(self, page):
+ # Show the title of the page we're working on.
+ # Highlight the title in purple.
+ pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default}
<<<"
+ % page.title())
+ try:
+ oldText = page.get()
+ text = oldText
+ curpos = 0
+ while curpos < len(text):
+ match = self.linkR.search(text, pos=curpos)
+ if not match:
+ break
+ # Make sure that next time around we will not find this same
+ # hit.
+ curpos = match.start() + 1
+ text, jumpToBeginning = self.handleNextLink(text, match)
+ if jumpToBeginning:
+ curpos = 0
+ if oldText == text:
+ pywikibot.output(u'No changes necessary.')
+ else:
+ pywikibot.showDiff(oldText, text)
+ page.put(text, self.comment)
+ except pywikibot.NoPage:
+ pywikibot.output(u"Page %s does not exist?!"
+ % page.title(asLink=True))
+ except pywikibot.IsRedirectPage:
+ pywikibot.output(u"Page %s is a redirect; skipping."
+ % page.title(asLink=True))
+ except pywikibot.LockedPage:
+ pywikibot.output(u"Page %s is locked?!" % page.title(asLink=True))
+
+ def run(self):
+ for page in self.generator:
+ if self.done:
+ break
+ self.treat(page)
+
+
+def main():
+ # This temporary array is used to read the page title if one single
+ # page that should be unlinked.
+ pageTitle = []
+ # Which namespaces should be processed?
+ # default to [] which means all namespaces will be processed
+ namespaces = []
+ always = False
+
+ for arg in pywikibot.handleArgs():
+ if arg.startswith('-namespace:'):
+ try:
+ namespaces.append(int(arg[11:]))
+ except ValueError:
+ namespaces.append(arg[11:])
+ elif arg == '-always':
+ always = True
+ else:
+ pageTitle.append(arg)
+
+ if pageTitle:
+ page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle))
+ bot = UnlinkBot(page, namespaces, always)
+ bot.run()
+ else:
+ pywikibot.showHelp('unlink')
+
+if __name__ == "__main__":
+ main()
--
To view, visit
https://gerrit.wikimedia.org/r/132910
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ic533bc9b9e1921b8ff0bcd07235492160b08a6b2
Gerrit-PatchSet: 5
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Maximilianklein <isalix(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com>
Gerrit-Reviewer: Maximilianklein <isalix(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>