jenkins-bot has submitted this change and it was merged.
Change subject: Port of selflink.py from compat to core.
......................................................................
Port of selflink.py from compat to core.
Bug: 64879
Change-Id: I893d5e4275a2d9fdc695925930dae7589c07d372
---
A scripts/selflink.py
1 file changed, 216 insertions(+), 0 deletions(-)
Approvals:
John Vandenberg: Looks good to me, but someone else must approve
Merlijn van Deen: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/selflink.py b/scripts/selflink.py
new file mode 100644
index 0000000..87ee18b
--- /dev/null
+++ b/scripts/selflink.py
@@ -0,0 +1,216 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+"""
+This bot goes over multiple pages of the site, searches for selflinks, and
+allows removing them.
+
+These command line parameters can be used to specify which pages to work on:
+
+¶ms;
+
+-always Unlink always but don't prompt you for each replacement.
+ ATTENTION: Use this with care!
+"""
+#
+# (C) Pywikibot team, 2006-2014
+#
+# Distributed under the terms of the MIT license.
+#
+
+import re
+import pywikibot
+from pywikibot import i18n, Bot
+from pywikibot.editor import TextEditor
+from pywikibot.pagegenerators import GeneratorFactory, PreloadingGenerator, \
+ parameterHelp
+
+# This is required for the text that is shown when you run this script
+# with the parameter -help.
+docuReplacements = {
+ '¶ms;': parameterHelp,
+}
+
+
+class SelflinkBot(Bot):
+
+ def __init__(self, generator, **kwargs):
+ super(SelflinkBot, self).__init__(**kwargs)
+ self.generator = generator
+ linktrail = pywikibot.Site().linktrail()
+ # The regular expression which finds links. Results consist of four
+ # groups:
+ # group title is the target page title, everything before | or ].
+ # group section is the page section. It'll include the # to make life
+ # easier for us.
+ # group label is the alternative link title, that's everything between
+ # | and ].
+ # group linktrail is the link trail, that's letters after ]] which are
+ # part of the word.
+ # note that the definition of 'letter' varies from language to
+ # language.
+ self.linkR = re.compile(
+ r'\[\[(?P<title>[^\]\|#]*)'
+ r'(?P<section>#[^\]\|]*)?'
+ '(\|(?P<label>[^\]]*))?\]\]'
+ r'(?P<linktrail>' + linktrail + ')')
+ self.done = False
+
+ def handleNextLink(self, page, match, context=100):
+ """Process the next link on a page, offering the user choices.
+
+ @param page: The page being edited
+ @type page: pywikibot.Page
+ @param match: The match object for the current link.
+ @type match: re.MatchObject
+ @param context: The amount of context around the link shown to the user
+ @type context: int
+ @return: jumpToBeginning, a boolean, which specifies if the cursor
+ position should be reset to 0. This is required after the user has
+ edited the article.
+ """
+ # ignore interwiki links and links to sections of the same page as well
+ # as section links
+ if not match.group('title') \
+ or page.site.isInterwikiLink(match.group('title')) \
+ or match.group('section'):
+ return False
+ try:
+ linkedPage = pywikibot.Page(page.site, title=match.group('title'))
+ except pywikibot.InvalidTitle, err:
+ pywikibot.warning(u'%s' % err)
+ return False
+
+ # Check whether the link found is to the current page itself.
+ if linkedPage != page:
+ # not a self-link, nothing to do
+ return False
+
+ # at the beginning of the link, start red color.
+ # at the end of the link, reset the color to default
+ if self.getOption('always'):
+ choice = 'a'
+ else:
+ pre = page.text[max(0, match.start() - context):match.start()]
+ post = page.text[match.end():match.end() + context]
+ matchText = match.group(0)
+ pywikibot.output(
+ pre + '\03{lightred}' + matchText + '\03{default}' + post)
+ choice = pywikibot.inputChoice(
+ u'\nWhat shall be done with this selflink?\n',
+ ['unlink', 'make bold', 'skip', 'edit', 'more context',
+ 'unlink all', 'quit'],
+ ['U', 'b', 's', 'e', 'm', 'a', 'q'], 'u')
+ pywikibot.output(u'')
+
+ if choice == 's':
+ # skip this link
+ return False
+ elif choice == 'e':
+ editor = TextEditor()
+ newText = editor.edit(page.text, jumpIndex=match.start())
+ # if user didn't press Cancel
+ if newText:
+ page.text = newText
+ return True
+ else:
+ return True
+ elif choice == 'm':
+ # show more context by recursive self-call
+ return self.handleNextLink(page, match, context=context + 100)
+ elif choice == 'a':
+ self.always = True
+ elif choice == 'q':
+ self.done = True
+ return False
+
+ # choice was 'U', 'b', or 'a'
+ new = match.group('label') or match.group('title')
+ new += match.group('linktrail')
+ preMatch = page.text[:match.start()]
+ postMatch = page.text[match.end():]
+ if choice == 'b':
+ # make bold
+ page.text = preMatch + "'''" + new + "'''" + postMatch
+ return False
+ else:
+ page.text = preMatch + new + postMatch
+ return False
+
+ def treat(self, page):
+ # Show the title of the page we're working on.
+ # Highlight the title in purple.
+ pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
+ % page.title())
+ try:
+ oldText = page.text
+ # Inside image maps, don't touch selflinks, as they're used
+ # to create tooltip labels. See for example:
+ # http://de.wikipedia.org/w/index.php?diff=next&oldid=35721641
+ if '<imagemap>' in page.text:
+ pywikibot.output(
+ u'Skipping page %s because it contains an image map.'
+ % page.title(asLink=True))
+ return
+ curpos = 0
+ while curpos < len(page.text) or self.done:
+ match = self.linkR.search(page.text, pos=curpos)
+ if not match:
+ break
+ # Make sure that next time around we will not find this same
+ # hit.
+ curpos = match.start() + 1
+ jumpToBeginning = self.handleNextLink(page, match)
+ if jumpToBeginning:
+ curpos = 0
+
+ if oldText == page.text:
+ pywikibot.output(u'No changes necessary.')
+ else:
+ pywikibot.showDiff(oldText, page.text)
+ comment = i18n.twtranslate(page.site, "selflink-remove")
+ page.save(async=True, comment=comment)
+ except pywikibot.NoPage:
+ pywikibot.output(u"Page %s does not exist."
+ % page.title(asLink=True))
+ except pywikibot.IsRedirectPage:
+ pywikibot.output(u"Page %s is a redirect; skipping."
+ % page.title(asLink=True))
+ except pywikibot.LockedPage:
+ pywikibot.output(u"Page %s is locked." % page.title(asLink=True))
+
+ def run(self):
+ for page in self.generator:
+ if self.done:
+ break
+ self.treat(page)
+
+
+def main():
+ # Page generator
+ gen = None
+ # Process global args and prepare generator args parser
+ local_args = pywikibot.handleArgs()
+ genFactory = GeneratorFactory()
+ botArgs = {}
+
+ for arg in local_args:
+ if arg == '-always':
+ botArgs['always'] = True
+ else:
+ genFactory.handleArg(arg)
+
+ gen = genFactory.getCombinedGenerator()
+ if not gen:
+ pywikibot.showHelp()
+ return
+
+ preloadingGen = PreloadingGenerator(gen)
+ bot = SelflinkBot(preloadingGen, **botArgs)
+ bot.run()
+
+if __name__ == "__main__":
+ try:
+ main()
+ finally:
+ pywikibot.stopme()
--
To view, visit https://gerrit.wikimedia.org/r/138661
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I893d5e4275a2d9fdc695925930dae7589c07d372
Gerrit-PatchSet: 8
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Audiodude <audiodude(a)gmail.com>
Gerrit-Reviewer: Audiodude <audiodude(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: Ricordisamoa <ricordisamoa(a)openmailbox.org>
Gerrit-Reviewer: jenkins-bot <>
jenkins-bot has submitted this change and it was merged.
Change subject: Ignore expired cookies
......................................................................
Ignore expired cookies
Bug: 63605
Change-Id: Ie04b1ecf43146eaaaa0253deecdb3347397b24a2
---
M login.py
M wikipedia.py
2 files changed, 12 insertions(+), 2 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/login.py b/login.py
index ebeccc7..ac0056c 100644
--- a/login.py
+++ b/login.py
@@ -59,6 +59,7 @@
import query
import wikipedia as pywikibot
import config
+from datetime import datetime, timedelta
# On some wikis you are only allowed to run a bot if there is a link to
# the bot's user page in a specific list.
@@ -233,7 +234,7 @@
pywikibot.output(u"%s/%s\n%s" % (response.code, response.msg,
fakeresponsemsg))
- Reat = re.compile(': (.*?)=(.*?);')
+ Reat = re.compile(': (.*?)=(.*?); (expires=(.*?);)?')
L = {}
if hasattr(response, 'sheaders'):
@@ -243,6 +244,10 @@
for eat in ck:
m = Reat.search(eat)
if m:
+ exps = m.group(4)
+ if exps:
+ if (datetime.strptime(exps, '%a, %d-%b-%Y %H:%M:%S %Z') - datetime.utcnow()) < timedelta(seconds=1):
+ continue
L[m.group(1)] = m.group(2)
got_token = got_user = False
diff --git a/wikipedia.py b/wikipedia.py
index 13ee2ab..3d74d7b 100644
--- a/wikipedia.py
+++ b/wikipedia.py
@@ -7116,11 +7116,16 @@
else:
ck = f.info().getallmatchingheaders('set-cookie')
if ck:
- Reat = re.compile(': (.*?)=(.*?);')
+ Reat = re.compile(': (.*?)=(.*?); (expires=(.*?);)?')
tmpc = {}
for d in ck:
m = Reat.search(d)
if m:
+ exps = m.group(4)
+ if exps:
+ if (datetime.datetime.strptime(exps, '%a, %d-%b-%Y %H:%M:%S %Z')
+ - datetime.datetime.utcnow()) < datetime.timedelta(seconds=1):
+ continue
tmpc[m.group(1)] = m.group(2)
if self.cookies(sysop):
self.updateCookies(tmpc, sysop)
--
To view, visit https://gerrit.wikimedia.org/r/124285
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ie04b1ecf43146eaaaa0253deecdb3347397b24a2
Gerrit-PatchSet: 2
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: devunt <devunt(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: devunt <devunt(a)gmail.com>
Gerrit-Reviewer: jenkins-bot <>