jenkins-bot has submitted this change and it was merged.
Change subject: Port watchlist.py to core
......................................................................
Port watchlist.py to core
Major changes from compat:
- Added Wikidata support (see lines 99-102)
- Removed _refreshOld() method for fetching data without using API
bug: 57995
Change-Id: Ie8f35ed77a22ad950fd5429ca4e823fa2a0aed75
---
A scripts/watchlist.py
1 file changed, 166 insertions(+), 0 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/watchlist.py b/scripts/watchlist.py
new file mode 100755
index 0000000..2509088
--- /dev/null
+++ b/scripts/watchlist.py
@@ -0,0 +1,166 @@
+# -*- coding: utf-8 -*-
+"""
+Allows access to the bot account's watchlist.
+
+The function refresh() downloads the current watchlist and saves it to disk. It
+is run automatically when a bot first tries to save a page retrieved. The
+watchlist can be updated manually by running this script. The list will also
+be reloaded automatically once a month.
+
+Syntax: python watchlist [-all]
+
+Command line options:
+ -all - Reloads watchlists for all wikis where a watchlist is already
+ present
+ -new - Load watchlists for all wikis where accounts is setting in
+ user-config.py
+"""
+#
+# (C) Daniel Herding, 2005
+# (C) Pywikibot team, 2005-2014
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
+#
+
+import pywikibot
+from pywikibot import config
+import re
+import sys
+import pickle
+import os.path
+import time
+
+cache = {}
+
+
+def get(site=None):
+ if site is None:
+ site = pywikibot.Site()
+ if site in cache:
+ # Use cached copy if it exists.
+ watchlist = cache[site]
+ else:
+ fn = config.datafilepath('watchlists',
+ 'watchlist-%s-%s.dat'
+ % (site.family.name, site.lang))
+ try:
+ # find out how old our saved dump is (in seconds)
+ file_age = time.time() - os.path.getmtime(fn)
+ # if it's older than 1 month, reload it
+ if file_age > 30 * 24 * 60 * 60:
+ pywikibot.output(
+ u'Copy of watchlist is one month old, reloading')
+ refresh(site)
+ except OSError:
+ # no saved watchlist exists yet, retrieve one
+ refresh(site)
+ f = open(fn, 'r')
+ watchlist = pickle.load(f)
+ f.close()
+ # create cached copy
+ cache[site] = watchlist
+ return watchlist
+
+
+def isWatched(pageName, site=None):
+ watchlist = get(site)
+ return pageName in watchlist
+
+
+def refresh(site, sysop=False):
+ # get watchlist special page's URL
+ if not site.logged_in(sysop=sysop):
+ site.forceLogin(sysop=sysop)
+
+ params = {
+ 'action': 'query',
+ 'list': 'watchlistraw',
+ 'site': site,
+ 'wrlimit': config.special_page_limit,
+ }
+
+ pywikibot.output(u'Retrieving watchlist for %s via API.' % str(site))
+ # pywikibot.put_throttle() # It actually is a get, but a heavy one.
+ watchlist = []
+ while True:
+ req = pywikibot.data.api.Request(**params)
+ data = req.submit()
+ if 'error' in data:
+ raise RuntimeError('ERROR: %s' % data)
+ watchlist.extend([w['title'] for w in data['watchlistraw']])
+
+ if 'query-continue' in data:
+ params.update(data['query-continue']['watchlistraw'])
+ else:
+ break
+
+ if site.family.name == 'wikidata':
+ lang = 'wikidata'
+ else:
+ lang = site.lang
+
+ # Save the watchlist to disk
+ # The file is stored in the watchlists subdir. Create if necessary.
+ f = open(config.datafilepath('watchlists',
+ 'watchlist-%s-%s%s.dat'
+ % (site.family.name, lang, '-sysop' if sysop else '')),
+ 'w')
+ pickle.dump(watchlist, f)
+ f.close()
+
+
+def refresh_all(new=False, sysop=False):
+ if new:
+ pywikibot.output(
+ 'Downloading All watchlists for your accounts in user-config.py')
+ for family in config.usernames:
+ for lang in config.usernames[family]:
+ refresh(pywikibot.Site(lang, family), sysop=sysop)
+ for family in config.sysopnames:
+ for lang in config.sysopnames[family]:
+ refresh(pywikibot.Site(lang, family), sysop=sysop)
+
+ else:
+ import dircache
+ filenames = dircache.listdir(
+ config.datafilepath('watchlists'))
+ watchlist_filenameR = re.compile('watchlist-([a-z\-:]+).dat')
+ for filename in filenames:
+ match = watchlist_filenameR.match(filename)
+ if match:
+ arr = match.group(1).split('-')
+ family = arr[0]
+ lang = '-'.join(arr[1:])
+ refresh(pywikibot.Site(lang, family))
+
+
+def main():
+ all = False
+ new = False
+ sysop = False
+ for arg in pywikibot.handleArgs():
+ if arg in ('-all', '-update'):
+ all = True
+ elif arg == '-new':
+ new = True
+ elif arg == '-sysop':
+ sysop = True
+ if all:
+ refresh_all(sysop=sysop)
+ elif new:
+ refresh_all(new, sysop=sysop)
+ else:
+ refresh(pywikibot.Site(), sysop=sysop)
+
+ watchlist = get(pywikibot.Site())
+ pywikibot.output(u'%i pages in the watchlist.' % len(watchlist))
+ for pageName in watchlist:
+ pywikibot.output(pageName, toStdout=True)
+
+if __name__ == "__main__":
+ try:
+ main()
+ finally:
+ pywikibot.stopme()
--
To view, visit https://gerrit.wikimedia.org/r/100363
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ie8f35ed77a22ad950fd5429ca4e823fa2a0aed75
Gerrit-PatchSet: 5
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Gerrit Patch Uploader <gerritpatchuploader(a)gmail.com>
Gerrit-Reviewer: Gerrit Patch Uploader <gerritpatchuploader(a)gmail.com>
Gerrit-Reviewer: Guoguo12 <Guoguo12(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: Ricordisamoa <ricordisamoa(a)openmailbox.org>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>
jenkins-bot has submitted this change and it was merged.
Change subject: fix textlib.getCategoryLinks() for adjacent categories
......................................................................
fix textlib.getCategoryLinks() for adjacent categories
the regex has been fixed (take #3):
- characters in the sortKey are now matched in non-greedy mode
- spaces at the end of the sortKey are not ignored anymore
test_adjoining_links() has been added to TestCategoryRearrangement
to make sure that the regular expression always works well.
Change-Id: Ia7dbe5007c34af8a457e8952ef727928e02f8d4d
---
M pywikibot/textlib.py
M tests/textlib_tests.py
2 files changed, 15 insertions(+), 5 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index f9a06b0..585ba77 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -663,7 +663,7 @@
text = removeDisabledParts(text)
catNamespace = '|'.join(site.category_namespaces())
R = re.compile(r'\[\[\s*(?P<namespace>%s)\s*:\s*(?P<catName>.+?)'
- r'(?:\|(?P<sortKey>.*))?\s*\]\]'
+ r'(?:\|(?P<sortKey>.*?))?\]\]'
% catNamespace, re.I)
for match in R.finditer(text):
cat = pywikibot.Category(pywikibot.Link(
diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py
index 72958c7..edcead9 100644
--- a/tests/textlib_tests.py
+++ b/tests/textlib_tests.py
@@ -134,7 +134,8 @@
"""
Tests to ensure that sorting keys are not being lost when
- using .getCategoryLinks() and .replaceCategoryLinks().
+ using .getCategoryLinks() and .replaceCategoryLinks(),
+ with both a newline and an empty string as separators.
"""
@classmethod
@@ -143,12 +144,21 @@
cls.old = ('[[Category:Cat1]]%(LS)s[[Category:Cat2|]]%(LS)s'
'[[Category:Cat1| ]]%(LS)s[[Category:Cat2|key]]'
% {'LS': config.LS})
+ cls.cats = textlib.getCategoryLinks(cls.old, site=cls.site)
- def test_replace_category_links(self):
- cats = textlib.getCategoryLinks(self.old, site=self.site)
- new = textlib.replaceCategoryLinks(self.old, cats, site=self.site)
+ def test_standard_links(self):
+ new = textlib.replaceCategoryLinks(self.old, self.cats, site=self.site)
self.assertEqual(self.old, new)
+ def test_adjoining_links(self):
+ old = self.old.replace(config.LS, '')
+ cats = textlib.getCategoryLinks(old, site=self.site)
+ self.assertEqual(self.cats, cats)
+ sep = config.LS
+ config.line_separator = '' # use an empty separator temporarily
+ new = textlib.replaceCategoryLinks(old, cats, site=self.site)
+ self.assertEqual(old, new)
+ config.line_separator = sep # restore the default separator
if __name__ == '__main__':
try:
--
To view, visit https://gerrit.wikimedia.org/r/148553
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ia7dbe5007c34af8a457e8952ef727928e02f8d4d
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Ricordisamoa <ricordisamoa(a)openmailbox.org>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Russell Blau <russblau(a)imapmail.org>
Gerrit-Reviewer: jenkins-bot <>
jenkins-bot has submitted this change and it was merged.
Change subject: Add whitespaces per PEP 8
......................................................................
Add whitespaces per PEP 8
Change-Id: Ia1a80643cbb83827f9587dc4bdac4ddc34dbc35d
---
M scripts/interwiki.py
1 file changed, 14 insertions(+), 14 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/interwiki.py b/scripts/interwiki.py
index 38b1c43..d05a7d4 100755
--- a/scripts/interwiki.py
+++ b/scripts/interwiki.py
@@ -1429,7 +1429,7 @@
# FIXME: What errors are we catching here?
# except: should be avoided!!
except:
- #raise
+ # raise
pywikibot.output(u'File autonomous_problems.dat open or corrupted! Try again with -restore.')
sys.exit()
iw = ()
@@ -1505,7 +1505,7 @@
if page.exists() and not page.isRedirectPage() and not page.isCategoryRedirect():
site = page.site
if site.family.interwiki_forward:
- #TODO: allow these cases to be propagated!
+ # TODO: allow these cases to be propagated!
continue # inhibit the forwarding families pages to be updated.
if site == self.originPage.site:
if page != self.originPage:
@@ -1552,7 +1552,7 @@
% (i, page2))
self.whereReport(page2, indent=8)
while True:
- #TODO: allow answer to repeat previous or go back after a mistake
+ # TODO: allow answer to repeat previous or go back after a mistake
answer = pywikibot.input(u"Which variant should be used? (<number>, [n]one, [g]ive up) ").lower()
if answer:
if answer == 'g':
@@ -1583,7 +1583,7 @@
if acceptall:
answer = 'a'
else:
- #TODO: allow answer to repeat previous or go back after a mistake
+ # TODO: allow answer to repeat previous or go back after a mistake
answer = pywikibot.inputChoice(u'What should be done?', ['accept', 'reject', 'give up', 'accept all'], ['a', 'r', 'g', 'l'], 'a')
if answer == 'l': # accept all
acceptall = True
@@ -1639,11 +1639,11 @@
# TODO: should be move to assemble()
# replaceLinks will skip the site it's working on.
if self.originPage.site not in new:
- #TODO: make this possible as well.
+ # TODO: make this possible as well.
if not self.originPage.site.family.interwiki_forward:
new[self.originPage.site] = self.originPage
- #self.replaceLinks(self.originPage, new, True)
+ # self.replaceLinks(self.originPage, new, True)
updatedSites = []
notUpdatedSites = []
@@ -1727,7 +1727,7 @@
continue
mods, mcomment, adding, removing, modifying \
= compareLanguages(old, new, insite=site)
- #cannot create pywikibot.User with IP
+ # cannot create pywikibot.User with IP
smallWikiAllowed = (
page.isIpEdit() or
len(removing) > 0 or
@@ -1771,7 +1771,7 @@
break
# disabled graph drawing for minor problems: it just takes too long
- #if notUpdatedSites != [] and config.interwiki_graph:
+ # if notUpdatedSites != [] and config.interwiki_graph:
# # at least one site was not updated, save a conflict graph
# self.createGraph()
@@ -1890,7 +1890,7 @@
if rmsite == page.site:
continue
rmPage = old[rmsite]
- #put it to new means don't delete it
+ # put it to new means don't delete it
if (
not globalvar.cleanup or
unicode(rmPage) not in globalvar.remove or
@@ -2164,8 +2164,8 @@
if page.isTalkPage():
pywikibot.output(u'Skipping: %s is a talk page' % page)
continue
- #doesn't work: page must be preloaded for this test
- #if page.isEmpty():
+ # doesn't work: page must be preloaded for this test
+ # if page.isEmpty():
# pywikibot.output(u'Skipping: %s is a empty page' % page.title())
# continue
if page.namespace() == 10:
@@ -2368,8 +2368,8 @@
# Version info marks bots without unicode error
# This also prevents abuse filter blocking on de-wiki
- #if not pywikibot.unicode_error:
- # mcomment += u'r%s) (' % sys.version.split()[0]
+ # if not pywikibot.unicode_error:
+ # mcomment += u'r%s) (' % sys.version.split()[0]
mcomment += globalvar.summary
@@ -2595,7 +2595,7 @@
except WindowsError:
pass
-#===========
+# ===========
globalvar = Global()
if __name__ == "__main__":
--
To view, visit https://gerrit.wikimedia.org/r/139576
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ia1a80643cbb83827f9587dc4bdac4ddc34dbc35d
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Whym <whym(a)whym.org>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: jenkins-bot <>