jenkins-bot has submitted this change and it was merged.
Change subject: Port watchlist.py to core
......................................................................
Port watchlist.py to core
Major changes from compat:
- Added Wikidata support (see lines 99-102)
- Removed _refreshOld() method for fetching data without using API
bug: 57995
Change-Id: Ie8f35ed77a22ad950fd5429ca4e823fa2a0aed75
---
A scripts/watchlist.py
1 file changed, 166 insertions(+), 0 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/watchlist.py b/scripts/watchlist.py
new file mode 100755
index 0000000..2509088
--- /dev/null
+++ b/scripts/watchlist.py
@@ -0,0 +1,166 @@
+# -*- coding: utf-8 -*-
+"""
+Allows access to the bot account's watchlist.
+
+The function refresh() downloads the current watchlist and saves it to disk. It
+is run automatically when a bot first tries to save a page retrieved. The
+watchlist can be updated manually by running this script. The list will also
+be reloaded automatically once a month.
+
+Syntax: python watchlist [-all]
+
+Command line options:
+ -all - Reloads watchlists for all wikis where a watchlist is already
+ present
+ -new - Load watchlists for all wikis where accounts is setting in
+ user-config.py
+"""
+#
+# (C) Daniel Herding, 2005
+# (C) Pywikibot team, 2005-2014
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
+#
+
+import pywikibot
+from pywikibot import config
+import re
+import sys
+import pickle
+import os.path
+import time
+
+cache = {}
+
+
+def get(site=None):
+ if site is None:
+ site = pywikibot.Site()
+ if site in cache:
+ # Use cached copy if it exists.
+ watchlist = cache[site]
+ else:
+ fn = config.datafilepath('watchlists',
+ 'watchlist-%s-%s.dat'
+ % (site.family.name, site.lang))
+ try:
+ # find out how old our saved dump is (in seconds)
+ file_age = time.time() - os.path.getmtime(fn)
+ # if it's older than 1 month, reload it
+ if file_age > 30 * 24 * 60 * 60:
+ pywikibot.output(
+ u'Copy of watchlist is one month old, reloading')
+ refresh(site)
+ except OSError:
+ # no saved watchlist exists yet, retrieve one
+ refresh(site)
+ f = open(fn, 'r')
+ watchlist = pickle.load(f)
+ f.close()
+ # create cached copy
+ cache[site] = watchlist
+ return watchlist
+
+
+def isWatched(pageName, site=None):
+ watchlist = get(site)
+ return pageName in watchlist
+
+
+def refresh(site, sysop=False):
+ # get watchlist special page's URL
+ if not site.logged_in(sysop=sysop):
+ site.forceLogin(sysop=sysop)
+
+ params = {
+ 'action': 'query',
+ 'list': 'watchlistraw',
+ 'site': site,
+ 'wrlimit': config.special_page_limit,
+ }
+
+ pywikibot.output(u'Retrieving watchlist for %s via API.' % str(site))
+ # pywikibot.put_throttle() # It actually is a get, but a heavy one.
+ watchlist = []
+ while True:
+ req = pywikibot.data.api.Request(**params)
+ data = req.submit()
+ if 'error' in data:
+ raise RuntimeError('ERROR: %s' % data)
+ watchlist.extend([w['title'] for w in data['watchlistraw']])
+
+ if 'query-continue' in data:
+ params.update(data['query-continue']['watchlistraw'])
+ else:
+ break
+
+ if site.family.name == 'wikidata':
+ lang = 'wikidata'
+ else:
+ lang = site.lang
+
+ # Save the watchlist to disk
+ # The file is stored in the watchlists subdir. Create if necessary.
+ f = open(config.datafilepath('watchlists',
+ 'watchlist-%s-%s%s.dat'
+ % (site.family.name, lang, '-sysop' if sysop
else '')),
+ 'w')
+ pickle.dump(watchlist, f)
+ f.close()
+
+
+def refresh_all(new=False, sysop=False):
+ if new:
+ pywikibot.output(
+ 'Downloading All watchlists for your accounts in user-config.py')
+ for family in config.usernames:
+ for lang in config.usernames[family]:
+ refresh(pywikibot.Site(lang, family), sysop=sysop)
+ for family in config.sysopnames:
+ for lang in config.sysopnames[family]:
+ refresh(pywikibot.Site(lang, family), sysop=sysop)
+
+ else:
+ import dircache
+ filenames = dircache.listdir(
+ config.datafilepath('watchlists'))
+ watchlist_filenameR = re.compile('watchlist-([a-z\-:]+).dat')
+ for filename in filenames:
+ match = watchlist_filenameR.match(filename)
+ if match:
+ arr = match.group(1).split('-')
+ family = arr[0]
+ lang = '-'.join(arr[1:])
+ refresh(pywikibot.Site(lang, family))
+
+
+def main():
+ all = False
+ new = False
+ sysop = False
+ for arg in pywikibot.handleArgs():
+ if arg in ('-all', '-update'):
+ all = True
+ elif arg == '-new':
+ new = True
+ elif arg == '-sysop':
+ sysop = True
+ if all:
+ refresh_all(sysop=sysop)
+ elif new:
+ refresh_all(new, sysop=sysop)
+ else:
+ refresh(pywikibot.Site(), sysop=sysop)
+
+ watchlist = get(pywikibot.Site())
+ pywikibot.output(u'%i pages in the watchlist.' % len(watchlist))
+ for pageName in watchlist:
+ pywikibot.output(pageName, toStdout=True)
+
+if __name__ == "__main__":
+ try:
+ main()
+ finally:
+ pywikibot.stopme()
--
To view, visit
https://gerrit.wikimedia.org/r/100363
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ie8f35ed77a22ad950fd5429ca4e823fa2a0aed75
Gerrit-PatchSet: 5
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Gerrit Patch Uploader <gerritpatchuploader(a)gmail.com>
Gerrit-Reviewer: Gerrit Patch Uploader <gerritpatchuploader(a)gmail.com>
Gerrit-Reviewer: Guoguo12 <Guoguo12(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: Ricordisamoa <ricordisamoa(a)openmailbox.org>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>