Revision: 6480
Author: russblau
Date: 2009-03-03 16:50:18 +0000 (Tue, 03 Mar 2009)
Log Message:
-----------
Ported to new framework
Modified Paths:
--------------
branches/rewrite/pywikibot/scripts/category_redirect.py
Modified: branches/rewrite/pywikibot/scripts/category_redirect.py
===================================================================
--- branches/rewrite/pywikibot/scripts/category_redirect.py 2009-03-03 16:49:18 UTC (rev
6479)
+++ branches/rewrite/pywikibot/scripts/category_redirect.py 2009-03-03 16:50:18 UTC (rev
6480)
@@ -14,8 +14,8 @@
"""
__version__ = '$Id$'
-import wikipedia, catlib
-import pagegenerators
+import pywikibot
+from pywikibot import pagegenerators
import simplejson
import cPickle
import math
@@ -25,27 +25,16 @@
from datetime import datetime, timedelta
-class APIError(Exception):
- """The wiki API returned an error message."""
-
- def __init__(self, errordict):
- """Save error dict returned by MW API."""
- self.errors = errordict
-
- def __str__(self):
- return "%(code)s: %(info)s" % self.errors
-
-
class CategoryRedirectBot(object):
def __init__(self):
self.cooldown = 7 # days
- self.site = wikipedia.getSite()
+ self.site = pywikibot.getSite()
self.catprefix = self.site.namespace(14)+":"
self.log_text = []
self.edit_requests = []
- self.log_page = wikipedia.Page(self.site,
+ self.log_page = pywikibot.Page(self.site,
u"User:%(user)s/category redirect log" %
- {'user': self.site.loggedInAs()})
+ {'user': self.site.user()})
# Localization:
@@ -147,7 +136,7 @@
'no': u"Bot for vedlikehold av kategoriomdirigeringer",
}
- self.edit_request_text = wikipedia.translate(self.site.lang,
+ self.edit_request_text = pywikibot.translate(self.site.lang,
{'en': u"""\
The following protected pages have been detected as requiring updates to \
category links:
@@ -156,7 +145,7 @@
""",
})
- self.edit_request_item = wikipedia.translate(self.site.lang,
+ self.edit_request_item = pywikibot.translate(self.site.lang,
{'en': u"* %s is in %s, which is a redirect to %s",
})
@@ -166,89 +155,94 @@
Moves subcategories of oldCat as well. oldCat and newCat should be
Category objects. If newCat is None, the category will be removed.
- This is a copy of portions of catlib.change_category(), with some
- changes.
+ This is a copy of portions of [old] catlib.change_category(), with
+ some changes.
"""
oldtext = article.get(get_redirect=True, force=True)
- newtext = wikipedia.replaceCategoryInPlace(oldtext, oldCat, newCat)
+ newtext = pywikibot.replaceCategoryInPlace(oldtext, oldCat, newCat)
try:
# even if no changes, still save the page, in case it needs
# an update due to changes in a transcluded template
article.put(newtext, comment)
if newtext == oldtext:
- wikipedia.output(
- u'No changes in made in page %s.' % article.aslink())
+ pywikibot.output(
+ u'No changes in made in page %s.'
+ % article.title(asLink=True)
+ )
return False
return True
- except wikipedia.EditConflict:
- wikipedia.output(
- u'Skipping %s because of edit conflict' % article.aslink())
- except wikipedia.LockedPage:
- wikipedia.output(u'Skipping locked page %s' % article.aslink())
- self.edit_requests.append((article.aslink(),
- oldCat.aslink(textlink=True),
- newCat.aslink(textlink=True)))
- except wikipedia.SpamfilterError, error:
- wikipedia.output(
+ except pywikibot.EditConflict:
+ pywikibot.output(
+ u'Skipping %s because of edit conflict'
+ % article.title(asLink=True)
+ )
+ except pywikibot.LockedPage:
+ pywikibot.output(u'Skipping locked page %s'
+ % article.title(asLink=True)
+ )
+ self.edit_requests.append(
+ (article.title(asLink=True, textlink=True),
+ oldCat.title(asLink=True, textlink=True),
+ newCat.title(asLink=True, textlink=True)
+ ))
+ except pywikibot.SpamfilterError, error:
+ pywikibot.output(
u'Changing page %s blocked by spam filter (URL=%s)'
- % (article.aslink(), error.url))
- except wikipedia.NoUsername:
- wikipedia.output(
+ % (article.title(asLink=True), error.url))
+ except pywikibot.NoUsername:
+ pywikibot.output(
u"Page %s not saved; sysop privileges required."
- % article.aslink())
- self.edit_requests.append((article.aslink(textlink=True),
- oldCat.aslink(textlink=True),
- newCat.aslink(textlink=True)))
- except wikipedia.PageNotSaved, error:
- wikipedia.output(u"Saving page %s failed: %s"
- % (article.aslink(), error.message))
+ % article.title(asLink=True))
+ self.edit_requests.append(
+ (article.title(asLink=True, textlink=True),
+ oldCat.title(asLink=True, textlink=True),
+ newCat.title(asLink=True, textlink=True)
+ ))
+ except pywikibot.PageNotSaved, error:
+ pywikibot.output(u"Saving page %s failed: %s"
+ % (article.title(asLink=True), error.message))
return False
def move_contents(self, oldCatTitle, newCatTitle, editSummary):
"""The worker function that moves pages out of oldCat into
newCat"""
while True:
try:
- oldCat = catlib.Category(self.site,
- self.catprefix + oldCatTitle)
- newCat = catlib.Category(self.site,
- self.catprefix + newCatTitle)
+ oldCat = pywikibot.Category(self.site,
+ self.catprefix + oldCatTitle)
+ newCat = pywikibot.Category(self.site,
+ self.catprefix + newCatTitle)
# Move articles
found, moved = 0, 0
- for result in self.query_results(list="categorymembers",
- cmtitle=oldCat.title(),
- cmprop="title|sortkey",
- cmlimit="max"):
- found += len(result['categorymembers'])
- for item in result['categorymembers']:
- article = wikipedia.Page(self.site, item['title'])
- changed = self.change_category(article, oldCat, newCat,
- comment=editSummary)
- if changed: moved += 1
+ for article in oldCat.members():
+ found += 1
+ changed = self.change_category(article, oldCat, newCat,
+ comment=editSummary)
+ if changed: moved += 1
# pass 2: look for template doc pages
- for result in self.query_results(list="categorymembers",
- cmtitle=oldCat.title(),
- cmprop="title|sortkey",
- cmnamespace="10",
- cmlimit="max"):
- for item in result['categorymembers']:
- doc = wikipedia.Page(self.site,
item['title']+"/doc")
- try:
- old_text = doc.get()
- except wikipedia.Error:
- continue
- changed = self.change_category(doc, oldCat, newCat,
- comment=editSummary)
- if changed: moved += 1
+ for item in pywikibot.data.api.ListGenerator(
+ "categorymembers", cmtitle=oldCat.title(),
+ cmprop="title|sortkey",
cmnamespace="10",
+ cmlimit="max"):
+ doc = pywikibot.Page(
+ pywikibot.Link(item['title']+"/doc",
self.site)
+ )
+ try:
+ old_text = doc.get()
+ except pywikibot.Error:
+ continue
+ changed = self.change_category(doc, oldCat, newCat,
+ comment=editSummary)
+ if changed: moved += 1
if found:
- wikipedia.output(u"%s: %s found, %s moved"
+ pywikibot.output(u"%s: %s found, %s moved"
% (oldCat.title(), found, moved))
return (found, moved)
- except wikipedia.ServerError:
- wikipedia.output(u"Server error: retrying in 5 seconds...")
+ except pywikibot.ServerError:
+ pywikibot.output(u"Server error: retrying in 5 seconds...")
time.sleep(5)
continue
except KeyboardInterrupt:
@@ -265,83 +259,12 @@
raise RuntimeError
return (deadline.strftime(dateformat) > cat.editTime())
- def query_results(self, **data):
- """Iterate results from API action=query, using data as
parameters."""
- addr = self.site.apipath()
- querydata = {'action': 'query',
- 'format': 'json',
- 'maxlag': str(wikipedia.config.maxlag)}
- querydata.update(data)
- if not querydata.has_key("action")\
- or not querydata['action'] == 'query':
- raise ValueError(
- "query_results: 'action' set to value other than
'query'"
- )
- waited = 0
- while True:
- response, data = self.site.postForm(addr, querydata)
- if response.status != 200:
- # WARNING: if the server is down, this could
- # cause an infinite loop
- wikipedia.output(u"HTTP error %i received; retrying..."
- % response.status)
- time.sleep(5)
- continue
- if data.startswith(u"unknown_action"):
- e = {'code': data[:14], 'info': data[16:]}
- raise APIError(e)
- try:
- result = simplejson.loads(data)
- except ValueError:
- # if the result isn't valid JSON, there must be a server
- # problem. Wait a few seconds and try again
- # WARNING: if the server is down, this could
- # cause an infinite loop
- wikipedia.output(u"Invalid API response received;
retrying...")
- time.sleep(5)
- continue
- if type(result) is dict and result.has_key("error"):
- if result['error']['code'] == "maxlag":
- print "Pausing due to server lag.\r",
- time.sleep(5)
- waited += 5
- if waited % 30 == 0:
- wikipedia.output(
- u"(Waited %i seconds due to server lag.)"
- % waited)
- continue
- else:
- # raise error
- raise APIError(result['error'])
- waited = 0
- if type(result) is list:
- # query returned no results
- return
- assert type(result) is dict, \
- "Unexpected result of type '%s' received." %
type(result)
- if "query" not in result:
- # query returned no results
- return
- yield result['query']
- if result.has_key("query-continue"):
- assert len(result['query-continue'].keys()) == 1, \
- "More than one query-continue key returned: %s" \
- % result['query-continue'].keys()
- query_type = result['query-continue'].keys()[0]
- assert (query_type in querydata.keys()
- or query_type in querydata.values()), \
- "Site returned unknown query-continue type
'%s'"\
- % query_type
- querydata.update(result['query-continue'][query_type])
- else:
- return
-
def get_log_text(self):
"""Rotate log text and return the most recent
text."""
LOG_SIZE = 7 # Number of items to keep in active log
try:
log_text = self.log_page.get()
- except wikipedia.NoPage:
+ except pywikibot.NoPage:
log_text = u""
log_items = {}
header = None
@@ -367,13 +290,15 @@
% (self.site.protocol(),
self.site.hostname(),
self.site.scriptpath(),
- self.log_page.urlname(),
+ self.log_page.title(asUrl=True),
rotate_revid))
return log_text
def run(self):
"""Run the bot"""
- user = self.site.loggedInAs()
+ global destmap, catlist, catmap
+
+ user = self.site.user()
redirect_magicwords = ["redirect"]
other_words = self.site.redirect()
if other_words:
@@ -382,9 +307,9 @@
l = time.localtime()
today = "%04d-%02d-%02d" % l[:3]
- edit_request_page = wikipedia.Page(self.site,
+ edit_request_page = pywikibot.Page(self.site,
u"User:%(user)s/category edit requests" %
locals())
- datafile = wikipedia.config.datafilepath(
+ datafile = pywikibot.config.datafilepath(
"%s-catmovebot-data" % self.site.dbName())
try:
inp = open(datafile, "rb")
@@ -399,7 +324,7 @@
template_list = self.redir_templates[self.site.family.name
][self.site.lang]
except KeyError:
- wikipedia.output(u"No redirect templates defined for %s"
+ pywikibot.output(u"No redirect templates defined for %s"
% self.site.sitename())
return
# regex to match soft category redirects
@@ -419,96 +344,82 @@
# check for hard-redirected categories that are not already marked
# with an appropriate template
- comment = wikipedia.translate(self.site.lang, self.redir_comment)
- for result in self.query_results(list='allpages',
- apnamespace='14', # Category:
- apfrom='!',
- apfilterredir='redirects',
- aplimit='max'):
- gen = (wikipedia.Page(self.site, page_item['title'])
- for page_item in result['allpages'])
- # gen yields all hard redirect pages in namespace 14
- for page in pagegenerators.PreloadingGenerator(gen, 120):
- if page.isCategoryRedirect():
- # this is already a soft-redirect, so skip it (for now)
- continue
- target = page.getRedirectTarget()
- if target.namespace() == 14:
- # this is a hard-redirect to a category page
- newtext = (u"{{%(template)s|%(cat)s}}"
- % {'cat': target.titleWithoutNamespace(),
- 'template': template_list[0]})
- try:
- page.put(newtext, comment, minorEdit=True)
- self.log_text.append(u"* Added {{tl|%s}} to %s"
- % (template_list[0],
- page.aslink(textlink=True)))
- except wikipedia.Error, e:
- self.log_text.append(
- u"* Failed to add {{tl|%s}} to %s (%s)"
- % (template_list[0],
- page.aslink(textlink=True),
- e))
- else:
- problems.append(
- u"# %s is a hard redirect to %s"
- % (page.aslink(textlink=True),
- target.aslink(textlink=True)))
+ comment = pywikibot.translate(self.site.lang, self.redir_comment)
+ for page in pagegenerators.PreloadingGenerator(
+ self.site.allpages(namespace=14, filterredir=True)
+ ):
+ # generator yields all hard redirect pages in namespace 14
+ if page.isCategoryRedirect():
+ # this is already a soft-redirect, so skip it (for now)
+ continue
+ target = page.getRedirectTarget()
+ if target.namespace() == 14:
+ # this is a hard-redirect to a category page
+ newtext = (u"{{%(template)s|%(cat)s}}"
+ % {'cat': target.title(withNamespace=False),
+ 'template': template_list[0]})
+ try:
+ page.put(newtext, comment, minorEdit=True)
+ self.log_text.append(u"* Added {{tl|%s}} to %s"
+ % (template_list[0],
+ page.title(asLink=True, textlink=True)))
+ except pywikibot.Error, e:
+ self.log_text.append(
+ u"* Failed to add {{tl|%s}} to %s (%s)"
+ % (template_list[0],
+ page.title(asLink=True, textlink=True),
+ e))
+ else:
+ problems.append(
+ u"# %s is a hard redirect to %s"
+ % (page.title(asLink=True, textlink=True),
+ target.title(asLink=True, textlink=True)))
- wikipedia.output("Done checking hard-redirect category pages.")
+ pywikibot.output("Done checking hard-redirect category pages.")
- comment = wikipedia.translate(self.site.lang, self.move_comment)
- scan_data = {
- u'action': 'query',
- u'list': 'embeddedin',
- u'einamespace': '14', # Category:
- u'eilimit': 'max',
- u'format': 'json'
- }
+ comment = pywikibot.translate(self.site.lang, self.move_comment)
counts, destmap, catmap = {}, {}, {}
- catlist, catpages, nonemptypages = [], [], []
- target = self.cat_redirect_cat[self.site.family.name][self.site.lang]
+ catlist, nonemptypages = [], []
+ redircat = pywikibot.Category(
+ pywikibot.Link(
+ self.cat_redirect_cat[self.site.family.name]
+ [self.site.lang],
+ self.site)
+ )
# get a list of all members of the category-redirect category
- for result in self.query_results(generator=u'categorymembers',
- gcmtitle=target,
- gcmnamespace=u'14', # CATEGORY
- gcmlimit=u'max',
- prop='info|categoryinfo'):
- for catdata in result['pages'].values():
- thispage = wikipedia.Page(self.site, catdata['title'])
- catpages.append(thispage)
- if 'categoryinfo' in catdata \
- and catdata['categoryinfo']['size'] !=
"0":
- # save those categories that have contents
- nonemptypages.append(thispage)
+ catpages = list(redircat.subcategories())
# preload the category pages for redirected categories
- wikipedia.output(u"")
- wikipedia.output(u"Preloading %s category redirect pages"
+ pywikibot.output(u"")
+ pywikibot.output(u"Preloading %s category redirect pages"
% len(catpages))
- for cat in pagegenerators.PreloadingGenerator(catpages, 120):
- cat_title = cat.titleWithoutNamespace()
+ for cat in pagegenerators.PreloadingGenerator(catpages):
+ catdata = cat.categoryinfo
+ if "size" in catdata and int(catdata['size']):
+ # save those categories that have contents
+ nonemptypages.append(cat)
+ cat_title = cat.title(withNamespace=False)
if "category redirect" in cat_title:
self.log_text.append(u"* Ignoring %s"
- % cat.aslink(textlink=True))
+ % cat.title(asLink=True, textlink=True))
continue
try:
- text = cat.get(get_redirect=True)
- except wikipedia.Error:
+ if not cat.isCategoryRedirect():
+ self.log_text.append(u"* False positive: %s"
+ % cat.title(asLink=True,
+ textlink=True))
+ continue
+ except pywikibot.Error:
self.log_text.append(u"* Could not load %s; ignoring"
- % cat.aslink(textlink=True))
+ % cat.title(asLink=True, textlink=True))
continue
- if not cat.isCategoryRedirect():
- self.log_text.append(u"* False positive: %s"
- % cat.aslink(textlink=True))
- continue
if cat_title not in record:
# make sure every redirect has a record entry
record[cat_title] = {today: None}
catlist.append(cat)
target = cat.getCategoryRedirectTarget()
- destination = target.titleWithoutNamespace()
+ destination = target.title(withNamespace=False)
destmap.setdefault(target, []).append(cat)
catmap[cat] = destination
## if match.group(1):
@@ -519,26 +430,27 @@
## u"Robot: fixing category redirect parameter
format")
## self.log_text.append(
## u"* Removed category prefix from parameter in %s"
-## % cat.aslink(textlink=True))
-## except wikipedia.Error:
+## % cat.title(asLink=True, textlink=True))
+## except pywikibot.Error:
## self.log_text.append(
## u"* Unable to save changes to %s"
-## % cat.aslink(textlink=True))
+## % cat.title(asLink=True, textlink=True))
# delete record entries for non-existent categories
for cat_name in list(record.keys()):
- if catlib.Category(self.site,
- self.catprefix+cat_name) not in catmap:
+ if pywikibot.Category(
+ pywikibot.Link(self.catprefix+cat_name, self.site)
+ ) not in catmap:
del record[cat_name]
- wikipedia.output(u"")
- wikipedia.output(u"Checking %s destination categories" % len(destmap))
- for dest in pagegenerators.PreloadingGenerator(destmap.keys(), 120):
+ pywikibot.output(u"")
+ pywikibot.output(u"Checking %s destination categories" % len(destmap))
+ for dest in pagegenerators.PreloadingGenerator(destmap.keys()):
if not dest.exists():
for d in destmap[dest]:
problems.append("# %s redirects to %s"
- % (d.aslink(textlink=True),
- dest.aslink(textlink=True)))
+ % (d.title(asLink=True, textlink=True),
+ dest.title(asLink=True, textlink=True)))
catlist.remove(d)
# do a null edit on d to make it appear in the
# "needs repair" category (if this wiki has one)
@@ -549,53 +461,59 @@
if dest in catlist:
for d in destmap[dest]:
# is catmap[dest] also a redirect?
- newcat = catlib.Category(self.site,
- self.catprefix+catmap[dest])
+ newcat = pywikibot.Category(
+ pywikibot.Link(self.catprefix+catmap[dest],
+ self.site)
+ )
while newcat in catlist:
if newcat == d or newcat == dest:
self.log_text.append(u"* Redirect loop from %s"
- % newcat.aslink(textlink=True))
+ % newcat.title(asLink=True,
+ textlink=True))
break
- newcat = catlib.Category(self.site,
- self.catprefix+catmap[newcat])
+ newcat = pywikibot.Category(
+ pywikibot.Link(
+ self.catprefix+catmap[newcat],
+ self.site)
+ )
else:
self.log_text.append(
u"* Fixed double-redirect: %s -> %s -> %s"
- % (d.aslink(textlink=True),
- dest.aslink(textlink=True),
- newcat.aslink(textlink=True)))
+ % (d.title(asLink=True, textlink=True),
+ dest.title(asLink=True, textlink=True),
+ newcat.title(asLink=True, textlink=True)))
oldtext = d.get(get_redirect=True)
# remove the old redirect from the old text,
# leaving behind any non-redirect text
oldtext = template_regex.sub("", oldtext)
newtext = (u"{{%(redirtemp)s|%(ncat)s}}"
% {'redirtemp': template_list[0],
- 'ncat': newcat.titleWithoutNamespace()})
+ 'ncat':
newcat.title(withNamespace=False)})
newtext = newtext + oldtext.strip()
try:
d.put(newtext,
- wikipedia.translate(self.site.lang,
+ pywikibot.translate(self.site.lang,
self.dbl_redir_comment),
minorEdit=True)
- except wikipedia.Error, e:
+ except pywikibot.Error, e:
self.log_text.append("** Failed: %s" % str(e))
# only scan those pages that have contents (nonemptypages)
# and that haven't been removed from catlist as broken redirects
cats_to_empty = set(catlist) & set(nonemptypages)
- wikipedia.output(u"")
- wikipedia.output(u"Moving pages out of %s redirected categories."
+ pywikibot.output(u"")
+ pywikibot.output(u"Moving pages out of %s redirected categories."
% len(cats_to_empty))
# thread_limit = int(math.log(len(cats_to_empty), 8) + 1)
# threadpool = ThreadList(limit=1) # disabling multi-threads
for cat in cats_to_empty:
- cat_title = cat.titleWithoutNamespace()
+ cat_title = cat.title(withNamespace=False)
if not self.readyToEdit(cat):
counts[cat_title] = None
self.log_text.append(
u"* Skipping %s; in cooldown period."
- % cat.aslink(textlink=True))
+ % cat.title(asLink=True, textlink=True))
continue
found, moved = self.move_contents(cat_title, catmap[cat],
editSummary=comment)
@@ -612,7 +530,7 @@
cPickle.dump(record, open(datafile, "wb"))
- wikipedia.setAction(wikipedia.translate(self.site.lang,
+ pywikibot.setAction(pywikibot.translate(self.site.lang,
self.maint_comment))
self.log_text.sort()
self.log_page.put(u"\n==%i-%02i-%02iT%02i:%02i:%02iZ==\n"
@@ -629,7 +547,7 @@
def main(*args):
global bot
try:
- a = wikipedia.handleArgs(*args)
+ a = pywikibot.handleArgs(*args)
if len(a) == 1:
raise RuntimeError('Unrecognized argument "%s"' % a[0])
elif a:
@@ -638,7 +556,7 @@
bot = CategoryRedirectBot()
bot.run()
finally:
- wikipedia.stopme()
+ pywikibot.stopme()
if __name__ == "__main__":