Revision: 6480 Author: russblau Date: 2009-03-03 16:50:18 +0000 (Tue, 03 Mar 2009)
Log Message: ----------- Ported to new framework
Modified Paths: -------------- branches/rewrite/pywikibot/scripts/category_redirect.py
Modified: branches/rewrite/pywikibot/scripts/category_redirect.py =================================================================== --- branches/rewrite/pywikibot/scripts/category_redirect.py 2009-03-03 16:49:18 UTC (rev 6479) +++ branches/rewrite/pywikibot/scripts/category_redirect.py 2009-03-03 16:50:18 UTC (rev 6480) @@ -14,8 +14,8 @@ """ __version__ = '$Id$'
-import wikipedia, catlib -import pagegenerators +import pywikibot +from pywikibot import pagegenerators import simplejson import cPickle import math @@ -25,27 +25,16 @@ from datetime import datetime, timedelta
-class APIError(Exception): - """The wiki API returned an error message.""" - - def __init__(self, errordict): - """Save error dict returned by MW API.""" - self.errors = errordict - - def __str__(self): - return "%(code)s: %(info)s" % self.errors - - class CategoryRedirectBot(object): def __init__(self): self.cooldown = 7 # days - self.site = wikipedia.getSite() + self.site = pywikibot.getSite() self.catprefix = self.site.namespace(14)+":" self.log_text = [] self.edit_requests = [] - self.log_page = wikipedia.Page(self.site, + self.log_page = pywikibot.Page(self.site, u"User:%(user)s/category redirect log" % - {'user': self.site.loggedInAs()}) + {'user': self.site.user()})
# Localization:
@@ -147,7 +136,7 @@ 'no': u"Bot for vedlikehold av kategoriomdirigeringer", }
- self.edit_request_text = wikipedia.translate(self.site.lang, + self.edit_request_text = pywikibot.translate(self.site.lang, {'en': u"""\ The following protected pages have been detected as requiring updates to \ category links: @@ -156,7 +145,7 @@ """, })
- self.edit_request_item = wikipedia.translate(self.site.lang, + self.edit_request_item = pywikibot.translate(self.site.lang, {'en': u"* %s is in %s, which is a redirect to %s", })
@@ -166,89 +155,94 @@ Moves subcategories of oldCat as well. oldCat and newCat should be Category objects. If newCat is None, the category will be removed.
- This is a copy of portions of catlib.change_category(), with some - changes. + This is a copy of portions of [old] catlib.change_category(), with + some changes.
""" oldtext = article.get(get_redirect=True, force=True) - newtext = wikipedia.replaceCategoryInPlace(oldtext, oldCat, newCat) + newtext = pywikibot.replaceCategoryInPlace(oldtext, oldCat, newCat) try: # even if no changes, still save the page, in case it needs # an update due to changes in a transcluded template article.put(newtext, comment) if newtext == oldtext: - wikipedia.output( - u'No changes in made in page %s.' % article.aslink()) + pywikibot.output( + u'No changes in made in page %s.' + % article.title(asLink=True) + ) return False return True - except wikipedia.EditConflict: - wikipedia.output( - u'Skipping %s because of edit conflict' % article.aslink()) - except wikipedia.LockedPage: - wikipedia.output(u'Skipping locked page %s' % article.aslink()) - self.edit_requests.append((article.aslink(), - oldCat.aslink(textlink=True), - newCat.aslink(textlink=True))) - except wikipedia.SpamfilterError, error: - wikipedia.output( + except pywikibot.EditConflict: + pywikibot.output( + u'Skipping %s because of edit conflict' + % article.title(asLink=True) + ) + except pywikibot.LockedPage: + pywikibot.output(u'Skipping locked page %s' + % article.title(asLink=True) + ) + self.edit_requests.append( + (article.title(asLink=True, textlink=True), + oldCat.title(asLink=True, textlink=True), + newCat.title(asLink=True, textlink=True) + )) + except pywikibot.SpamfilterError, error: + pywikibot.output( u'Changing page %s blocked by spam filter (URL=%s)' - % (article.aslink(), error.url)) - except wikipedia.NoUsername: - wikipedia.output( + % (article.title(asLink=True), error.url)) + except pywikibot.NoUsername: + pywikibot.output( u"Page %s not saved; sysop privileges required." - % article.aslink()) - self.edit_requests.append((article.aslink(textlink=True), - oldCat.aslink(textlink=True), - newCat.aslink(textlink=True))) - except wikipedia.PageNotSaved, error: - wikipedia.output(u"Saving page %s failed: %s" - % (article.aslink(), error.message)) + % article.title(asLink=True)) + self.edit_requests.append( + (article.title(asLink=True, textlink=True), + oldCat.title(asLink=True, textlink=True), + newCat.title(asLink=True, textlink=True) + )) + except pywikibot.PageNotSaved, error: + pywikibot.output(u"Saving page %s failed: %s" + % (article.title(asLink=True), error.message)) return False
def move_contents(self, oldCatTitle, newCatTitle, editSummary): """The worker function that moves pages out of oldCat into newCat""" while True: try: - oldCat = catlib.Category(self.site, - self.catprefix + oldCatTitle) - newCat = catlib.Category(self.site, - self.catprefix + newCatTitle) + oldCat = pywikibot.Category(self.site, + self.catprefix + oldCatTitle) + newCat = pywikibot.Category(self.site, + self.catprefix + newCatTitle)
# Move articles found, moved = 0, 0 - for result in self.query_results(list="categorymembers", - cmtitle=oldCat.title(), - cmprop="title|sortkey", - cmlimit="max"): - found += len(result['categorymembers']) - for item in result['categorymembers']: - article = wikipedia.Page(self.site, item['title']) - changed = self.change_category(article, oldCat, newCat, - comment=editSummary) - if changed: moved += 1 + for article in oldCat.members(): + found += 1 + changed = self.change_category(article, oldCat, newCat, + comment=editSummary) + if changed: moved += 1
# pass 2: look for template doc pages - for result in self.query_results(list="categorymembers", - cmtitle=oldCat.title(), - cmprop="title|sortkey", - cmnamespace="10", - cmlimit="max"): - for item in result['categorymembers']: - doc = wikipedia.Page(self.site, item['title']+"/doc") - try: - old_text = doc.get() - except wikipedia.Error: - continue - changed = self.change_category(doc, oldCat, newCat, - comment=editSummary) - if changed: moved += 1 + for item in pywikibot.data.api.ListGenerator( + "categorymembers", cmtitle=oldCat.title(), + cmprop="title|sortkey", cmnamespace="10", + cmlimit="max"): + doc = pywikibot.Page( + pywikibot.Link(item['title']+"/doc", self.site) + ) + try: + old_text = doc.get() + except pywikibot.Error: + continue + changed = self.change_category(doc, oldCat, newCat, + comment=editSummary) + if changed: moved += 1
if found: - wikipedia.output(u"%s: %s found, %s moved" + pywikibot.output(u"%s: %s found, %s moved" % (oldCat.title(), found, moved)) return (found, moved) - except wikipedia.ServerError: - wikipedia.output(u"Server error: retrying in 5 seconds...") + except pywikibot.ServerError: + pywikibot.output(u"Server error: retrying in 5 seconds...") time.sleep(5) continue except KeyboardInterrupt: @@ -265,83 +259,12 @@ raise RuntimeError return (deadline.strftime(dateformat) > cat.editTime())
- def query_results(self, **data): - """Iterate results from API action=query, using data as parameters.""" - addr = self.site.apipath() - querydata = {'action': 'query', - 'format': 'json', - 'maxlag': str(wikipedia.config.maxlag)} - querydata.update(data) - if not querydata.has_key("action")\ - or not querydata['action'] == 'query': - raise ValueError( - "query_results: 'action' set to value other than 'query'" - ) - waited = 0 - while True: - response, data = self.site.postForm(addr, querydata) - if response.status != 200: - # WARNING: if the server is down, this could - # cause an infinite loop - wikipedia.output(u"HTTP error %i received; retrying..." - % response.status) - time.sleep(5) - continue - if data.startswith(u"unknown_action"): - e = {'code': data[:14], 'info': data[16:]} - raise APIError(e) - try: - result = simplejson.loads(data) - except ValueError: - # if the result isn't valid JSON, there must be a server - # problem. Wait a few seconds and try again - # WARNING: if the server is down, this could - # cause an infinite loop - wikipedia.output(u"Invalid API response received; retrying...") - time.sleep(5) - continue - if type(result) is dict and result.has_key("error"): - if result['error']['code'] == "maxlag": - print "Pausing due to server lag.\r", - time.sleep(5) - waited += 5 - if waited % 30 == 0: - wikipedia.output( - u"(Waited %i seconds due to server lag.)" - % waited) - continue - else: - # raise error - raise APIError(result['error']) - waited = 0 - if type(result) is list: - # query returned no results - return - assert type(result) is dict, \ - "Unexpected result of type '%s' received." % type(result) - if "query" not in result: - # query returned no results - return - yield result['query'] - if result.has_key("query-continue"): - assert len(result['query-continue'].keys()) == 1, \ - "More than one query-continue key returned: %s" \ - % result['query-continue'].keys() - query_type = result['query-continue'].keys()[0] - assert (query_type in querydata.keys() - or query_type in querydata.values()), \ - "Site returned unknown query-continue type '%s'"\ - % query_type - querydata.update(result['query-continue'][query_type]) - else: - return - def get_log_text(self): """Rotate log text and return the most recent text.""" LOG_SIZE = 7 # Number of items to keep in active log try: log_text = self.log_page.get() - except wikipedia.NoPage: + except pywikibot.NoPage: log_text = u"" log_items = {} header = None @@ -367,13 +290,15 @@ % (self.site.protocol(), self.site.hostname(), self.site.scriptpath(), - self.log_page.urlname(), + self.log_page.title(asUrl=True), rotate_revid)) return log_text
def run(self): """Run the bot""" - user = self.site.loggedInAs() + global destmap, catlist, catmap + + user = self.site.user() redirect_magicwords = ["redirect"] other_words = self.site.redirect() if other_words: @@ -382,9 +307,9 @@
l = time.localtime() today = "%04d-%02d-%02d" % l[:3] - edit_request_page = wikipedia.Page(self.site, + edit_request_page = pywikibot.Page(self.site, u"User:%(user)s/category edit requests" % locals()) - datafile = wikipedia.config.datafilepath( + datafile = pywikibot.config.datafilepath( "%s-catmovebot-data" % self.site.dbName()) try: inp = open(datafile, "rb") @@ -399,7 +324,7 @@ template_list = self.redir_templates[self.site.family.name ][self.site.lang] except KeyError: - wikipedia.output(u"No redirect templates defined for %s" + pywikibot.output(u"No redirect templates defined for %s" % self.site.sitename()) return # regex to match soft category redirects @@ -419,96 +344,82 @@
# check for hard-redirected categories that are not already marked # with an appropriate template - comment = wikipedia.translate(self.site.lang, self.redir_comment) - for result in self.query_results(list='allpages', - apnamespace='14', # Category: - apfrom='!', - apfilterredir='redirects', - aplimit='max'): - gen = (wikipedia.Page(self.site, page_item['title']) - for page_item in result['allpages']) - # gen yields all hard redirect pages in namespace 14 - for page in pagegenerators.PreloadingGenerator(gen, 120): - if page.isCategoryRedirect(): - # this is already a soft-redirect, so skip it (for now) - continue - target = page.getRedirectTarget() - if target.namespace() == 14: - # this is a hard-redirect to a category page - newtext = (u"{{%(template)s|%(cat)s}}" - % {'cat': target.titleWithoutNamespace(), - 'template': template_list[0]}) - try: - page.put(newtext, comment, minorEdit=True) - self.log_text.append(u"* Added {{tl|%s}} to %s" - % (template_list[0], - page.aslink(textlink=True))) - except wikipedia.Error, e: - self.log_text.append( - u"* Failed to add {{tl|%s}} to %s (%s)" - % (template_list[0], - page.aslink(textlink=True), - e)) - else: - problems.append( - u"# %s is a hard redirect to %s" - % (page.aslink(textlink=True), - target.aslink(textlink=True))) + comment = pywikibot.translate(self.site.lang, self.redir_comment) + for page in pagegenerators.PreloadingGenerator( + self.site.allpages(namespace=14, filterredir=True) + ): + # generator yields all hard redirect pages in namespace 14 + if page.isCategoryRedirect(): + # this is already a soft-redirect, so skip it (for now) + continue + target = page.getRedirectTarget() + if target.namespace() == 14: + # this is a hard-redirect to a category page + newtext = (u"{{%(template)s|%(cat)s}}" + % {'cat': target.title(withNamespace=False), + 'template': template_list[0]}) + try: + page.put(newtext, comment, minorEdit=True) + self.log_text.append(u"* Added {{tl|%s}} to %s" + % (template_list[0], + page.title(asLink=True, textlink=True))) + except pywikibot.Error, e: + self.log_text.append( + u"* Failed to add {{tl|%s}} to %s (%s)" + % (template_list[0], + page.title(asLink=True, textlink=True), + e)) + else: + problems.append( + u"# %s is a hard redirect to %s" + % (page.title(asLink=True, textlink=True), + target.title(asLink=True, textlink=True)))
- wikipedia.output("Done checking hard-redirect category pages.") + pywikibot.output("Done checking hard-redirect category pages.")
- comment = wikipedia.translate(self.site.lang, self.move_comment) - scan_data = { - u'action': 'query', - u'list': 'embeddedin', - u'einamespace': '14', # Category: - u'eilimit': 'max', - u'format': 'json' - } + comment = pywikibot.translate(self.site.lang, self.move_comment) counts, destmap, catmap = {}, {}, {} - catlist, catpages, nonemptypages = [], [], [] - target = self.cat_redirect_cat[self.site.family.name][self.site.lang] + catlist, nonemptypages = [], [] + redircat = pywikibot.Category( + pywikibot.Link( + self.cat_redirect_cat[self.site.family.name] + [self.site.lang], + self.site) + )
# get a list of all members of the category-redirect category - for result in self.query_results(generator=u'categorymembers', - gcmtitle=target, - gcmnamespace=u'14', # CATEGORY - gcmlimit=u'max', - prop='info|categoryinfo'): - for catdata in result['pages'].values(): - thispage = wikipedia.Page(self.site, catdata['title']) - catpages.append(thispage) - if 'categoryinfo' in catdata \ - and catdata['categoryinfo']['size'] != "0": - # save those categories that have contents - nonemptypages.append(thispage) + catpages = list(redircat.subcategories())
# preload the category pages for redirected categories - wikipedia.output(u"") - wikipedia.output(u"Preloading %s category redirect pages" + pywikibot.output(u"") + pywikibot.output(u"Preloading %s category redirect pages" % len(catpages)) - for cat in pagegenerators.PreloadingGenerator(catpages, 120): - cat_title = cat.titleWithoutNamespace() + for cat in pagegenerators.PreloadingGenerator(catpages): + catdata = cat.categoryinfo + if "size" in catdata and int(catdata['size']): + # save those categories that have contents + nonemptypages.append(cat) + cat_title = cat.title(withNamespace=False) if "category redirect" in cat_title: self.log_text.append(u"* Ignoring %s" - % cat.aslink(textlink=True)) + % cat.title(asLink=True, textlink=True)) continue try: - text = cat.get(get_redirect=True) - except wikipedia.Error: + if not cat.isCategoryRedirect(): + self.log_text.append(u"* False positive: %s" + % cat.title(asLink=True, + textlink=True)) + continue + except pywikibot.Error: self.log_text.append(u"* Could not load %s; ignoring" - % cat.aslink(textlink=True)) + % cat.title(asLink=True, textlink=True)) continue - if not cat.isCategoryRedirect(): - self.log_text.append(u"* False positive: %s" - % cat.aslink(textlink=True)) - continue if cat_title not in record: # make sure every redirect has a record entry record[cat_title] = {today: None} catlist.append(cat) target = cat.getCategoryRedirectTarget() - destination = target.titleWithoutNamespace() + destination = target.title(withNamespace=False) destmap.setdefault(target, []).append(cat) catmap[cat] = destination ## if match.group(1): @@ -519,26 +430,27 @@ ## u"Robot: fixing category redirect parameter format") ## self.log_text.append( ## u"* Removed category prefix from parameter in %s" -## % cat.aslink(textlink=True)) -## except wikipedia.Error: +## % cat.title(asLink=True, textlink=True)) +## except pywikibot.Error: ## self.log_text.append( ## u"* Unable to save changes to %s" -## % cat.aslink(textlink=True)) +## % cat.title(asLink=True, textlink=True))
# delete record entries for non-existent categories for cat_name in list(record.keys()): - if catlib.Category(self.site, - self.catprefix+cat_name) not in catmap: + if pywikibot.Category( + pywikibot.Link(self.catprefix+cat_name, self.site) + ) not in catmap: del record[cat_name]
- wikipedia.output(u"") - wikipedia.output(u"Checking %s destination categories" % len(destmap)) - for dest in pagegenerators.PreloadingGenerator(destmap.keys(), 120): + pywikibot.output(u"") + pywikibot.output(u"Checking %s destination categories" % len(destmap)) + for dest in pagegenerators.PreloadingGenerator(destmap.keys()): if not dest.exists(): for d in destmap[dest]: problems.append("# %s redirects to %s" - % (d.aslink(textlink=True), - dest.aslink(textlink=True))) + % (d.title(asLink=True, textlink=True), + dest.title(asLink=True, textlink=True))) catlist.remove(d) # do a null edit on d to make it appear in the # "needs repair" category (if this wiki has one) @@ -549,53 +461,59 @@ if dest in catlist: for d in destmap[dest]: # is catmap[dest] also a redirect? - newcat = catlib.Category(self.site, - self.catprefix+catmap[dest]) + newcat = pywikibot.Category( + pywikibot.Link(self.catprefix+catmap[dest], + self.site) + ) while newcat in catlist: if newcat == d or newcat == dest: self.log_text.append(u"* Redirect loop from %s" - % newcat.aslink(textlink=True)) + % newcat.title(asLink=True, + textlink=True)) break - newcat = catlib.Category(self.site, - self.catprefix+catmap[newcat]) + newcat = pywikibot.Category( + pywikibot.Link( + self.catprefix+catmap[newcat], + self.site) + ) else: self.log_text.append( u"* Fixed double-redirect: %s -> %s -> %s" - % (d.aslink(textlink=True), - dest.aslink(textlink=True), - newcat.aslink(textlink=True))) + % (d.title(asLink=True, textlink=True), + dest.title(asLink=True, textlink=True), + newcat.title(asLink=True, textlink=True))) oldtext = d.get(get_redirect=True) # remove the old redirect from the old text, # leaving behind any non-redirect text oldtext = template_regex.sub("", oldtext) newtext = (u"{{%(redirtemp)s|%(ncat)s}}" % {'redirtemp': template_list[0], - 'ncat': newcat.titleWithoutNamespace()}) + 'ncat': newcat.title(withNamespace=False)}) newtext = newtext + oldtext.strip() try: d.put(newtext, - wikipedia.translate(self.site.lang, + pywikibot.translate(self.site.lang, self.dbl_redir_comment), minorEdit=True) - except wikipedia.Error, e: + except pywikibot.Error, e: self.log_text.append("** Failed: %s" % str(e))
# only scan those pages that have contents (nonemptypages) # and that haven't been removed from catlist as broken redirects cats_to_empty = set(catlist) & set(nonemptypages) - wikipedia.output(u"") - wikipedia.output(u"Moving pages out of %s redirected categories." + pywikibot.output(u"") + pywikibot.output(u"Moving pages out of %s redirected categories." % len(cats_to_empty)) # thread_limit = int(math.log(len(cats_to_empty), 8) + 1) # threadpool = ThreadList(limit=1) # disabling multi-threads
for cat in cats_to_empty: - cat_title = cat.titleWithoutNamespace() + cat_title = cat.title(withNamespace=False) if not self.readyToEdit(cat): counts[cat_title] = None self.log_text.append( u"* Skipping %s; in cooldown period." - % cat.aslink(textlink=True)) + % cat.title(asLink=True, textlink=True)) continue found, moved = self.move_contents(cat_title, catmap[cat], editSummary=comment) @@ -612,7 +530,7 @@
cPickle.dump(record, open(datafile, "wb"))
- wikipedia.setAction(wikipedia.translate(self.site.lang, + pywikibot.setAction(pywikibot.translate(self.site.lang, self.maint_comment)) self.log_text.sort() self.log_page.put(u"\n==%i-%02i-%02iT%02i:%02i:%02iZ==\n" @@ -629,7 +547,7 @@ def main(*args): global bot try: - a = wikipedia.handleArgs(*args) + a = pywikibot.handleArgs(*args) if len(a) == 1: raise RuntimeError('Unrecognized argument "%s"' % a[0]) elif a: @@ -638,7 +556,7 @@ bot = CategoryRedirectBot() bot.run() finally: - wikipedia.stopme() + pywikibot.stopme()
if __name__ == "__main__":