Revision: 7849 Author: russblau Date: 2009-12-31 17:56:33 +0000 (Thu, 31 Dec 2009)
Log Message: ----------- Port redirect.py to rewrite branch; to do this, some options had to be removed and some command-line arguments changed. Because this is a port rather than a newly-written script, it may contain some unused variables/methods and could benefit from refactoring.
Added Paths: ----------- branches/rewrite/scripts/redirect.py
Added: branches/rewrite/scripts/redirect.py =================================================================== --- branches/rewrite/scripts/redirect.py (rev 0) +++ branches/rewrite/scripts/redirect.py 2009-12-31 17:56:33 UTC (rev 7849) @@ -0,0 +1,792 @@ +# -*- coding: utf-8 -*- +""" +Script to resolve double redirects, and to delete broken redirects. Requires +access to MediaWiki's maintenance pages or to a XML dump file. Delete +function requires adminship. + +Syntax: + + python redirect.py action [-arguments ...] + +where action can be one of these: + +double Fix redirects which point to other redirects +broken Delete redirects where targets don't exist. Requires adminship. +both Both of the above. Permitted only with -api. Implies -api. + +and arguments can be: + +-moves Use the page move log to find double-redirect candidates. Only + works with action "double". + +-namespace:n Namespace to process. Can be given multiple times, for several + namespaces. If omitted, only the main (article) namespace is + treated. + +-offset:n With -moves, the number of hours ago to start scanning moved + pages. Otherwise, ignored. + +-start:title The starting page title in each namespace. Page need not exist. + +-until:title The possible last page title in each namespace. Page needs not + exist. + +-total:n The maximum count of redirects to work upon. If omitted, there + is no limit. + +-always Don't prompt you for each replacement. + +""" + +# XML not yet implemented: deleted help text follows +##-xml Retrieve information from a local XML dump +## (http://download.wikimedia.org). Argument can also be given as +## "-xml:filename.xml". Cannot be used with -api or -moves. +## If neither of -xml -api -moves is given, info will be loaded +## from a special page of the live wiki. + +# +# (C) Daniel Herding, 2004. +# Purodha Blissenbach, 2009. +# +# Distributed under the terms of the MIT license. +# +# +import pywikibot +from pywikibot import config +# import xmlreader +import re, sys + +__version__='$Id: redirect.py 7789 2009-12-17 19:20:12Z xqt $' + +# Summary message for fixing double redirects +msg_double={ + 'als':u'Bötli: Uflösig vun de doppleti Wyterleitig zue %s', + 'ar': u'روبوت: تصليح تحويلة مزدوجة → %s', + 'bat-smg': u'Robots: Taisuoms dvėgobs paradresavėms → %s', + 'be-x-old': u'Робат: выпраўленьне падвойнага перанакіраваньня → %s', + 'br': u'Kempennet adkas doubl gant robot → %s', + 'cs': u'Robot opravil dvojité přesměrování → %s', + 'de': u'Bot: Korrigiere doppelte Weiterleitung zu %s', + 'en': u'Robot: Fixing double redirect to %s', + 'es': u'Robot: Arreglando doble redirección → %s', + 'fa': u'ربات:اصلاح تغییر مسیر دوتایی → %s', + 'fi': u'Botti korjasi kaksinkertaisen ohjauksen → %s', + 'fr': u'Robot: répare double redirection à %s', + 'ga': u'Róbó: Ag socrú athsheolta dúbailte → %s', + 'he': u'בוט: מתקן הפניה כפולה → %s', + 'hr': u'Bot: Popravak dvostrukih preusmjeravanja → %s', + 'ia': u'Robot: reparation de duple redirection → %s', + 'is': u'Vélmenni: Lagfæri tvöfalda tilvísun → %s', + 'it': u'Bot: Sistemo i redirect doppi a %s', + 'ja': u'ロボットによる: 二重リダイレクト修正 → %s', + 'ka': u'რობოტი: ორმაგი გადამისამართების გასწორება → %s', + 'ko': u'로봇: 이중 넘겨주기 수정 → %s', + 'kk': u'Бот: Шынжырлы айдатуды түзетті → %s', + 'ksh':u'Bot: [[special:doubleredirects|Dubbel Ömlëijdong]] fottjemaat → %s', + 'lb': u'Bot: Duebel Viruleedung gefléckt → %s', + 'lt': u'robotas: Taisomas dvigubas peradresavimas → %s', + 'mk': u'Бот: Исправка на двојни пренасочувања → %s', + 'nds':u'Bot: Dubbelte Wiederleiden rutmakt → %s', + 'nl': u'Bot: dubbele doorverwijzing gecorrigeerd aan %s', + 'nn': u'robot: retta dobbel omdirigering → %s', + 'no': u'bot: Retter dobbel omdirigering → %s', + 'pl': u'Robot naprawia podwójne przekierowanie → %s', + 'pt': u'Bot: Corrigido duplo redirecionamento → %s', + 'ru': u'Робот: исправление двойного перенаправления → %s', + 'sr': u'Бот: Поправка дуплих преусмерења → %s', + 'sv': u'Robot: Rättar dubbel omdirigering → %s', + 'szl':u'Robot sprowjo tuplowane przekerowańa → %s', + 'th': u'โรบอต: แก้หน้าเปลี่ยนทางซ้ำซ้อน → %s', + 'tr': u'Bot değişikliği: Yönlendirmeye olan yönlendirme → %s', + 'uk': u'Робот: виправлення подвійного перенаправлення → %s', + 'war':u'Robot: Gin-ayad in nagduduha nga redirek → %s', + 'yi': u'באט: פארראכטן פארטאפלטע ווייטערפירונג → %s', + 'zh': u'機器人:修正雙重重定向 → %s', + 'zh-yue': u'機械人:拉直連串跳轉 → %s', + 'zh-classical': u'僕:復修渡口 → %s', +} + +# Reason for deleting broken redirects +reason_broken={ + 'ar': u'روبوت: هدف التحويلة غير موجود', + 'be-x-old': u'Робат: мэта перанакіраваньня не існуе', + 'cs': u'Přerušené přesměrování', + 'de': u'Bot: Weiterleitungsziel existiert nicht', + 'en': u'[[WP:CSD#G8|G8]]: [[Wikipedia:Redirect|Redirect]] to a deleted or non-existent page', + 'es': u'Robot: La página a la que redirige no existe', + 'fa': u'ربات:تغییرمسیر مقصد ندارد', + 'fi': u'Botti: Ohjauksen kohdesivua ei ole olemassa', + 'fr': u'Robot : Cible du redirect inexistante', + 'ga': u'Róbó : Targaid athsheoladh ar iarraidh', + 'he': u'בוט: יעד ההפניה אינו קיים', + 'it': u'Bot: Il redirect indirizza ad una pagina inesistente', + 'ja': u'ロボットによる:リダイレクトの目標は存在しませんでした', + 'ka': u'რობოტი: გადამისამართებული გვერდი არ არსებობს', + 'ko': u'로봇: 끊긴 넘겨주기', + 'kk': u'Бот: Айдату нысанасы жоқ болды', + 'ksh':u'Bot: Dė [[Special:BrokenRedirects|Ömlëijdong jingk ennet Liiere]]', + 'lt': u'robotas: Peradresavimas į niekur', + 'nds':u'Bot: Kaputte Wiederleiden rutmakt', + 'nl': u'Bot: doelpagina doorverwijzing bestaat niet', + 'nn': u'robot: målet for omdirigeringa eksisterer ikkje', + 'no': u'robot: målet for omdirigeringen eksisterer ikke', + 'pl': u'Robot: cel przekierowania nie istnieje', + 'pt': u'Bot: Redirecionamento não existe', + 'ru': u'Робот: перенаправление в никуда', + 'sr': u'Бот: Преусмерење не постоји', + 'th': u'โรบอต: หน้าเปลี่ยนทางเสีย', + 'tr': u'Bot değişikliği: Var olmayan sayfaya olan yönlendirme', + 'war':u'Robot: Waray dida an karadto-an han redirek', + 'yi': u'באט: ווײַטערפֿירן ציל עקזיסטירט נישט', + 'zh': u'機器人:該重定向的目標不存在', + 'zh-yue': u'機械人:跳轉目標唔存在', +} + +# Summary message for putting broken redirect to speedy delete +sd_tagging_sum = { + 'ar': u'روبوت: وسم للحذف السريع', + 'cs': u'Robot označil ke smazání', + 'en': u'Robot: Tagging for speedy deletion', + 'ga': u'Róbó: Ag maircáil le luas-scrios', + 'it': u'Bot: +Da cancellare subito', + 'ja': u'ロボットによる:迷子のリダイレクトを即時削除へ', + 'ksh':u'Bot: Di Ömlëijdong jeiht noh nörjendwoh.', + 'nds':u'Bot: Kaputte Wiederleiden ward nich brukt', + 'nl': u'Bot: gemarkeerd voor snelle verwijdering', + 'war':u'Robot: Nautod o nagbinalikbalik nga redirek', + 'zh': u'機器人: 將損壞的重定向提報快速刪除', +} + +# Insert deletion template into page with a broken redirect +sd_template = { + 'ar': u'{{شطب|تحويلة مكسورة}}', + 'cs': u'{{smazat|přerušené přesměrování}}', + 'en': u'{{db-r1}}', + 'ga': u'{{scrios|Athsheoladh briste}}', + 'it': u'{{Cancella subito|9}}', + 'ja': u'{{即時削除|壊れたリダイレクト}}', + 'ksh':u'{{Schmieß fott}}Di Ömlëijdong jeiht noh nörjendwoh hen.<br />--~~~~~\n\n', + 'nds':u'{{delete}}Kaputte Wiederleiden, wat nich brukt ward.<br />--~~~~\n\n', + 'war':u'{{delete}}Nautod o nagbinalikbalik nga redirek.--~~~~\n\n', + 'zh': u'{{delete|R1}}', +} + +class RedirectGenerator: + def __init__(self, xmlFilename=None, namespaces=[], offset=-1, + use_move_log=False, use_api=False, start=None, until=None, + number=None): + self.site = pywikibot.getSite() +## self.xmlFilename = xmlFilename + self.namespaces = namespaces + if use_api and self.namespaces == []: + self.namespaces = [ 0 ] + self.offset = offset + self.use_move_log = use_move_log + self.use_api = use_api + self.api_start = start + self.api_until = until + self.api_number = number + +## def get_redirects_from_dump(self, alsoGetPageTitles=False): +## ''' +## Load a local XML dump file, look at all pages which have the +## redirect flag set, and find out where they're pointing at. Return +## a dictionary where the redirect names are the keys and the redirect +## targets are the values. +## ''' +## xmlFilename = self.xmlFilename +## redict = {} +## # open xml dump and read page titles out of it +## dump = xmlreader.XmlDump(xmlFilename) +## redirR = self.site.redirectRegex() +## readPagesCount = 0 +## if alsoGetPageTitles: +## pageTitles = set() +## for entry in dump.parse(): +## readPagesCount += 1 +## # always print status message after 10000 pages +## if readPagesCount % 10000 == 0: +## pywikibot.output(u'%i pages read...' % readPagesCount) +## if len(self.namespaces) > 0: +## if pywikibot.Page(self.site, entry.title).namespace() \ +## not in self.namespaces: +## continue +## if alsoGetPageTitles: +## pageTitles.add(entry.title.replace(' ', '_')) +## +## m = redirR.match(entry.text) +## if m: +## target = m.group(1) +## # There might be redirects to another wiki. Ignore these. +## for code in self.site.family.langs.keys(): +## if target.startswith('%s:' % code) \ +## or target.startswith(':%s:' % code): +## if code == self.site.language(): +## # link to our wiki, but with the lang prefix +## target = target[(len(code)+1):] +## if target.startswith(':'): +## target = target[1:] +## else: +## pywikibot.output( +## u'NOTE: Ignoring %s which is a redirect to %s:' +## % (entry.title, code)) +## target = None +## break +## # if the redirect does not link to another wiki +## if target: +## source = entry.title.replace(' ', '_') +## target = target.replace(' ', '_') +## # remove leading and trailing whitespace +## target = target.strip('_') +## # capitalize the first letter +## if not pywikibot.getSite().nocapitalize: +## source = source[:1].upper() + source[1:] +## target = target[:1].upper() + target[1:] +## if '#' in target: +## target = target[:target.index('#')].rstrip("_") +## if '|' in target: +## pywikibot.output( +## u'HINT: %s is a redirect with a pipelink.' +## % entry.title) +## target = target[:target.index('|')].rstrip("_") +## if target: # in case preceding steps left nothing +## redict[source] = target +## if alsoGetPageTitles: +## return redict, pageTitles +## else: +## return redict +## + def get_redirect_pages_via_api(self): + """Return generator that yields Pages that are redirects.""" + for ns in self.namespaces: + done = False + gen = self.site.allpages(start=self.api_start, + namespace=ns, + filterredir=True) + if self.api_number: + gen.set_maximum_items(self.api_number) + for p in gen: + done = self.api_until \ + and p.title(withNamespace=False) >= self.api_until + if done: + return + yield p + + def _next_redirect_group(self): + """ + Return a generator that retrieves pageids from the API 500 at a time + and yields them as a list + """ + apiQ = [] + for page in self.get_redirect_pages_via_api(): + apiQ.append(str(page._pageid)) + if len(apiQ) >= 500: + yield apiQ + apiQ = [] + if apiQ: + yield apiQ + + def get_redirects_via_api(self, maxlen=8): + """ + Return a generator that yields tuples of data about redirect Pages: + 0 - page title of a redirect page + 1 - type of redirect: + 0 - broken redirect, target page title missing + 1 - normal redirect, target page exists and is not a + redirect + 2..maxlen - start of a redirect chain of that many redirects + (currently, the API seems not to return sufficient + data to make these return values possible, but + that may change) + maxlen+1 - start of an even longer chain, or a loop + (currently, the API seems not to return sufficient + data to allow this return values, but that may + change) + None - start of a redirect chain of unknown length, or loop + 2 - target page title of the redirect, or chain (may not exist) + 3 - target page of the redirect, or end of chain, or page title where + chain or loop detecton was halted, or None if unknown + """ + for apiQ in self._next_redirect_group(): + gen = pywikibot.data.api.Request(action="query", redirects="", + pageids=apiQ) + data = gen.submit() + if 'error' in data: + raise RuntimeError("API query error: %s" % data) + if data == [] or "query" not in data: + raise RuntimeError("No results given.") + redirects = {} + pages = {} + redirects = dict((x['from'], x['to']) + for x in data['query']['redirects']) + + for pagetitle in data['query']['pages'].values(): + if 'missing' in pagetitle and 'pageid' not in pagetitle: + pages[pagetitle['title']] = False + else: + pages[pagetitle['title']] = True + for redirect in redirects: + target = redirects[redirect] + result = 0 + final = None + try: + if pages[target]: + final = target + try: + while result <= maxlen: + result += 1 + final = redirects[final] + # result = None + except KeyError: + pass + except KeyError: + result = None + pass + yield (redirect, result, target, final) + + def retrieve_broken_redirects(self): + if self.use_api: + count = 0 + for (pagetitle, type, target, final) \ + in self.get_redirects_via_api(maxlen=2): + if type == 0: + yield pagetitle + if self.api_number: + count += 1 + if count >= self.api_number: + break + +## elif self.xmlFilename == None: +## # retrieve information from the live wiki's maintenance page +## # broken redirect maintenance page's URL +## path = self.site.broken_redirects_address(default_limit=False) +## pywikibot.output(u'Retrieving special page...') +## maintenance_txt = self.site.getUrl(path) +## +## # regular expression which finds redirects which point to a +## # non-existing page inside the HTML +## Rredir = re.compile('<li><a href=".+?" title="(.*?)"') +## +## redir_names = Rredir.findall(maintenance_txt) +## pywikibot.output(u'Retrieved %d redirects from special page.\n' +## % len(redir_names)) +## for redir_name in redir_names: +## yield redir_name +## else: +## # retrieve information from XML dump +## pywikibot.output( +## u'Getting a list of all redirects and of all page titles...') +## redirs, pageTitles = self.get_redirects_from_dump( +## alsoGetPageTitles=True) +## for (key, value) in redirs.iteritems(): +## if value not in pageTitles: +## yield key + + def retrieve_double_redirects(self): + if self.use_move_log: + for redir_page in self.get_moved_pages_redirects(): + yield redir_page.title() + return + else: + count = 0 + for (pagetitle, type, target, final) \ + in self.get_redirects_via_api(maxlen=2): + if type != 0 and type != 1: + yield pagetitle + if self.api_number: + count += 1 + if count >= self.api_number: + break + +## elif self.xmlFilename == None: +## # retrieve information from the live wiki's maintenance page +## # double redirect maintenance page's URL +### pywikibot.config.special_page_limit = 1000 +## path = self.site.double_redirects_address(default_limit = False) +## pywikibot.output(u'Retrieving special page...') +## maintenance_txt = self.site.getUrl(path) +## +## # regular expression which finds redirects which point to +## # another redirect inside the HTML +## Rredir = re.compile('<li><a href=".+?" title="(.*?)">') +## redir_names = Rredir.findall(maintenance_txt) +## pywikibot.output(u'Retrieved %i redirects from special page.\n' +## % len(redir_names)) +## for redir_name in redir_names: +## yield redir_name +## else: +## redict = self.get_redirects_from_dump() +## num = 0 +## for (key, value) in redict.iteritems(): +## num += 1 +## # check if the value - that is, the redirect target - is a +## # redirect as well +## if num > self.offset and value in redict: +## yield key +## pywikibot.output(u'\nChecking redirect %i of %i...' +## % (num + 1, len(redict))) + + def get_moved_pages_redirects(self): + '''generate redirects to recently-moved pages''' + # this will run forever, until user interrupts it + import datetime + + if self.offset <= 0: + self.offset = 1 + start = datetime.datetime.utcnow() \ + - datetime.timedelta(0, self.offset*3600) + # self.offset hours ago + offset_time = start.strftime("%Y%m%d%H%M%S") + + move_gen = self.site.logevents(logtype="move", start=offset_time) + if self.api_number: + move_gen.set_maximum_items(self.api_number) + for logentry in move_gen: + moved_page = logentry.title() + try: + if not moved_page.isRedirectPage(): + continue + except pywikibot.BadTitle: + continue + except pywikibot.ServerError: + continue + # moved_page is now a redirect, so any redirects pointing + # to it need to be changed + try: + for page in moved_page.getReferences(follow_redirects=True, + redirectsOnly=True): + yield page + except pywikibot.NoPage: + # original title must have been deleted after move + continue + + +class RedirectRobot: + def __init__(self, action, generator, always=False, number=None): + self.site = pywikibot.getSite() + self.action = action + self.generator = generator + self.always = always + self.number = number + self.exiting = False + + def prompt(self, question): + if not self.always: + choice = pywikibot.inputChoice(question, + ['Yes', 'No', 'All', 'Quit'], + ['y', 'N', 'a', 'q'], 'N') + if choice == 'n': + return False + elif choice == 'q': + self.exiting = True + return False + elif choice == 'a': + self.always = True + return True + + def delete_broken_redirects(self): + # get reason for deletion text + reason = pywikibot.translate(self.site, reason_broken) + for redir_name in self.generator.retrieve_broken_redirects(): + self.delete_1_broken_redirect( redir_name, reason) + if self.exiting: + break + + def delete_1_broken_redirect(self, redir_name, reason): + redir_page = pywikibot.Page(self.site, redir_name) + # Show the title of the page we're working on. + # Highlight the title in purple. + pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" + % redir_page.title()) + try: + targetPage = redir_page.getRedirectTarget() + except pywikibot.IsNotRedirectPage: + pywikibot.output(u'%s is not a redirect.' % redir_page.title()) + except pywikibot.NoPage: + pywikibot.output(u'%s doesn't exist.' % redir_page.title()) + else: + try: + targetPage.get() + except pywikibot.NoPage: + if self.prompt( + u'Redirect target %s does not exist. Do you want to delete %s?' + % (targetPage.title(asLink=True), + redir_page.title(asLink=True))): + try: + redir_page.delete(reason, prompt = False) + except pywikibot.NoUsername: + if targetPage.site().lang in sd_template \ + and targetPage.site().lang in sd_tagging_sum: + pywikibot.output( + u"No sysop in user-config.py, put page to speedy deletion.") + content = redir_page.get(get_redirect=True) + content = pywikibot.translate( + targetPage.site().lang, + sd_template)+"\n"+content + summary = pywikibot.translate( + targetPage.site().lang, + sd_tagging_sum) + redir_page.put(content, summary) + + except pywikibot.IsRedirectPage: + pywikibot.output( + u'Redirect target %s is also a redirect! Won't delete anything.' + % targetPage.title(asLink=True)) + else: + #we successfully get the target page, meaning that + #it exists and is not a redirect: no reason to touch it. + pywikibot.output( + u'Redirect target %s does exist! Won't delete anything.' + % targetPage.title(asLink=True)) + pywikibot.output(u'') + + def fix_double_redirects(self): + for redir_name in self.generator.retrieve_double_redirects(): + self.fix_1_double_redirect(redir_name) + if self.exiting: + break + + def fix_1_double_redirect(self, redir_name): + redir = pywikibot.Page(self.site, redir_name) + # Show the title of the page we're working on. + # Highlight the title in purple. + pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" + % redir.title()) + newRedir = redir + redirList = [] # bookkeeping to detect loops + while True: + redirList.append(u'%s:%s' % (newRedir.site().lang, + newRedir.title(withSection=False))) + try: + targetPage = newRedir.getRedirectTarget() + except pywikibot.IsNotRedirectPage: + if len(redirList) == 1: + pywikibot.output(u'Skipping: Page %s is not a redirect.' + % redir.title(asLink=True)) + break #do nothing + elif len(redirList) == 2: + pywikibot.output( + u'Skipping: Redirect target %s is not a redirect.' + % newRedir.title(asLink=True)) + break # do nothing + except pywikibot.SectionError: + pywikibot.output( + u'Warning: Redirect target section %s doesn't exist.' + % newRedir.title(asLink=True)) + except pywikibot.BadTitle, e: + # str(e) is in the format 'BadTitle: [[Foo]]' + pywikibot.output( + u'Warning: Redirect target %s is not a valid page title.' + % str(e)[10:]) + #sometimes this error occures. Invalid Title starting with a '#' + except pywikibot.InvalidTitle, err: + pywikibot.output(u'Warning: %s' % err) + break + except pywikibot.NoPage: + if len(redirList) == 1: + pywikibot.output(u'Skipping: Page %s does not exist.' + % redir.title(asLink=True)) + break + else: + if self.always: + pywikibot.output( + u"Skipping: Redirect target %s doesn't exist." + % newRedir.title(asLink=True)) + break # skip if automatic + else: + pywikibot.output( + u"Warning: Redirect target %s doesn't exist." + % newRedir.title(asLink=True)) + except pywikibot.ServerError: + pywikibot.output(u'Skipping: Server Error') + break + else: + pywikibot.output( + u' Links to: %s.' + % targetPage.title(asLink=True)) + if targetPage.site() != self.site: + pywikibot.output( + u'Warning: redirect target (%s) is on a different site.' + % (targetPage.title(asLink=True))) + if self.always: + break # skip if automatic + # watch out for redirect loops + if redirList.count(u'%s:%s' + % (targetPage.site().lang, + targetPage.title(withSection=False)) + ) > 0: + pywikibot.output( + u'Warning: Redirect target %s forms a redirect loop.' + % targetPage.title(asLink=True)) + break ###xqt doesn't work. edits twice! + try: + content = targetPage.get(get_redirect=True) + except pywikibot.SectionError: + content = pywikibot.Page( + targetPage.site(), + targetPage.title(withSection=False) + ).get(get_redirect=True) + if targetPage.site().lang in sd_template \ + and targetPage.site().lang in sd_tagging_sum: + pywikibot.output(u"Tagging redirect for deletion") + # Delete the two redirects + content = pywikibot.translate( + targetPage.site().lang, + sd_template)+"\n"+content + summ = pywikibot.translate(targetPage.site().lang, + sd_tagging_sum) + targetPage.put(content, summ) + redir.put(content, summ) + break # TODO Better implement loop redirect + else: + newRedir = targetPage + continue + try: + oldText = redir.get(get_redirect=True) + except pywikibot.BadTitle: + pywikibot.output(u"Bad Title Error") + break + text = self.site.redirectRegex().sub( + '#%s %s' % + (self.site.redirect( True ), + targetPage.title(asLink=True)), + oldText) + if text == oldText: + break + summary = pywikibot.translate(self.site, msg_double)\ + % targetPage.title(asLink=True) + pywikibot.showDiff(oldText, text) + if self.prompt(u'Do you want to accept the changes?'): + try: + redir.put(text, summary) + except pywikibot.LockedPage: + pywikibot.output(u'%s is locked.' % redir.title()) + except pywikibot.SpamfilterError, error: + pywikibot.output( + u"Saving page [[%s]] prevented by spam filter: %s" + % (redir.title(), error.url)) + except pywikibot.PageNotSaved, error: + pywikibot.output(u"Saving page [[%s]] failed: %s" + % (redir.title(), error)) + except pywikibot.NoUsername: + pywikibot.output( + u"Page [[%s]] not saved; sysop privileges required." + % redir.title()) + except pywikibot.Error, error: + pywikibot.output( + u"Unexpected error occurred trying to save [[%s]]: %s" + % (redir.title(), error)) + break + + def fix_double_or_delete_broken_redirects(self): + # TODO: part of this should be moved to generator, the rest merged into self.run() + # get reason for deletion text + delete_reason = pywikibot.translate(self.site, reason_broken) + count = 0 + for (redir_name, code, target, final)\ + in self.generator.get_redirects_via_api(maxlen=2): + if code == 1: + continue + elif code == 0: + self.delete_1_broken_redirect(redir_name, delete_reason) + count += 1 + else: + self.fix_1_double_redirect(redir_name) + count += 1 + if self.exiting or (self.number and count >= self.number): + break + + def run(self): + # TODO: make all generators return a redirect type indicator, + # thus make them usable with 'both' + if self.action == 'double': + self.fix_double_redirects() + elif self.action == 'broken': + self.delete_broken_redirects() + elif self.action == 'both': + self.fix_double_or_delete_broken_redirects() + +def main(*args): + # read command line parameters + # what the bot should do (either resolve double redirs, or delete broken + # redirs) + action = None + # where the bot should get his infos from (either None to load the + # maintenance special page from the live wiki, or the filename of a + # local XML dump file) + xmlFilename = None + # Which namespace should be processed when using a XML dump + # default to -1 which means all namespaces will be processed + namespaces = [] + # at which redirect shall we start searching double redirects again + # (only with dump); default to -1 which means all redirects are checked + offset = -1 + moved_pages = False + api = True # rewrite always uses api, probably should get rid of this + start = '' + until = '' + number = None + always = False + for arg in pywikibot.handleArgs(*args): + if arg == 'double' or arg == 'do': + action = 'double' + elif arg == 'broken' or arg == 'br': + action = 'broken' + elif arg == 'both': + action = 'both' + elif arg.startswith('-xml'): + if len(arg) == 4: + xmlFilename = pywikibot.input( + u'Please enter the XML dump's filename: ') + else: + xmlFilename = arg[5:] + elif arg.startswith('-moves'): + moved_pages = True + elif arg.startswith('-namespace:'): + ns = arg[11:] + if ns == '': + ## "-namespace:" does NOT yield -namespace:0 further down the road! + ns = pywikibot.input( + u'Please enter a namespace by its number: ') +# u'Please enter a namespace by its name or number: ') +# TODO! at least for some generators. + if ns == '': + ns = '0' + try: + ns = int(ns) + except ValueError: +#-namespace:all Process all namespaces. Works only with the API read interface. + pass + if not ns in namespaces: + namespaces.append(ns) + elif arg.startswith('-offset:'): + offset = int(arg[8:]) + elif arg.startswith('-start:'): + start = arg[7:] + elif arg.startswith('-until:'): + until = arg[7:] + elif arg.startswith('-total:'): + number = int(arg[8:]) + elif arg == '-always': + always = True + else: + pywikibot.output(u'Unknown argument: %s' % arg) + + if xmlFilename: + pywikibot.output("Sorry, xmlreader is not yet implemented in rewrite", + level=pywikibot.ERROR) + elif not action: # or (xmlFilename and moved_pages) + # or (api and xmlFilename): + pywikibot.showHelp('redirect') + else: + gen = RedirectGenerator(xmlFilename, namespaces, offset, moved_pages, + api, start, until, number) + bot = RedirectRobot(action, gen, always, number) + bot.run() + +if __name__ == '__main__': + try: + main() + finally: + pywikibot.stopme()
pywikipedia-svn@lists.wikimedia.org