Revision: 4419 Author: leogregianin Date: 2007-10-04 18:06:39 +0000 (Thu, 04 Oct 2007)
Log Message: ----------- This script has the intention to correct all redirect links in featured pages or only one page of each wiki.
Added Paths: ----------- trunk/pywikipedia/fixing_redirects.py
Added: trunk/pywikipedia/fixing_redirects.py =================================================================== --- trunk/pywikipedia/fixing_redirects.py (rev 0) +++ trunk/pywikipedia/fixing_redirects.py 2007-10-04 18:06:39 UTC (rev 4419) @@ -0,0 +1,156 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" +This script has the intention to correct all redirect +links in featured pages or only one page of each wiki. + +Can be using with: +-featured Run over featured pages +-page:XXX Run over only one page + +""" +# +# Distributed under the terms of the MIT license. +# +__version__='$Id: disambredir.py 4407 2007-10-03 17:27:14Z leogregianin $' +# +import wikipedia +import pagegenerators +import re, sys + +msg = { + 'en': u'Bot: Fixing redirects', + 'pt': u'Bot: Arrumando redirects', + } + +featured_articles = { + 'de': u'Wikipedia:Exzellente_Artikel', + 'en': u'Wikipedia:Featured_articles', + 'es': u'Wikipedia:Artículos_destacados', + 'fr': u'Wikipédia:Articles_de_qualité', + 'it': u'Wikipedia:Articoli_in_vetrina', + 'nl': u'Wikipedia:Etalage', + 'sv': u'Wikipedia:Utvalda_artiklar', + 'pt': u'Wikipedia:Os_melhores_artigos', + } + +def firstcap(string): + return string[0].upper()+string[1:] + +def treat(text, linkedPage, targetPage): + """ + Based on the method of the same name in solve_disambiguation.py + """ + # make a backup of the original text so we can show the changes later + linkR = re.compile(r'[[(?P<title>[^]|#]*)(?P<section>#[^]|]*)?(|(?P<label>[^]]*))?]](?P<linktrail>' + linktrail + ')') + curpos = 0 + # This loop will run until we have finished the current page + while True: + m = linkR.search(text, pos = curpos) + if not m: + break + # Make sure that next time around we will not find this same hit. + curpos = m.start() + 1 + # ignore interwiki links and links to sections of the same page + if m.group('title') == '' or mysite.isInterwikiLink(m.group('title')): + continue + else: + actualLinkPage = wikipedia.Page(page.site(), m.group('title')) + # Check whether the link found is to page. + if actualLinkPage != linkedPage: + continue + + # how many bytes should be displayed around the current link + context = 15 + # at the beginning of the link, start red color. + # at the end of the link, reset the color to default + wikipedia.output(text[max(0, m.start() - context) : m.start()] + '\03{lightred}' + text[m.start() : m.end()] + '\03{default}' + text[m.end() : m.end() + context]) + choice = 'y' + + # The link looks like this: + # [[page_title|link_text]]trailing_chars + page_title = m.group('title') + link_text = m.group('label') + + if not link_text: + # or like this: [[page_title]]trailing_chars + link_text = page_title + if m.group('section') == None: + section = '' + else: + section = m.group('section') + trailing_chars = m.group('linktrail') + if trailing_chars: + link_text += trailing_chars + + if choice in "uU": + # unlink - we remove the section if there's any + text = text[:m.start()] + link_text + text[m.end():] + continue + replaceit = choice in "rR" + + if link_text[0].isupper(): + new_page_title = targetPage.title() + else: + new_page_title = targetPage.title()[0].lower() + targetPage.title()[1:] + if replaceit and trailing_chars: + newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars) + elif replaceit or (new_page_title == link_text and not section): + newlink = "[[%s]]" % new_page_title + # check if we can create a link with trailing characters instead of a pipelink + elif len(new_page_title) <= len(link_text) and firstcap(link_text[:len(new_page_title)]) == firstcap(new_page_title) and re.sub(re.compile(linktrail), '', link_text[len(new_page_title):]) == '' and not section: + newlink = "[[%s]]%s" % (link_text[:len(new_page_title)], link_text[len(new_page_title):]) + else: + newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text) + text = text[:m.start()] + newlink + text[m.end():] + continue + return text + +def workon(page): + try: + text = page.get() + except wikipedia.IsRedirectPage: + return + wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) + links = page.linkedPages() + wikipedia.getall(mysite,links) + for page2 in links: + try: + target = page2.getRedirectTarget() + except (wikipedia.Error,wikipedia.SectionError): + continue + text = treat(text, page2, target) + if text != page.get(): + comment = wikipedia.translate(mysite, msg) + page.put(text, comment) + +try: + gen = None + action = None + mysite = wikipedia.getSite() + linktrail = mysite.linktrail() + + for arg in wikipedia.handleArgs(): + if arg == '-featured': + action = True + featured = wikipedia.translate(mysite, featured_articles) + ref = wikipedia.Page(wikipedia.getSite(), featured) + gen = pagegenerators.ReferringPageGenerator(ref) + generator = pagegenerators.NamespaceFilterPageGenerator(gen, [0]) + for page in generator: + workon(page) + elif arg.startswith('-page'): + action = True + if len(arg) == 5: + title = wikipedia.input(u'Which page should be processed?') + else: + title = arg[6:] + page = wikipedia.Page(wikipedia.getSite(), title) + workon(page) + + if not action: + wikipedia.showHelp('fixing_redirects') + sys.exit() + +finally: + wikipedia.stopme()