SVN: [9478] archive/trunk - Pywikipedia-svn

29 Aug 2011

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9478
Revision: 9478
Author:   xqt
Date:     2011-08-29 15:11:50 +0000 (Mon, 29 Aug 2011)
Log Message:
-----------
moved to archive
Added Paths:
-----------
    archive/trunk/CommonsPictureOfTheDay.py
    archive/trunk/WdTXMLParser.py
    archive/trunk/are-identical.py
    archive/trunk/brackethttp.py
    archive/trunk/check_extern.py
    archive/trunk/copy_table.py
    archive/trunk/extract_names.py
    archive/trunk/featuredcount.py
    archive/trunk/getimages.py
    archive/trunk/mediawiki_messages.py
    archive/trunk/refcheck.py
    archive/trunk/sqldump.py
    archive/trunk/test.py
    archive/trunk/translator.py
    archive/trunk/windows_chars.py
Copied: archive/trunk/CommonsPictureOfTheDay.py (from rev 9461, trunk/pywikipedia/archive/CommonsPictureOfTheDay.py)
===================================================================

--- archive/trunk/CommonsPictureOfTheDay.py	                        (rev 0)
+++ archive/trunk/CommonsPictureOfTheDay.py	2011-08-29 15:11:50 UTC (rev 9478)
@@ -0,0 +1,112 @@
+# -*- coding: utf-8 -*-
+'''
+Put "Picture of the day" in your desktop wallpaper from Wikimedia Commons.
+
+For Windows system, do you need:
+* Python 2.5
+* Pywin32 for Python 2.5
+* PIL for Python 2.5
+
+For Linux system, do you need:
+* Python and PIL
+
+'''
+
+from wikipedia import Site, Page, ImagePage
+from PIL import Image, ImageDraw, ImageFont
+import httplib, time, sys, os
+
+if sys.platform == 'win32':
+    import ctypes, win32con
+    from _winreg import *
+else:
+    import gconf
+
+def get_commons_image(image):
+    headers = {"Accept": "image/jpg",
+               "Accept": "image/gif",
+               "Accept": "image/png",
+               "Accept": "image/svg",
+               }
+    conn = httplib.HTTPConnection('upload.wikimedia.org')
+    conn.request("GET", image, None, headers)
+    r = conn.getresponse()
+    data = r.read()
+    if sys.platform == 'win32':
+        arq = open("Picture_of_the_day.bmp","wb") # convert image "on the fly" to Windows Bitmap
+    else:
+        arq = open("Picture_of_the_day.png","wb")
+    arq.write(data)
+    arq.close()
+    conn.close()
+
+def write_gray(filename, text, outfilename):
+    img = Image.open(filename).convert("RGB")
+    write = Image.new("RGB", (img.size[0], img.size[1]))
+    draw = ImageDraw.ImageDraw(img)
+    size = 0
+    while True:
+        size +=1
+        try:
+            FONT = "C:\WINDOWS\Fonts\Verdana.ttf"
+        except IndexError:
+            FONT = "/usr/share/fonts/truetype/ttf-bitstream-vera/Verdana.ttf" # ubuntu
+        except IndexError:
+            FONT = "/usr/share/fonts/bitstream-vera/Vera.ttf" # fedora
+        except IndexError:
+            print "Please, report this problem to leogregianin@gmail.com"
+            sys.exit()
+        nextfont = ImageFont.truetype(FONT, size)
+        nexttextwidth, nexttextheight = nextfont.getsize(text)
+        if nexttextwidth+nexttextheight/3 > write.size[0]: break
+        font = nextfont
+        textwidth, textheight = nexttextwidth, nexttextheight
+    draw.setfont(font)
+    draw.text(((write.size[0]-textwidth)/55, (write.size[0]-textheight)/55), text, fill=(120,120,120))
+    img.save(outfilename)
+    
+def set_wallpaper():
+    if sys.platform == 'win32':
+        SPI_SETDESKWALLPAPER = 20
+        ctypes.windll.user32.SystemParametersInfoA(SPI_SETDESKWALLPAPER, 0, "Picture_of_the_day.bmp", 0)
+    else:
+        gconf.client_get_default().get_string('/desktop/gnome/background/picture_options', 'scaled') 
+        gconf.client_get_default().get_string('/desktop/gnome/background/picture_filename', 'Picture_of_the_day.png')
+
+if __name__ == '__main__':
+    commons = Site('commons', 'commons')
+    date_today = time.strftime('%Y-%m-%d', time.localtime())
+    template = 'Template:Potd/%s' % date_today
+    templatePage = Page(commons, template)
+    image_today = templatePage.get()
+    image_name = 'Image:%s'% image_today
+    imageURL = ImagePage(commons, image_name)
+    featuredImage = imageURL.fileUrl()
+    image = featuredImage[27:]
+
+    if sys.platform == 'win32':
+        if image.endswith('.svg'):
+            sys.exit() # Windows background don't accept svg files
+
+        ### Install CommonsPictureOfTheDay in registry
+        Reg = ConnectRegistry(None, HKEY_LOCAL_MACHINE)
+        Key = OpenKey(Reg, r"SOFTWARE\Microsoft\Windows\CurrentVersion\Run", 0, KEY_WRITE)
+        # entry your correct pywikipediabot patch
+        SetValueEx(Key,"CommonsPictureOfTheDay", 0, REG_SZ, r"C:\pywikipediabot\pywikipedia\CommonsPictureOfTheDay.py")
+        CloseKey(Key)
+        CloseKey(Reg)
+    
+        get_commons_image(image)
+
+        write_gray('Picture_of_the_day.bmp',
+                   'http://commons.wikimedia.org/wiki/Commons:Picture_of_the_day',
+                   'Picture_of_the_day.bmp')
+
+        set_wallpaper()
+    
+    else:
+        get_commons_image(image)
+        write_gray('Picture_of_the_day.png',
+                   'http://commons.wikimedia.org/wiki/Commons:Picture_of_the_day',
+                   'Picture_of_the_day.png')
+        set_wallpaper()
Copied: archive/trunk/WdTXMLParser.py (from rev 9461, trunk/pywikipedia/archive/WdTXMLParser.py)
===================================================================
--- archive/trunk/WdTXMLParser.py	                        (rev 0)
+++ archive/trunk/WdTXMLParser.py	2011-08-29 15:11:50 UTC (rev 9478)
@@ -0,0 +1,74 @@
+# -*- coding: iso-8859-1  -*-
+"""
+(C) 2003 Thomas R. Koll, tomk32@tomk32.de
+ Distributed under the terms of the MIT license.
+"""
+
+__version__='$Id: WdTXMLParser.py,v 1.3 2005/12/21 17:51:26 wikipedian Exp $'
+
+DEBUG = 0
+import re
+from xml.sax.handler import ContentHandler
+
+class WdTXMLParser(ContentHandler):
+
+        def __init__(self):
+                self.rTitle = re.compile ('(.*): (.*)')
+                self.rLink  = re.compile ('.*[\r\n]*(http://.*)')
+                self.rCount = re.compile ('.*: (\d*)')
+                self.inItem = 0
+                self.inITitle = 0
+                self.inILink = 0
+                self.inIDescription = 0
+                self.tmp = {}
+                self.result = {}
+
+        def startDocument(self):
+                self.result = {}
+                self.tmp = {}
+        def endDocument(self):
+                return self.result
+
+        def startElement(self, name, attrs):
+                if name == 'item':
+                        self.inItem = 1
+                if self.inItem == 1:
+                        if name == 'title':
+                                self.inTitle = 1
+                        if name == 'link':
+                                self.inLink = 1
+                        if name == 'description':
+                                self.inDescription = 1
+
+        def characters(self, characters):
+                if self.inItem:
+                        if self.inTitle:
+                                self.tmp['title'] = self.rTitle.match(characters).group(2)
+                        if self.inLink:
+                                self.tmp['link'] = self.rLink.match(characters).group(1)
+                        if self.inDescription:
+                                self.tmp['count'] = self.rCount.match(characters).group(1)
+                                
+        def endElement(self, name):
+                if name == 'item':
+                        self.inItem = 0
+                        self.result[self.tmp['title']] = {
+                                'link' : self.tmp['link'],
+                                'count' : self.tmp['count']
+                                }
+                        self.tmp = {}
+                if name == 'title':
+                        self.inTitle = 0
+                if name == 'link':
+                        self.inLink = 0
+                if name == 'description':
+                        self.inDescription = 0
+                
+"""
+if self.date and self.link and self.count:
+                        self.results[self.title] = {
+                                'date' : self.date,
+                                'link' : self.link,
+                                'count' : self.count
+                                }
+"""
Copied: archive/trunk/are-identical.py (from rev 9461, trunk/pywikipedia/archive/are-identical.py)
===================================================================
--- archive/trunk/are-identical.py	                        (rev 0)
+++ archive/trunk/are-identical.py	2011-08-29 15:11:50 UTC (rev 9478)
@@ -0,0 +1,81 @@
+"""
+Simple bot to check whether two pages with the same name on different language
+'pedias have interwiki links to the same page on another language.
+
+Call the script with 3 arguments:
+
+   python are-identical.py lang1 lang2 name
+
+The script will either print "Yes" and return exit code 0,
+                    or print "No"  and return exit code 1,
+                    or print "Both links are already present"
+                                   and return exit code 2,
+                    or print "One links already present"
+                                   and return exit code 0.
+                                   
+It may raise exceptions on pages that disappeared or whatever. This is
+a simple framework at least for the moment.
+"""
+#
+# (C) Rob Hooft, 2005
+#
+# Distributed under the terms of the MIT license.
+#
+__version__='$Id: are-identical.py,v 1.3 2005/12/21 17:51:26 wikipedian Exp $'
+#
+from __future__ import generators
+
+import sys, wikipedia
+
+class TwoPageGenerator:
+    def __init__(self, lang1, lang2, name):
+        self.lang1 = lang1
+        self.lang2 = lang2
+        self.name = name
+
+    def __iter__(self):
+        yield wikipedia.Page(wikipedia.getSite(self.lang1), self.name)
+        yield wikipedia.Page(wikipedia.getSite(self.lang2), self.name)
+
+
+class IdenticalRobot:
+    def __init__(self, generator):
+        self.generator = generator
+
+    def run(self):
+        arr = []
+        for x in self.generator:
+            arr.append(x)
+        pg1 = arr[0]
+        pg2 = arr[1]
+        iw1 = pg1.interwiki()
+        iw2 = pg2.interwiki()
+        if pg2 in iw1 and pg1 in iw2:
+            print "Both links are already present"
+            sys.exit(2)
+        if pg2 in iw1 or pg1 in iw2:
+            print "One link already present"
+            sys.exit(0)
+        for iw in iw1:
+            if iw in iw2:
+                print "Yes"
+                sys.exit(0)
+        print "No"
+        sys.exit(1)
+        
+def main():
+    args = []
+    for arg in sys.argv[1:]:
+        arg = wikipedia.argHandler(arg, 'are-identical')
+        if arg:
+            args.append(arg)
+    g = TwoPageGenerator(*args)
+    r = IdenticalRobot(g)
+    r.run()
+    
+try:
+    main()
+finally:
+    wikipedia.stopme()
+            
+
Copied: archive/trunk/brackethttp.py (from rev 9461, trunk/pywikipedia/archive/brackethttp.py)
===================================================================
--- archive/trunk/brackethttp.py	                        (rev 0)
+++ archive/trunk/brackethttp.py	2011-08-29 15:11:50 UTC (rev 9478)
@@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+'''
+Script to correct URLs like
+(http://www.example.org) to [http://www.example.org example.org]
+to have correct generation of links in Wikipedia
+'''
+
+__author__ = '(C) 2003 Thomas R. Koll, tomk32@tomk32.de'
+__license__ = 'Distributed under the terms of the MIT license.'
+__version__='$Id: brackethttp.py,v 1.13 2005/12/21 17:51:26 wikipedian Exp $'
+
+import re, sys
+import wikipedia
+
+myComment = {'ar':u'بوت: URL تم إصلاحها',
+             'en':u'Bot: URL fixed',
+             'fa':u'ربات: URL اصلاح شد',
+             'he':u'בוט: תוקנה כתובת URL',
+             'pt':u'Bot: URL corrigido',
+             'zh':u'機器人: 網址已修復',
+             }
+
+if __name__ == "__main__":
+    try:
+        for arg in sys.argv[1:]:
+            if wikipedia.argHandler(arg, 'brackethttp'):
+                pass
+            else:
+                pl = wikipedia.Page(wikipedia.getSite(), arg)
+                text = pl.get()
+        
+                newText = re.sub("(http://([^ ]*[^] ])))", "[\1 \2])", text)
+
+                if newText != text:
+                    wikipedia.showDiff(text, newText)
+                    status, reason, data = pl.put(newText, wikipedia.translate(wikipedia.mylang,myComment))
+                    print status, reason
+                else:
+                    print "No bad link found"
+    except:
+        wikipedia.stopme()
+        raise
+wikipedia.stopme()
Copied: archive/trunk/check_extern.py (from rev 9461, trunk/pywikipedia/archive/check_extern.py)
===================================================================
--- archive/trunk/check_extern.py	                        (rev 0)
+++ archive/trunk/check_extern.py	2011-08-29 15:11:50 UTC (rev 9478)
@@ -0,0 +1,223 @@
+# -*- coding: utf-8  -*-
+"""
+This bot is used for checking external links from Wikipedia. It checks
+all external links in groups of 480 pages, gives the error code for each
+that causes problems, and counts the number of links with and without
+problems.
+
+It accepts all general Wikipediabot arguments as well as:
+-start:xxx  Check starting at 'xxx'.
+-nolog      Do not log to a file, only give output to a screen.
+
+Anything else is assumed to be a page that is to be checked. Spaces in
+page titles have to be replaced by underscores, otherwise the bot assumes
+the parts are separate pages. If no page has been specified and also no
+-start argument has been provided, the bot acts as if -start:! had been
+specified, starting at the beginning.
+
+The bot returns all links that have some problem, with the errorcode
+provided by the server, or the artificial errorcode -1 if the server
+could not be reached at all. Output is sent both to the screen and the
+file check_extern.txt
+"""
+
+#
+# (C) Andre Engels, 2004
+#
+# Distributed under the terms of the MIT license.
+#
+
+__version__='$Id: check_extern.py,v 1.16 2005/12/21 17:51:26 wikipedian Exp $'
+
+import wikipedia, urllib, re, sys, httplib
+
+class URLerrorFinder(urllib.FancyURLopener):
+    version="RobHooftWikiRobot/1.0"
+    def open_http(self, url):
+        """Use HTTP protocol."""
+        if isinstance(url, str):
+            host, selector = urllib.splithost(url)
+            if host:
+                user_passwd, host = urllib.splituser(host)
+                host = urllib.unquote(host)
+            realhost = host
+        else:
+            host, selector = url
+            urltype, rest = urllib.splittype(selector)
+            url = rest
+            user_passwd = None
+            if urltype.lower() != 'http':
+                realhost = None
+            else:
+                realhost, rest = splithost(rest)
+                if realhost:
+                    user_passwd, realhost = splituser(realhost)
+                if user_passwd:
+                    selector = "%s://%s%s" % (urltype, realhost, rest)
+                if proxy_bypass(realhost):
+                    host = realhost
+        if not host: return -2
+        h = httplib.HTTP(host)
+        h.putrequest('GET', selector)
+        if realhost: h.putheader('Host', realhost)
+        for args in self.addheaders: h.putheader(*args)
+        h.endheaders()
+        errcode, errmsg, headers = h.getreply()
+        return errcode
+
+# Which error codes do we not consider errors? 
+allowederrorcodes = [100,101,200,201,202,203,205,304]
+
+errname = {
+    -1:'No contact to server',
+    -2:'No host found',
+    100:'Continue',
+    101:'Switching Protocols',
+    200:'OK',
+    201:'Created',
+    202:'Accepted',
+    203:'Non-Authorative Information',
+    204:'No Content',
+    205:'Reset Content',
+    206:'Partial Content',
+    300:'Multiple Choices',
+    301:'Moved Permanently',
+    302:'Moved Temporarily',
+    303:'See Other',
+    304:'Not Modified',
+    305:'Use Proxy',
+    307:'Temporary Redirect',
+    400:'Bad Request',
+    401:'Unauthorized',
+    402:'Payment Required',
+    403:'Forbidden',
+    404:'Not Found',
+    405:'Method Not Allowed',
+    406:'None Acceptable',
+    407:'Proxy Authentication Required',
+    408:'Request Timeout',
+    409:'Conflict',
+    410:'Gone',
+    411:'Authorization Refused',
+    412:'Precondition Failed',
+    413:'Request Entity Too Large',
+    414:'Request-URI Too Large',
+    415:'Unsupported Media Type',
+    416:'Requested Range not satisfiable',
+    417:'Expectation Failed',
+    500:'Internal Server Error',
+    501:'Not Implemented',
+    502:'Bad Gateway',
+    503:'Service Unavailable',
+    504:'Gateway Timeout',
+    505:'HTTP Version not supported',
+    8181:'Certificate Expired',
+    12002:'Timeout',
+    12007:'No such host',
+    12029:'No connection',
+    12031:'Connection Reset'
+ }
+
+def errorname(error):
+    # Given a numerical HTML error, give its actual identity
+    if error in errname:
+        return errname[error]
+    elif (error > 300) and (error < 400):
+        return 'Unknown Redirection Response'
+    else:
+        return 'Unknown Error'
+    
+start = '!'
+log = True
+todo = []
+do_all = False
+
+for arg in sys.argv[1:]:
+    url=sys.argv[1]
+    arg = wikipedia.argHandler(arg, 'check_extern')
+    if arg:
+        if arg.startswith('-start:'):
+            start=arg[7:]
+            do_all=True
+        elif arg=='-nolog':
+            log = False
+        else:
+            mysite = wikipedia.getSite()
+            todo.append(wikipedia.Page(mysite,arg))
+
+# Make sure we have the final site
+mysite = wikipedia.getSite()
+
+if todo == []:
+    # No pages have been given; if also no start is given, we start at
+    # the beginning
+    do_all = True
+
+if log:
+    import logger
+    sys.stdout = logger.Logger(sys.stdout, filename = 'check_extern.log')
+
+cont = True
+checked = 0
+working = 0
+nonworking = 0
+totalchecked = 0
+
+try:
+    while cont:
+        print
+        i = 0
+        if len(todo)<61 and do_all:
+            for pl in wikipedia.allpages(start = start):
+                todo.append(pl)
+                i += 1
+                if i==480:
+                    break
+            start = todo[len(todo)-1].title() + '_0'
+        # todo is a list of pages to do, donow are the pages we will be doing in this run.
+        if len(todo)>60:
+            # Take the first 60.
+            donow = todo[0:60]
+            todo = todo[60:]
+        else:
+            donow = todo
+            # If there was more to do, the 'if len(todo)<61' part would have extended
+            # todo beyond this size.
+            cont = False
+        try:
+            wikipedia.getall(mysite, donow)
+        except wikipedia.SaxError:
+            # Ignore this error, and get the pages the traditional way.
+            pass
+        checked +=len(donow)
+        for pl in donow:
+            R = re.compile(r'http://%5B%5E%5Cs%7D<]]+[^\s.,:;)?!]}<]')
+            try:
+                for url in R.findall(pl.get()):
+                    url = wikipedia.unicode2html(url,'ascii')
+                    try:
+                        error = URLerrorFinder().open(url)
+                    except IOError:
+                        error = -1
+                    if error in allowederrorcodes:
+                        working += 1
+                    else:
+                        nonworking += 1
+                        print
+                        wikipedia.output(u'Page "%s" links to:'%pl.title())
+                        wikipedia.output(url)
+                        wikipedia.output(u'Which gave error: %s %s'%(error,errorname(error)))
+            # If anything is wrong with the Wikipedia page, just ignore
+            except (wikipedia.NoPage,wikipedia.IsRedirectPage,wikipedia.LockedPage):
+                pass
+        if checked>499 or not cont:
+            totalchecked += 500
+            checked -= 500
+            print
+            print '======================================================================'
+            wikipedia.output(u'%s pages checked, last was [[%s]]'%(totalchecked+checked,donow[len(donow)-1]))
+            print 'In those pages there were %s correct and %s problematic external links.'%(working,nonworking)
+except:
+    wikipedia.stopme()
+    raise
+wikipedia.stopme()
Copied: archive/trunk/copy_table.py (from rev 9461, trunk/pywikipedia/archive/copy_table.py)
===================================================================
--- archive/trunk/copy_table.py	                        (rev 0)
+++ archive/trunk/copy_table.py	2011-08-29 15:11:50 UTC (rev 9478)
@@ -0,0 +1,230 @@
+# -*- coding: utf-8 -*-
+"""
+Script to copy a table from one Wikipedia to another one, translating it
+on-the-fly. 
+
+Syntax:
+  copy_table.py -type:abcd -from:xy Article_Name
+
+Command line options:
+
+-from:xy       Copy the table from the Wikipedia article in language xy
+               Article must have interwiki link to xy
+
+-debug         Show debug info, and don't send the results to the server
+              
+-type:abcd     Translates the table, using translations given below.
+               When the -type argument is not used, the bot will simply
+               copy the table as-is.
+
+-file:XYZ      Reads article names from a file. XYZ is the name of the 
+               file from which the list is taken. If XYZ is not given, the
+               user is asked for a filename.
+               Page titles should be saved one per line, without [[brackets]].
+               The -pos parameter won't work if -file is used.                              
+
+-image         Copy all images within the found table to the target Wikipedia.
+               Make sure the bot is logged in before trying to upload images.
+
+Article_Name:  Name of the article where a table should be inserted
+
+"""
+#
+# (C) Daniel Herding, 2004
+#
+# Distributed under the terms of the MIT license.
+#
+__version__='$Id: copy_table.py,v 1.31 2005/12/21 17:51:26 wikipedian Exp $'
+#
+import wikipedia, translator, lib_images
+import re, sys, string
+
+# Summary message
+msg={
+    "ar":u"روبوت: نسخ الجدول من ",
+    "en":u"robot: copying table from ",
+    "de":u"Bot: Kopiere Tabelle von ",
+    "he":u"רובוט: מעתיק טבלה מתוך ",
+    "pt":u"Bot: Copiando tabela de ",
+    }
+
+# Prints text on the screen only if in -debug mode.
+# Argument text should be raw unicode.
+def print_debug(text):
+    if debug:
+        wikipedia.output(text)
+    
+
+# this is a modified version of wikipedia.imagelinks(), it only looks in text, not in the whole page.
+def imagelinks(site, text):
+    image_ns = site.image_namespace()
+    # regular expression which matches e.g. "Image" as well as "image" (for en:)
+    im = '[' + image_ns[0].upper() + image_ns[0].lower() + ']' + image_ns[1:]
+    w1=r'('+im+':[^]|]*)'
+    w2=r'([^]]*)'
+    Rlink = re.compile(r'[['+w1+r'(|'+w2+r')?]]')
+    result = []
+    for l in Rlink.findall(text):
+        result.append(l[0])
+    return result
+
+# opens on a page, checks for an interwiki link, transfers and translates the first
+# table, copies all images in that table.
+def treat(to_pl, fromsite):
+    try:
+        to_text = to_pl.get()
+        interwikis = to_pl.interwiki()
+    except wikipedia.IsRedirectPage:
+        print "Can't work on redirect page."
+        return
+    except wikipedia.NoPage:
+        print "Page not found."
+        return
+    from_pl = None
+    for interwiki in interwikis:
+        if interwiki.site() == fromsite:
+            from_pl = interwiki
+    if from_pl is None:
+        print "Interwiki link to %s not found." % repr(fromsite)
+        return
+    from_text = from_pl.get()
+    wikipedia.setAction(wikipedia.translate(mysite.lang, msg) + from_pl.aslink())
+    # search start of table
+    table = get_table(from_text)
+    if not table:
+        wikipedia.output(u"No table found in %s" % (from_pl.aslink()))
+        return
+
+    print_debug(u"Copying images")
+    if copy_images:
+        # extract image links from original table
+        images=imagelinks(fromsite, table)
+        for image in images:
+            # Copy the image to the current wikipedia, copy the image description page as well.
+            # Prompt the user so that he can translate the filename.
+            new_filename = lib_images.transfer_image(wikipedia.Page(fromsite, image), debug)
+            # if the upload succeeded
+            if new_filename:
+                old_image_tag = wikipedia.Page(fromsite, image).title()
+                new_image_tag = wikipedia.Page(mysite, mysite.image_namespace() + ":" + new_filename).title()
+                print_debug(u"Replacing " + old_image_tag + " with " + new_image_tag)
+                # We want to replace "Image:My pic.jpg" as well as "image:my_pic.jpg", so we need a regular expression.
+                old_image_tag = old_image_tag.replace(" ", "[ _]")
+                old_image_tag = "[" + old_image_tag[0].upper() + old_image_tag[0].lower() + "]" + old_image_tag[1:]
+                #todo: regex for first letter of filename, i.e. first letter after the colon
+                rOld_image_tag = re.compile(old_image_tag)
+                table = re.sub(old_image_tag, new_image_tag, table)
+
+
+    translated_table = translator.translate(table, type, fromsite.lang, debug, mysite.lang)
+    if not translated_table:
+        print "Could not translate table."
+        return
+
+    print_debug(u"\n" + translated_table)
+    # add table to top of the article, seperated by a blank lines
+    to_text = translated_table + "\n\n" + to_text
+    if not debug:
+        # save changes on Wikipedia
+        to_pl.put(to_text, minorEdit='0')
+
+            
+        
+
+# Regular expression that will match both <table and {|
+startR = re.compile(r"<table|{|")
+# Regular expression that will match both </table> and |}
+endR = re.compile(r"</table>||}")
+
+# Finds the first table inside a text, including cascaded inner tables.
+def get_table(text):
+    pos = 0
+    # find first start tag
+    first_start_tag = re.search(startR, text)
+    if not first_start_tag:
+        return
+    else:
+        print_debug(u"First start tag found at " + str(first_start_tag.start()))
+        pos = first_start_tag.end()
+        # number of start tags minus numer of end tags
+        table_level = 1
+        remaining_text = text
+    # until an end tag has been found for each start tag:
+    while table_level != 0:
+        # continue search after the last found tag
+        remaining_text = text[pos:]
+        next_start_tag = re.search(startR, remaining_text, pos)
+        next_end_tag = re.search(endR, remaining_text, pos)
+        if not next_end_tag:
+            print_debug(u"Error: missing end tag")
+            pass
+        # if another cascaded table is opened before the current one is closed    
+        elif next_start_tag and next_start_tag.start() < next_end_tag.start():
+            print_debug(u"Next start tag found at " + str(pos + next_start_tag.start()))
+            pos += next_start_tag.end()
+            table_level += 1
+            print_debug(u"Table level is " + str(table_level))
+        else:
+            print_debug(u"Next end tag found at " + str(pos + next_end_tag.start()))
+            pos += next_end_tag.end()
+            table_level -= 1
+            print_debug(u"Table level is " + str(table_level))
+    print_debug(u"Table starts at " + str(first_start_tag.start()) + " and ends at " + str(pos) +"\n")
+    print_debug(text[first_start_tag.start():pos])
+    return text[first_start_tag.start():pos]
+
+if __name__=="__main__":
+    try:
+        # if the -file argument is used, page titles are dumped in this array.
+        # otherwise it will only contain one page.
+        page_list = []
+        # if -file is not used, this temporary array is used to read the page title.
+        page_title = []
+        from_lang = ""
+        type = ""
+        debug = False
+        copy_images = False
+
+        # read command line parameters
+        for arg in sys.argv[1:]:
+            arg = wikipedia.argHandler(arg, 'copy_table')
+            if arg:
+                if arg.startswith("-from"):
+                    from_lang = arg[6:]
+                elif arg.startswith("-type:"):
+                    type = arg[6:]
+                elif arg == "-debug":
+                    debug = True
+                elif arg == "-image":
+                    copy_images = True
+                elif arg.startswith('-file'):
+                    if len(arg) == 5:
+                        file = wikipedia.input(u'Please enter the list's filename: ')
+                    else:
+                        file = arg[6:]
+                    # open file and read page titles out of it
+                    f=open(file)
+                    for line in f.readlines():
+                        if line != '\n':           
+                            page_list.append(line)
+                    f.close()
+                else:
+                    page_title.append(arg)
+
+        # if the page name is given as a command line argument,
+        # connect the title's parts with spaces
+        if page_title != []:
+            page_title = ' '.join(page_title)
+            page_list.append(page_title)
+
+        mysite = wikipedia.getSite()
+        fromsite = mysite.getSite(code=from_lang)
+    
+        for current_page_name in page_list:
+            thispl = wikipedia.Page(mysite, current_page_name)
+            treat(thispl, fromsite)
+    except:
+        wikipedia.stopme()
+        raise
+    wikipedia.stopme()
+
Copied: archive/trunk/extract_names.py (from rev 9461, trunk/pywikipedia/archive/extract_names.py)
===================================================================
--- archive/trunk/extract_names.py	                        (rev 0)
+++ archive/trunk/extract_names.py	2011-08-29 15:11:50 UTC (rev 9478)
@@ -0,0 +1,23 @@
+"""
+Script to extract all wiki page names a certain HTML file points to
+
+The output can be used as input to some robot that takes a list of pages as input.
+
+This script takes a single file name argument, the file should be a HTML file
+as captured from one of the wikipedia servers.
+"""
+#
+# (C) Rob W.W. Hooft, 2003
+#
+# Distributed under the terms of the MIT license.
+#
+__version__='$Id: extract_names.py,v 1.9 2005/12/21 17:51:26 wikipedian Exp $'
+#
+import sys,re
+R=re.compile('/wiki/(.*?)" *')
+fn=sys.argv[1]
+f=open(fn)
+text=f.read()
+f.close()
+for hit in R.findall(text):
+    print hit
Copied: archive/trunk/featuredcount.py (from rev 9461, trunk/pywikipedia/archive/featuredcount.py)
===================================================================
--- archive/trunk/featuredcount.py	                        (rev 0)
+++ archive/trunk/featuredcount.py	2011-08-29 15:11:50 UTC (rev 9478)
@@ -0,0 +1,52 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+##################################################
+This script all function have merged to featured.py. please use:
+
+  featured.py -fromall -count
+
+shizhao 2009-04-18
+##################################################
+
+
+This script only counts how many featured articles all wikipedias have.
+
+usage: featuredcount.py
+
+"""
+__version__ = '$Id: featuredcount.py 6336 2009-02-08 04:14:37Z purodha $'
+
+#
+# Distributed under the terms of the MIT license.
+#
+
+import sys
+import wikipedia, catlib
+from featured import featured_name
+
+def featuredArticles(site):
+    method=featured_name[site.lang][0]
+    name=featured_name[site.lang][1]
+    args=featured_name[site.lang][2:]
+    raw=method(site, name, *args)
+    arts=[]
+    for p in raw:
+        if p.namespace()==0:
+            arts.append(p)
+        elif p.namespace()==1:
+            arts.append(wikipedia.Page(p.site(), p.titleWithoutNamespace()))
+    wikipedia.output('\03{lightred}** wikipedia:%s has %i featured articles\03{default}' % (site.lang, len(arts)))
+
+if __name__=="__main__":
+    mysite = wikipedia.getSite()
+    fromlang = featured_name.keys()
+    fromlang.sort()
+    try:
+        for ll in fromlang:
+            fromsite = wikipedia.getSite(ll)
+            if fromsite != mysite:
+                arts = featuredArticles(fromsite)
+        arts_mysite = featuredArticles(mysite)
+    finally:
+        wikipedia.stopme()
Copied: archive/trunk/getimages.py (from rev 9461, trunk/pywikipedia/archive/getimages.py)
===================================================================
--- archive/trunk/getimages.py	                        (rev 0)
+++ archive/trunk/getimages.py	2011-08-29 15:11:50 UTC (rev 9478)
@@ -0,0 +1,67 @@
+"""
+Script to transfer many images from one wiki to another. Your
+language (which can be changed with the -lang: argument) is the
+language to upload to. The images should be in a file as interwiki
+links (that is in the form [[en:Image:myimage.png]]); they do not
+need to be all from the same Wiki. This file can be created with
+extract_wikilinks.py.
+
+Arguments:
+
+  -lang:xx Log in to the given wikipedia language to upload to
+
+The first other argument is taken to be the name of the file you get
+the links from; other arguments are ignored.
+"""
+
+#
+# (C) Andre Engels 2004
+#
+# Distributed under the terms of the MIT license.
+#
+# Modified by Gerrit Holl, 01-11-2004
+__version__='$Id: getimages.py,v 1.15 2005/12/21 17:51:26 wikipedian Exp $'
+
+import sys
+import wikipedia, lib_images, pagegenerators
+
+def getfn():
+    fns = []
+
+    for arg in sys.argv[1:]:
+        arg = wikipedia.argHandler(arg, 'getimages')
+        if arg:
+            fns.append(arg)
+
+    if len(fns) == 0:
+        fns.append(raw_input("Please enter a filename: "))
+
+    return fns
+
+def main():
+    for filename in getfn():
+        print "Handling images from %s" % filename
+        gen = pagegenerators.TextfilePageGenerator(filename)
+        for image in gen:
+            if image.isImage():
+                print "-" * 50
+                print "Image: %s" % image.title()
+                try:
+                    # show the image description page's contents
+                    print image.get()
+                except wikipedia.NoPage:
+                    print "Description empty."
+                except wikipedia.IsRedirectPage:
+                    print "Description page is redirect?!"
+                answer=wikipedia.input(u"Copy this image (y/N)?")
+                if answer.lower().startswith('y'):
+                    lib_images.transfer_image(image)
+
+if __name__ == "__main__":
+    try:
+        main()
+    except:
+        wikipedia.stopme()
+        raise
+    else:
+        wikipedia.stopme()
Copied: archive/trunk/mediawiki_messages.py (from rev 9461, trunk/pywikipedia/archive/mediawiki_messages.py)
===================================================================
--- archive/trunk/mediawiki_messages.py	                        (rev 0)
+++ archive/trunk/mediawiki_messages.py	2011-08-29 15:11:50 UTC (rev 9478)
@@ -0,0 +1,218 @@
+# -*- coding: utf-8 -*-
+"""
+Allows access to the MediaWiki messages, that's the label texts of the MediaWiki
+software in the current language. These can be used in other bots.
+
+The function refresh_messages() downloads all the current messages and saves
+them to disk. It is run automatically when a bot first tries to access one of
+the messages. It can be updated manually by running this script, e.g. when
+somebody changed the current message at the wiki. The texts will also be
+reloaded automatically once a month.
+
+Syntax: python mediawiki_messages [-all]
+
+Command line options:
+    -refresh - Reloads messages for the home wiki or for the one defined via
+               the -lang and -family parameters.
+
+    -all     - Reloads messages for all wikis where messages are already present
+
+    If another parameter is given, it will be interpreted as a MediaWiki key.
+    The script will then output the respective value, without refreshing..
+    
+"""
+
+# (C) Daniel Herding, 2004
+#
+# Distributed under the terms of the MIT license.
+
+##THIS MODULE IS DEPRECATED AND HAS BEEN REPLACED BY NEW FUNCTIONALITY IN
+##WIKIPEDIA.PY.  It is being retained solely for compatibility in case any
+##custom-written bots rely upon it.  Bot authors should replace any uses
+##of this module as follows:
+##
+##    OLD:    mediawiki_messages.get(key, site)
+##    NEW:    site.mediawiki_message(key)
+##
+##    OLD:    mediawiki_messages.has(key, site)
+##    NEW:    site.has_mediawiki_message(key)
+##
+##    OLD:    mediawiki_messages.makepath(path)
+##    NEW:    wikipedia.makepath(path)
+##
+##########################################################################
+
+import warnings
+warnings.warn(
+"""The mediawiki_messages module is deprecated and no longer
+maintained; see the source code for new methods to replace
+calls to this module.""",
+            DeprecationWarning, stacklevel=2)
+
+
+import wikipedia
+import re, sys, pickle
+import os.path
+import time
+import codecs
+import urllib
+from BeautifulSoup import *
+
+__version__='$Id: mediawiki_messages.py 3731 2007-06-20 14:42:55Z russblau $'
+
+loaded = {}
+
+def get(key, site = None, allowreload = True):
+    site = site or wikipedia.getSite()
+    if site in loaded:
+        # Use cached copy if it exists.
+        dictionary = loaded[site]
+    else:
+        fn = 'mediawiki-messages/mediawiki-messages-%s-%s.dat' % (site.family.name, site.lang)
+        try:
+            # find out how old our saved dump is (in seconds)
+            file_age = time.time() - os.path.getmtime(fn)
+            # if it's older than 1 month, reload it
+            if file_age > 30 * 24 * 60 * 60:
+                print 'Current MediaWiki message dump is one month old, reloading'
+                refresh_messages(site)
+        except OSError:
+            # no saved dumped exists yet
+            refresh_messages(site)
+        f = open(fn, 'r')
+        dictionary = pickle.load(f)
+        f.close()
+        loaded[site] = dictionary
+    key = key[0].lower() + key[1:]
+    if key in dictionary:
+        return dictionary[key]
+    elif allowreload:
+        refresh_messages(site = site)
+        return get(key, site = site, allowreload = False)
+    else:
+        raise KeyError('MediaWiki Key %s not found' % key)
+
+def has(key, site = None, allowreload = True):
+    try:
+        get(key, site, allowreload)
+        return True
+    except KeyError:
+        return False
+
+def makepath(path):
+    """ creates missing directories for the given path and
+        returns a normalized absolute version of the path.
+
+    - if the given path already exists in the filesystem
+      the filesystem is not modified.
+
+    - otherwise makepath creates directories along the given path
+      using the dirname() of the path. You may append
+      a '/' to the path if you want it to be a directory path.
+
+    from holger@trillke.net 2002/03/18
+    """
+    from os import makedirs
+    from os.path import normpath,dirname,exists,abspath
+
+    dpath = normpath(dirname(path))
+    if not exists(dpath): makedirs(dpath)
+    return normpath(abspath(path))
+    
+def refresh_messages(site = None):
+    site = site or wikipedia.getSite()
+    # get 'all messages' special page's path
+    path = site.allmessages_address()
+    print 'Retrieving MediaWiki messages for %s' % repr(site)
+    wikipedia.put_throttle() # It actually is a get, but a heavy one.
+    allmessages = site.getUrl(path)
+
+    print 'Parsing MediaWiki messages'
+    soup = BeautifulSoup(allmessages,
+                         convertEntities=BeautifulSoup.HTML_ENTITIES)
+    # The MediaWiki namespace in URL-encoded format, as it can contain
+    # non-ASCII characters and spaces.
+    quotedMwNs = urllib.quote(site.namespace(8).replace(' ', '_').encode(site.encoding()))
+    mw_url = site.path() + "?title=" + quotedMwNs + ":"
+    altmw_url = site.path() + "/" + quotedMwNs + ":"
+    nicemw_url = site.nice_get_address(quotedMwNs + ":")
+    shortmw_url = "/" + quotedMwNs + ":"
+    ismediawiki = lambda url:url and (url.startswith(mw_url)
+                                      or url.startswith(altmw_url)
+                                      or url.startswith(nicemw_url)
+                                      or url.startswith(shortmw_url))
+    # we will save the found key:value pairs here
+    dictionary = {}
+
+    try:
+        for keytag in soup('a', href=ismediawiki):
+            # Key strings only contain ASCII characters, so we can save them as
+            # strs
+            key = str(keytag.find(text=True))
+            keyrow = keytag.parent.parent
+            if keyrow['class'] == "orig":
+                valrow = keyrow.findNextSibling('tr')
+                assert valrow['class'] == "new"
+                value = unicode(valrow.td.string).strip()
+            elif keyrow['class'] == 'def':
+                value = unicode(keyrow('td')[1].string).strip()
+            else:
+                raise AssertionError("Unknown tr class value: %s" % keyrow['class'])
+            dictionary[key] = value
+    except Exception, e:
+        wikipedia.debugDump( 'MediaWiki_Msg', site, u'%s: %s while processing URL: %s' % (repr(e), str(e), unicode(path)), allmessages)
+        raise
+
+    # Save the dictionary to disk
+    # The file is stored in the mediawiki_messages subdir. Create if necessary. 
+    if dictionary == {}:
+        wikipedia.debugDump( 'MediaWiki_Msg', site, u'Error URL: '+unicode(path), allmessages )
+        sys.exit()
+    else:
+        f = open(makepath('mediawiki-messages/mediawiki-messages-%s-%s.dat' % (site.family.name, site.lang)), 'w')
+        pickle.dump(dictionary, f)
+        f.close()
+    print "Loaded %i values from %s" % (len(dictionary.keys()), site)
+    #print dictionary['sitestatstext']
+
+def refresh_all_messages():
+    import dircache, time
+    filenames = dircache.listdir('mediawiki-messages')
+    message_filenameR = re.compile('mediawiki-messages-([a-z:]+)-([a-z:]+).dat')
+    for filename in filenames:
+        match = message_filenameR.match(filename)
+        if match:
+            family = match.group(1)
+            lang = match.group(2)
+            site = wikipedia.getSite(code = lang, fam = family)
+            refresh_messages(site)
+
+def main():
+    refresh_all = False
+    refresh = False
+    key = None
+    for arg in wikipedia.handleArgs():
+        if arg == '-all':
+            refresh_all = True
+        elif arg == '-refresh':
+            refresh = True
+        else:
+            key = arg
+    if key:
+        wikipedia.output(get(key), toStdout = True)
+    elif refresh_all:
+        refresh_all_messages()
+    elif refresh:
+        refresh_messages(wikipedia.getSite())
+    else:
+        wikipedia.showHelp('mediawiki_messages')
+
+if __name__ == "__main__":
+    try:
+        main()
+    except:
+        wikipedia.stopme()
+        raise
+    else:
+        wikipedia.stopme()
+
Copied: archive/trunk/refcheck.py (from rev 9461, trunk/pywikipedia/archive/refcheck.py)
===================================================================
--- archive/trunk/refcheck.py	                        (rev 0)
+++ archive/trunk/refcheck.py	2011-08-29 15:11:50 UTC (rev 9478)
@@ -0,0 +1,95 @@
+#!/usr/bin/python
+"""
+##################################################
+This script with all its function has been merged
+to templatecount.py. please use:
+
+  templatecount.py -count
+
+xqt 2009-10-30
+##################################################
+This script checks references to see if they are properly formatted.  Right now
+it just counts the total number of transclusions of any number of given templates.
+
+NOTE: This script is not capable of handling the <ref></ref> syntax. It just
+handles the {{ref}} syntax, which is still used, but DEPRECATED on the English
+Wikipedia.
+
+Syntax: python refcheck.py command [arguments]
+
+Command line options:
+
+-count        Counts the number of times each template (passed in as an argument)
+              is transcluded.
+-namespace:   Filters the search to a given namespace.  If this is specified
+              multiple times it will search all given namespaces
+
+Examples:
+
+Counts how many time {{ref}} and {{note}} are transcluded in articles.
+
+     python refcheck.py -count ref note -namespace:0
+
+"""
+__version__ = '$Id$'
+
+import wikipedia, config
+import replace, pagegenerators
+import re, sys, string
+
+templates = ['ref', 'note', 'ref label', 'note label', 'reflist']
+
+class ReferencesRobot:
+    #def __init__(self):
+        #Nothing
+    def countRefs(self, templates, namespaces):
+        mysite = wikipedia.getSite()
+        mytpl  = mysite.template_namespace()+':'
+        finalText = [u'Number of transclusions per template',u'------------------------------------']
+        for template in templates:
+            gen = pagegenerators.ReferringPageGenerator(wikipedia.Page(mysite, mytpl + template), onlyTemplateInclusion = True)
+            if namespaces:
+                gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
+            count = 0
+            for page in gen:
+                count += 1
+            finalText.append(u'%s: %d' % (template, count))
+        for line in finalText:
+            wikipedia.output(line)
+
+def main():
+    doCount = False
+    argsList = []
+    namespaces = []
+    for arg in wikipedia.handleArgs():
+        if arg == '-count':
+            doCount = True
+        elif arg.startswith('-namespace:'):
+            try:
+                namespaces.append(int(arg[len('-namespace:'):]))
+            except ValueError:
+                namespaces.append(arg[len('-namespace:'):])
+        else:
+            argsList.append(arg)
+
+    if doCount:
+        robot = ReferencesRobot()
+        if not argsList:
+           argsList = templates
+        choice = ''
+        if 'reflist' in argsList:
+            wikipedia.output(u'NOTE: it will take a long time to count "reflist".')
+            choice = wikipedia.inputChoice(u'Proceed anyway?', ['yes', 'no', 'skip'], ['y', 'n', 's'], 'y')
+            if choice == 's':
+                argsList.remove('reflist')
+        if choice <> 'n':
+            robot.countRefs(argsList, namespaces)
+    else:
+        wikipedia.showHelp('refcheck')
+
+if __name__ == "__main__":
+    try:
+        main()
+    finally:
+        wikipedia.stopme()
+
Copied: archive/trunk/sqldump.py (from rev 9461, trunk/pywikipedia/archive/sqldump.py)
===================================================================
--- archive/trunk/sqldump.py	                        (rev 0)
+++ archive/trunk/sqldump.py	2011-08-29 15:11:50 UTC (rev 9478)
@@ -0,0 +1,289 @@
+# -*- coding: utf-8  -*-
+"""
+Reads a cur SQL dump and offers a generator over SQLentry objects which can be
+used by other bots. Each SQLentry object represents a page.
+
+Can also be run directly from the command line to retrieve page lists from
+an SQL dump.
+
+Syntax:
+
+    python sqldump.py -sql:filename.sql action
+
+Where action can be one of these:
+
+* find               - List pages which contain a certain text
+* findr              - List pages containing text matching a regular expression
+* shortpages         - List pages with short contents
+* unmountedcats      - List categories that don't have a supercategory
+* percentnames       - List pages that contain internal links where special
+                       characters are encoded as hexadecimal codes, e.g. %F6
+* baddisambiguations - Created for de.wikipedia to fix primary topic
+                       disambiguations (Begriffsklärung nach Modell 2).
+"""
+#
+# (C) Daniel Herding, 2004
+#
+# Distributed under the terms of the MIT license.
+# 
+
+__version__ = '$Id:'
+
+from __future__ import generators
+import re, time
+import wikipedia, config
+
+
+class SQLentry(object):
+    '''
+    Represents a wiki page, read from an SQL dump. 
+    
+    An instance of this class will have the following attributes:
+    * self.id is the page ID (integer)
+    * self.namespace is the namespace ID (integer)
+    * self.title is the page title without namespace (unicode)
+    * self.text is the text on that page (unicode)
+    * self.comment is the last edit summary (unicode)
+    * self.userid is the last editor's ID (integer)
+    * self.username is the last editor's username (unicode)
+    * self.timestamp is the time of the last edit (time tuple)
+    * self.restrictions is True if the page is locked (boolean)
+    * self.counter is the # of page views, disabled on Wikimedia wikis (integer)
+    * self.redirect is True if the page is a redirect (boolean)
+    * self.minor is True if the last edit was marked as minor (boolean)
+    * self.new is True if the last edit was the first one (boolean)
+    * self.random is a random number used for the 'Random Page' function (float)
+    * self.touched is the date of the last cache update (time tuple)
+
+    See http://meta.wikimedia.org/wiki/Cur_table for details.
+    '''
+
+    def __init__(self, id, namespace, title, text, comment, userid, username, timestamp, restrictions, counter, redirect, minor, new, random, inversetimestamp, touched):
+        '''
+        Constructor. All parameters should be strings, as read from the SQL
+        dump. This function will convert them to formats which are more
+        appropriate for the data types.
+        '''
+        self.id = int(id)
+        self.namespace = int(namespace)
+        self.title = title
+        self.text = text
+        self.comment = comment
+        self.userid = int(userid)
+        self.username = username
+        # convert to a 9-dimensional time tuple, see http://python.org/doc/2.3.4/lib/module-time.html
+        self.timestamp = time.strptime(timestamp, '%Y%m%d%H%M%S')
+        # convert to boolean
+        self.restrictions = (restrictions != '')
+        self.counter = int(counter)
+        self.redirect = (redirect == '1')
+        self.minor = (minor == '1')
+        self.new = (new == '1')
+        self.random = float(random)
+        # Inversetimestamp is obsolete, so we ignore it.
+        #self.inversetimestamp = inversetimestamp
+        
+        # Basically, I would want to convert touched to time tuple, as I did
+        # with timestamp. But I noticed a problem: in the nds: dump touched
+        # comes before inversetimestamp, and that would cause strptime to crash.
+        # So we simply leave touched as it is and hope that this is the only
+        # exception where entries are mixed up. If you find other such cases,
+        # please report.
+        #self.touched = time.strptime(touched, '%Y%m%d%H%M%S')
+        self.touched = touched
+
+        # MediaWiki escapes apostrophes, backslashes and quotes with
+        # backslashes. We need to unescape them again.
+        # This regular expression matches a backslash followed by a group, where
+        # the group matches either an apostrophe, a backslashes or a quote.
+        escapedR = re.compile(r'\([\"'])')
+        # The group \1 is the character we really want, while the leading
+        # backslash is only escape information we don't need.
+        self.title = escapedR.sub(r"\1", self.title)
+        self.text = escapedR.sub(r"\1", self.text)
+        self.comment = escapedR.sub(r"\1", self.comment)
+        self.username = escapedR.sub(r"\1", self.username)
+        
+        # convert \n and \r to newlines and carriage returns.
+        self.text = self.text.replace('\r', '\r')
+        self.text = self.text.replace('\n', '\n')
+        # comments can also contain newline characters 
+        self.comment = self.comment.replace('\r', '\r')
+        self.comment = self.comment.replace('\n', '\n')
+        # I hope that titles and usernames can't :-)
+
+    def full_title(self, underline = True):
+        '''
+        Returns the full page title in the form 'namespace:title', using the
+        localized namespace titles defined in your family file.
+        If underline is True, returns the page title with underlines instead of
+        spaces.
+        '''
+        if not underline:
+            title = self.title.replace('_', ' ')
+        else:
+            title = self.title
+        namespace_title = wikipedia.getSite().namespace(self.namespace)
+        if namespace_title == None:
+            return self.title
+        else:
+            if underline:
+                namespace_title = namespace_title.replace(' ', '_') 
+            return namespace_title + ':' + self.title
+
+    def age(self):
+        '''
+        Returns the time passed since the last edit, in relation to the current
+        system time, in seconds (floating point number).
+        '''
+        return time.time() - time.mktime(self.timestamp)
+
+# Represents one parsed SQL dump file. Reads the local file at initialization,
+# parses it with a regular expression, and offers access to the resulting
+# SQLentry objects through the entries() generator.
+class SQLdump(object):
+    def __init__(self, filename, encoding):
+        self.filename = filename
+        self.encoding = encoding
+    
+    def entries(self):
+        '''
+        Generator which reads one line at a time from the SQL dump file, and
+        parses it to create SQLentry objects. Stops when the end of file is
+        reached.
+        '''
+        # This regular expression will match one SQL database entry (i.e. a
+        # page), and each group represents an attribute of that entry.
+        # NOTE: We don't need re.DOTALL because newlines are escaped.
+        pageR = re.compile("((\d+),"      # cur_id             (page ID number)
+                         + "(\d+),"        # cur_namespace      (namespace number)
+                         + "'(.*?)',"      # cur_title          (page title w/o namespace)
+                         + "'(.*?)',"      # cur_text           (page contents)
+                         + "'(.*?)',"      # cur_comment        (last edit's summary text)
+                         + "(\d+),"        # cur_user           (user ID of last contributor)
+                         + "'(.*?)',"      # cur_user_text      (user name)
+                         + "'(\d{14})',"   # cur_timestamp      (time of last edit)
+                         + "'(.*?)',"      # cur_restrictions   (protected pages have 'sysop' here)
+                         + "(\d+),"        # cur_counter        (view counter, disabled on WP)
+                         + "([01]),"       # cur_is_redirect
+                         + "([01]),"       # cur_minor_edit
+                         + "([01]),"       # cur_is_new
+                         + "([\d.]+?)," # cur_random         (for random page function)
+                         + "'(\d{14})',"   # inverse_timestamp  (obsolete)
+                         + "'(\d{14})')") # cur_touched        (cache update timestamp)
+        print 'Reading SQL dump'
+        # Open the file, read it using the given encoding, and replace invalid
+        # characters with question marks.
+        import codecs
+        f=codecs.open(self.filename, 'r', encoding = self.encoding, errors='replace')
+        eof = False
+        while not eof:
+            # Read only one (very long) line because we would risk out of memory
+            # errors if we read the entire file at once
+            line = f.readline()
+            if line == '':
+                print 'End of file.'
+                eof = True
+            self.entries = []
+            for id, namespace, title, text, comment, userid, username, timestamp, restrictions, counter, redirect, minor, new, random, inversetimestamp, touched in pageR.findall(line):
+                 new_entry = SQLentry(id, namespace, title, text, comment, userid, username, timestamp, restrictions, counter, redirect, minor, new, random, inversetimestamp, touched)
+                 yield new_entry
+        f.close()
+
+    def query_percentnames(self):
+        '''
+        yields pages that contain internal links where special characters are
+        encoded as hexadecimal codes, e.g. %F6
+        '''
+        Rpercentlink = re.compile('[[[^]]*?%[A-F0-9][A-F0-9][^]]*?]]')
+        for entry in self.entries():
+            text = wikipedia.removeLanguageLinks(entry.text)
+            if Rpercentlink.search(text):
+                yield entry
+    
+    def query_shortpages(self, minsize):
+        '''
+        yields articles that have less than minsize bytes of text
+        '''
+        for entry in self.entries():
+            if entry.namespace == 0 and not entry.redirect and len(entry.text) < minsize:
+                yield entry
+    
+    def query_find(self, keyword):
+        '''
+        yields pages which contain keyword
+        '''
+        for entry in self.entries():
+            if entry.text.find(keyword) != -1:
+                yield entry
+    
+    def query_findr(self, regex, namespace = None):
+        '''
+        yields pages which contain a string matching the given regular expression
+        '''
+        r = re.compile(regex)
+        for entry in self.entries():
+            if r.search(entry.text) and (namespace == None or entry.namespace == namespace):
+                yield entry
+    
+    def query_unmountedcats(self):
+        '''
+        yields categories which don't have any supercategory
+        '''
+        for entry in self.entries():
+            if entry.namespace == 14:
+                has_supercategory = False
+                for ns in wikipedia.getSite().category_namespaces():
+                    if entry.text.find('[[%s:' % ns) != -1:
+                        has_supercategory = True
+                        break
+                if not has_supercategory:
+                    yield entry
+            
+def query(sqldump, action):
+    if action == 'percentnames':
+        for entry in sqldump.query_percentnames():
+            yield entry
+    elif action == 'shortpages':
+        minsize = int(wikipedia.input(u'Minimum size:'))
+        for entry in sqldump.query_shortpages(minsize):
+            yield entry
+    elif action == 'find':
+        keyword = wikipedia.input(u'Search for:')
+        for entry in sqldump.query_find(keyword):
+            yield entry
+    elif action == 'findr':
+        keyword = wikipedia.input(u'Search for:')
+        for entry in sqldump.query_findr(keyword):
+            yield entry
+    elif action == 'unmountedcats':
+        for entry in sqldump.query_unmountedcats():
+            yield entry
+    elif action == 'baddisambiguation':
+        for entry in sqldump.entries():
+            if entry.namespace == 0 and entry.title.endswith(')') and entry.text.startswith("''") and not entry.text.startswith("'''"):
+                yield entry
+
+if __name__=="__main__":
+    wikipedia.stopme() # No need to have me on the stack, as I'm not contacting the wiki
+    import sys
+    action = None
+    filename = None
+    for arg in sys.argv[1:]:
+        arg = wikipedia.argHandler(arg, 'sqldump')
+        if arg:
+            if arg.startswith('-sql'):
+                if len(arg) == 4:
+                    filename = wikipedia.input(u'Please enter the SQL dump's filename: ')
+                else:
+                    filename = arg[5:]
+            else:
+                action = arg
+    if not filename or not action:
+        wikipedia.output(__doc__, 'utf-8')
+    else:
+        sqldump = SQLdump(filename, wikipedia.myencoding())
+        
+        for entry in query(sqldump, action):
+            wikipedia.output(u'*[[%s]]' % entry.full_title())
+
Copied: archive/trunk/test.py (from rev 9461, trunk/pywikipedia/archive/test.py)
===================================================================
--- archive/trunk/test.py	                        (rev 0)
+++ archive/trunk/test.py	2011-08-29 15:11:50 UTC (rev 9478)
@@ -0,0 +1,61 @@
+#!/usr/bin/python
+"""
+##################################################
+This script with all its function has been merged
+to login.py. please use:
+
+  login.py -test
+
+xqt 2009-10-26
+##################################################
+
+Script to test whether you are logged-in
+
+Parameters:
+
+   -all         Try to test on all sites where a username is defined in
+                user-config.py.
+   -sysop       test your sysop account. (Works only with -all)
+"""
+#
+# (C) Rob W.W. Hooft, 2003
+#
+# Distributed under the terms of the MIT license.
+#
+__version__='$Id$'
+#
+import re,sys,wikipedia,config
+
+def show (mysite, sysop = False):
+    if mysite.loggedInAs(sysop = sysop):
+        wikipedia.output(u"You are logged in on %s as %s." % (repr(mysite), mysite.loggedInAs(sysop=sysop)))
+    else:
+        wikipedia.output(u"You are not logged in on %s." % repr(mysite))
+
+def main():
+    testall = False
+    sysop   = False
+    for arg in wikipedia.handleArgs():
+        if arg == "-all":
+            testall = True
+        elif arg == "-sysop":
+            sysop = True
+        else:
+            wikipedia.showHelp()
+            return
+    if testall:
+        if sysop:
+            namedict = config.sysopnames
+        else:
+            namedict = config.usernames
+        for familyName in namedict.iterkeys():
+            for lang in namedict[familyName].iterkeys():
+                 show(wikipedia.getSite(lang, familyName), sysop)
+    else:
+        show(wikipedia.getSite(), sysop)
+
+if __name__ == "__main__":
+    try:
+        main()
+    finally:
+        wikipedia.stopme()
Copied: archive/trunk/translator.py (from rev 9461, trunk/pywikipedia/archive/translator.py)
===================================================================
--- archive/trunk/translator.py	                        (rev 0)
+++ archive/trunk/translator.py	2011-08-29 15:11:50 UTC (rev 9478)
@@ -0,0 +1,465 @@
+# -*- coding: utf-8 -*-
+
+'''
+This module translates a string from one language to another, using
+translations given in a hard-coded dictionary. Various dictionaries exist for
+different types of text; e.g. type 'geography' is for tables about places and
+regions, and 'city' is for tables about cities and villages.
+
+For each table type, there can be three lists:
+* translations - direct replacements. Work in either direction, e.g. if
+                 the bot knows that he should replace 'Location' with 'Ligging'
+                 when translating from English to Dutch, he can also translate
+                 it from Dutch to English.
+* regexes      - regular expression replacements. These are more powerful than
+                 direct replacements as they support wildcards etc., but only
+                 work in one direction.
+* includes     - one type can include all items from another type, e.g. when
+                 translating a text of the type 'city', the bot also tries to
+                 apply the translations and regexes given for type 'geography'
+                 because 'city' includes 'geography'.
+'''
+
+# (C) Daniel Herding, 2004
+#
+# Distributed under the terms of the MIT license.
+#
+#
+
+__version__='$Id: translator.py,v 1.21 2005/12/21 17:51:26 wikipedian Exp $'
+
+types = {
+    # translations for images (inside other tables)
+    "images": {
+         "translations": [
+             { "en":"[[image:",     "de":"[[bild:",          "nl":"[[afbeelding:",    "fr":"[[image:",  "af":"[[beeld:"    },
+             { "en":"[[Image:",     "de":"[[Bild:",          "nl":"[[Afbeelding:",    "fr":"[[Image:",  "af":"[[Beeld:"    },
+             { "en":"larger image", "de":u"Bild vergrößern", "nl":"grotere versie",   "fr":u"En détail", "af":"In detail"  },
+             { "en":"larger image", "de":u"Bild vergrößern", "nl":"groter",           "fr":u"En détail", "af":"In detail"  },
+             # usually used as link description for articles about flags, coats of arms etc.
+             { "en":"Details",      "de":u"Details",         "nl":"details",          "fr":u"Détails", "af":"Details"    },
+         ],
+    },
+    
+    # translations for taxoboxes (for biology articles)
+    "taxo": {
+        "translations": [
+            # Background colors for table headers, with or without quotation marks (taxoboxes on de: all have quotation marks)
+            { "en":"bgcolor=pink",                         "de":"bgcolor="#ffc0c0"",                       "nl":"bgcolor=#EEEEEE",                                "fr":"bgcolor=pink"                               },
+            { "en":"bgcolor="pink"",                     "de":"bgcolor="#ffc0c0"",                       "nl":"bgcolor="#EEEEEE"",                            "fr":"bgcolor="pink""                           },
+            # second table header (below the image)
+            { "en":"[[Scientific classification]]",        "de":"[[Systematik (Biologie)|Systematik]]",      "nl":"[[Taxonomie|Wetenschappelijke  classificatie]]", "fr":u"Classification [[systématique]]"        },
+            # main taxobox content
+            { "en":"[[Domain (biology)|Domain]]:",         "de":u"''[[Domäne (Biologie)|Domäne]]:''",  "nl":"[[Domain (biologie)|Domain]]:",                  "fr":"??? (domain)"                               },
+            { "en":"Domain:",                              "de":u"''[[Domäne (Biologie)|Domäne]]:''",  "nl":"[[Domain (biologie)|Domain]]:",                  "fr":"??? (domain)"                               },
+            { "en":"[[Kingdom (biology)|Kingdom]]:",       "de":"''[[Reich (Biologie)|Reich]]:''",           "nl":"[[Rijk (biologie)|Rijk]]:",                      "fr":u"[[Règne (biologie)|Règne]]:",        },
+            { "en":"Kingdom:",       "de":"''[[Reich (Biologie)|Reich]]:''",           "nl":"[[Rijk (biologie)|Rijk]]:",                      "fr":u"[[Règne (biologie)|Règne]]:",        },
+            { "en":"[[Division (biology)|Division]]:",      "de":"''[[Abteilung (Biologie)|Abteilung]]:''",                      },
+            { "en":"Division:",                            "de":"''[[Abteilung (Biologie)|Abteilung]]:''",                                        },
+            { "en":"[[Phylum (biology)|Phylum]]:",         "de":"''[[Stamm (Biologie)|Stamm]]:''",           "nl":"[[Stam (biologie)|Stam]]:",                      "fr":"[[Embranchement]]:",                        },
+            { "en":"Phylum:",                              "de":"''[[Stamm (Biologie)|Stamm]]:''",           "nl":"[[Stam (biologie)|Stam]]:",                      "fr":"[[Embranchement]]:",                        },
+            { "en":"[[Subphylum]]:",                       "de":"''[[Unterstamm]]:''",                       "nl":"[[Substam (biologie)|Substam]]:",                "fr":"[[Sous-embranchement]]:",                   },
+            { "en":"Phylum:",                              "de":"''[[Unterstamm]]:''",                       "nl":"[[Substam (biologie)|Substam]]:",                "fr":"[[Sous-embranchement]]:",                   },
+            { "en":"[[Superclass (biology)|Superclass]]:", "de":u"''[[Klasse (Biologie)|Überklasse]]:''", "nl":"[[Superklasse (biologie)|Superklasse]]:",        "fr":"[[Super-classe (biologie)|Super-classe]]:", },
+            { "en":"Superclass:",                          "de":u"''[[Klasse (Biologie)|Überklasse]]:''", "nl":"[[Superklasse (biologie)|Superklasse]]:",        "fr":"[[Super-classe (biologie)|Super-classe]]:", },
+            { "en":"[[Class (biology)|Class]]:",           "de":"''[[Klasse (Biologie)|Klasse]]:''",         "nl":"[[Klasse (biologie)|Klasse]]:",                  "fr":"[[Classe (biologie)|Classe]]:",             },
+            { "en":"Class:",                               "de":"''[[Klasse (Biologie)|Klasse]]:''",         "nl":"[[Klasse (biologie)|Klasse]]:",                  "fr":"[[Classe (biologie)|Classe]]:",             },
+            { "en":"[[Subclass]]:",                        "de":"''[[Klasse (Biologie)|Unterklasse]]:''",    "nl":"[[Onderklasse]]:",                               "fr":"[[Sous-classe (biologie)|Sous-classe]]:",   },
+            { "en":"Subclass:",                            "de":"''[[Klasse (Biologie)|Unterklasse]]:''",    "nl":"[[Onderklasse]]:",                               "fr":"[[Sous-classe (biologie)|Sous-classe]]:",   },
+            { "en":"[[Order (biology)|Superorder]]:",      "de":u"''[[Ordnung (Biologie)|Überordnung]]:''",  "nl":"[[Superorde]]:",       },
+            { "en":"[[Order (biology)|Order]]:",           "de":"''[[Ordnung (Biologie)|Ordnung]]:''",       "nl":"[[Orde (biologie)|Orde]]:",                      "fr":"[[Ordre (biologie)|Ordre]]:"                },
+            { "en":"Order:",                               "de":"''[[Ordnung (Biologie)|Ordnung]]:''",       "nl":"[[Orde (biologie)|Orde]]:",                      "fr":"[[Ordre (biologie)|Ordre]]:"                },
+            { "en":"[[Suborder]]:",                        "de":"''[[Ordnung (Biologie)|Unterordnung]]:''",  "nl":"[[Infraorde (biologie)|Infraorde]]:",            "fr":"[[Sous-ordre (biologie)|Sous-ordre]]:",     },
+            { "en":"Suborder:",                            "de":"''[[Ordnung (Biologie)|Unterordnung]]:''",  "nl":"[[Infraorde (biologie)|Infraorde]]:",            "fr":"[[Sous-ordre (biologie)|Sous-ordre]]:",     },
+            { "en":"[[Family (biology)|Family]]:",         "de":"''[[Familie (Biologie)|Familie]]:''",       "nl":"[[Familie (biologie)|Familie]]:",                "fr":"[[Famille (biologie)|Famille]]:",           },
+            { "en":"Family:",                              "de":"''[[Familie (Biologie)|Familie]]:''",       "nl":"[[Familie (biologie)|Familie]]:",                "fr":"[[Famille (biologie)|Famille]]:",           },
+            { "en":"[[Subfamily (biology)|Subfamily]]:",   "de":"''[[Familie (Biologie)|Unterfamilie]]:''",  "nl":"[[Onderfamilie]]:",                              "fr":"[[Sous-famille (biologie)|Sous-famille]]:", },
+            { "en":"Subfamily:",                           "de":"''[[Familie (Biologie)|Unterfamilie]]:''",  "nl":"[[Onderfamilie]]:",                              "fr":"[[Sous-famille (biologie)|Sous-famille]]:", },
+            { "en":"[[Tribe (biology)|Tribe]]:",           "de":"''[[Tribus (Biologie)|Tribus]]:''",         "nl":"[[Tak (biologie)|Tak]]:",                        "fr":"??? (Tribus)"                               },
+            { "en":"Tribe:",                               "de":"''[[Tribus (Biologie)|Tribus]]:''",         "nl":"[[Tak (biologie)|Tak]]:",                        "fr":"??? (Tribus)"                               },
+            { "en":"[[Genus]]:",                           "de":"''[[Gattung (Biologie)|Gattung]]:''",       "nl":"[[Geslacht (biologie)|Geslacht]]:",              "fr":"[[Genre]]:"                                 },
+            { "en":"Genus:",                               "de":"''[[Gattung (Biologie)|Gattung]]:''",       "nl":"[[Geslacht (biologie)|Geslacht]]:",              "fr":"[[Genre]]:"                                 },
+            { "en":"[[Subgenus]]:",                        "de":"''[[Gattung (Biologie)|Untergattung]]:''",  "nl":"[[Ondergeslacht]]:",                             "fr":"??? (Sous-genre)"                           },
+            { "en":"Subgenus:",                            "de":"''[[Gattung (Biologie)|Untergattung]]:''",  "nl":"[[Ondergeslacht]]:",                             "fr":"??? (Sous-genre)"                           },
+            { "en":"[[Species]]:",                         "de":"''[[Art (Biologie)|Art]]:''",               "nl":"[[Soort]]:",                                     "fr":u"[[Espèce]]:"                            },
+            { "en":"Species:",                             "de":"''[[Art (Biologie)|Art]]:''",               "nl":"[[Soort]]:",                                     "fr":u"[[Espèce]]:"                            },
+            # table headers for subdivisions of the current group
+            { "en":"[[Class (biology)|Classes]]",           "de":"[[Klasse (Biologie)|Klassen]]",            "nl":"[[Klasse (biologie)|Klassen]]",                              },
+            { "en":"[[Order (biology)|Orders]]",           "de":"[[Ordnung (Biologie)|Ordnungen]]",          "nl":"[[Orde (biologie)|Orden]]",                      "fr":"[[Ordre (biologie)|Ordres]]"                },
+            { "en":"[[Suborder]]s",                        "de":"[[Ordnung (Biologie)|Unterordnungen]]",     "nl":"[[Infraorde (biologie)|Infraorden]]:",           "fr":"[[Sous-ordre (biologie)|Sous-ordres]]",     },
+            { "en":"[[Family (biology)|Families]]",        "de":"[[Familie (Biologie)|Familien]]",         "nl":"[[Familie (biologie)|Families]]",                "fr":"[[Famille (biologie)|Familles]]",           },
+            { "en":"[[Genus|Genera]]",                     "de":"[[Gattung (Biologie)|Gattungen]]",          "nl":"[[Geslacht (biologie)|Geslachten]]",             "fr":"[[Genre (biologie)|Genre]]"                 },
+            { "en":"[[Species]]",                          "de":"[[Art (Biologie)|Arten]]",                  "nl":"[[Soort]]en",                                    "fr":u"??? (Espèces)"                          },
+            { "en":"[[Species]] (incomplete)",             "de":"[[Art (Biologie)|Arten (Auswahl)]]",        "nl":"[[Soort]]en (incompleet)",                       "fr":u"??? (Espèces (sélection))"           },
+            # table headers for nl: style taxoboxes (current group is listed in a special section at the bottom)
+            { "en":"[[Order (biology)|Order]]",            "de":"[[Ordnung (Biologie)|Ordnung]]",            "nl":"[[Orde (biologie)|Orde]]",                       "fr":"[[Ordre (biologie)|Ordre]]"                 },
+            { "en":"[[Family (biology)|Family]]",          "de":"[[Familie (Biologie)|Familie]]",            "nl":"[[Familie (biologie)|Familie]]",                 "fr":"[[Famille (biologie)|Famille]]",            },
+            { "en":"[[Genus]]",                            "de":"[[Gattung (Biologie)|Gattung]]",            "nl":"[[Geslacht (biologie)|Geslacht]]",               "fr":"[[Genre]]"                                  },
+            { "en":"[[Species]]",                          "de":"[[Art (Biologie)|Art]]",                    "nl":"[[Soort]]",                                      "fr":u"[[Espèce]]"                             },
+        ],
+        "regexes": {
+            "en": {
+                # de: doesn't have conservation status infos
+                "{{msg:Status[^}]+}}": {"de":"", },
+            },
+        },
+        "includes": ["images", "taxo_categories"],
+    },
+
+    # this should only include classes etc. which appear very often, not every species!
+    "taxo_categories": {
+        "translations": [
+            # kingdoms
+            { "en":"[[Animal]]ia",                      "de":"[[Tiere]] (Animalia)",                  "nl":"Dieren (''[[Animalia]]'')",      },
+            { "en":"[[Plant]]ae",                       "de":"[[Pflanzen]] (Plantae)",                },
+            # divisions
+            { "en":"[[flowering plant|Magnoliophyta]]", "de":u"[[Blütenpflanzen]] (Magnoliophyta)", },
+            # phylums
+            { "en":"[[Anthropod]]a",                    "de":u"[[Gliederfüßler]] (Anthropoda)",               },
+            { "en":"[[Chordata]]",                      "de":"[[Chordatiere]] (Chordata)",            "nl":"Chordadieren (''[[Chordata]]'')",                   },
+            { "en":"[[Chordate|Chordata]]",             "de":"[[Chordatiere]] (Chordata)",            "nl":"Chordadieren (''[[Chordata]]'')",                   },
+            # subphylums
+            { "en":"[[Vertebrata]]",                    "de":"[[Wirbeltiere]] (Vertebrata)",          "nl":"Gewervelden (''[[Vertebrata]]'')", },
+            # superclasses
+            # classes
+            { "en":"[[Aves]]",                          "de":u"[[Vögel]] (Aves)",                     "nl":"Vogels (''[[Aves]]'')",               },
+            { "en":"[[Insect]]a",                       "de":"[[Insekten]] (Insecta)",             },
+            { "en":"[[Mammal]]ia",                      "de":u"[[Säugetiere]] (Mammalia)",            "nl":"Zoogdieren (''[[Mammalia]]'')",   },
+            { "en":"[[Mammalia]]",                      "de":u"[[Säugetiere]] (Mammalia)",            "nl":"Zoogdieren (''[[Mammalia]]'')",   },
+            { "en":"[[dicotyledon|Magnoliopsida]]",     "de":u"Zweikeimblättrige (Magnoliopsida)", },
+            {                                           "de":"Reptilien (Reptilia)",                  "nl":"Reptielen (''[[Reptilia]]'')",  },
+        ],
+        "regexes": {
+            "de": {
+                # change [[Hunde]] (Canidae) to Hunde (''[[Canidae]]'') for nl:
+                # and to [[Canidae]] for en:
+                "[[(?P<german>[^[]+)]] ((?P<latin>.+))": {"en":"[[\g<latin>]]", "nl":"\g<german> (''[[\g<latin>]]'')", },
+            },
+            "nl": {
+                # change Knaagdieren (''[[Rodentia]]'') to [[Knaagdieren]] (Rodentia)
+                "(?P<dutch>[a-zA-Z ]+) ([[''(?P<latin>[^[]+)'']])": {"de":"[[\g<dutch>]] (\g<latin>)", },
+                "(?P<dutch>[a-zA-Z ]+) (''[[(?P<latin>[^[]+)]]'')": {"de":"[[\g<dutch>]] (\g<latin>)", },
+                "(?P<dutch>[a-zA-Z ]+) ([[<i>(?P<latin>[^[]+)</i>]])": {"de":"[[\g<dutch>]] (\g<latin>)", },
+                "(?P<dutch>[a-zA-Z ]+) (<i>[[(?P<latin>[^[]+)]]</i>)": {"de":"[[\g<dutch>]] (\g<latin>)", },
+            },
+        },
+                
+    },
+            
+
+    # plants get the same table color as animals on de:, but on en: they are green instead of pink
+    "plant": {
+        "translations": [
+            { "en":"bgcolor=lightgreen",               "de":"bgcolor="#ffc0c0"",                     }, 
+            { "en":"bgcolor="lightgreen"",           "de":"bgcolor="#ffc0c0"",                     }, 
+        ],
+        "includes": ["taxo"],
+    },
+
+    # regular expressions for number formats
+    "numbers": {
+        "translations": [
+            # miljoen shouldn't be abbreviated on nl:
+            { "en":"mill.",      "de":"Mio.",    "nl":"miljoen", },
+            { "en":"bill.",      "de":"Mrd." },
+        ],
+        "regexes": {
+            "fr": {
+                # fr uses &nbsp; or space to separate thousands, de uses dots
+                # note: this doesn't work for numbers > 1,000,000, don't know why
+                "(?P<pre>\d+)&nbsp;(?P<block>\d\d\d)": {"de":"\g<pre>.\g<block>", },
+                "(?P<pre>\d+) (?P<block>\d\d\d)": {"de":"\g<pre>.\g<block>", },
+            },
+            "en": {
+                # de uses dots to separate thousands, en uses commas
+                # de uses commas to indicate floating point numbers, en uses dots
+                # switch both - temporary placeholder required
+                "(?P<pre>\d+),(?P<block>\d\d\d)":        {"de":"\g<pre>TEMPORARY_DOT\g<block>", },
+                "(?P<pre>\d+).(?P<block>\d+)":           {"de":"\g<pre>,\g<block>", },
+                "TEMPORARY_DOT": {"de":".", },
+            },
+            "de": {
+                # de uses dots to separate thousands, en uses commas
+                # de uses commas to indicate floating point numbers, en uses dots
+                # switch both - temporary placeholder required
+                "(?P<pre>\d+).(?P<block>\d\d\d)":             {"en":"\g<pre>TEMPORARY_COMMA\g<block>", },
+                "(?P<pre>\d+),(?P<block>\d+)":                {"en":"\g<pre>.\g<block>", },
+                "TEMPORARY_COMMA": {"en":",", },
+            },
+        },
+    },
+    
+    "months": {
+        "translations": [
+            { "sl":"januar",    "it":"gennaio",   "en":"January",   "de":"Januar",    "fr":"janvier",    "nl":"januari",   "af":"Januarie"},
+            { "sl":"februar",   "it":"febbraio",  "en":"February",  "de":"Februar",   "fr":u"février",   "nl":"februari",  "af":"Februarie"},
+            { "sl":"marec",     "it":"marzo",     "en":"March",     "de":u"März",     "fr":"mars",       "nl":"maart",     "af":"Maart"},
+            { "sl":"april",     "it":"aprile",    "en":"April",     "de":"April",     "fr":"avril",      "nl":"april",     "af":"April"},
+            { "sl":"maj",       "it":"maggio",    "en":"May",       "de":"Mai",       "fr":"mai",        "nl":"mei",       "af":"Mei"},
+            { "sl":"junij",     "it":"giugno",    "en":"June",      "de":"Juni",      "fr":"juin",       "nl":"juni",      "af":"Junie"},
+            { "sl":"julij",     "it":"luglio",    "en":"July",      "de":"Juli",      "fr":"juillet",    "nl":"juli",      "af":"Julie"},
+            { "sl":"avgust",    "it":"agosto",    "en":"August",    "de":"August",    "fr":u"août",      "nl":"augustus",  "af":"Augustus"},
+            { "sl":"september", "it":"settembre", "en":"September", "de":"September", "fr":"septembre",  "nl":"september", "af":"September"},
+            { "sl":"oktober",   "it":"ottobre",   "en":"October",   "de":"Oktober",   "fr":"octobre",    "nl":"oktober",   "af":"Oktober"},
+            { "sl":"november",  "it":"novembre",  "en":"November",  "de":"November",  "fr":"novembre",   "nl":"november",  "af":"November"},
+            { "sl":"december",  "it":"dicembre",  "en":"December",  "de":"Dezember",  "fr":u"décembre",  "nl":"december",  "af":"Desember"},
+        ]
+    },
+    
+    # conversion between number formats
+    "dates": {
+        "regexes": {
+            "de": {
+                # dd.mm.yy and dd.mm.yyyy format
+                "(?P<day>\d\d).(?P<month>\d\d).(?P<year>(\d\d)+)": {"nl":"\g<day>-\g<month>-\g<year>", },
+            },
+        },
+    },
+           
+    
+   
+    # units of measurement etc.
+    # only for internal use
+    "units": {
+        "translations": [
+            { "en":"[[Square kilometre|km&sup2;]]",  "de":"[[Quadratkilometer|km&sup2;]]",  "nl":"[[Vierkante kilometer|km&sup2;]]", },
+            { "en":u"[[Square kilometre|km²]]",      "de":u"[[Quadratkilometer|km²]]",      "nl":u"[[Vierkante kilometer|km²]]",     },
+            { "en":"as of ",                         "de":"Stand: ",                         },
+            { "en":"years",                          "de":"Jahre",                           "nl":"jaar"},
+        ]
+    },
+    
+    # general geographical terms etc.
+    # only for internal use
+    "geography": {
+        "translations": [
+            # header
+            { "en":"Base data",                     "de":"Basisdaten",                     "nl":"Basisgegevens",                       "fr":"Informations",       },
+            { "en":"[[Area]]:",                     "de":u"[[Fläche]]:",                "nl":"Oppervlakte:",                        "fr":"[[Superficie]]:",       "eo":"Areo:",},
+            { "en":"[[Population]]:",               "de":"[[Einwohner]]:",                 "nl":"Inwoneraantal:",                      "fr":u"[[Population]]:",       "eo":u"Logantaro:",   },
+            { "en":"[[Population density]]:",       "de":u"[[Bevölkerungsdichte]]:",    "nl":"[[Bevolkingsdichtheid]]:",              },
+            { "en":"inh./km&sup2;",                 "de":"Einw./km&sup2;",                 "nl":"inw./km&sup2;",                       "fr":"hab/km&sup2;", },
+            { "en":u"inh./km²",                  "de":u"Einw./km²",                  "nl":u"inw./km²",                        "fr":u"hab/km²",  },
+            { "en":"inhabitants/km&sup2;",          "de":"Einwohner/km&sup2;",             "nl":"inwoners / km&sup2;",                },
+            { "en":u"inhabitants/km²",           "de":u"Einwohner/km²",              "nl":u"inwoners / km²",               },
+            { "en":"inhabitants per km&sup2;",      "de":"Einwohner pro km&sup2;",         "nl":"inwoners per km&sup2;",               }, 
+            { "en":u"inhabitants per km²",       "de":u"Einwohner pro km²",          "nl":u"inwoners per km²",                    },
+            { "en":"inh.",                          "de":"Einw.",                          "nl":"inw.",                                 "fr":"hab.", },
+            { "en":"above [[sea level]]",           "de":u"ü. [[Normalnull|NN]]",       "nl":"boven [[Normaal Amsterdams Peil|NAP]]",                           },
+            { "en":"location",                      "de":"Geografische Lage",              "nl":"Ligging",                              "fr":"Localisation",                              },
+            # longitude, latitude
+            { "en":"' north",                       "de":u"' nördlicher Breite",        "nl":"' NB" },
+            { "en":"' north",                       "de":u"' nördl. Breite",            "nl":"' NB" },
+            { "en":"' north",                       "de":"' n. Br.",                       "nl":"' NB" },
+            { "en":"' east",                        "de":u"' östlicher Länge",       "nl":"' OL" },
+            { "en":"' east",                        "de":u"' östl. Länge",           "nl":"' OL" },
+            { "en":"' east",                        "de":u"' ö. L.",                    "nl":"' OL" },
+            { "en":"Map",                           "de":"Karte",                          "nl":"Kaart",                        },
+            { "en":"Coat of Arms",                  "de":"Wappen",                         "nl":"Wapen",                                 "fr":"Blason"      },
+        ],
+        "includes": ["units"],
+    },
+            
+    "city": {
+        "translations": [
+            { "en":"[[Location]]:",                          "de":"[[Geografische Lage]]:",                    "nl":"Ligging", },
+            { "en":"[[Altitude]]:",                          "de":u"[[Höhe]]:",                             "nl":"Hoogte:", },
+            { "en":"Highest point:",                         "de":u"Höchster Punkt:",                       "nl":"Hoogste punt:",},
+            { "en":"Lowest point:",                          "de":"Niedrigster Punkt:",                        "nl":"Laagste punt:"},
+            { "en":"[[Postal code]]:",                       "de":"[[Postleitzahl]]:",                         "nl":"[[Postcode]]:",                 },
+            { "en":"[[Postal code]]s:",                      "de":"[[Postleitzahl]]en:",                       "nl":"[[Postcode]]s:",                 },
+            { "en":"[[Area code]]:",                         "de":"[[Telefonvorwahl|Vorwahl]]:",               "nl":"[[Netnummer]]:",             },
+            { "en":"[[Area code]]s:",                        "de":"[[Telefonvorwahl|Vorwahlen]]:",             "nl":"[[Netnummer]]s:",             },
+            { "en":"[[License plate]]:",                     "de":"[[KFZ-Kennzeichen]]:",                      "nl":"[[Autonummerbord]]:",         },
+            { "en":"[[License plate]]:",                     "de":"[[Kfz-Kennzeichen]]:",                      "nl":"[[Autonummerbord]]:",           },
+            { "en":"City structure:",                        "de":"Gliederung des Stadtgebiets:",              "nl":"Ondergemeentelijke indeling:",  },
+            # town hall snail mail address
+            { "en":"Municipality's address:",                "de":"Adresse der Gemeindeverwaltung:",           "nl":"Adres gemeentehuis:",       },
+            # city hall snail mail address
+            { "en":"Municipality's address:",                "de":"Adresse der Stadtverwaltung:",              "nl":"Adres stadhuis:",       },
+            { "en":"Website:",                               "de":"Webseite:",                                 "nl":"Website:"     },
+            { "en":"Website:",                               "de":"Website:",                                  "nl":"Website:"     },
+            { "en":"E-Mail adress:",                         "de":"[[E-Mail]]-Adresse:",                       "nl":"Email-adres:",               },
+            { "en":"E-Mail adress:",                         "de":"E-Mail-Adresse:",                           "nl":"Email-adres:",               },
+            # table header
+            { "en":"Politics",                               "de":"Politik",                                   "nl":"Politiek",                  },
+            # female mayor
+            { "en":"[[Mayor]]:",                             "de":u"[[Bürgermeister]]in:",                  "nl":"[[Burgemeester]]:",               },
+            { "en":"[[Mayor]]:",                             "de":u"[[Bürgermeisterin]]:",                  "nl":"[[Burgemeester]]:",          },
+            # male mayor
+            { "en":"[[Mayor]]:",                             "de":u"[[Bürgermeister]]:",                    "nl":"[[Burgemeester]]:",           },
+            { "en":"Governing [[Political party|party]]:",   "de":"Regierende [[Politische Partei|Partei]]",   "nl":"Regerende partij",               },
+            { "en":"Governing [[Political party|parties]]:", "de":"Regierende [[Politische Partei|Parteien]]", "nl":"Regerende partijen",             },
+            { "en":"Majority [[Political party|party]]:",    "de":"[[Politische Partei|Mehrheitspartei]]",     "nl":"Meerderheidspartij"},
+            { "en":"Debts:",                                 "de":"Schulden:",                                     },
+            { "en":"[[Unemployment]]:",                      "de":"[[Arbeitslosenquote]]:",                    "nl":"Werkloosheidspercentage:", },
+            {                                                "de":u"[[Ausländeranteil]]:",                  "nl":"Percentage buitenlanders",            },
+            { "en":"Age distribution:",                      "de":"Altersstruktur:",                           "nl":"Leeftijdsopbouw:",          },
+            {                                                "de":"Stadtteile",                                "nl":"wijken"},
+            {                                                "de":"[[Stadtbezirk]]e",                          "nl":"deelgemeenten"                 },
+            {                                                "de":"Stadtbezirke",                              "nl":"deelgemeenten"                 },
+            { "en":"Independent",                            "de":"Parteilos",                                 "nl":"geen partij"             },
+            { "en":"Region",                                 "de":"[[Region]]",                                "nl":"Landstreek"                },
+        ],
+        "includes": ["images", "geography", "numbers"],
+    },
+    
+    # translations for cities in Germany
+    "city-de": {
+        "translations": [
+            { "en":"[[Bundesland]]:",          "de":"[[Bundesland]]:",                      "nl":"[[Deelstaat (Duitsland)|Deelstaat]]",     },
+            { "en":"[[Regierungsbezirk]]:",    "de":"[[Regierungsbezirk]]:",                "nl":"[[Regierungsbezirk]]:",                   },
+            { "en":"[[District]]:",            "de":"[[Landkreis|Kreis]]:",                 "nl":"[[District]]",                            },
+            { "en":"[[District]]:",            "de":"[[Landkreis]]:",                       "nl":"[[District]]",                            },
+            { "en":"district-free town",       "de":"[[kreisfreie Stadt]]",                 "nl":"[[stadsdistrict]]",                       },
+            { "en":"District-free town",       "de":"[[Kreisfreie Stadt]]",                 "nl":"[[Stadsdistrict]]",                       },
+            { "en":"District-free town",       "de":"[[Stadtkreis]]",                       "nl":"[[Stadsdistrict]]",                       },
+            { "en":"[[Municipality key]]:",    "de":"[[Amtliche Gemeindekennzahl]]:", },
+            { "en":"[[Municipality key]]:",    "de":u"[[Amtlicher Gemeindeschlüssel]]:",                                              },
+            { "en":"urban districts",          "de":"[[Stadtbezirk]]e",                     "nl":"stadsdelen",                                             },
+            # female first mayor, no exact translation in en:
+            { "en":"[[Mayor]]:",               "de":u"[[Oberbürgermeisterin]]:",         "nl":"[[Burgemeester]]:"},
+            { "en":"[[Mayor]]:",               "de":u"[[Oberbürgermeister]]in:",         "nl":"[[Burgemeester]]:"},
+            # male first mayor, no exact translation in en:
+            { "en":"[[Mayor]]:",               "de":u"[[Oberbürgermeister]]:",           "nl":"[[Burgemeester]]:"},
+            # "bis" is used between postal codes
+            { "en":" to ",                     "de":" bis ",                                "nl":"t/m"},          
+            # some cities have demographic info which is titled "Bevölkerung" (population). The spaces are important
+            # because "Bevölkerung" is also a substring of "Bevölkerungsdichte (population density).
+            {                                  "de":u" Bevölkerung ",                      "nl":" Demografie ", },
+
+            # parties
+            { "en":"[[Christian Democratic Union of Germany|CDU]]",       "de":"[[CDU]]",                            "nl":"[[Christlich Demokratische Union|CDU]]"},
+            { "en":"[[Social Democratic Party of Germany|SPD]]",          "de":"[[SPD]]",                            "nl":"[[Sozialdemokratische Partei Deutschlands|SPD]]"},
+            { "en":"[[Christian Social Union in Bavaria|CSU]]",           "de":"[[CSU]]",                            "nl":"[[CSU]]"},
+            { "en":"[[Free Democratic Party of Germany|FDP]]",            "de":"[[FDP (Deutschland)|FDP]]",          "nl":"[[FDP]]"},
+            { "en":u"[[German Green Party|Bündnis 90/Die Grünen]]", "de":u"[[Bündnis 90/Die Grünen]]",   "nl":u"[[Die Grünen]]"},
+            { "en":"[[Party of Democratic Socialism|PDS]]",               "de":"[[PDS]]",                            "nl":"[[PDS]]"},
+            # Bundeslaender
+            { "en":"[[Bavaria]]",                                         "de":"[[Bayern]]",                         "nl":"[[Beieren]]"},
+            { "en":"[[Bremen (state)|Bremen]]",                           "de":"[[Bremen (Land)|Bremen]]",           "nl":"[[Bremen]]"},
+            { "en":"[[Hesse]]",                                           "de":"[[Hessen]]",                         "nl":"[[Hessen]]"},
+            { "en":"[[Mecklenburg-Western Pomerania]]",                   "de":"[[Mecklenburg-Vorpommern]]",         "nl":"[[Mecklenburg-Voorpommeren]]"},
+            { "en":"[[Lower Saxony]]",                                    "de":"[[Niedersachsen]]",                  "nl":"[[Nedersaksen]]"},
+            { "en":"[[North Rhine-Westphalia]]",                          "de":"[[Nordrhein-Westfalen]]",            "nl":"[[Noordrijn-Westfalen]]"},
+            { "en":"[[Rhineland-Palatinate]]",                            "de":"[[Rheinland-Pfalz]]",                "nl":"[[Rijnland-Palts]]"},
+            { "en":"[[Saxony]]",                                          "de":"[[Sachsen (Bundesland)|Sachsen]]",   "nl":"[[Saksen (deelstaat)|Saksen]]"},
+            { "en":"[[Saxony-Anhalt]]",                                   "de":"[[Sachsen-Anhalt]]",                 "nl":"[[Saksen-Anhalt]]"},
+            { "en":"[[Schleswig-Holstein]]",                              "de":"[[Schleswig-Holstein]]",             "nl":"[[Sleeswijk-Holstein]]"},
+            { "en":"[[Thuringia]]",                                       "de":u"[[Thüringen]]",                  "nl":u"[[Thüringen]]",},
+        ],
+        "regexes": {
+            "de": {
+                # image alt text
+                "Deutschlandkarte, (?P<city>.+) markiert":                                                           {"en":"Map of Germany, \g<city> marked", "nl":"Kaart van Duitsland met de locatie van \g<city>", },
+                "Karte Deutschlands, (?P<city>.+) markiert":                                                         {"en":"Map of Germany, \g<city> marked", "nl":"Kaart van Duitsland met de locatie van \g<city>", },
+                "Karte (?P<city>.+) in Deutschland":                                                                 {"en":"Map of Germany, \g<city> marked", "nl":"Kaart van Duitsland met de locatie van \g<city>", },
+                # nl: doesn't want Municipality Number
+                u"|[-]+ bgcolor="#FFFFFF"[\r\n]+| *[[Amtliche( Gemeindekennzahl|r Gemeindeschlüssel)]]:[ |\r\n]+[\d -]+[\r\n]+": {                                        "nl":"", },
+            },
+        },
+        "includes": ["city", "dates"],
+        
+    },
+    
+    # French départements
+    "dep": {
+        "translations": [
+            # some entries on fr: lack colons, others have spaces before the colons.
+            { "de":"[[Region (Frankreich)|Region]]:",              "fr":u"[[Régions françaises|Région]] :", "eo":"[[Francaj regionoj|Regiono]]:", },
+            { "de":"[[Region (Frankreich)|Region]]:",              "fr":u"[[Régions françaises|Région]]:",  "eo":"[[Francaj regionoj|Regiono]]:", },
+            { "de":u"[[Präfektur (Frankreich)|Präfektur]]:", "fr":u"[[Préfecture]] :",                      "eo":"[[Prefektejo]]:" },
+            { "de":u"[[Präfektur (Frankreich)|Präfektur]]:", "fr":u"[[Préfecture]]:",                       "eo":"[[Prefektejo]]:"},
+            { "de":u"[[Unterpräfektur]]en:",                    "fr":u"[[Sous-préfecture]]s :",                },
+            { "de":u"[[Unterpräfektur]]en:",                    "fr":u"[[Sous-préfecture]]s:",                 },
+            { "de":u"[[Unterpräfektur]]:",                      "fr":u"[[Sous-préfecture]] :",                },
+            { "de":u"[[Unterpräfektur]]:",                      "fr":u"[[Sous-préfecture]]:",                 },
+            { "de":"insgesamt",                                    "fr":"Totale",                                    },
+            # the next three items are already in the list "geography", but someone forgot the colons on fr:
+            { "de":u"[[Einwohner]]:",                              "fr":u"[[Population]]",                           "eo":u"Lo\u011dantaro:",          },
+            { "de":u"[[Bevölkerungsdichte|Dichte]]:",           "fr":u"[[Densité de population|Densité]]",  },
+            { "de":u"[[Fläche]]:",                              "fr":"[[Superficie]]",                            "eo":"Areo:",             },
+            # another workaround for a forgotten colon
+            { "de":"''</small>:",                                  "fr":"''</small>",                           },
+            { "de":"[[Arrondissement]]s:",                         "fr":"[[Arrondissement]]s",                       },
+            { "de":"[[Kanton (Frankreich)|Kantone]]:",             "fr":u"[[Cantons français|Cantons]]",          },
+            { "de":"[[Kommune (Frankreich)|Kommunen]]:",           "fr":"[[Communes de France|Communes]]",           },
+            { "de":u"Präsident des<br>[[Generalrat (Frankreich)|Generalrats]]:",
+                                                                   "fr":u"[[Président du Conseil général|Président du Conseil<br> général]]", },
+        ],
+        "regexes": {
+            "fr": {
+                "[[[aA]rrondissements (des |du |de la |de l'|d'|de )":           {"de":u"[[Arrondissements im Département ", },
+                "[[[cC]ommunes (des |du |de la |de l'|d'|de )":                  {"de":u"[[Kommunen im Département ",       },
+                "[[[cC]antons (des |du |de la|de l'|d'|de )":                    {"de":u"[[Kantone im Département ",        },
+                "Blason (des |du |de la |de l'|d'|de )":                           {"de":"Wappen von ",                          },
+                # image alt text
+                "Localisation (des |du |de la |de l'|d'|de )(?P<dep>.+?) en France": {"de":"Lage von \g<dep> in Frankreich",       },
+            },  
+        },
+        "includes": ["numbers", "images", "geography"],
+    },          
+}
+
+import wikipedia, string, re
+
+class Global(object):
+    debug = False
+
+# Prints text on the screen only if in debug mode.
+# Argument text should be raw unicode.
+def print_debug(text):
+    if Global.debug:
+        wikipedia.output(text)
+
+# Translate the string given as argument 'text' from language 'from_lang' to 
+# language 'to_lang', using translation list 'type' in above dictionary.
+# if debug_mode=True, status messages are displayed.
+def translate(text, type, from_lang, debug_mode=False, to_lang=None):
+    if to_lang is None:
+        to_lang = wikipedia.getSite().lang        
+    if debug_mode:
+        Global.debug = True
+    if type == "":
+        return text
+    else:
+        print_debug("\n Translating type " + type)
+        # check if the translation database knows this type of table
+        if not type in types:
+            print "Unknown table type: " + type
+            return
+        if "translations" in types.get(type):
+            print_debug("\nDirect translations for type " + type + "\n")
+            for item in types.get(type).get("translations"):
+                # check if the translation database includes the source language
+                if not from_lang in item:
+                    print_debug(from_lang + " translation for item not found in translation table, skipping item")
+                    continue
+                # if it's necessary to replace a substring
+                if string.find(text, item.get(from_lang)) > -1:
+                     # check if the translation database includes the target language
+                     if not to_lang in item:
+                         print_debug("Can't translate "" + item.get(from_lang) + "". Please make sure that there is a translation in copy_table.py.")
+                     else:
+                         print_debug(item.get(from_lang) + " => " + item.get(to_lang))
+                         # translate a substring
+                         text = string.replace(text, item.get(from_lang), item.get(to_lang))
+        if 'regexes' in types.get(type):
+            # work on regular expressions
+            print_debug("\nWorking on regular expressions for type " + type + "\n")
+            regexes = types.get(type).get("regexes")
+            if from_lang in regexes:
+                for item in regexes.get(from_lang):
+                    # only work on regular expressions that have a replacement for the target language
+                    if to_lang in regexes.get(from_lang).get(item):
+                        replacement = regexes.get(from_lang).get(item).get(to_lang)
+                        regex = re.compile(item)
+                        # if the regular expression doesn't match anyway, we don't want it to print a debug message
+                        while re.search(regex, text):
+                            print_debug(item + " => " + replacement)
+                            text = re.sub(regex, replacement, text)
+        # recursively use translation lists which are included in the current list
+        if "includes" in types.get(type):
+            for inc in types.get(type).get("includes"):
+                text = translate(text, inc, from_lang, debug_mode, to_lang)
+        return text
Copied: archive/trunk/windows_chars.py (from rev 9461, trunk/pywikipedia/archive/windows_chars.py)
===================================================================
--- archive/trunk/windows_chars.py	                        (rev 0)
+++ archive/trunk/windows_chars.py	2011-08-29 15:11:50 UTC (rev 9478)
@@ -0,0 +1,147 @@
+# -*- coding: utf-8 -*-
+"""
+Script to replace bad Windows-1252 (cp1252) characters with
+HTML entities on ISO 8859-1 wikis. Don't run this script on a UTF-8 wiki.
+
+Syntax: python windows_chars.py [pageTitle] [file[:filename]] [sql[:filename]]
+
+Command line options:
+
+   -file:XYZ  reads a list of pages, which can for exampagee be gotten through
+              Looxix's robot. XYZ is the name of the file from which the
+              list is taken. If XYZ is not given, the user is asked for a
+              filename.
+              Page titles should be in [[double-square brackets]].
+
+   -sql:XYZ   reads a local SQL cur dump, available at
+              http://download.wikimedia.org/. Searches for pages with
+              Windows-1252 characters, and tries to repair them on the live
+              wiki. Example:
+              python windows_chars.py -sql:20040711_cur_table.sql.sql -lang:es
+
+"""
+#
+# (C) Daniel Herding, 2004
+#
+# Distributed under the terms of the MIT license.
+#
+__version__='$Id: windows_chars.py,v 1.27 2005/12/21 17:51:26 wikipedian Exp $'
+#
+import wikipedia, config
+import replace, pagegenerators
+import re, sys
+
+# Summary message
+msg={
+    'en':u'robot: changing Windows-1252 characters to HTML entities',
+    'fa':u'ربات: تغییر نویسه‌های Windows-1252 به نهادهای اچ‌تی‌ام‌ال',
+    'de':u'Bot: Wandle Windows-1252-Zeichen in HTML-Entitäten um',
+    'fr':u'Bot: Modifie caracteres Windows-1252 vers entités HTML',
+    'he':u'רובוט: משנה תווים בקידוד Windows-1252 ליישויות HTML',
+    'ia':u'Robot: modification de characteres Windows-1252 a entitates HTML',
+    }
+
+# characters that are in Windows-1252), but not in ISO 8859-1
+replacements = [
+    (u"\x80", u"&euro;"),   # euro sign
+    (u"\x82", u"&sbquo;"),   # single low-9 quotation mark
+    (u"\x83", u"&fnof;"),   # latin small f with hook = function = florin
+    (u"\x84", u"&bdquo;"),  # double low-9 quotation mark
+    (u"\x85", u"&hellip;"), # horizontal ellipsis = three dot leader
+    (u"\x86", u"&dagger;"), # dagger
+    (u"\x87", u"&Dagger;"), # double dagger
+    (u"\x88", u"&circ;"),   # modifier letter circumflex accent
+    (u"\x89", u"&permil;"), # per mille sign
+    (u"\x8A", u"&Scaron;"), # latin capital letter S with caron
+    (u"\x8B", u"&#8249;"),  # single left-pointing angle quotation mark
+    (u"\x8C", u"&OElig;"),  # latin capital ligature OE
+    (u"\x8E", u"&#381;"),   # latin capital letter Z with caron
+    (u"\x91", u"&lsquo;"),  # left single quotation mark
+    (u"\x92", u"&rsquo;"),  # right single quotation mark
+    (u"\x93", u"&ldquo;"),  # left double quotation mark
+    (u"\x94", u"&rdquo;"),  # right double quotation mark
+    (u"\x95", u"&bull;"),   # bullet = black small circle
+    (u"\x96", u"&ndash;"),  # en dash
+    (u"\x97", u"&mdash;"),  # em dash
+    (u"\x98", u"&tilde;"),  # small tilde
+    (u"\x99", u"&trade;"),  # trade mark sign
+    (u"\x9A", u"&scaron;"), # latin small letter s with caron
+    (u"\x9B", u"&8250;"),   # single right-pointing angle quotation mark
+    (u"\x9C", u"&oelig;"),  # latin small ligature oe
+    (u"\x9E", u"&#382;"),   # latin small letter z with caron
+    (u"\x9F", u"&Yuml;")    # latin capital letter Y with diaeresis
+]
+
+class SqlWindows1252PageGenerator:
+    """
+    opens a local SQL dump file, searches for pages with Windows-1252
+    characters.
+    """
+    def __init__(self, filename):
+        self.filename = filename
+
+    def __iter__(self):
+        # open SQL dump and read page titles out of it
+        import sqldump
+        sqldump = sqldump.SQLdump(self.filename, 'latin-1')
+        for entry in sqldump.entries():
+            for char in replacements.keys():
+                if entry.text.find(char) != -1:
+                    page = wikipedia.Page(wikipedia.getSite(), entry.full_title())
+                    yield page
+                    break
+
+class WindowsCharsBot:
+    def __init__(self, generator):
+        self.generator = generator
+
+    def run(self):
+        replaceBot = replace.ReplaceRobot(self.generator, replacements)
+        replaceBot.run()
+
+def main():
+    # this temporary array is used to read the page title.
+    pageTitle = []
+    gen = None
+
+    for arg in sys.argv[1:]:
+        arg = wikipedia.argHandler(arg, 'windows_chars')
+        if arg:
+            if arg.startswith('-file'):
+                if len(arg) == 5:
+                    filename = wikipedia.input(u'please enter the list's filename: ')
+                else:
+                    filename = arg[6:]
+                gen = pagegenerators.TextfilePageGenerator(filename)
+            elif arg.startswith('-sql'):
+                if len(arg) == 4:
+                    sqlfilename = wikipedia.input(u'please enter the SQL dump's filename: ')
+                else:
+                    sqlfilename = arg[5:]
+                gen = SqlWindows1252PageGenerator(sqlfilename)
+            else:
+                pageTitle.append(arg)
+
+    # if a single page is given as a command line argument,
+    # reconnect the title's parts with spaces
+    if pageTitle != []:
+        page = wikipedia.Page(wikipedia.getSite(), ' '.join(pageTitle))
+        gen = iter([page])
+
+    # get edit summary message
+    wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg))
+
+    if not gen:
+        wikipedia.showHelp('windows_chars')
+    elif wikipedia.getSite().encoding() == "utf-8":
+        print "There is no need to run this robot on UTF-8 wikis."
+    else:
+        preloadingGen = pagegenerators.PreloadingGenerator(gen)
+        bot = WindowsCharsBot(preloadingGen)
+        bot.run()
+
+if __name__ == "__main__":
+    try:
+        main()
+    finally:
+        wikipedia.stopme()