#!/usr/bin/python import MySQLdb import re import md5 w2m_cre = re.compile( r"(%%)(.*?)%%|(?P={2,6})(.+?)(?P=start)|" + r"\b[a-z]+://\S+|''|\"\"|\*\*|##|__|//|----|---|" + r"\n([ \t]+)(-|[0-9a-zA-Z]+\))|" + r"(\[\[)([^ \t\n\r\f\v\]]+)(\s+([^\n\r\f\v\]]+))?\]\]|" + r"\b[A-Z][A-Za-z]+?[:][A-Za-z0-9]*?\b|" + r"\b[A-Z][a-z]+?[A-Z0-9][A-Za-z0-9]*?\b|\n", re.MULTILINE | re.DOTALL ); def WIKIVERSION(maj, min, patch): return maj * 256 * 256 + min * 256 + patch __debug = True __wikiversion = WIKIVERSION(1, 2, 6) usermap = None useropt = { "quickbar" : 1, "underline" : 1, "hover" : 1, "cols" : 80, "rows" : 25, "searchlimit" : 20, "contextlines" : 5, "contextchars" : 50, "skin" : 0, "math" : 1, "rcdays" : 7, "rclimit" : 50, "highlightbroken" : 1, "stubthreshold" : 0, "previewontop" : 1, "editsection" : 1, "editsectiononrightclick" : 0, "showtoc" : 1, "showtoolbar" : 1, "date" : 0, "searchNs-1" : 0, "searchNs0" : 1, "searchNs1" : 0, "searchNs2" : 0, "searchNs3" : 0, "searchNs4" : 0, "searchNs5" : 0, "searchNs6" : 0, "searchNs7" : 0, "rememberpassword" : 0 } useropts = "\n".join( map( lambda e: e + "=" + str( useropt[e] ), useropt ) ) class UserMap: def __init__(self, db): self.users = {} self.curs = db.cursor() def uid(self, uname): if self.users.has_key( uname ): return self.users[uname] self.curs.execute( "SELECT user_id FROM user WHERE user_name = %s", ( uname ) ) res = self.curs.fetchone() if not res: return "0" self.users[uname] = res[0]; return res[0] class WakkaDoc: def __init__(self, data): self.bold = False self.italic = False self.underline = False self.monospace = False self.escape = False self.prefix = "" self.level = 0 self.br = True self.data = data self.links = {} # Find the highest section level for l in xrange(6, 1, -1): if self.data.find( "=" * l ) >= 0: self.seclvl = l break else: self.seclvl = 0 def replace(self, mobj): mstr = mobj.group(0) # New lines if mstr == "\n": if not self.br: self.br = True return "\n" self.level = 0 self.prefix = "" # FIXME: Not sure if this is really the best way, but it seems # to work. pos = mobj.start() if self.data[pos-1] != "\n" and self.data[pos+1] != "\n": return "
" else: return "\n" # Bold, italic, underlined, typewriter if mstr == "**": self.bold = not self.bold; if self.bold: return "" else: return "" if mstr == "//": self.italic = not self.italic; if self.italic: return "" else: return "" if mstr == "__": self.underline = not self.underline; if self.underline: return "" else: return "" if mstr == "##": self.monospace = not self.monospace if self.monospace: return "" else: return "" # Notes are converted to emphasis and thus stay the same if mstr == "''": return "''" # Escaped text is just unescaped if mstr == "\"\"": self.escape = not self.escape return "" # Breaks if mstr == "---": return "
" if mstr == "----": return "----" mgrp = mobj.group(1) mcnt = mobj.group(2) # Block of code. if mgrp == "%%": return "

\n\n" + mcnt + "\n\n

\n" mgrp = mobj.group(3) mcnt = mobj.group(4) # Headers if str(mgrp)[0:2] == "==": n = self.seclvl - len(mgrp) + 2 if n > 4: print "\twarning: document contains more than 3 levels of sections" hdr = "=" * n self.br = False return hdr + mcnt + hdr mgrp = mobj.group(5) mcnt = mobj.group(6) # Lists if re.match( "\n[ \t]+(-|[0-9,a-z,A-Z]+\))?", mstr ): if mcnt == "": prefix = " " elif mcnt == "-": prefix = "*" else: prefix = "#" level = len( mgrp ) if level > self.level: self.prefix += prefix * (level - self.level) elif level < self.level: self.prefix = self.prefix[:level - self.level] self.level = level self.br = False return "\n" + self.prefix mgrp = mobj.group(7) mcnt = mobj.group(8) # Links # URLs are kept as-is, they are handled by mediawiki # Wiki links without a display text are not modified # but must be processed to populate the links table if mgrp == "[[": if not mcnt: return "" self.links[mcnt] = 1 if mobj.group(10): return "[[%s|%s]]" % ( mcnt, mobj.group(10) ) else: return "[[%s]]" % mcnt if re.match( "[A-Z][a-z]+[A-Z0-9][A-Za-z0-9]*", mstr ) and not self.escape: self.links[mstr] = 1 return "[[%s]]" % mstr # TODO: handle interwiki links # TODO: handle actions return mstr def convert(self): data = w2m_cre.sub( self.replace, self.data ) self.links = self.links.keys() return data def migrate_pages(wkcurs, mdcurs): # Only export the latest version, and don't export pages created # by the installer (system pages) wkcurs.execute( "SELECT tag, time, user, body FROM wakka_pages " "WHERE latest = 'Y' AND user != 'WakkaInstaller'" ) res = wkcurs.fetchone() n = 1 links = {} pages = {} while res: title = res[0] time = res[1] data = res[3] # Map the username to userid uname = res[2] uid = usermap.uid( uname ) if uid == "0": uname = "" # Convert the document data print "Converting document '%s' (%u)" % (title, n) if __debug: open( "%03u.wakka" % n, "w" ).write( data ) doc = WakkaDoc( data ) data = doc.convert().decode( "iso-8859-1" ).encode( "utf-8" ) if __debug: open( "%03u.media" % n, "w" ).write( data ) mdcurs.execute( "INSERT INTO cur " "(cur_title, cur_text, cur_user, cur_user_text, cur_timestamp, cur_touched)" " VALUES (%s, %s, %s, %s, %s, %s)", ( title, data, uid, uname, time, time ) ) mdcurs.execute( "SELECT cur_id FROM cur WHERE cur_title = %s", title ); cid = mdcurs.fetchone()[0] links[cid] = doc.links pages[title] = cid pages[cid] = title res = wkcurs.fetchone() n += 1 # Populate the links and brokenlinks tables for cid in links.keys(): if __wikiversion >= WIKIVERSION(1,3,0): id = cid else: id = pages[cid] for title in links[cid]: if pages.has_key( title ): mdcurs.execute( "INSERT INTO links (l_from, l_to) VALUES (%s, %s)", ( id, pages[title] ) ); else: mdcurs.execute( "INSERT INTO brokenlinks (bl_from, bl_to) VALUES (%s, %s)", ( cid, title ) ); def migrate_users(wkcurs, mdcurs): wkcurs.execute( "SELECT * FROM wakka_users" ) res = wkcurs.fetchone() while res: name = res[0] pwd = res[1] email = res[2] mdcurs.execute( "INSERT INTO user " "(user_name, user_email, user_options) " "VALUES (%s, %s, %s)", ( name, email, useropts ) ) mdcurs.execute( "SELECT user_id FROM user WHERE user_name = %s", name ) uid = mdcurs.fetchone()[0] # Salt the password with the user ID pwd = md5.new( str(uid) + "-" + pwd ).hexdigest() mdcurs.execute( "UPDATE user SET user_password = %s WHERE user_id = %s", ( pwd, uid ) ) res = wkcurs.fetchone() def main(): # Connect to the databases try: wkdb = MySQLdb.connect( host = "wakka_host", db = "wakka_db", user = "wakka_user", passwd = "wakka_passwd" ) mddb = MySQLdb.connect( host = "media_host", db = "media_db", user = "media_user", passwd = "media_passwd" ) except: print "Unable to connect to the databases" return 1 global usermap usermap = UserMap( mddb ) wkcurs = wkdb.cursor() mdcurs = mddb.cursor() if 1: migrate_users( wkcurs, mdcurs ) if 1: migrate_pages( wkcurs, mdcurs ) main()