#!/usr/bin/python

import MySQLdb
import re
import md5

w2m_cre = re.compile( r"(%%)(.*?)%%|(?P<start>={2,6})(.+?)(?P=start)|" +
	              r"\b[a-z]+://\S+|''|\"\"|\*\*|##|__|//|----|---|" +
		      r"\n([ \t]+)(-|[0-9a-zA-Z]+\))|" +
	              r"(\[\[)([^ \t\n\r\f\v\]]+)(\s+([^\n\r\f\v\]]+))?\]\]|" +
	              r"\b[A-Z][A-Za-z]+?[:][A-Za-z0-9]*?\b|" +
	              r"\b[A-Z][a-z]+?[A-Z0-9][A-Za-z0-9]*?\b|\n",
		       re.MULTILINE | re.DOTALL );

def WIKIVERSION(maj, min, patch):
    return maj * 256 * 256 + min * 256 + patch

__debug = True
__wikiversion = WIKIVERSION(1, 2, 6)

usermap = None
useropt = { "quickbar" : 1, "underline" : 1, "hover" : 1, "cols" : 80,
            "rows" : 25, "searchlimit" : 20, "contextlines" : 5,
            "contextchars" : 50, "skin" : 0, "math" : 1, "rcdays" : 7,
            "rclimit" : 50, "highlightbroken" : 1, "stubthreshold" : 0,
            "previewontop" : 1, "editsection" : 1, "editsectiononrightclick" : 0,
            "showtoc" : 1, "showtoolbar" : 1, "date" : 0, "searchNs-1" : 0,
            "searchNs0" : 1, "searchNs1" : 0, "searchNs2" : 0, "searchNs3" : 0,
            "searchNs4" : 0, "searchNs5" : 0, "searchNs6" : 0, "searchNs7" : 0,
            "rememberpassword" : 0 }
useropts = "\n".join( map( lambda e: e + "=" + str( useropt[e] ), useropt ) )

class UserMap:

    def __init__(self, db):
        self.users = {}
	self.curs = db.cursor()

    def uid(self, uname):
        if self.users.has_key( uname ):
	    return self.users[uname]

        self.curs.execute( "SELECT user_id FROM user WHERE user_name = %s",
	                   ( uname ) )
	res = self.curs.fetchone()
	if not res:
	    return "0"

        self.users[uname] = res[0];
	return res[0]


class WakkaDoc:

    def __init__(self, data):
        self.bold = False
	self.italic = False
	self.underline = False
	self.monospace = False
	self.escape = False
	self.prefix = ""
	self.level = 0
	self.br = True
	self.data = data
	self.links = {}

	# Find the highest section level
	for l in xrange(6, 1, -1):
	    if self.data.find( "=" * l ) >= 0:
	        self.seclvl = l
	        break
	else:
	    self.seclvl = 0

    def replace(self, mobj):
        mstr = mobj.group(0)

	# New lines
	if mstr == "\n":
	    if not self.br:
	        self.br = True
		return "\n"

	    self.level = 0
	    self.prefix = ""

            # FIXME: Not sure if this is really the best way, but it seems
	    # to work.
	    pos = mobj.start()
	    if self.data[pos-1] != "\n" and self.data[pos+1] != "\n":
		return "<br/>"
	    else:
		return "\n"

	# Bold, italic, underlined, typewriter
	if mstr == "**":
	    self.bold = not self.bold;
	    if self.bold:
	        return "<strong>"
	    else:
	        return "</strong>"
	if mstr == "//":
	    self.italic = not self.italic;
	    if self.italic:
	        return "<em>"
	    else:
	        return "</em>"
	if mstr == "__":
	    self.underline = not self.underline;
	    if self.underline:
	        return "<u>"
	    else:
	        return "</u>"
	if mstr == "##":
	    self.monospace = not self.monospace
            if self.monospace:
		return "<tt>"
	    else:
	        return "</tt>"

        # Notes are converted to emphasis and thus stay the same
	if mstr == "''":
	    return "''"
	# Escaped text is just unescaped
	if mstr == "\"\"":
	    self.escape = not self.escape
	    return ""

        # Breaks
	if mstr == "---":
	    return "<br/>"
	if mstr == "----":
	    return "----"

	mgrp = mobj.group(1)
	mcnt = mobj.group(2)

	# Block of code.
	if mgrp == "%%":
	    return "<pre>\n<nowiki>\n" + mcnt + "\n</nowiki>\n</pre>\n"

	mgrp = mobj.group(3)
	mcnt = mobj.group(4)

	# Headers
	if str(mgrp)[0:2] == "==":
	    n = self.seclvl - len(mgrp) + 2
	    if n > 4:
	        print "\twarning: document contains more than 3 levels of sections"
	    hdr = "=" * n
	    self.br = False
	    return hdr + mcnt + hdr

	mgrp = mobj.group(5)
	mcnt = mobj.group(6)

	# Lists
	if re.match( "\n[ \t]+(-|[0-9,a-z,A-Z]+\))?", mstr ):

            if mcnt == "":
	        prefix = " "
	    elif mcnt == "-":
	        prefix = "*"
	    else:
	        prefix = "#"

	    level = len( mgrp )
	    if level > self.level:
		self.prefix += prefix * (level - self.level)
	    elif level < self.level:
	        self.prefix = self.prefix[:level - self.level]

            self.level = level
	    self.br = False
	    return "\n" + self.prefix

	mgrp = mobj.group(7)
	mcnt = mobj.group(8)

        # Links
	# URLs are kept as-is, they are handled by mediawiki
	# Wiki links without a display text are not modified
	# but must be processed to populate the links table
	if mgrp == "[[":
	    if not mcnt:
	        return ""

	    self.links[mcnt] = 1
	    if mobj.group(10):
	        return "[[%s|%s]]" % ( mcnt, mobj.group(10) )
	    else:
	        return "[[%s]]" % mcnt
	if re.match( "[A-Z][a-z]+[A-Z0-9][A-Za-z0-9]*", mstr ) and not self.escape:
	    self.links[mstr] = 1
	    return "[[%s]]" % mstr
	# TODO: handle interwiki links

	# TODO: handle actions

        return mstr

    def convert(self):
        data = w2m_cre.sub( self.replace, self.data )
	self.links = self.links.keys()
	return data


def migrate_pages(wkcurs, mdcurs):

    # Only export the latest version, and don't export pages created
    # by the installer (system pages)
    wkcurs.execute( "SELECT tag, time, user, body FROM wakka_pages "
                    "WHERE latest = 'Y' AND user != 'WakkaInstaller'" )

    res = wkcurs.fetchone()
    n = 1
    links = {}
    pages = {}
    while res:
        title = res[0]
	time = res[1]
	data = res[3]

        # Map the username to userid
	uname = res[2]
	uid = usermap.uid( uname )
	if uid == "0":
	    uname = ""

	# Convert the document data
	print "Converting document '%s' (%u)" % (title, n)
	if __debug:
	    open( "%03u.wakka" % n, "w" ).write( data )

	doc = WakkaDoc( data )
	data = doc.convert().decode( "iso-8859-1" ).encode( "utf-8" )

	if __debug:
	    open( "%03u.media" % n, "w" ).write( data )

        mdcurs.execute( "INSERT INTO cur "
	                "(cur_title, cur_text, cur_user, cur_user_text, cur_timestamp, cur_touched)"
			" VALUES (%s, %s, %s, %s, %s, %s)",
	                ( title, data, uid, uname, time, time ) )

	mdcurs.execute( "SELECT cur_id FROM cur WHERE cur_title = %s", title );
	cid = mdcurs.fetchone()[0]
	links[cid] = doc.links
	pages[title] = cid
	pages[cid] = title

        res = wkcurs.fetchone()
	n += 1

    # Populate the links and brokenlinks tables
    for cid in links.keys():
	if __wikiversion >= WIKIVERSION(1,3,0):
	    id = cid
	else:
	    id = pages[cid]

	for title in links[cid]:
	    if pages.has_key( title ):
	        mdcurs.execute( "INSERT INTO links (l_from, l_to) VALUES (%s, %s)",
	                        ( id, pages[title] ) );
	    else:
	        mdcurs.execute( "INSERT INTO brokenlinks (bl_from, bl_to) VALUES (%s, %s)",
	                        ( cid, title ) );


def migrate_users(wkcurs, mdcurs):

    wkcurs.execute( "SELECT * FROM wakka_users" )

    res = wkcurs.fetchone()
    while res:
        name = res[0]
	pwd = res[1]
	email = res[2]

        mdcurs.execute( "INSERT INTO user "
	                "(user_name, user_email, user_options) "
			"VALUES (%s, %s, %s)",
	                ( name, email, useropts ) )
	mdcurs.execute( "SELECT user_id FROM user WHERE user_name = %s", name )
	uid = mdcurs.fetchone()[0]

	# Salt the password with the user ID
        pwd = md5.new( str(uid) + "-" + pwd ).hexdigest()
	mdcurs.execute( "UPDATE user SET user_password = %s WHERE user_id = %s", ( pwd, uid ) )

        res = wkcurs.fetchone()


def main():

    # Connect to the databases
    try:
        wkdb = MySQLdb.connect( host = "wakka_host", db = "wakka_db",
                                user = "wakka_user", passwd = "wakka_passwd" )
        mddb = MySQLdb.connect( host = "media_host", db = "media_db",
	                        user = "media_user", passwd = "media_passwd" )
    except:
        print "Unable to connect to the databases"
	return 1

    global usermap
    usermap = UserMap( mddb )

    wkcurs = wkdb.cursor()
    mdcurs = mddb.cursor()

    if 1:
        migrate_users( wkcurs, mdcurs )
    if 1:
        migrate_pages( wkcurs, mdcurs )


main()

