[Pywikipedia-l] SVN: [6275] trunk/pywikipedia/userinterfaces

a_engels at svn.wikimedia.org a_engels at svn.wikimedia.org
Wed Jan 21 20:21:19 UTC 2009


Revision: 6275
Author:   a_engels
Date:     2009-01-21 20:21:19 +0000 (Wed, 21 Jan 2009)

Log Message:
-----------
speeding up transliteration by using a dictionary instead of a sequence of elifs

Modified Paths:
--------------
    trunk/pywikipedia/userinterfaces/terminal_interface.py
    trunk/pywikipedia/userinterfaces/transliteration.py

Modified: trunk/pywikipedia/userinterfaces/terminal_interface.py
===================================================================
--- trunk/pywikipedia/userinterfaces/terminal_interface.py	2009-01-21 19:19:12 UTC (rev 6274)
+++ trunk/pywikipedia/userinterfaces/terminal_interface.py	2009-01-21 20:21:19 UTC (rev 6275)
@@ -16,6 +16,8 @@
 except ImportError:
     ctypes_found = False
 
+transliterator = transliteration.transliterator()
+
 def getDefaultTextColorInWindows():
     """
     This method determines the default text color and saves its color
@@ -195,9 +197,9 @@
                 # original question marks.
                 if codecedText[i] == '?' and text[i] != u'?':
                     try:
-                        transliterated = transliteration.trans(text[i], default = '?', prev = prev, next = text[i+1])
+                        transliterated = transliterator.transliterate(text[i], default = '?', prev = prev, next = text[i+1])
                     except IndexError:
-                        transliterated = transliteration.trans(text[i], default = '?', prev = prev, next = ' ')
+                        transliterated = transliterator.transliterate(text[i], default = '?', prev = prev, next = ' ')
                     # transliteration was successful. The replacement
                     # could consist of multiple letters.
                     # mark the transliterated letters in yellow.

Modified: trunk/pywikipedia/userinterfaces/transliteration.py
===================================================================
--- trunk/pywikipedia/userinterfaces/transliteration.py	2009-01-21 19:19:12 UTC (rev 6274)
+++ trunk/pywikipedia/userinterfaces/transliteration.py	2009-01-21 20:21:19 UTC (rev 6275)
@@ -1,1902 +1,1357 @@
-# -*- coding: utf-8 -*-
-def trans(char, default = '?', prev = '-', next = '-'):
-    # Give a transliteration for char, or default if none is known
-    # Accented etc. Latin characters
-    if char in u"ÀÁÂẦẤẪẨẬÃĀĂẰẮẴẶẲȦǠẠḀȂĄǍẢ":
-        return u"A"
-    if char in u"ȀǞ":
-        return u"Ä"
-    if char == u"Ǻ":
-        return u"Å"
-    if char == u"Ä":
-        return u"Ae"
-    if char == u"Å":
-        return u"Aa"
-    if char in u"àáâầấẫẩậãāăằắẵặẳȧǡạḁȃąǎảẚ":
-        return u"a"
-    if char in u"ȁǟ":
-        return u"ä"
-    if char == u"ǻ":
-        return u"å"
-    if char == u"ä":
-        return u"ae"
-    if char == u"å":
-        return u"aa"
-    if char in u"ḂḄḆƁƂ":
-        return u"B"
-    if char in u"ḃḅḇƀɓƃ":
-        return u"b"
-    if char in u"ĆĈĊÇČƇ":
-        return u"C"
-    if char in u"ćĉċçčƈȼ":
-        return u"c"
-    if char == u"Ḉ":
-        return u"Ç"
-    if char == u"ḉ":
-        return u"ç"
-    if char == u"Ð":
-        return u"Dh"
-    if char == u"ð":
-        return u"dh"    
-    if char in u"ĎḊḌḎḐḒĐƉƊƋ":
-        return u"D"
-    if char in u"ďḋḍḏḑḓđɖɗƌ":
-        return u"d"
-    if char in u"ÈȄÉÊḚËĒḔḖĔĖẸE̩ȆȨḜĘĚẼḘẺ":
-        return u"E"
-    if char in u"ỀẾỄỆỂ":
-        return u"Ê"
-    if char in u"èȅéêḛëēḕḗĕėẹe̩ȇȩḝęěẽḙẻ":
-        return u"e"
-    if char in u"ềếễệể":
-        return u"ê"
-    if char in u"ḞƑ":
-        return u"F"
-    if char in u"ḟƒ":
-        return u"f"
-    if char in u"ǴḠĞĠĢǦǤƓ":
-        return u"G"
-    if char in u"ǵḡğġģǧǥɠ":
-        return u"g"
-    if char == u"Ĝ":
-        return u"Gx"
-    if char == u"ĝ":
-        return u"gx"
-    if char in u"ḢḤḦȞḨḪH̱ĦǶ":
-        return u"H"
-    if char in u"ḣḥḧȟḩḫ̱ẖħƕ":
-        return u"h"
-    if char in u"IÌȈÍÎĨḬÏḮĪĬȊĮǏİỊỈƗ":
-        return u"I"
-    if char in u"ıìȉíîĩḭïḯīĭȋįǐiịỉɨ":
-        return u"i"
-    if char in u"ĴJ":
-        return u"J"
-    if char in u"ɟĵ̌ǰ":
-        return u"j"
-    if char in u"ḰǨĶḲḴƘ":
-        return u"K"
-    if char in u"ḱǩķḳḵƙ":
-        return u"k"
-    if char in u"ĹĻĽḶḸḺḼȽŁ":
-        return u"L"
-    if char in u"ĺļľḷḹḻḽƚłɫ":
-        return u"l"
-    if char in u"ḾṀṂ":
-        return u"M"
-    if char in u"ḿṁṃɱ":
-        return u"m"
-    if char in u"ǸŃÑŅŇṄṆṈṊŊƝɲȠ":
-        return u"N"
-    if char in u"ǹńñņňṅṇṉṋŋɲƞ":
-        return u"n"
-    if char in u"ÒÓÔÕṌṎȬÖŌṐṒŎǑȮȰỌǪǬƠỜỚỠỢỞỎƟØǾ":
-        return u"O"
-    if char in u"òóôõṍṏȭöōṑṓŏǒȯȱọǫǭơờớỡợởỏɵøǿ":
-        return u"o"
-    if char in u"ȌŐȪ":
-        return u"Ö"
-    if char in u"ȍőȫ":
-        return u"ö"
-    if char in u"ỒỐỖỘỔȎ":
-        return u"Ô"
-    if char in u"ồốỗộổȏ":
-        return u"ô"
-    if char in u"ṔṖƤ":
-        return u"P"
-    if char in u"ṕṗƥ":
-        return u"p"
-    if char == u"ᵽ":
-        return u"q"
-    if char in u"ȐŔŖŘȒṘṚṜṞ":
-        return u"R"
-    if char in u"ȑŕŗřȓṙṛṝṟɽ":
-        return u"r"
-    if char in u"ŚṤŞȘŠṦṠṢṨ":
-        return u"S"
-    if char in u"śṥşșšṧṡṣṩȿ":
-        return u"s"
-    if char == u"Ŝ":
-        return u"Sx"
-    if char == u"ŝ":
-        return u"sx"
-    if char in u"ŢȚŤṪṬṮṰŦƬƮ":
-        return u"T"
-    if char in u"ţțťṫṭṯṱŧȾƭʈ":
-        return u"t"
-    if char in u"ÙÚŨṸṴÜṲŪṺŬỤŮŲǓṶỦƯỮỰỬ":
-        return u"U"
-    if char in u"ùúũṹṵüṳūṻŭụůųǔṷủưữựửʉ":
-        return u"u"
-    if char in u"ȔŰǛǗǕǙ":
-        return u"Ü"
-    if char in u"ȕűǜǘǖǚ":
-        return u"ü"
-    if char == u"Û":
-        return u"Ux"
-    if char == u"û":
-        return u"ux"
-    if char == u"Ȗ":
-        return u"Û"
-    if char == u"ȗ":
-        return u"û"
-    if char == u"Ừ":
-        return u"Ù"
-    if char == u"ừ":
-        return u"ù"
-    if char == u"Ứ":
-        return u"Ú"
-    if char == u"ứ":
-        return u"ú"
-    if char in u"ṼṾ":
-        return u"V"
-    if char in u"ṽṿ":
-        return u"v"
-    if char in u"ẀẂŴẄẆẈ":
-        return u"W"
-    if char in u"ẁẃŵẅẇẉ":
-        return u"w"
-    if char in u"ẊẌ":
-        return u"X"
-    if char in u"ẋẍ":
-        return u"x"
-    if char in u"ỲÝŶŸỸȲẎỴỶƳ":
-        return u"Y"
-    if char in u"ỳýŷÿỹȳẏỵỷƴ":
-        return u"y"
-    if char in u"ŹẐŻẒŽẔƵȤ":
-        return u"Z"
-    if char in u"źẑżẓžẕƶȥ":
-        return u"z"
-    if char == u"ɀ":
-        return u"zv"
-    
-    # Latin: extended Latin alphabet
-    if char == u"ɑ":
-        return u"a"
-    if char in u"ÆǼǢ":
-        return u"AE"
-    if char in u"æǽǣ":
-        return u"ae"
-    if char == u"Ð":
-        return u"Dh"
-    if char == u"ð":
-        return u"dh"
-    if char in u"ƎƏƐ":
-        return u"E"
-    if char in u"ǝəɛ":
-        return u"e"
-    if char in u"ƔƢ":
-        return u"G"
-    if char in u"ᵷɣƣᵹ":
-        return u"g"
-    if char == u"Ƅ":
-        return u"H"
-    if char == u"ƅ":
-        return u"h"
-    if char == u"Ƕ":
-        return u"Wh"
-    if char == u"ƕ":
-        return u"wh"
-    if char == u"Ɩ":
-        return u"I"
-    if char == u"ɩ":
-        return u"i"
-    if char == u"Ŋ":
-        return u"Ng"
-    if char == u"ŋ":
-        return u"ng"
-    if char == u"Œ":
-        return u"OE"
-    if char == u"œ":
-        return u"oe"
-    if char == u"Ɔ":
-        return u"O"
-    if char == u"ɔ":
-        return u"o"
-    if char == u"Ȣ":
-        return u"Ou"
-    if char == u"ȣ":
-        return u"ou"
-    if char == u"Ƽ":
-        return u"Q"
-    if char in u"ĸƽ":
-        return u"q"
-    if char == u"ȹ":
-        return u"qp"
-    if char == u"":
-        return u"r"
-    if char == u"ſ":
-        return u"s"
-    if char == u"ß":
-        return u"ss"
-    if char == u"Ʃ":
-        return u"Sh"
-    if char == u"ʃᶋ":
-        return u"sh"
-    if char == u"Ʉ":
-        return u"U"
-    if char == u"ʉ":
-        return u"u"
-    if char == u"Ʌ":
-        return u"V"
-    if char == u"ʌ":
-        return u"v"
-    if char in u"ƜǷ":
-        return u"W"
-    if char in u"ɯƿ":
-        return u"w"
-    if char == u"Ȝ":
-        return u"Y"
-    if char == u"ȝ":
-        return u"y"
-    if char == u"IJ":
-        return u"IJ"
-    if char == u"ij":
-        return u"ij"
-    if char == u"Ƨ":
-        return u"Z"
-    if char in u"ʮƨ":
-        return u"z"
-    if char == u"Ʒ":
-        return u"Zh"
-    if char == u"ʒ":
-        return u"zh"
-    if char == u"Ǯ":
-        return u"Dzh"
-    if char == u"ǯ":
-        return u"dzh"
-    if char in u"ƸƹʔˀɁɂ":
-        return u"'"
-    if char in u"Þ":
-        return u"Th"
-    if char in u"þ":
-        return u"th"
-    if char in u"Cʗǃ":
-        return u"!"
+# -*- coding: utf-8  -*-
+class transliterator(object):
+    def __init__(self):
+        self.trans = {}
+        for char in u"ÀÁÂẦẤẪẨẬÃĀĂẰẮẴẶẲȦǠẠḀȂĄǍẢ":
+            self.trans[char] = u"A"       
+        for char in u"ȀǞ":
+            self.trans[char] = u"Ä"
+        self.trans[u"Ǻ"] = u"Å"
+        self.trans[u"Ä"] = u"Ae"
+        self.trans[u"Å"] = u"Aa"
+        for char in u"àáâầấẫẩậãāăằắẵặẳȧǡạḁȃąǎảẚ":
+            self.trans[char] = u"a"
+        for char in u"ȁǟ":
+            self.trans[char] = u"ä"
+        self.trans[u"ǻ"] = u"å"
+        self.trans[u"ä"] = u"ae"
+        self.trans[u"å"] = u"aa"
+        for char in u"ḂḄḆƁƂ":
+            self.trans[char] = u"B"
+        for char in u"ḃḅḇƀɓƃ":
+            self.trans[char] = u"b"
+        for char in u"ĆĈĊÇČƇ":
+            self.trans[char] = u"C"
+        for char in u"ćĉċçčƈȼ":
+            self.trans[char] = u"c"
+        self.trans[u"Ḉ"] = u"Ç"
+        self.trans[u"ḉ"] = u"ç"
+        self.trans[u"Ð"] = u"Dh"
+        self.trans[u"ð"] = u"dh"    
+        for char in u"ĎḊḌḎḐḒĐƉƊƋ":
+            self.trans[char] = u"D"
+        for char in u"ďḋḍḏḑḓđɖɗƌ":
+            self.trans[char] = u"d"
+        for char in u"ÈȄÉÊḚËĒḔḖĔĖẸE̩ȆȨḜĘĚẼḘẺ":
+            self.trans[char] = u"E"
+        for char in u"ỀẾỄỆỂ":
+            self.trans[char] = u"Ê"
+        for char in u"èȅéêḛëēḕḗĕėẹe̩ȇȩḝęěẽḙẻ":
+            self.trans[char] = u"e"
+        for char in u"ềếễệể":
+            self.trans[char] = u"ê"
+        for char in u"ḞƑ":
+            self.trans[char] = u"F"
+        for char in u"ḟƒ":
+            self.trans[char] = u"f"
+        for char in u"ǴḠĞĠĢǦǤƓ":
+            self.trans[char] = u"G"
+        for char in u"ǵḡğġģǧǥɠ":
+            self.trans[char] = u"g"
+        self.trans[u"Ĝ"] = u"Gx"
+        self.trans[u"ĝ"] = u"gx"
+        for char in u"ḢḤḦȞḨḪH̱ĦǶ":
+            self.trans[char] = u"H"
+        for char in u"ḣḥḧȟḩḫ̱ẖħƕ":
+            self.trans[char] = u"h"
+        for char in u"IÌȈÍÎĨḬÏḮĪĬȊĮǏİỊỈƗ":
+            self.trans[char] = u"I"
+        for char in u"ıìȉíîĩḭïḯīĭȋįǐiịỉɨ":
+            self.trans[char] = u"i"
+        for char in u"ĴJ":
+            self.trans[char] = u"J"
+        for char in u"ɟĵ̌ǰ":
+            self.trans[char] = u"j"
+        for char in u"ḰǨĶḲḴƘ":
+            self.trans[char] = u"K"
+        for char in u"ḱǩķḳḵƙ":
+            self.trans[char] = u"k"
+        for char in u"ĹĻĽḶḸḺḼȽŁ":
+            self.trans[char] = u"L"
+        for char in u"ĺļľḷḹḻḽƚłɫ":
+            self.trans[char] = u"l"
+        for char in u"ḾṀṂ":
+            self.trans[char] = u"M"
+        for char in u"ḿṁṃɱ":
+            self.trans[char] = u"m"
+        for char in u"ǸŃÑŅŇṄṆṈṊŊƝɲȠ":
+            self.trans[char] = u"N"
+        for char in u"ǹńñņňṅṇṉṋŋɲƞ":
+            self.trans[char] = u"n"
+        for char in u"ÒÓÔÕṌṎȬÖŌṐṒŎǑȮȰỌǪǬƠỜỚỠỢỞỎƟØǾ":
+            self.trans[char] = u"O"
+        for char in u"òóôõṍṏȭöōṑṓŏǒȯȱọǫǭơờớỡợởỏɵøǿ":
+            self.trans[char] = u"o"
+        for char in u"ȌŐȪ":
+            self.trans[char] = u"Ö"
+        for char in u"ȍőȫ":
+            self.trans[char] = u"ö"
+        for char in u"ỒỐỖỘỔȎ":
+            self.trans[char] = u"Ô"
+        for char in u"ồốỗộổȏ":
+            self.trans[char] = u"ô"
+        for char in u"ṔṖƤ":
+            self.trans[char] = u"P"
+        for char in u"ṕṗƥ":
+            self.trans[char] = u"p"
+        self.trans[u"ᵽ"] = u"q"
+        for char in u"ȐŔŖŘȒṘṚṜṞ":
+            self.trans[char] = u"R"
+        for char in u"ȑŕŗřȓṙṛṝṟɽ":
+            self.trans[char] = u"r"
+        for char in u"ŚṤŞȘŠṦṠṢṨ":
+            self.trans[char] = u"S"
+        for char in u"śṥşșšṧṡṣṩȿ":
+            self.trans[char] = u"s"
+        self.trans[u"Ŝ"] = u"Sx"
+        self.trans[u"ŝ"] = u"sx"
+        for char in u"ŢȚŤṪṬṮṰŦƬƮ":
+            self.trans[char] = u"T"
+        for char in u"ţțťṫṭṯṱŧȾƭʈ":
+            self.trans[char] = u"t"
+        for char in u"ÙÚŨṸṴÜṲŪṺŬỤŮŲǓṶỦƯỮỰỬ":
+            self.trans[char] = u"U"
+        for char in u"ùúũṹṵüṳūṻŭụůųǔṷủưữựửʉ":
+            self.trans[char] = u"u"
+        for char in u"ȔŰǛǗǕǙ":
+            self.trans[char] = u"Ü"
+        for char in u"ȕűǜǘǖǚ":
+            self.trans[char] = u"ü"
+        self.trans[u"Û"] = u"Ux"
+        self.trans[u"û"] = u"ux"
+        self.trans[u"Ȗ"] = u"Û"
+        self.trans[u"ȗ"] = u"û"
+        self.trans[u"Ừ"] = u"Ù"
+        self.trans[u"ừ"] = u"ù"
+        self.trans[u"Ứ"] = u"Ú"
+        self.trans[u"ứ"] = u"ú"
+        for char in u"ṼṾ":
+            self.trans[char] = u"V"
+        for char in u"ṽṿ":
+            self.trans[char] = u"v"
+        for char in u"ẀẂŴẄẆẈ":
+            self.trans[char] = u"W"
+        for char in u"ẁẃŵẅẇẉ":
+            self.trans[char] = u"w"
+        for char in u"ẊẌ":
+            self.trans[char] = u"X"
+        for char in u"ẋẍ":
+            self.trans[char] = u"x"
+        for char in u"ỲÝŶŸỸȲẎỴỶƳ":
+            self.trans[char] = u"Y"
+        for char in u"ỳýŷÿỹȳẏỵỷƴ":
+            self.trans[char] = u"y"
+        for char in u"ŹẐŻẒŽẔƵȤ":
+            self.trans[char] = u"Z"
+        for char in u"źẑżẓžẕƶȥ":
+            self.trans[char] = u"z"
+        self.trans[u"ɀ"] = u"zv"
+        
+        # Latin: extended Latin alphabet
+        self.trans[u"ɑ"] = u"a"
+        for char in u"ÆǼǢ":
+            self.trans[char] = u"AE"
+        for char in u"æǽǣ":
+            self.trans[char] = u"ae"
+        self.trans[u"Ð"] = u"Dh"
+        self.trans[u"ð"] = u"dh"
+        for char in u"ƎƏƐ":
+            self.trans[char] = u"E"
+        for char in u"ǝəɛ":
+            self.trans[char] = u"e"
+        for char in u"ƔƢ":
+            self.trans[char] = u"G"
+        for char in u"ᵷɣƣᵹ":
+            self.trans[char] = u"g"
+        self.trans[u"Ƅ"] = u"H"
+        self.trans[u"ƅ"] = u"h"
+        self.trans[u"Ƕ"] = u"Wh"
+        self.trans[u"ƕ"] = u"wh"
+        self.trans[u"Ɩ"] = u"I"
+        self.trans[u"ɩ"] = u"i"
+        self.trans[u"Ŋ"] = u"Ng"
+        self.trans[u"ŋ"] = u"ng"
+        self.trans[u"Œ"] = u"OE"
+        self.trans[u"œ"] = u"oe"
+        self.trans[u"Ɔ"] = u"O"
+        self.trans[u"ɔ"] = u"o"
+        self.trans[u"Ȣ"] = u"Ou"
+        self.trans[u"ȣ"] = u"ou"
+        self.trans[u"Ƽ"] = u"Q"
+        for char in u"ĸƽ":
+            self.trans[char] = u"q"
+        self.trans[u"ȹ"] = u"qp"
+        self.trans[u""] = u"r"
+        self.trans[u"ſ"] = u"s"
+        self.trans[u"ß"] = u"ss"
+        self.trans[u"Ʃ"] = u"Sh"
+        for char in u"ʃᶋ":
+            self.trans[char] = u"sh"
+        self.trans[u"Ʉ"] = u"U"
+        self.trans[u"ʉ"] = u"u"
+        self.trans[u"Ʌ"] = u"V"
+        self.trans[u"ʌ"] = u"v"
+        for char in u"ƜǷ":
+            self.trans[char] = u"W"
+        for char in u"ɯƿ":
+            self.trans[char] = u"w"
+        self.trans[u"Ȝ"] = u"Y"
+        self.trans[u"ȝ"] = u"y"
+        self.trans[u"IJ"] = u"IJ"
+        self.trans[u"ij"] = u"ij"
+        self.trans[u"Ƨ"] = u"Z"
+        for char in u"ʮƨ":
+            self.trans[char] = u"z"
+        self.trans[u"Ʒ"] = u"Zh"
+        self.trans[u"ʒ"] = u"zh"
+        self.trans[u"Ǯ"] = u"Dzh"
+        self.trans[u"ǯ"] = u"dzh"
+        for char in u"ƸƹʔˀɁɂ":
+            self.trans[char] = u"'"
+        for char in u"Þ":
+            self.trans[char] = u"Th"
+        for char in u"þ":
+            self.trans[char] = u"th"
+        for char in u"Cʗǃ":
+            self.trans[char] = u"!"
 
-    #Punctuation and typography
-    if char in u"«»“”„¨":
-        return u'"'
-    if char in u"‘’′":
-        return u"'"
-    if char == u"•":
-        return u"*"
-    if char == u"@":
-        return u"(at)"
-    if char == u"¤":
-        return u"$"
-    if char == u"¢":
-        return u"c"
-    if char == u"€":
-        return u"E"
-    if char == u"£":
-        return u"L"
-    if char == u"¥":
-        return u"yen"
-    if char == u"†":
-        return u"+"
-    if char == u"‡":
-        return u"++"
-    if char == u"°":
-        return u":"
-    if char == u"¡":
-        return u"!"
-    if char == u"¿":
-        return u"?"
-    if char == u"‰":
-        return u"o/oo"
-    if char == u"‱":
-        return u"o/ooo"
-    if char in u"¶§":
-        return u">"
-    if char in u"…":
-        return u"..."
-    if char in u"‒–—―":
-        return u"-"
-    if char in u"·":
-        return u" "
-    if char == u"¦":
-        return u"|"
-    if char == u"⁂":
-        return u"***"
-    if char == u"◊":
-        return u"<>"
-    if char == u"‽":
-        return u"?!"
-    if char == u"؟":
-        return u";-)"
-    
+        #Punctuation and typography
+        for char in u"«»“”„¨":
+            self.trans[char] = u'"'
+        for char in u"‘’′":
+            self.trans[char] = u"'"
+        self.trans[u"•"] = u"*"
+        self.trans[u"@"] = u"(at)"
+        self.trans[u"¤"] = u"$"
+        self.trans[u"¢"] = u"c"
+        self.trans[u"€"] = u"E"
+        self.trans[u"£"] = u"L"
+        self.trans[u"¥"] = u"yen"
+        self.trans[u"†"] = u"+"
+        self.trans[u"‡"] = u"++"
+        self.trans[u"°"] = u":"
+        self.trans[u"¡"] = u"!"
+        self.trans[u"¿"] = u"?"
+        self.trans[u"‰"] = u"o/oo"
+        self.trans[u"‱"] = u"o/ooo"
+        for char in u"¶§":
+            self.trans[char] = u">"
+        for char in u"…":
+            self.trans[char] = u"..."
+        for char in u"‒–—―":
+            self.trans[char] = u"-"
+        for char in u"·":
+            self.trans[char] = u" "
+        self.trans[u"¦"] = u"|"
+        self.trans[u"⁂"] = u"***"
+        self.trans[u"◊"] = u"<>"
+        self.trans[u"‽"] = u"?!"
+        self.trans[u"؟"] = u";-)"      
 
-    # Cyrillic
-    if char == u"А":
-        return u"A"
-    if char == u"а":
-        return u"a"
-    if char == u"Б":
-        return u"B"
-    if char == u"б":
-        return u"b"
-    if char == u"В":
-        return u"V"
-    if char == u"в":
-        return u"v"
-    if char == u"Г":
-        return u"G"
-    if char == u"г":
-        return u"g"
-    if char == u"Д":
-        return u"D"
-    if char == u"д":
-        return u"d"
-    if char == u"Е":
-        return u"E"
-    if char == u"е":
-        return u"e"
-    if char == u"Ж":
-        return u"Zh"
-    if char == u"ж":
-        return u"zh"
-    if char == u"З":
-        return u"Z"
-    if char == u"з":
-        return u"z"
-    if char == u"И":
-        return u"I"
-    if char == u"и":
-        return u"i"
-    if char == u"Й":
-        return u"J"
-    if char == u"й":
-        return u"j"
-    if char == u"К":
-        return u"K"
-    if char == u"к":
-        return u"k"
-    if char == u"Л":
-        return u"L"
-    if char == u"л":
-        return u"l"
-    if char == u"М":
-        return u"M"
-    if char == u"м":
-        return u"m"
-    if char == u"Н":
-        return u"N"
-    if char == u"н":
-        return u"n"
-    if char == u"О":
-        return u"O"
-    if char == u"о":
-        return u"o"
-    if char == u"П":
-        return u"P"
-    if char == u"п":
-        return u"p"
-    if char == u"Р":
-        return u"R"
-    if char == u"р":
-        return u"r"
-    if char == u"С":
-        return u"S"
-    if char == u"с":
-        return u"s"
-    if char == u"Т":
-        return u"T"
-    if char == u"т":
-        return u"t"
-    if char in u"У":
-        return u"U"
-    if char == u"у":
-        return u"u"
-    if char == u"Ф":
-        return u"F"
-    if char == u"ф":
-        return u"f"
-    if char in u"ХΧ":
-        if prev.lower() == prev:
-            return u"Kh"
-        else:
-            return u"KH"
-    if char == u"х":
-        return u"kh"
-    if char == u"Ц":
-        return u"C"
-    if char == u"ц":
-        return u"c"
-    if char == u"Ч":
-        return u"Ch"
-    if char == u"ч":
-        return u"ch"
-    if char == u"Ш":
-        return u"Sh"
-    if char == u"ш":
-        return u"sh"
-    if char == u"Щ":
-        return u"Shch"
-    if char == u"щ":
-        return u"shch"
-    if char in u"Ьь":
-        return u"'"
-    if char in u"Ъъ":
-        return '"'
-    if char == u"Ю":
-        return u"Yu"
-    if char == u"ю":
-        return u"yu"
-    if char == u"Я":
-        return u"Ya"
-    if char == u"я":
-        return u"ya"
-    # Additional Cyrillic letters, most occuring in only one or a few languages
-    if char == u"Ы":
-        return u"Y"
-    if char == u"ы":
-        return u"y"
-    if char == u"Ё":
-        return u"Ë"
-    if char == u"ё":
-        return u"ë"
-    if char in u"ЭЀ":
-        return u"È"
-    if char in u"эѐ":
-        return u"è"
-    if char == u"І":
-        return u"I"
-    if char == u"і":
-        return u"i"
-    if char == u"Ї":
-        return u"Ji"
-    if char == u"ї":
-        return u"ji"
-    if char == u"Є":
-        return u"Je"
-    if char == u"є":
-        return u"je"
-    if char in u"ҐҜ":
-        return u"G"
-    if char in u"ґҝ":
-        return u"g"
-    if char == u"Ђ":
-        return u"Dj"
-    if char == u"ђ":
-        return u"dj"
-    if char in u"ЈӤҊ":
-        return u"J"
-    if char in u"јӥҋ":
-        return u"j"
-    if char == u"Ӣ":
-        return u"Y"
-    if char == u"ӣ":
-        return u"y"
-    if char == u"Љ":
-        return u"Lj"
-    if char == u"љ":
-        return u"lj"
-    if char == u"Њ":
-        return u"Nj"
-    if char == u"њ":
-        return u"nj"
-    if char == u"Ћ":
-        return u"Cj"
-    if char == u"ћ":
-        return u"cj"
-    if char in u"ЏӁӜҶ":
-        return u"Dzh"
-    if char in u"џӂӝҷ":
-        return u"dzh"
-    if char == u"Җ":
-        return u"Zhj"
-    if char == u"җ":
-        return u"zhj"
-    if char in u"ЅӞӠӋҸ":
-        return u"Dz"
-    if char in u"ѕӟӡӌҹ":
-        return u"dz"
-    if char == u"Ѓ":
-        return u"Gj"
-    if char == u"ѓ":
-        return u"gj"
-    if char == u"Ќ":
-        return u"Kj"
-    if char == u"ќ":
-        return u"kj"
-    if char in u"ҒӶҔ":
-        return u"G"
-    if char in u"ғӷҕ":
-        return u"g"
-    if char == u"Ӣ":
-        return u"Ii"
-    if char == u"ӣ":
-        return u"ii"
-    if char in u"ҚҞҠӃ":
-        return u"Q"
-    if char == u"қҟҡӄ":
-        return u"q"
-    if char == u"Ӯ":
-        return u"U"
-    if char == u"ӯ":
-        return u"u"
-    if char == u"Ҳ":
-        return u"H"
-    if char == u"ҳ":
-        return u"h"
-    if char == u"Ҷ":
-        return u"Dz"
-    if char == u"ҷ":
-        return u"dz"
-    if char in u"ӨӪ":
-        return u"Ô"
-    if char in u"өӫ":
-        return u"ô"
-    if char == u"Ү":
-        return u"Y"
-    if char == u"ү":
-        return u"y"
-    if char == u"Һ":
-        return u"H"
-    if char == u"һ":
-        return u"h"
-    if char in u"ӘӔ":
-        return u"AE"
-    if char == u"ә":
-        return u"ae"
-    if char == u"ӚӬ":
-        return u"Ë"
-    if char == u"ӛӭ":
-        return u"ë"
-    if char == u"Җ":
-        return u"Zhj"
-    if char == u"җ":
-        return u"zhj"
-    if char == u"ҢҤӉӇ":
-        return u"Ng"
-    if char == u"ңҥӊӈ":
-        return u"ng"
-    if char == u"Ұ":
-        return u"U"
-    if char == u"ұ":
-        return u"u"
-    if char == u"ў":
-        return u"ù"
-    if char == u"Ў":
-        return u"Ù"
-    if char == u"ѝ":
-        return u"ì"
-    if char == u"Ѝ":
-        return u"Ì"
-    if char == u"Ӑ":
-        return u"A"
-    if char == u"ă":
-        return u"a"
-    if char == u"Ӓ":
-        return u"Ä"
-    if char == u"ä":
-        return u"ä"
-    if char in u"ӖѢҌ":
-        return u"E"
-    if char in u"ӗѣҍ":
-        return u"e"
-    if char == u"ҼҾ":
-        return u"Ts"
-    if char == u"ҽҿ":
-        return u"ts"
-    if char == u"Ҙ":
-        return u"Dh"
-    if char == u"ҙ":
-        return u"dh"
-    if char in u"Ӏӏ":
-        return u""
-    if char == u"Ӆ":
-        return u"L"
-    if char == u"ӆ":
-        return u"l"
-    if char == u"Ӎ":
-        return u"M"
-    if char == u"ӎ":
-        return u"m"
-    if char == u"Ӧ":
-        return u"Ö"
-    if char == u"ӧ":
-        return u"ö"
-    if char == u"Ҩ":
-        return u"u"
-    if char == u"ҩ":
-        return u"u"
-    if char == u"Ҧ":
-        return u"Ph"
-    if char == u"ҧ":
-        return u"ph"
-    if char == u"Ҏ":
-        return u"R"
-    if char == u"ҏ":
-        return u"r"
-    if char == u"Ҫ":
-        return u"Th"
-    if char == u"ҫ":
-        return u"th"
-    if char == u"Ҭ":
-        return u"T"
-    if char == u"ҭ":
-        return u"t"
-    if char in u"ӲӰҮ":
-        return u"Ü"
-    if char in u"ӳӱү":
-        return u"ü"
-    if char == u"Ӯ":
-        return u"Û"
-    if char == u"ӯ":
-        return u"û"
-    if char == u"ҰӸ":
-        return u"U"
-    if char == u"ұӹ":
-        return u"u"
-    if char == u"Ҵ":
-        return u"Tts"
-    if char == u"ҵ":
-        return u"tts"
-    if char == u"Ӵ":
-        return u"Ch"
-    if char == u"ӵ":
-        return u"ch"
+        # Cyrillic
+        self.trans.update({u"А" : u"A", u"а" : u"a", u"Б" : u"B", u"б" : u"b",
+                      u"В" : u"V", u"в" : u"v", u"Г" : u"G", u"г" : u"g",
+                      u"Д" : u"D", u"д" : u"d", u"Е" : u"E", u"е" : u"e",
+                      u"Ж" : u"Zh", u"ж" : u"zh", u"З" : u"Z", u"з" : u"z",
+                      u"И" : u"I", u"и" : u"i", u"Й" : u"J", u"й" : u"j",
+                      u"К" : u"K", u"к" : u"k", u"Л" : u"L", u"л" : u"l",
+                      u"М" : u"M", u"м" : u"m", u"Н" : u"N", u"н" : u"n",
+                      u"О" : u"O", u"о" : u"o", u"П" : u"P", u"п" : u"p",
+                      u"Р" : u"R", u"р" : u"r", u"С" : u"S", u"с" : u"s",
+                      u"Т" : u"T", u"т" : u"t", u"У" : u"U", u"у" : u"u",
+                      u"Ф" : u"F", u"ф" : u"f", u"х" : u"kh", u"Ц" : u"C",
+                      u"ц" : u"c", u"Ч" : u"Ch", u"ч" : u"ch", u"Ш" : u"Sh",
+                      u"ш" : u"sh", u"Щ" : u"Shch", u"щ" : u"shch", u"Ь" : u"'",
+                      u"ь" : "'", u"Ъ" : u'"', u"ъ" : '"', u"Ю" : u"Yu",
+                      u"ю" : u"yu", u"Я" : u"Ya", u"я" : u"ya", u"Х" : u"Kh",
+                      u"Χ" : u"Kh"})
 
-    # Archaic Cyrillic letters
-    if char == u"Ѹ":
-        return u"Ou"
-    if char == u"ѹ":
-        return u"ou"
-    if char in u"ѠѺ":
-        return u"O"
-    if char in u"ѡѻ":
-        return u"o"
-    if char == u"Ѿ":
-        return u"Ot"
-    if char == u"ѿ":
-        return u"ot"
-    if char == u"Ѣ":
-        return u"E"
-    if char == u"ѣ":
-        return u"e"
-    if char in u"ѤѦ":
-        return u"Ei"
-    if char in u"ѥѧ":
-        return u"ei"
-    if char == u"Ѫ":
-        return u"Ai"
-    if char == u"ѫ":
-        return u"ai"
-    if char == u"Ѯ":
-        return u"X"
-    if char == u"ѯ":
-        return u"x"
-    if char == u"Ѱ":
-        return u"Ps"
-    if char == u"ѱ":
-        return u"ps"
-    if char == u"Ѳ":
-        return u"Th"
-    if char == u"ѳ":
-        return u"th"
-    if char in u"ѴѶ":
-        return u"Ü"
-    if char == u"ѵ":
-        return u"ü"
-    
+        # Additional Cyrillic letters, most occuring in only one or a few languages
+        self.trans.update({u"Ы" : u"Y", u"ы" : u"y", u"Ё" : u"Ë", u"ё" : u"ë",
+                      u"Э" : u"È", u"Ѐ" : u"È", u"э" : u"è", u"ѐ" : u"è",
+                      u"І" : u"I", u"і" : u"i", u"Ї" : u"Ji", u"ї" : u"ji",
+                      u"Є" : u"Je", u"є" : u"je", u"Ґ" : u"G", u"Ҝ" : u"G",
+                      u"ґ" : u"g", u"ҝ" : u"g", u"Ђ" : u"Dj", u"ђ" : u"dj",
+                      u"Ӣ" : u"Y", u"ӣ" : u"y", u"Љ" : u"Lj", u"љ" : u"lj",
+                      u"Њ" : u"Nj", u"њ" : u"nj", u"Ћ" : u"Cj", u"ћ" : u"cj",
+                      u"Җ" : u"Zhj", u"җ" : u"zhj", u"Ѓ" : u"Gj", u"ѓ" : u"gj",
+                      u"Ќ" : u"Kj", u"ќ" : u"kj", u"Ӣ" : u"Ii", u"ӣ" : u"ii",
+                      u"Ӯ" : u"U", u"ӯ" : u"u", u"Ҳ" : u"H", u"ҳ" : u"h",
+                      u"Ҷ" : u"Dz",u"ҷ" : u"dz", u"Ө" :u"Ô", u"Ӫ" : u"Ô",
+                      u"ө" : u"ô", u"ӫ" : u"ô", u"Ү": u"Y", u"ү": u"y", u"Һ": u"H",
+                      u"һ": u"h", u"Ә": u"AE", u"Ӕ": u"AE", u"ә": u"ae",
+                      u"Ӛ": u"Ë", u"Ӭ": u"Ë", u"ӛ": u"ë", u"ӭ": u"ë", u"Җ": u"Zhj",
+                      u"җ": u"zhj", u"Ұ": u"U", u"ұ": u"u", u"ў": u"ù", u"Ў": u"Ù",
+                      u"ѝ": u"ì", u"Ѝ": u"Ì", u"Ӑ": u"A", u"ă": u"a", u"Ӓ": u"Ä",
+                      u"ä": u"ä", u"Ҽ" : u"Ts", u"Ҿ": u"Ts", u"ҽ": u"ts", u"ҿ": u"ts",
+                      u"Ҙ": u"Dh", u"ҙ": u"dh", u"Ӏ": u"", u"ӏ": u"", u"Ӆ": u"L",
+                      u"ӆ": u"l", u"Ӎ": u"M", u"ӎ": u"m", u"Ӧ": u"Ö", u"ӧ": u"ö",
+                      u"Ҩ": u"u", u"ҩ": u"u", u"Ҧ": u"Ph", u"ҧ": u"ph", u"Ҏ": u"R",
+                      u"ҏ": u"r", u"Ҫ": u"Th", u"ҫ": u"th", u"Ҭ": u"T", u"ҭ": u"t",
+                      u"Ӯ": u"Û", u"ӯ": u"û", u"Ұ": u"U", u"Ӹ": u"U", u"ұ": u"u",
+                      u"ӹ": u"u", u"Ҵ": u"Tts", u"ҵ": u"tts", u"Ӵ": u"Ch", u"ӵ": u"ch"})
+              
+        for char in u"ЈӤҊ":
+            self.trans[char] = u"J"
+        for char in u"јӥҋ":
+            self.trans[char] = u"j"
+        for char in u"ЏӁӜҶ":
+            self.trans[char] = u"Dzh"
+        for char in u"џӂӝҷ":
+            self.trans[char] = u"dzh"
+        for char in u"ЅӞӠӋҸ":
+            self.trans[char] = u"Dz"
+        for char in u"ѕӟӡӌҹ":
+            self.trans[char] = u"dz"
+        for char in u"ҒӶҔ":
+            self.trans[char] = u"G"
+        for char in u"ғӷҕ":
+            self.trans[char] = u"g"
+        for char in u"ҚҞҠӃ":
+            self.trans[char] = u"Q"
+        for char in u"қҟҡӄ":
+            self.trans[char] = u"q"
+        for char in u"ҢҤӉӇ":
+            self.trans[char] = u"Ng"
+        for char in u"ңҥӊӈ":
+            self.trans[char] = u"ng"
+        for char in u"ӖѢҌ":
+            self.trans[char] = u"E"
+        for char in u"ӗѣҍ":
+            self.trans[char] = u"e"
+        for char in u"ӲӰҮ":
+            self.trans[char] = u"Ü"
+        for char in u"ӳӱү":
+            self.trans[char] = u"ü"
 
-    # Hebrew alphabet
-    if char in u"אע":
-        return u"'"
-    if char == u"ב":
-        return u"b"
-    if char == u"ג":
-        return u"g"
-    if char == u"ד":
-        return u"d"
-    if char == u"ה":
-        return u"h"
-    if char == u"ו":
-        return u"v"
-    if char == u"ז":
-        return u"z"
-    if char == u"ח":
-        return u"kh"
-    if char == u"ט":
-        return u"t"
-    if char == u"י":
-        return u"y"
-    if char in u"ךכ":
-        return u"k"
-    if char == u"ל":
-        return u"l"
-    if char in u"םמ":
-        return u"m"
-    if char in u"ןנ":
-        return u"n"
-    if char == u"ס":
-        return u"s"
-    if char in u"ףפ":
-        return u"ph"
-    if char in u"ץצ":
-        return u"ts"
-    if char == u"ק":
-        return u"q"
-    if char == u"ר":
-        return u"r"
-    if char == u"ש":
-        return u"sh"
-    if char == u"ת":
-        return u"th"
-    
-    # Arab alphabet
-    if char in u"اﺍﺎ":
-        return u"a"
-    if char in u"بﺏﺐﺒﺑ":
-        return u"b"
-    if char in u"تﺕﺖﺘﺗ":
-        return u"t"
-    if char in u"ثﺙﺚﺜﺛ":
-        return u"th"
-    if char in u"جﺝﺞﺠﺟ":
-        return u"g"
-    if char in u"حﺡﺢﺤﺣ":
-        return u"h"
-    if char in u"خﺥﺦﺨﺧ":
-        return u"kh"
-    if char in u"دﺩﺪ":
-        return u"d"
-    if char in u"ذﺫﺬ":
-        return u"dh"
-    if char in u"رﺭﺮ":
-        return u"r"
-    if char in u"زﺯﺰ":
-        return u"z"
-    if char in u"سﺱﺲﺴﺳ":
-        return u"s"
-    if char in u"شﺵﺶﺸﺷ":
-        return u"sh"
-    if char in u"صﺹﺺﺼﺻ":
-        return u"s"
-    if char in u"ضﺽﺾﻀﺿ":
-        return u"d"
-    if char in u"طﻁﻂﻄﻃ":
-        return u"t"
-    if char in u"ظﻅﻆﻈﻇ":
-        return u"z"
-    if char in u"عﻉﻊﻌﻋ":
-        return u"'"
-    if char in u"غﻍﻎﻐﻏ":
-        return u"gh"
-    if char in u"فﻑﻒﻔﻓ":
-        return u"f"
-    if char in u"قﻕﻖﻘﻗ":
-        return u"q"
-    if char in u"كﻙﻚﻜﻛک":
-        return u"k"
-    if char in u"لﻝﻞﻠﻟ":
-        return u"l"
-    if char in u"مﻡﻢﻤﻣ":
-        return u"m"
-    if char in u"نﻥﻦﻨﻧ":
-        return u"n"
-    if char in u"هﻩﻪﻬﻫ":
-        return u"h"
-    if char in u"وﻭﻮ":
-        return u"w"
-    if char in u"یيﻱﻲﻴﻳ":
-        return u"y"
-    # Arabic - additional letters, modified letters and ligatures
-    if char == u"ﺀ":
-        return u"'"
-    if char in u"آﺁﺂ":
-        return u"'a"
-    if char in u"ةﺓﺔ":
-        return u"th"
-    if char in u"ىﻯﻰ":
-        return u"á"
-    if char in u"یﯼﯽﯿﯾ":
-        return u"y"
-    if char == u"؟":
-        return u"?"
-    # Arabic - ligatures
-    if char in u"ﻻﻼ":
-        return u"la"
-    if char == u"ﷲ":
-        return u"llah"
-    if char in u"إأ":
-        return u"a'"
-    if char == u"ؤ":
-        return u"w'"
-    if char == u"ئ":
-        return u"y'"
-    if char == u"◌":
-        return prev
-    if char in u"◌◌":
-        return u"" # indicates absence of vowels
-    # Arabic vowels
-    if char == u"◌":
-        return u"a"
-    if char == u"◌":
-        return u"u"
-    if char == u"◌":
-        return u"i"
-    if char == u"◌":
-        return u"a"
-    if char == u"◌":
-        return u"ay"
-    if char == u"◌":
-        return u"ay"
-    if char == u"◌":
-        return u"u"
-    if char == u"◌":
-        return u"iy"
-    # Arab numerals
-    if char in u"٠۰":
-        return u"0"
-    if char in u"١۱":
-        return u"1"
-    if char in u"٢۲":
-        return u"2"
-    if char in u"٣۳":
-        return u"3"
-    if char in u"٤۴":
-        return u"4"
-    if char in u"٥۵":
-        return u"5"
-    if char in u"٦۶":
-        return u"6"
-    if char in u"٧۷":
-        return u"7"
-    if char in u"٨۸":
-        return u"8"
-    if char in u"٩۹":
-        return u"9"
-    # Perso-Arabic
-    if char in u"پﭙﭙپ":
-        return u"p"
-    if char in u"چچچچ":
-        return u"ch"
-    if char in u"ژژ":
-        return u"zh"
-    if char in u"گﮔﮕﮓ":
-        return u"g"
+        # Archaic Cyrillic letters
+        self.trans.update({u"Ѹ": u"Ou", u"ѹ": u"ou", u"Ѡ": u"O", u"Ѻ": u"O", u"ѡ": u"o",
+                      u"ѻ": u"o", u"Ѿ": u"Ot", u"ѿ": u"ot", u"Ѣ": u"E", u"ѣ": u"e",
+                      u"Ѥ": u"Ei", u"Ѧ": u"Ei", u"ѥ": u"ei", u"ѧ": u"ei", u"Ѫ": u"Ai",
+                      u"ѫ": u"ai", u"Ѯ": u"X", u"ѯ": u"x", u"Ѱ": u"Ps", u"ѱ": u"ps",
+                      u"Ѳ": u"Th", u"ѳ": u"th", u"Ѵ": u"Ü", u"Ѷ": u"Ü", u"ѵ": u"ü"})
 
-    # Greek
-    if char == u"Α":
-        return u"A"
-    if char == u"α":
-        return u"a"
-    if char == u"Β":
-        return u"B"
-    if char == u"β":
-        return u"b"
-    if char == u"Γ":
-        return u"G"
-    if char == u"γ":
-        return u"g"
-    if char == u"Δ":
-        return u"D"
-    if char == u"δ":
-        return u"d"
-    if char == u"Ε":
-        return u"E"
-    if char == u"ε":
-        return u"e"
-    if char == u"Ζ":
-        return u"Z"
-    if char == u"ζ":
-        return u"z"
-    if char == u"Η":
-        return u"I"
-    if char == u"η":
-        return u"i"
-    if char == u"Θ":
-        if prev.lower() == prev:
-            return u"Th"
-        else:
-            return u"TH"
-    if char == u"θ":
-        return u"th"
-    if char == u"Ι":
-        return u"I"
-    if char == u"ι":
-        return u"i"
-    if char == u"Κ":
-        return u"K"
-    if char == u"κ":
-        return u"k"
-    if char == u"Λ":
-        return u"L"
-    if char == u"λ":
-        return u"l"
-    if char == u"Μ":
-        return u"M"
-    if char == u"μ":
-        return u"m"
-    if char == u"Ν":
-        return u"N"
-    if char == u"ν":
-        return u"n"
-    if char == u"Ξ":
-        return u"X"
-    if char == u"ξ":
-        return u"x"
-    if char == u"Ο":
-        return u"O"
-    if char == u"ο":
-        return u"o"
-    if char == u"Π":
-        return u"P"
-    if char == u"π":
-        return u"p"
-    if char == u"Ρ":
-        return u"R"
-    if char == u"ρ":
-        return u"r"
-    if char == u"Σ":
-        return u"S"
-    if char in u"σς":
-        return u"s"
-    if char == u"Τ":
-        return u"T"
-    if char == u"τ":
-        return u"t"
-    if char == u"Υ":
-        return u"Y"
-    if char == u"υ":
-        return u"y"
-    if char == u"Φ":
-        return u"F"
-    if char == u"φ":
-        return u"f"
-    if char == u"Ψ":
-        if prev.lower() == prev:
-            return u"Ps"
-        else:
-            return u"PS"
-    if char == u"ψ":
-        return u"ps"
-    if char == u"Ω":
-        return u"O"
-    if char == u"ω":
-        return u"o"
-    # Greek: Special and old characters
-    if char == u"ϗ":
-        return u"&"
-    if char == u"Ϛ":
-        if prev.lower() == prev:
-            return u"St"
-        else:
-            return u"ST"
-    if char == u"ϛ":
-        return u"st"
-    if char in u"ϘϞ":
-        return u"Q"
-    if char in u"ϙϟ":
-        return u"q"
-    if char == u"Ϻ":
-        return u"S"
-    if char == u"ϻ":
-        return u"s"
-    if char == u"Ϡ":
-        if prev.lower() == prev:
-            return u"Ss"
-        else:
-            return u"SS"
-    if char == u"ϡ":
-        return u"ss"
-    if char == u"Ϸ":
-        if prev.lower() == prev:
-            return u"Sh"
-        else:
-            return u"SH"
-    if char == u"ϸ":
-        return u"sh"
-    if char == u"·":
-        return u":"
-    # Greek: Accented characters
-    if char == u"Ά":
-        return u"Á"
-    if char == u"ά":
-        return u"á"
-    if char in u"ΈΉ":
-        return u"É"
-    if char in u"έή":
-        return u"é"
-    if char == u"Ί":
-        return u"Í"
-    if char == u"ί":
-        return u"í"
-    if char == u"Ϊ":
-        return u"Ï"
-    if char in u"ϊΐ":
-        return u"ï"
-    if char == u"Ό":
-        return u"Ó"
-    if char == u"ό":
-        return u"ó"
-    if char == u"Ύ":
-        return u"Ý"
-    if char == u"ύ":
-        return u"ý"
-    if char == u"Ϋ":
-        return u"Y"
-    if char in u"ϋΰ":
-        return u"ÿ"
-    if char == u"Ώ":
-        return u"Ó"
-    if char == u"ώ":
-        return u"ó"
+        # Hebrew alphabet
+        for char in u"אע":
+            self.trans[char] = u"'"
+        self.trans[u"ב"] = u"b"
+        self.trans[u"ג"] = u"g"
+        self.trans[u"ד"] = u"d"
+        self.trans[u"ה"] = u"h"
+        self.trans[u"ו"] = u"v"
+        self.trans[u"ז"] = u"z"
+        self.trans[u"ח"] = u"kh"
+        self.trans[u"ט"] = u"t"
+        self.trans[u"י"] = u"y"
+        for char in u"ךכ":
+            self.trans[char] = u"k"
+        self.trans[u"ל"] = u"l"
+        for char in u"םמ":
+            self.trans[char] = u"m"
+        for char in u"ןנ":
+            self.trans[char] = u"n"
+        self.trans[u"ס"] = u"s"
+        for char in u"ףפ":
+            self.trans[char] = u"ph"
+        for char in u"ץצ":
+            self.trans[char] = u"ts"
+        self.trans[u"ק"] = u"q"
+        self.trans[u"ר"] = u"r"
+        self.trans[u"ש"] = u"sh"
+        self.trans[u"ת"] = u"th"
+        
+        # Arab alphabet
+        for char in u"اﺍﺎ":
+            self.trans[char] = u"a"
+        for char in u"بﺏﺐﺒﺑ":
+            self.trans[char] = u"b"
+        for char in u"تﺕﺖﺘﺗ":
+            self.trans[char] = u"t"
+        for char in u"ثﺙﺚﺜﺛ":
+            self.trans[char] = u"th"
+        for char in u"جﺝﺞﺠﺟ":
+            self.trans[char] = u"g"
+        for char in u"حﺡﺢﺤﺣ":
+            self.trans[char] = u"h"
+        for char in u"خﺥﺦﺨﺧ":
+            self.trans[char] = u"kh"
+        for char in u"دﺩﺪ":
+            self.trans[char] = u"d"
+        for char in u"ذﺫﺬ":
+            self.trans[char] = u"dh"
+        for char in u"رﺭﺮ":
+            self.trans[char] = u"r"
+        for char in u"زﺯﺰ":
+            self.trans[char] = u"z"
+        for char in u"سﺱﺲﺴﺳ":
+            self.trans[char] = u"s"
+        for char in u"شﺵﺶﺸﺷ":
+            self.trans[char] = u"sh"
+        for char in u"صﺹﺺﺼﺻ":
+            self.trans[char] = u"s"
+        for char in u"ضﺽﺾﻀﺿ":
+            self.trans[char] = u"d"
+        for char in u"طﻁﻂﻄﻃ":
+            self.trans[char] = u"t"
+        for char in u"ظﻅﻆﻈﻇ":
+            self.trans[char] = u"z"
+        for char in u"عﻉﻊﻌﻋ":
+            self.trans[char] = u"'"
+        for char in u"غﻍﻎﻐﻏ":
+            self.trans[char] = u"gh"
+        for char in u"فﻑﻒﻔﻓ":
+            self.trans[char] = u"f"
+        for char in u"قﻕﻖﻘﻗ":
+            self.trans[char] = u"q"
+        for char in u"كﻙﻚﻜﻛک":
+            self.trans[char] = u"k"
+        for char in u"لﻝﻞﻠﻟ":
+            self.trans[char] = u"l"
+        for char in u"مﻡﻢﻤﻣ":
+            self.trans[char] = u"m"
+        for char in u"نﻥﻦﻨﻧ":
+            self.trans[char] = u"n"
+        for char in u"هﻩﻪﻬﻫ":
+            self.trans[char] = u"h"
+        for char in u"وﻭﻮ":
+            self.trans[char] = u"w"
+        for char in u"یيﻱﻲﻴﻳ":
+            self.trans[char] = u"y"
+        # Arabic - additional letters, modified letters and ligatures
+        self.trans[u"ﺀ"] = u"'"
+        for char in u"آﺁﺂ":
+            self.trans[char] = u"'a"
+        for char in u"ةﺓﺔ":
+            self.trans[char] = u"th"
+        for char in u"ىﻯﻰ":
+            self.trans[char] = u"á"
+        for char in u"یﯼﯽﯿﯾ":
+            self.trans[char] = u"y"
+        self.trans[u"؟"] = u"?"
+        # Arabic - ligatures
+        for char in u"ﻻﻼ":
+            self.trans[char] = u"la"
+        self.trans[u"ﷲ"] = u"llah"
+        for char in u"إأ":
+            self.trans[char] = u"a'"
+        self.trans[u"ؤ"] = u"w'"
+        self.trans[u"ئ"] = u"y'"
+        for char in u"◌◌":
+            self.trans[char] = u"" # indicates absence of vowels
+        # Arabic vowels
+        self.trans[u"◌"] = u"a"
+        self.trans[u"◌"] = u"u"
+        self.trans[u"◌"] = u"i"
+        self.trans[u"◌"] = u"a"
+        self.trans[u"◌"] = u"ay"
+        self.trans[u"◌"] = u"ay"
+        self.trans[u"◌"] = u"u"
+        self.trans[u"◌"] = u"iy"
+        # Arab numerals
+        for char in u"٠۰":
+            self.trans[char] = u"0"
+        for char in u"١۱":
+            self.trans[char] = u"1"
+        for char in u"٢۲":
+            self.trans[char] = u"2"
+        for char in u"٣۳":
+            self.trans[char] = u"3"
+        for char in u"٤۴":
+            self.trans[char] = u"4"
+        for char in u"٥۵":
+            self.trans[char] = u"5"
+        for char in u"٦۶":
+            self.trans[char] = u"6"
+        for char in u"٧۷":
+            self.trans[char] = u"7"
+        for char in u"٨۸":
+            self.trans[char] = u"8"
+        for char in u"٩۹":
+            self.trans[char] = u"9"
+        # Perso-Arabic
+        for char in u"پﭙﭙپ":
+            self.trans[char] = u"p"
+        for char in u"چچچچ":
+            self.trans[char] = u"ch"
+        for char in u"ژژ":
+            self.trans[char] = u"zh"
+        for char in u"گﮔﮕﮓ":
+            self.trans[char] = u"g"
 
-    # Japanese (katakana and hiragana)
-    if char in u"アァあ":
-        return u"a"
-    if char in u"イィい":
-        return u"i"
-    if char in u"ウう":
-        return u"u"
-    if char in u"エェえ":
-        return u"e"
-    if char in u"オォお":
-        return u"o"
-    if char in u"ャや":
-        return u"ya"
-    if char in u"ュゆ":
-        return u"yu"
-    if char in u"ョよ":
-        return u"yo"
-    if char in u"カか":
-        return u"ka"
-    if char in u"キき":
-        return u"ki"
-    if char in u"クく":
-        return u"ku"
-    if char in u"ケけ":
-        return u"ke"
-    if char in u"コこ":
-        return u"ko"
-    if char in u"サさ":
-        return u"sa"
-    if char in u"シし":
-        return u"shi"
-    if char in u"スす":
-        return u"su"
-    if char in u"セせ":
-        return u"se"
-    if char in u"ソそ":
-        return u"so"
-    if char in u"タた":
-        return u"ta"
-    if char in u"チち":
-        return u"chi"
-    if char in u"ツつ":
-        return u"tsu"
-    if char in u"テて":
-        return u"te"
-    if char in u"トと":
-        return u"to"
-    if char in u"ナな":
-        return u"na"
-    if char in u"ニに":
-        return u"ni"
-    if char in u"ヌぬ":
-        return u"nu"
-    if char in u"ネね":
-        return u"ne"
-    if char in u"ノの":
-        return u"no"
-    if char in u"ハは":
-        return u"ha"
-    if char in u"ヒひ":
-        return u"hi"
-    if char in u"フふ":
-        return u"fu"
-    if char in u"ヘへ":
-        return u"he"
-    if char in u"ホほ":
-        return u"ho"
-    if char in u"マま":
-        return u"ma"
-    if char in u"ミみ":
-        return u"mi"
-    if char in u"ムむ":
-        return u"mu"
-    if char in u"メめ":
-        return u"me"
-    if char in u"モも":
-        return u"mo"
-    if char in u"ラら":
-        return u"ra"
-    if char in u"リり":
-        return u"ri"
-    if char in u"ルる":
-        return u"ru"
-    if char in u"レれ":
-        return u"re"
-    if char in u"ロろ":
-        return u"ro"
-    if char in u"ワわ":
-        return u"wa"
-    if char in u"ヰゐ":
-        return u"wi"
-    if char in u"ヱゑ":
-        return u"we"
-    if char in u"ヲを":
-        return u"wo"
-    if char in u"ンん":
-        return u"n"
-    if char in u"ガが":
-        return u"ga"
-    if char in u"ギぎ":
-        return u"gi"
-    if char in u"グぐ":
-        return u"gu"
-    if char in u"ゲげ":
-        return u"ge"
-    if char in u"ゴご":
-        return u"go"
-    if char in u"ザざ":
-        return u"za"
-    if char in u"ジじ":
-        return u"ji"
-    if char in u"ズず":
-        return u"zu"
-    if char in u"ゼぜ":
-        return u"ze"
-    if char in u"ゾぞ":
-        return u"zo"
-    if char in u"ダだ":
-        return u"da"
-    if char in u"ヂぢ":
-        return u"dji"
-    if char in u"ヅづ":
-        return u"dzu"
-    if char in u"デで":
-        return u"de"
-    if char in u"ドど":
-        return u"do"
-    if char in u"バば":
-        return u"ba"
-    if char in u"ビび":
-        return u"bi"
-    if char in u"ブぶ":
-        return u"bu"
-    if char in u"ベべ":
-        return u"be"
-    if char in u"ボぼ":
-        return u"bo"
-    if char in u"パぱ":
-        return u"pa"
-    if char in u"ピぴ":
-        return u"pi"
-    if char in u"プぷ":
-        return u"pu"
-    if char in u"ペぺ":
-        return u"pe"
-    if char in u"ポぽ":
-        return u"po"
-    if char in u"ヴゔ":
-        return u"vu"
-    if char == u"ヷ":
-        return u"va"
-    if char == u"ヸ":
-        return u"vi"
-    if char == u"ヹ":
-        return u"ve"
-    if char == u"ヺ":
-        return u"vo"
-    if char == u"ッ":
-        return trans(next)[0]
+        # Greek
+        self.trans.update({u"Α": u"A", u"α": u"a", u"Β": u"B", u"β": u"b", u"Γ": u"G",
+                      u"γ": u"g", u"Δ": u"D", u"δ": u"d", u"Ε": u"E", u"ε": u"e",
+                      u"Ζ": u"Z", u"ζ": u"z", u"Η": u"I", u"η": u"i", u"θ": u"th",
+                      u"Θ": u"Th", u"Ι": u"I", u"ι": u"i", u"Κ": u"K", u"κ": u"k",
+                      u"Λ": u"L", u"λ": u"l", u"Μ": u"M", u"μ": u"m", u"Ν": u"N",
+                      u"ν": u"n", u"Ξ": u"X", u"ξ": u"x", u"Ο": u"O", u"ο": u"o",
+                      u"Π": u"P", u"π": u"p", u"Ρ": u"R", u"ρ": u"r", u"Σ": u"S",
+                      u"σ": u"s", u"ς": u"s", u"Τ": u"T", u"τ": u"t", u"Υ": u"Y",
+                      u"υ": u"y", u"Φ": u"F", u"φ": u"f", u"Ψ": u"Ps", u"ψ": u"ps",
+                      u"Ω": u"O", u"ω": u"o", u"ϗ": u"&", u"Ϛ": u"St", u"ϛ": u"st",
+                      u"Ϙ": u"Q", u"Ϟ": u"Q", u"ϙ": u"q", u"ϟ": u"q", u"Ϻ": u"S",
+                      u"ϻ": u"s", u"Ϡ": u"Ss", u"ϡ": u"ss", u"Ϸ": u"Sh", u"ϸ": u"sh",
+                      u"·": u":", u"Ά": u"Á", u"ά": u"á", u"Έ": u"É", u"Ή": u"É",
+                      u"έ": u"é", u"ή": u"é", u"Ί": u"Í", u"ί": u"í", u"Ϊ": u"Ï",
+                      u"ϊ": u"ï", u"ΐ": u"ï", u"Ό": u"Ó", u"ό": u"ó", u"Ύ": u"Ý",
+                      u"ύ": u"ý", u"Ϋ": u"Y", u"ϋ": u"ÿ", u"ΰ": u"ÿ", u"Ώ": u"Ó",
+                      u"ώ": u"ó"})
 
-    # Japanese and Chinese punctuation and typography
-    if char == u"・·":
-        return u" "
-    if char == u"々仝ヽヾゝゞ〱〲〳〵〴〵":
-        return prev
-    if char in u"〃『』《》":
-        return u'"'
-    if char in u"「」〈〉〘〙〚〛":
-        return u"'"
-    if char in u"(〔":
-        return u"("
-    if char in u")〕":
-        return u")"
-    if char in u"[【〖":
-        return u"["
-    if char in u"]】〗":
-        return u"]"
-    if char == u"{":
-        return u"{"
-    if char == u"}":
-        return u"}"
-    if char == u"っ":
-        return u":"
-    if char == u"ー":
-        return u"h"
-    if char == u"゛":
-        return u"'"
-    if char == u"゜":
-        return u"p"
-    if char == u"。":
-        return u". "
-    if char == u"、":
-        return u", "
-    if char == u"・":
-        return u" "
-    if char == u"〆":
-        return u"shime"
-    if char == u"〜":
-        return u"-"
-    if char == u"…":
-        return u"..."
-    if char == u"‥":
-        return u".."
-    if char == u"ヶ":
-        return u"months"
-    if char in u"•◦":
-        return u"_"
-    if char in u"※*":
-        return u"*"
-    if char == u"Ⓧ":
-        return u"(X)"
-    if char == u"Ⓨ":
-        return u"(Y)"
-    if char == u"!":
-        return u"!"
-    if char == u"?":
-        return u"?"
-    if char == u";":
-        return u";"
-    if char == u":":
-        return u":"
-    if char == u"。":
-        return u"."
-    if char in u",、":
-        return u","
+        # Japanese (katakana and hiragana)
+        for char in u"アァあ":
+            self.trans[char] = u"a"
+        for char in u"イィい":
+            self.trans[char] = u"i"
+        for char in u"ウう":
+            self.trans[char] = u"u"
+        for char in u"エェえ":
+            self.trans[char] = u"e"
+        for char in u"オォお":
+            self.trans[char] = u"o"
+        for char in u"ャや":
+            self.trans[char] = u"ya"
+        for char in u"ュゆ":
+            self.trans[char] = u"yu"
+        for char in u"ョよ":
+            self.trans[char] = u"yo"
+        for char in u"カか":
+            self.trans[char] = u"ka"
+        for char in u"キき":
+            self.trans[char] = u"ki"
+        for char in u"クく":
+            self.trans[char] = u"ku"
+        for char in u"ケけ":
+            self.trans[char] = u"ke"
+        for char in u"コこ":
+            self.trans[char] = u"ko"
+        for char in u"サさ":
+            self.trans[char] = u"sa"
+        for char in u"シし":
+            self.trans[char] = u"shi"
+        for char in u"スす":
+            self.trans[char] = u"su"
+        for char in u"セせ":
+            self.trans[char] = u"se"
+        for char in u"ソそ":
+            self.trans[char] = u"so"
+        for char in u"タた":
+            self.trans[char] = u"ta"
+        for char in u"チち":
+            self.trans[char] = u"chi"
+        for char in u"ツつ":
+            self.trans[char] = u"tsu"
+        for char in u"テて":
+            self.trans[char] = u"te"
+        for char in u"トと":
+            self.trans[char] = u"to"
+        for char in u"ナな":
+            self.trans[char] = u"na"
+        for char in u"ニに":
+            self.trans[char] = u"ni"
+        for char in u"ヌぬ":
+            self.trans[char] = u"nu"
+        for char in u"ネね":
+            self.trans[char] = u"ne"
+        for char in u"ノの":
+            self.trans[char] = u"no"
+        for char in u"ハは":
+            self.trans[char] = u"ha"
+        for char in u"ヒひ":
+            self.trans[char] = u"hi"
+        for char in u"フふ":
+            self.trans[char] = u"fu"
+        for char in u"ヘへ":
+            self.trans[char] = u"he"
+        for char in u"ホほ":
+            self.trans[char] = u"ho"
+        for char in u"マま":
+            self.trans[char] = u"ma"
+        for char in u"ミみ":
+            self.trans[char] = u"mi"
+        for char in u"ムむ":
+            self.trans[char] = u"mu"
+        for char in u"メめ":
+            self.trans[char] = u"me"
+        for char in u"モも":
+            self.trans[char] = u"mo"
+        for char in u"ラら":
+            self.trans[char] = u"ra"
+        for char in u"リり":
+            self.trans[char] = u"ri"
+        for char in u"ルる":
+            self.trans[char] = u"ru"
+        for char in u"レれ":
+            self.trans[char] = u"re"
+        for char in u"ロろ":
+            self.trans[char] = u"ro"
+        for char in u"ワわ":
+            self.trans[char] = u"wa"
+        for char in u"ヰゐ":
+            self.trans[char] = u"wi"
+        for char in u"ヱゑ":
+            self.trans[char] = u"we"
+        for char in u"ヲを":
+            self.trans[char] = u"wo"
+        for char in u"ンん":
+            self.trans[char] = u"n"
+        for char in u"ガが":
+            self.trans[char] = u"ga"
+        for char in u"ギぎ":
+            self.trans[char] = u"gi"
+        for char in u"グぐ":
+            self.trans[char] = u"gu"
+        for char in u"ゲげ":
+            self.trans[char] = u"ge"
+        for char in u"ゴご":
+            self.trans[char] = u"go"
+        for char in u"ザざ":
+            self.trans[char] = u"za"
+        for char in u"ジじ":
+            self.trans[char] = u"ji"
+        for char in u"ズず":
+            self.trans[char] = u"zu"
+        for char in u"ゼぜ":
+            self.trans[char] = u"ze"
+        for char in u"ゾぞ":
+            self.trans[char] = u"zo"
+        for char in u"ダだ":
+            self.trans[char] = u"da"
+        for char in u"ヂぢ":
+            self.trans[char] = u"dji"
+        for char in u"ヅづ":
+            self.trans[char] = u"dzu"
+        for char in u"デで":
+            self.trans[char] = u"de"
+        for char in u"ドど":
+            self.trans[char] = u"do"
+        for char in u"バば":
+            self.trans[char] = u"ba"
+        for char in u"ビび":
+            self.trans[char] = u"bi"
+        for char in u"ブぶ":
+            self.trans[char] = u"bu"
+        for char in u"ベべ":
+            self.trans[char] = u"be"
+        for char in u"ボぼ":
+            self.trans[char] = u"bo"
+        for char in u"パぱ":
+            self.trans[char] = u"pa"
+        for char in u"ピぴ":
+            self.trans[char] = u"pi"
+        for char in u"プぷ":
+            self.trans[char] = u"pu"
+        for char in u"ペぺ":
+            self.trans[char] = u"pe"
+        for char in u"ポぽ":
+            self.trans[char] = u"po"
+        for char in u"ヴゔ":
+            self.trans[char] = u"vu"
+        self.trans[u"ヷ"] = u"va"
+        self.trans[u"ヸ"] = u"vi"
+        self.trans[u"ヹ"] = u"ve"
+        self.trans[u"ヺ"] = u"vo"
 
-    # Georgian
-    if char == u"ა":
-        return u"a"
-    if char == u"ბ":
-        return u"b"
-    if char == u"გ":
-        return u"g"
-    if char == u"დ":
-        return u"d"
-    if char in u"ეჱ":
-        return u"e"
-    if char == u"ვ":
-        return u"v"
-    if char == u"ზ":
-        return u"z"
-    if char == u"თ":#
-        return u"th"
-    if char == u"ი":
-        return u"i"
-    if char == u"კ":#
-        return u"k"
-    if char == u"ლ":
-        return u"l"
-    if char == u"მ":
-        return u"m"
-    if char == u"ნ":
-        return u"n"
-    if char == u"ო":
-        return u"o"
-    if char == u"პ":#
-        return u"p"
-    if char == u"ჟ":#
-        return u"zh"
-    if char == u"რ":
-        return u"r"
-    if char == u"ს":
-        return u"s"
-    if char == u"ტ":#
-        return u"t"
-    if char == u"უ":
-        return u"u"
-    if char == u"ფ":#
-        return u"ph"
-    if char == u"ქ":#
-        return u"q"
-    if char == u"ღ":#
-        return u"gh"
-    if char == u"ყ":#
-        return u"q'"
-    if char == u"შ":
-        return u"sh"
-    if char == u"ჩ":
-        return u"ch"
-    if char == u"ც":
-        return u"ts"
-    if char == u"ძ":
-        return u"dz"
-    if char == u"წ":#
-        return u"ts'"
-    if char == u"ჭ":#
-        return u"ch'"
-    if char == u"ხ":
-        return u"kh"
-    if char == u"ჯ":#
-        return u"j"
-    if char == u"ჰ":
-        return u"h"
-    if char == u"ჳ":
-        return u"w"
-    if char == u"ჵ":
-        return u"o"
-    if char == u"ჶ":
-        return u"f"
+        # Japanese and Chinese punctuation and typography
+        for char in u"・·":
+            self.trans[char] = u" "
+        for char in u"〃『』《》":
+            self.trans[char] = u'"'
+        for char in u"「」〈〉〘〙〚〛":
+            self.trans[char] = u"'"
+        for char in u"(〔":
+            self.trans[char] = u"("
+        for char in u")〕":
+            self.trans[char] = u")"
+        for char in u"[【〖":
+            self.trans[char] = u"["
+        for char in u"]】〗":
+            self.trans[char] = u"]"
+        for char in u"{":
+            self.trans[char] = u"{"
+        for char in u"}":
+            self.trans[char] = u"}"
+        for char in u"っ":
+            self.trans[char] = u":"
+        for char in u"ー":
+            self.trans[char] = u"h"
+        for char in u"゛":
+            self.trans[char] = u"'"
+        for char in u"゜":
+            self.trans[char] = u"p"
+        for char in u"。":
+            self.trans[char] = u". "
+        for char in u"、":
+            self.trans[char] = u", "
+        for char in u"・":
+            self.trans[char] = u" "
+        for char in u"〆":
+            self.trans[char] = u"shime"
+        for char in u"〜":
+            self.trans[char] = u"-"
+        for char in u"…":
+            self.trans[char] = u"..."
+        for char in u"‥":
+            self.trans[char] = u".."
+        for char in u"ヶ":
+            self.trans[char] = u"months"
+        for char in u"•◦":
+            self.trans[char] = u"_"
+        for char in u"※*":
+            self.trans[char] = u"*"
+        for char in u"Ⓧ":
+            self.trans[char] = u"(X)"
+        for char in u"Ⓨ":
+            self.trans[char] = u"(Y)"
+        for char in u"!":
+            self.trans[char] = u"!"
+        for char in u"?":
+            self.trans[char] = u"?"
+        for char in u";":
+            self.trans[char] = u";"
+        for char in u":":
+            self.trans[char] = u":"
+        for char in u"。":
+            self.trans[char] = u"."
+        for char in u",、":
+            self.trans[char] = u","
 
-    # Devanagari
-    if char in u"पप":
-        return u"p"
-    if char in u"अ":
-        return u"a"
-    if char in u"आा":
-        return u"aa"
-    if char == u"प":
-        return u"pa"
-    if char in u"इि":
-        return u"i"
-    if char in u"ईी":
-        return u"ii"
-    if char in u"उु":
-        return u"u"
-    if char in u"ऊू":
-        return u"uu"
-    if char in u"एे":
-        return u"e"
-    if char in u"ऐै":
-        return u"ai"
-    if char in u"ओो":
-        return u"o"
-    if char in u"औौ":
-        return u"au"
-    if char in u"ऋृर":
-        return u"r"
-    if char in u"ॠॄ":
-        return u"rr"
-    if char in u"ऌॢल":
-        return u"l"
-    if char in u"ॡॣ":
-        return u"ll"
-    if char == u"क":
-        return u"k"
-    if char == u"ख":
-        return u"kh"
-    if char == u"ग":
-        return u"g"
-    if char == u"घ":
-        return u"gh"
-    if char == u"ङ":
-        return u"ng"
-    if char == u"च":
-        return u"c"
-    if char == u"छ":
-        return u"ch"
-    if char == u"ज":
-        return u"j"
-    if char == u"झ":
-        return u"jh"
-    if char == u"ञ":
-        return u"ñ"
-    if char in u"टत":
-        return u"t"
-    if char in u"ठथ":
-        return u"th"
-    if char in u"डद":
-        return u"d"
-    if char in u"ढध":
-        return u"dh"
-    if char in u"णन":
-        return u"n"
-    if char == u"फ":
-        return u"ph"
-    if char == u"ब":
-        return u"b"
-    if char == u"भ":
-        return u"bh"
-    if char == u"म":
-        return u"m"
-    if char == u"य":
-        return u"y"
-    if char == u"व":
-        return u"v"
-    if char == u"श":
-        return u"sh"
-    if char in u"षस":
-        return u"s"
-    if char == u"ह":
-        return u"h"
-    if char == u"क":
-        return u"x"
-    if char == u"त":
-        return u"tr"
-    if char == u"ज":
-        return u"gj"
-    if char == u"क़":
-        return u"q"
-    if char == u"फ":
-        return u"f"
-    if char == u"ख":
-        return u"hh"
-    if char == u"H":
-        return u"gh"
-    if char == u"ज":
-        return u"z"
-    if char in u"डढ":
-        return u"r"
-    # Devanagari ligatures (possibly incomplete and/or incorrect)
-    if char == u"ख्":
-        return u"khn"
-    if char == u"त":
-        return u"tn"
-    if char == u"द्":
-        return u"dn"
-    if char == u"श":
-        return u"cn"
-    if char == u"ह्":
-        return u"fn"
-    if char in u"अँ":
-        return u"m"
-    if char in u"॒॑":
-        return u""
-    if char == u"०":
-        return u"0"
-    if char == u"१":
-        return u"1"
-    if char == u"२":
-        return u"2"
-    if char == u"३":
-        return u"3"
-    if char == u"४":
-        return u"4"
-    if char == u"५":
-        return u"5"
-    if char == u"६":
-        return u"6"
-    if char == u"७":
-        return u"7"
-    if char == u"८":
-        return u"8"
-    if char == u"९":
-        return u"9"
+        # Georgian
+        for char in u"ა":
+            self.trans[char] = u"a"
+        for char in u"ბ":
+            self.trans[char] = u"b"
+        for char in u"გ":
+            self.trans[char] = u"g"
+        for char in u"დ":
+            self.trans[char] = u"d"
+        for char in u"ეჱ":
+            self.trans[char] = u"e"
+        for char in u"ვ":
+            self.trans[char] = u"v"
+        for char in u"ზ":
+            self.trans[char] = u"z"
+        for char in u"თ":#
+            self.trans[char] = u"th"
+        for char in u"ი":
+            self.trans[char] = u"i"
+        for char in u"კ":#
+            self.trans[char] = u"k"
+        for char in u"ლ":
+            self.trans[char] = u"l"
+        for char in u"მ":
+            self.trans[char] = u"m"
+        for char in u"ნ":
+            self.trans[char] = u"n"
+        for char in u"ო":
+            self.trans[char] = u"o"
+        for char in u"პ":#
+            self.trans[char] = u"p"
+        for char in u"ჟ":#
+            self.trans[char] = u"zh"
+        for char in u"რ":
+            self.trans[char] = u"r"
+        for char in u"ს":
+            self.trans[char] = u"s"
+        for char in u"ტ":#
+            self.trans[char] = u"t"
+        for char in u"უ":
+            self.trans[char] = u"u"
+        for char in u"ფ":#
+            self.trans[char] = u"ph"
+        for char in u"ქ":#
+            self.trans[char] = u"q"
+        for char in u"ღ":#
+            self.trans[char] = u"gh"
+        for char in u"ყ":#
+            self.trans[char] = u"q'"
+        for char in u"შ":
+            self.trans[char] = u"sh"
+        for char in u"ჩ":
+            self.trans[char] = u"ch"
+        for char in u"ც":
+            self.trans[char] = u"ts"
+        for char in u"ძ":
+            self.trans[char] = u"dz"
+        for char in u"წ":#
+            self.trans[char] = u"ts'"
+        for char in u"ჭ":#
+            self.trans[char] = u"ch'"
+        for char in u"ხ":
+            self.trans[char] = u"kh"
+        for char in u"ჯ":#
+            self.trans[char] = u"j"
+        for char in u"ჰ":
+            self.trans[char] = u"h"
+        for char in u"ჳ":
+            self.trans[char] = u"w"
+        for char in u"ჵ":
+            self.trans[char] = u"o"
+        for char in u"ჶ":
+            self.trans[char] = u"f"
 
-    # Armenian
-    if char == u"Ա":
-        return u"A"
-    if char == u"ա":
-        return u"a"
-    if char == u"Բ":
-        return u"B"
-    if char == u"բ":
-        return u"b"
-    if char == u"Գ":
-        return u"G"
-    if char == u"գ":
-        return u"g"
-    if char == u"Դ":
-        return u"D"
-    if char == u"դ":
-        return u"d"
-    if char == u"Ե":
-        return u"Je"
-    if char == u"ե":
-        return u"e"
-    if char == u"Զ":
-        return u"Z"
-    if char == u"զ":
-        return u"z"
-    if char == u"Է":
-        return u"É"
-    if char == u"է":
-        return u"é"
-    if char == u"Ը":
-        return u"Ë"
-    if char == u"ը":
-        return u"ë"
-    if char == u"Թ":
-        return u"Th"
-    if char == u"թ":
-        return u"th"
-    if char == u"Ժ":
-        return u"Zh"
-    if char == u"ժ":
-        return u"zh"
-    if char == u"Ի":
-        return u"I"
-    if char == u"ի":
-        return u"i"
-    if char == u"Լ":
-        return u"L"
-    if char == u"լ":
-        return u"l"
-    if char == u"Խ":
-        return u"Ch"
-    if char == u"խ":
-        return u"ch"
-    if char == u"Ծ":
-        return u"Ts"
-    if char == u"ծ":
-        return u"ts"
-    if char == u"Կ":
-        return u"K"
-    if char == u"կ":
-        return u"k"
-    if char == u"Հ":
-        return u"H"
-    if char == u"հ":
-        return u"h"
-    if char == u"Ձ":
-        return u"Dz"
-    if char == u"ձ":
-        return u"dz"
-    if char == u"Ղ":
-        return u"R"
-    if char == u"ղ":
-        return u"r"
-    if char == u"Ճ":
-        return u"Cz"
-    if char == u"ճ":
-        return u"cz"
-    if char == u"Մ":
-        return u"M"
-    if char == u"մ":
-        return u"m"
-    if char == u"Յ":
-        return u"J"
-    if char == u"յ":
-        return u"j"
-    if char == u"Ն":
-        return u"N"
-    if char == u"ն":
-        return u"n"
-    if char == u"Շ":
-        return u"S"
-    if char == u"շ":
-        return u"s"
-    if char == u"Շ":
-        return u"Vo"
-    if char == u"շ":
-        return u"o"
-    if char == u"Չ":
-        return u"Tsh"
-    if char == u"չ":
-        return u"tsh"
-    if char == u"Պ":
-        return u"P"
-    if char == u"պ":
-        return u"p"
-    if char == u"Ջ":
-        return u"Dz"
-    if char == u"ջ":
-        return u"dz"
-    if char == u"Ռ":
-        return u"R"
-    if char == u"ռ":
-        return u"r"
-    if char == u"Ս":
-        return u"S"
-    if char == u"ս":
-        return u"s"
-    if char == u"Վ":
-        return u"V"
-    if char == u"վ":
-        return u"v"
-    if char == u"Տ":
-        return u"T'"
-    if char == u"տ":
-        return u"t'"
-    if char == u"Ր":
-        return u"R"
-    if char == u"ր":
-        return u"r"
-    if char == u"Ց":
-        return u"Tsh"
-    if char == u"ց":
-        return u"tsh"
-    if char == u"Ւ":
-        return u"V"
-    if char == u"ւ":
-        return u"v"
-    if char == u"Փ":
-        return u"Ph"
-    if char == u"փ":
-        return u"ph"
-    if char == u"Ք":
-        return u"Kh"
-    if char == u"ք":
-        return u"kh"
-    if char == u"Օ":
-        return u"O"
-    if char == u"օ":
-        return u"o"
-    if char == u"Ֆ":
-        return u"F"
-    if char == u"ֆ":
-        return u"f"
-    if char == u"և":
-        return u"&"
-    if char == u"՟":
-        return u"."
-    if char == u"՞":
-        return u"?"
-    if char == u"՝":
-        return u";"
-    if char == u"՛":
-        return u""
+        # Devanagari
+        for char in u"पप":
+            self.trans[char] = u"p"
+        for char in u"अ":
+            self.trans[char] = u"a"
+        for char in u"आा":
+            self.trans[char] = u"aa"
+        for char in u"प":
+            self.trans[char] = u"pa"
+        for char in u"इि":
+            self.trans[char] = u"i"
+        for char in u"ईी":
+            self.trans[char] = u"ii"
+        for char in u"उु":
+            self.trans[char] = u"u"
+        for char in u"ऊू":
+            self.trans[char] = u"uu"
+        for char in u"एे":
+            self.trans[char] = u"e"
+        for char in u"ऐै":
+            self.trans[char] = u"ai"
+        for char in u"ओो":
+            self.trans[char] = u"o"
+        for char in u"औौ":
+            self.trans[char] = u"au"
+        for char in u"ऋृर":
+            self.trans[char] = u"r"
+        for char in u"ॠॄ":
+            self.trans[char] = u"rr"
+        for char in u"ऌॢल":
+            self.trans[char] = u"l"
+        for char in u"ॡॣ":
+            self.trans[char] = u"ll"
+        for char in u"क":
+            self.trans[char] = u"k"
+        for char in u"ख":
+            self.trans[char] = u"kh"
+        for char in u"ग":
+            self.trans[char] = u"g"
+        for char in u"घ":
+            self.trans[char] = u"gh"
+        for char in u"ङ":
+            self.trans[char] = u"ng"
+        for char in u"च":
+            self.trans[char] = u"c"
+        for char in u"छ":
+            self.trans[char] = u"ch"
+        for char in u"ज":
+            self.trans[char] = u"j"
+        for char in u"झ":
+            self.trans[char] = u"jh"
+        for char in u"ञ":
+            self.trans[char] = u"ñ"
+        for char in u"टत":
+            self.trans[char] = u"t"
+        for char in u"ठथ":
+            self.trans[char] = u"th"
+        for char in u"डद":
+            self.trans[char] = u"d"
+        for char in u"ढध":
+            self.trans[char] = u"dh"
+        for char in u"णन":
+            self.trans[char] = u"n"
+        for char in u"फ":
+            self.trans[char] = u"ph"
+        for char in u"ब":
+            self.trans[char] = u"b"
+        for char in u"भ":
+            self.trans[char] = u"bh"
+        for char in u"म":
+            self.trans[char] = u"m"
+        for char in u"य":
+            self.trans[char] = u"y"
+        for char in u"व":
+            self.trans[char] = u"v"
+        for char in u"श":
+            self.trans[char] = u"sh"
+        for char in u"षस":
+            self.trans[char] = u"s"
+        for char in u"ह":
+            self.trans[char] = u"h"
+        for char in u"क":
+            self.trans[char] = u"x"
+        for char in u"त":
+            self.trans[char] = u"tr"
+        for char in u"ज":
+            self.trans[char] = u"gj"
+        for char in u"क़":
+            self.trans[char] = u"q"
+        for char in u"फ":
+            self.trans[char] = u"f"
+        for char in u"ख":
+            self.trans[char] = u"hh"
+        for char in u"H":
+            self.trans[char] = u"gh"
+        for char in u"ज":
+            self.trans[char] = u"z"
+        for char in u"डढ":
+            self.trans[char] = u"r"
+        # Devanagari ligatures (possibly incomplete and/or incorrect)
+        for char in u"ख्":
+            self.trans[char] = u"khn"
+        for char in u"त":
+            self.trans[char] = u"tn"
+        for char in u"द्":
+            self.trans[char] = u"dn"
+        for char in u"श":
+            self.trans[char] = u"cn"
+        for char in u"ह्":
+            self.trans[char] = u"fn"
+        for char in u"अँ":
+            self.trans[char] = u"m"
+        for char in u"॒॑":
+            self.trans[char] = u""
+        for char in u"०":
+            self.trans[char] = u"0"
+        for char in u"१":
+            self.trans[char] = u"1"
+        for char in u"२":
+            self.trans[char] = u"2"
+        for char in u"३":
+            self.trans[char] = u"3"
+        for char in u"४":
+            self.trans[char] = u"4"
+        for char in u"५":
+            self.trans[char] = u"5"
+        for char in u"६":
+            self.trans[char] = u"6"
+        for char in u"७":
+            self.trans[char] = u"7"
+        for char in u"८":
+            self.trans[char] = u"8"
+        for char in u"९":
+            self.trans[char] = u"9"
 
-    # Tamil
-    if char == u"க்":
-        return u"k"
-    if char in u"ஙண்ந்ன்":
-        return u"n"
-    if char == u"ச":
-        return u"c"
-    if char == u"ஞ்":
-        return u"ñ"
-    if char == u"ட்":
-        return u"th"
-    if char == u"த":
-        return u"t"
-    if char == u"ப":
-        return u"p"
-    if char == u"ம்":
-        return u"m"
-    if char == u"ய்":
-        return u"y"
-    if char in u"ர்ழ்ற":
-        return u"r"
-    if char in u"ல்ள":
-        return u"l"
-    if char == u"வ்":
-        return u"v"
-    if char == u"ஜ":
-        return u"j"
-    if char == u"ஷ":
-        return u"sh"
-    if char == u"ஸ":
-        return u"s"
-    if char == u"ஹ":
-        return u"h"
-    if char == u"க்ஷ":
-        return u"x"
-    if char == u"அ":
-        return u"a"
-    if char == u"ஆ":
-        return u"aa"
-    if char == u"இ":
-        return u"i"
-    if char == u"ஈ":
-        return u"ii"
-    if char == u"உ":
-        return u"u"
-    if char == u"ஊ":
-        return u"uu"
-    if char == u"எ":
-        return u"e"
-    if char == u"ஏ":
-        return u"ee"
-    if char == u"ஐ":
-        return u"ai"
-    if char == u"ஒ":
-        return u"o"
-    if char == u"ஓ":
-        return u"oo"
-    if char == u"ஔ":
-        return u"au"
-    if char == u"ஃ":
-        return ""
+        # Armenian
+        for char in u"Ա":
+            self.trans[char] = u"A"
+        for char in u"ա":
+            self.trans[char] = u"a"
+        for char in u"Բ":
+            self.trans[char] = u"B"
+        for char in u"բ":
+            self.trans[char] = u"b"
+        for char in u"Գ":
+            self.trans[char] = u"G"
+        for char in u"գ":
+            self.trans[char] = u"g"
+        for char in u"Դ":
+            self.trans[char] = u"D"
+        for char in u"դ":
+            self.trans[char] = u"d"
+        for char in u"Ե":
+            self.trans[char] = u"Je"
+        for char in u"ե":
+            self.trans[char] = u"e"
+        for char in u"Զ":
+            self.trans[char] = u"Z"
+        for char in u"զ":
+            self.trans[char] = u"z"
+        for char in u"Է":
+            self.trans[char] = u"É"
+        for char in u"է":
+            self.trans[char] = u"é"
+        for char in u"Ը":
+            self.trans[char] = u"Ë"
+        for char in u"ը":
+            self.trans[char] = u"ë"
+        for char in u"Թ":
+            self.trans[char] = u"Th"
+        for char in u"թ":
+            self.trans[char] = u"th"
+        for char in u"Ժ":
+            self.trans[char] = u"Zh"
+        for char in u"ժ":
+            self.trans[char] = u"zh"
+        for char in u"Ի":
+            self.trans[char] = u"I"
+        for char in u"ի":
+            self.trans[char] = u"i"
+        for char in u"Լ":
+            self.trans[char] = u"L"
+        for char in u"լ":
+            self.trans[char] = u"l"
+        for char in u"Խ":
+            self.trans[char] = u"Ch"
+        for char in u"խ":
+            self.trans[char] = u"ch"
+        for char in u"Ծ":
+            self.trans[char] = u"Ts"
+        for char in u"ծ":
+            self.trans[char] = u"ts"
+        for char in u"Կ":
+            self.trans[char] = u"K"
+        for char in u"կ":
+            self.trans[char] = u"k"
+        for char in u"Հ":
+            self.trans[char] = u"H"
+        for char in u"հ":
+            self.trans[char] = u"h"
+        for char in u"Ձ":
+            self.trans[char] = u"Dz"
+        for char in u"ձ":
+            self.trans[char] = u"dz"
+        for char in u"Ղ":
+            self.trans[char] = u"R"
+        for char in u"ղ":
+            self.trans[char] = u"r"
+        for char in u"Ճ":
+            self.trans[char] = u"Cz"
+        for char in u"ճ":
+            self.trans[char] = u"cz"
+        for char in u"Մ":
+            self.trans[char] = u"M"
+        for char in u"մ":
+            self.trans[char] = u"m"
+        for char in u"Յ":
+            self.trans[char] = u"J"
+        for char in u"յ":
+            self.trans[char] = u"j"
+        for char in u"Ն":
+            self.trans[char] = u"N"
+        for char in u"ն":
+            self.trans[char] = u"n"
+        for char in u"Շ":
+            self.trans[char] = u"S"
+        for char in u"շ":
+            self.trans[char] = u"s"
+        for char in u"Շ":
+            self.trans[char] = u"Vo"
+        for char in u"շ":
+            self.trans[char] = u"o"
+        for char in u"Չ":
+            self.trans[char] = u"Tsh"
+        for char in u"չ":
+            self.trans[char] = u"tsh"
+        for char in u"Պ":
+            self.trans[char] = u"P"
+        for char in u"պ":
+            self.trans[char] = u"p"
+        for char in u"Ջ":
+            self.trans[char] = u"Dz"
+        for char in u"ջ":
+            self.trans[char] = u"dz"
+        for char in u"Ռ":
+            self.trans[char] = u"R"
+        for char in u"ռ":
+            self.trans[char] = u"r"
+        for char in u"Ս":
+            self.trans[char] = u"S"
+        for char in u"ս":
+            self.trans[char] = u"s"
+        for char in u"Վ":
+            self.trans[char] = u"V"
+        for char in u"վ":
+            self.trans[char] = u"v"
+        for char in u"Տ":
+            self.trans[char] = u"T'"
+        for char in u"տ":
+            self.trans[char] = u"t'"
+        for char in u"Ր":
+            self.trans[char] = u"R"
+        for char in u"ր":
+            self.trans[char] = u"r"
+        for char in u"Ց":
+            self.trans[char] = u"Tsh"
+        for char in u"ց":
+            self.trans[char] = u"tsh"
+        for char in u"Ւ":
+            self.trans[char] = u"V"
+        for char in u"ւ":
+            self.trans[char] = u"v"
+        for char in u"Փ":
+            self.trans[char] = u"Ph"
+        for char in u"փ":
+            self.trans[char] = u"ph"
+        for char in u"Ք":
+            self.trans[char] = u"Kh"
+        for char in u"ք":
+            self.trans[char] = u"kh"
+        for char in u"Օ":
+            self.trans[char] = u"O"
+        for char in u"օ":
+            self.trans[char] = u"o"
+        for char in u"Ֆ":
+            self.trans[char] = u"F"
+        for char in u"ֆ":
+            self.trans[char] = u"f"
+        for char in u"և":
+            self.trans[char] = u"&"
+        for char in u"՟":
+            self.trans[char] = u"."
+        for char in u"՞":
+            self.trans[char] = u"?"
+        for char in u"՝":
+            self.trans[char] = u";"
+        for char in u"՛":
+            self.trans[char] = u""
 
-    # Bengali
-    if char == u"অ":
-        return u"ô"
-    if char in u"আা":
-        return u"a"
-    if char in u"ইিঈী":
-        return u"i"
-    if char in u"উুঊূ":
-        return u"u"
-    if char in u"ঋৃ":
-        return u"ri"
-    if char in u"এেয়":
-        return u"e"
-    if char in u"ঐৈ":
-        return u"oi"
-    if char in u"ওো":
-        return u"o"
-    if char in u"ঔৌ":
-        return "ou"
-    if char == u"্":
-        return u""
-    if char == u"ৎ":
-        return u"t"
-    if char == u"ং":
-        return u"n"
-    if char == u"ঃ":
-        return u"h"
-    if char == u"ঁ":
-        return u"ñ"
-    if char == u"ক":
-        return u"k"
-    if char == u"খ":
-        return u"kh"
-    if char == u"গ":
-        return u"g"
-    if char == u"ঘ":
-        return u"gh"
-    if char == u"ঙ":
-        return u"ng"
-    if char == u"চ":
-        return u"ch"
-    if char == u"ছ":
-        return u"chh"
-    if char in u"জ":
-        return u"j"
-    if char == u"ঝ":
-        return u"jh"
-    if char == u"ঞ":
-        return u"n"
-    if char in u"টত":
-        return u"t"
-    if char in u"ঠথ":
-        return u"th"
-    if char in u"ডদ":
-        return u"d"
-    if char in u"ঢধ":
-        return u"dh"
-    if char in u"ণন":
-        return u"n"
-    if char == u"প":
-        return u"p"
-    if char == u"ফ":
-        return u"ph"
-    if char == u"ব":
-        return u"b"
-    if char == u"ভ":
-        return u"bh"
-    if char == u"ম":
-        return u"m"
-    if char == u"য":
-        return u"dzh"
-    if char == u"র":
-        return u"r"
-    if char == u"ল":
-        return u"l"
-    if char == u"শ":
-        return u"s"
-    if char == u"হ":
-        return u"h"
-    if char == u"য়":
-        return u"-"
-    if char == u"ড়":
-        return u"r"
-    if char == u"ঢ":
-        return u"rh"
-    if char == u"০":
-        return u"0"
-    if char == u"১":
-        return u"1"
-    if char == u"২":
-        return u"2"
-    if char == u"৩":
-        return u"3"
-    if char == u"৪":
-        return u"4"
-    if char == u"৫":
-        return u"5"
-    if char == u"৬":
-        return u"6"
-    if char == u"৭":
-        return u"7"
-    if char == u"৮":
-        return u"8"
-    if char == u"৯":
-        return u"9"
-    
-    # Thai (because of complications of the alphabet, transliterations
-    #       are very imprecise here)
-    if char == u"ก":
-        return u"k"
-    if char in u"ขฃคฅฆ":
-        return u"kh"
-    if char == u"ง":
-        return u"ng"
-    if char in u"จฉชฌ":
-        return u"ch"
-    if char in u"ซศษส":
-        return u"s"
-    if char in u"ญย":
-        return u"y"
-    if char in u"ฎด":
-        return u"d"
-    if char in u"ฏต":
-        return u"t"
-    if char in u"ฐฑฒถทธ":
-        return u"th"
-    if char in u"ณน":
-        return u"n"
-    if char == u"บ":
-        return u"b"
-    if char == u"ป":
-        return u"p"
-    if char in u"ผพภ":
-        return u"ph"
-    if char in u"ฝฟ":
-        return u"f"
-    if char in u"ม":
-        return u"m"
-    if char == u"ร":
-        return u"r"
-    if char == u"ฤ":
-        return u"rue"
-    if char in u"ๅ":
-        return u":"
-    if char in u"ลฬ":
-        return u"l"
-    if char == u"ฦ":
-        return u"lue"
-    if char == u"ว":
-        return u"w"
-    if char in u"หฮ":
-        return u"h"
-    if char == u"อ":
-        return u""
-    if char == u"ร":
-        return u"ü"
-    if char == u"ว":
-        return u"ua"
-    if char in u"อว–โิ":
-        return u"o"
-    if char in u"ะัา":
-        return u"a"
-    if char in u"ว":
-        return u"u"
-    if char == u"ำ":
-        return u"am"
-    if char == u"ิ":
-        return u"i"
-    if char == u"ี":
-        return u"i:"
-    if char == u"ึ":
-        return u"ue"
-    if char == u"ื":
-        return u"ue:"
-    if char == u"ุ":
-        return u"u"
-    if char == u"ู":
-        return u"u:"
-    if char in u"เ็":
-        return u"e"
-    if char == u"แ":
-        return u"ae"
-    if char in u"ใไ":
-        return u"ai"
-    if char in u"่้๊๋็์":
-        return u""
-    if char in u"ฯ":
-        return u"."
-    if char in u"ๆ":
-        return u"(2)"    
-    
-    return default
+        # Tamil
+        for char in u"க்":
+            self.trans[char] = u"k"
+        for char in u"ஙண்ந்ன்":
+            self.trans[char] = u"n"
+        for char in u"ச":
+            self.trans[char] = u"c"
+        for char in u"ஞ்":
+            self.trans[char] = u"ñ"
+        for char in u"ட்":
+            self.trans[char] = u"th"
+        for char in u"த":
+            self.trans[char] = u"t"
+        for char in u"ப":
+            self.trans[char] = u"p"
+        for char in u"ம்":
+            self.trans[char] = u"m"
+        for char in u"ய்":
+            self.trans[char] = u"y"
+        for char in u"ர்ழ்ற":
+            self.trans[char] = u"r"
+        for char in u"ல்ள":
+            self.trans[char] = u"l"
+        for char in u"வ்":
+            self.trans[char] = u"v"
+        for char in u"ஜ":
+            self.trans[char] = u"j"
+        for char in u"ஷ":
+            self.trans[char] = u"sh"
+        for char in u"ஸ":
+            self.trans[char] = u"s"
+        for char in u"ஹ":
+            self.trans[char] = u"h"
+        for char in u"க்ஷ":
+            self.trans[char] = u"x"
+        for char in u"அ":
+            self.trans[char] = u"a"
+        for char in u"ஆ":
+            self.trans[char] = u"aa"
+        for char in u"இ":
+            self.trans[char] = u"i"
+        for char in u"ஈ":
+            self.trans[char] = u"ii"
+        for char in u"உ":
+            self.trans[char] = u"u"
+        for char in u"ஊ":
+            self.trans[char] = u"uu"
+        for char in u"எ":
+            self.trans[char] = u"e"
+        for char in u"ஏ":
+            self.trans[char] = u"ee"
+        for char in u"ஐ":
+            self.trans[char] = u"ai"
+        for char in u"ஒ":
+            self.trans[char] = u"o"
+        for char in u"ஓ":
+            self.trans[char] = u"oo"
+        for char in u"ஔ":
+            self.trans[char] = u"au"
+        for char in u"ஃ":
+            self.trans[char] = ""
+
+        # Bengali
+        for char in u"অ":
+            self.trans[char] = u"ô"
+        for char in u"আা":
+            self.trans[char] = u"a"
+        for char in u"ইিঈী":
+            self.trans[char] = u"i"
+        for char in u"উুঊূ":
+            self.trans[char] = u"u"
+        for char in u"ঋৃ":
+            self.trans[char] = u"ri"
+        for char in u"এেয়":
+            self.trans[char] = u"e"
+        for char in u"ঐৈ":
+            self.trans[char] = u"oi"
+        for char in u"ওো":
+            self.trans[char] = u"o"
+        for char in u"ঔৌ":
+            self.trans[char] = "ou"
+        for char in u"্":
+            self.trans[char] = u""
+        for char in u"ৎ":
+            self.trans[char] = u"t"
+        for char in u"ং":
+            self.trans[char] = u"n"
+        for char in u"ঃ":
+            self.trans[char] = u"h"
+        for char in u"ঁ":
+            self.trans[char] = u"ñ"
+        for char in u"ক":
+            self.trans[char] = u"k"
+        for char in u"খ":
+            self.trans[char] = u"kh"
+        for char in u"গ":
+            self.trans[char] = u"g"
+        for char in u"ঘ":
+            self.trans[char] = u"gh"
+        for char in u"ঙ":
+            self.trans[char] = u"ng"
+        for char in u"চ":
+            self.trans[char] = u"ch"
+        for char in u"ছ":
+            self.trans[char] = u"chh"
+        for char in u"জ":
+            self.trans[char] = u"j"
+        for char in u"ঝ":
+            self.trans[char] = u"jh"
+        for char in u"ঞ":
+            self.trans[char] = u"n"
+        for char in u"টত":
+            self.trans[char] = u"t"
+        for char in u"ঠথ":
+            self.trans[char] = u"th"
+        for char in u"ডদ":
+            self.trans[char] = u"d"
+        for char in u"ঢধ":
+            self.trans[char] = u"dh"
+        for char in u"ণন":
+            self.trans[char] = u"n"
+        for char in u"প":
+            self.trans[char] = u"p"
+        for char in u"ফ":
+            self.trans[char] = u"ph"
+        for char in u"ব":
+            self.trans[char] = u"b"
+        for char in u"ভ":
+            self.trans[char] = u"bh"
+        for char in u"ম":
+            self.trans[char] = u"m"
+        for char in u"য":
+            self.trans[char] = u"dzh"
+        for char in u"র":
+            self.trans[char] = u"r"
+        for char in u"ল":
+            self.trans[char] = u"l"
+        for char in u"শ":
+            self.trans[char] = u"s"
+        for char in u"হ":
+            self.trans[char] = u"h"
+        for char in u"য়":
+            self.trans[char] = u"-"
+        for char in u"ড়":
+            self.trans[char] = u"r"
+        for char in u"ঢ":
+            self.trans[char] = u"rh"
+        for char in u"০":
+            self.trans[char] = u"0"
+        for char in u"১":
+            self.trans[char] = u"1"
+        for char in u"২":
+            self.trans[char] = u"2"
+        for char in u"৩":
+            self.trans[char] = u"3"
+        for char in u"৪":
+            self.trans[char] = u"4"
+        for char in u"৫":
+            self.trans[char] = u"5"
+        for char in u"৬":
+            self.trans[char] = u"6"
+        for char in u"৭":
+            self.trans[char] = u"7"
+        for char in u"৮":
+            self.trans[char] = u"8"
+        for char in u"৯":
+            self.trans[char] = u"9"
+        
+        # Thai (because of complications of the alphabet, self.transliterations
+        #       are very imprecise here)
+        for char in u"ก":
+            self.trans[char] = u"k"
+        for char in u"ขฃคฅฆ":
+            self.trans[char] = u"kh"
+        for char in u"ง":
+            self.trans[char] = u"ng"
+        for char in u"จฉชฌ":
+            self.trans[char] = u"ch"
+        for char in u"ซศษส":
+            self.trans[char] = u"s"
+        for char in u"ญย":
+            self.trans[char] = u"y"
+        for char in u"ฎด":
+            self.trans[char] = u"d"
+        for char in u"ฏต":
+            self.trans[char] = u"t"
+        for char in u"ฐฑฒถทธ":
+            self.trans[char] = u"th"
+        for char in u"ณน":
+            self.trans[char] = u"n"
+        for char in u"บ":
+            self.trans[char] = u"b"
+        for char in u"ป":
+            self.trans[char] = u"p"
+        for char in u"ผพภ":
+            self.trans[char] = u"ph"
+        for char in u"ฝฟ":
+            self.trans[char] = u"f"
+        for char in u"ม":
+            self.trans[char] = u"m"
+        for char in u"ร":
+            self.trans[char] = u"r"
+        for char in u"ฤ":
+            self.trans[char] = u"rue"
+        for char in u"ๅ":
+            self.trans[char] = u":"
+        for char in u"ลฬ":
+            self.trans[char] = u"l"
+        for char in u"ฦ":
+            self.trans[char] = u"lue"
+        for char in u"ว":
+            self.trans[char] = u"w"
+        for char in u"หฮ":
+            self.trans[char] = u"h"
+        for char in u"อ":
+            self.trans[char] = u""
+        for char in u"ร":
+            self.trans[char] = u"ü"
+        for char in u"ว":
+            self.trans[char] = u"ua"
+        for char in u"อว–โิ":
+            self.trans[char] = u"o"
+        for char in u"ะัา":
+            self.trans[char] = u"a"
+        for char in u"ว":
+            self.trans[char] = u"u"
+        for char in u"ำ":
+            self.trans[char] = u"am"
+        for char in u"ิ":
+            self.trans[char] = u"i"
+        for char in u"ี":
+            self.trans[char] = u"i:"
+        for char in u"ึ":
+            self.trans[char] = u"ue"
+        for char in u"ื":
+            self.trans[char] = u"ue:"
+        for char in u"ุ":
+            self.trans[char] = u"u"
+        for char in u"ู":
+            self.trans[char] = u"u:"
+        for char in u"เ็":
+            self.trans[char] = u"e"
+        for char in u"แ":
+            self.trans[char] = u"ae"
+        for char in u"ใไ":
+            self.trans[char] = u"ai"
+        for char in u"่้๊๋็์":
+            self.trans[char] = u""
+        for char in u"ฯ":
+            self.trans[char] = u"."
+        for char in u"ๆ":
+            self.trans[char] = u"(2)"
+              
+              
+    def transliterate(self, char, default="?", prev="-", next="-"):
+        if char in self.trans:
+            return self.trans[char]             
+        #Arabic
+        if char == u"◌":
+            return prev
+        #Japanese
+        if char == u"ッ":
+            return self.transliterate(next)[0]
+        if char in u"々仝ヽヾゝゞ〱〲〳〵〴〵":
+            return prev
+        return default
+





More information about the Pywikipedia-l mailing list