http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10047
Revision: 10047 Author: a_engels Date: 2012-03-26 09:35:37 +0000 (Mon, 26 Mar 2012) Log Message: ----------- 1. Use the configuration value of the encoding for transliteration purposes, even if it is overwritten in a subclass 2. Allow multi-step transliteration (x transliterates to y, which however cannot be shown either, then use the transliteration of y as the transliteration for x) 3. Add transliteration for Lao alphabet
Modified Paths: -------------- trunk/pywikipedia/userinterfaces/terminal_interface_base.py trunk/pywikipedia/userinterfaces/transliteration.py
Modified: trunk/pywikipedia/userinterfaces/terminal_interface_base.py =================================================================== --- trunk/pywikipedia/userinterfaces/terminal_interface_base.py 2012-03-26 08:10:48 UTC (rev 10046) +++ trunk/pywikipedia/userinterfaces/terminal_interface_base.py 2012-03-26 09:35:37 UTC (rev 10047) @@ -10,7 +10,7 @@ import traceback, re, sys import wikipedia
-transliterator = transliteration.transliterator() +transliterator = transliteration.transliterator(config.console_encoding)
colors = [ 'default', @@ -40,6 +40,7 @@ self.stdout = sys.stdout self.stderr = sys.stderr self.encoding = config.console_encoding + self.realencoding = config.console_encoding
def printNonColorized(self, text, targetStream): # We add *** after the text as a whole if anything needed to be colorized. @@ -69,7 +70,7 @@ # Encode our unicode string in the encoding used by the user's console, # and decode it back to unicode. Then we can see which characters # can't be represented in the console encoding. - codecedText = text.encode(self.encoding, 'replace').decode(self.encoding) + codecedText = text.encode(self.realencoding, 'replace').decode(self.realencoding) transliteratedText = '' # Note: A transliteration replacement might be longer than the original # character, e.g. ч is transliterated to ch.
Modified: trunk/pywikipedia/userinterfaces/transliteration.py =================================================================== --- trunk/pywikipedia/userinterfaces/transliteration.py 2012-03-26 08:10:48 UTC (rev 10046) +++ trunk/pywikipedia/userinterfaces/transliteration.py 2012-03-26 09:35:37 UTC (rev 10047) @@ -3,7 +3,7 @@
class transliterator(object): - def __init__(self): + def __init__(self, encoding): self.trans = {} for char in u"ÀÁÂẦẤẪẨẬÃĀĂẰẮẴẶẲȦǠẠḀȂĄǍẢ": self.trans[char] = u"A" @@ -1594,7 +1594,76 @@ self.trans[u"౼"] = u"1/16" self.trans[u"౽"] = u"1/8" self.trans[u"౾"] = u"3/16" - + # Lao - note: pronounciation in initial position is used; + # different pronounciation in final position is ignored + self.trans[u"ກ"] = "k" + for char in u"ຂຄ": + self.trans[char] = "kh" + self.trans[u"ງ"] = "ng" + self.trans[u"ຈ"] = "ch" + for char in u"ສຊ": + self.trans[char] = "s" + self.trans[u"ຍ"] = "ny" + self.trans[u"ດ"] = "d" + self.trans[u"ຕ"] = "t" + for char in u"ຖທ": + self.trans[char] = "th" + self.trans[u"ນ"] = "n" + self.trans[u"ບ"] = "b" + self.trans[u"ປ"] = "p" + for char in u"ຜພ": + self.trans[char] = "ph" + for char in u"ຝຟ": + self.trans[char] = "f" + for char in u"ມໝ": + self.trans[char] = "m" + self.trans[u"ຢ"] = "y" + for char in u"ຣຼ": + self.trans[char] = "r" + for char in u"ລຼ": + self.trans[char] = "l" + self.trans[u"ວ"] = "v" + for char in u"ຮ": + self.trans[char] = "h" + self.trans[u"ອ"] = "'" + for char in u"ະັ": + self.trans[char] = "a" + self.trans[u"ິ"] = "i" + self.trans[u"ຶ"] = "ue" + self.trans[u"ຸ"] = "u" + self.trans[u"ເ"] = u"é" + self.trans[u"ແ"] = u"è" + for char in u"ໂົາໍ": + self.trans[char] = "o" + self.trans[u"ຽ"] = "ia" + self.trans[u"ເຶ"] = "uea" + self.trans[u"ຍ"] = "i" + for char in u"ໄໃ": + self.trans[char] = "ai" + self.trans[u"ຳ"] = "am" + self.trans[u"າ"] = "aa" + self.trans[u"ີ"] = "ii" + self.trans[u"ື"] = "yy" + self.trans[u"ູ"] = "uu" + self.trans[u"ເ"] = "e" + self.trans[u"ແ"] = "ei" + self.trans[u"໐"] = "0" + self.trans[u"໑"] = "1" + self.trans[u"໒"] = "2" + self.trans[u"໓"] = "3" + self.trans[u"໔"] = "4" + self.trans[u"໕"] = "5" + self.trans[u"໖"] = "6" + self.trans[u"໗"] = "7" + self.trans[u"໘"] = "8" + self.trans[u"໙"] = "9" + for char in self.trans: + value = self.trans[char] + if value == "?": continue + while value.encode(encoding, 'replace').decode(encoding) == "?" and value in self.trans: + value = self.trans[value] + self.trans[char] = value + def transliterate(self, char, default="?", prev="-", next="-"): if char in self.trans: return self.trans[char] @@ -1606,5 +1675,11 @@ return self.transliterate(next)[0] if char in u"々仝ヽヾゝゞ〱〲〳〵〴〵": return prev + #Lao + if char == u"ຫ": + if next in u"ງຍນຣລຼຼວ": + return "" + else: + return "h" return default
pywikipedia-svn@lists.wikimedia.org