http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10187
Revision: 10187 Author: valhallasw Date: 2012-05-05 19:22:03 +0000 (Sat, 05 May 2012) Log Message: ----------- Added transliteration feature, and added hint for people running on windows.
Based on r10048; fixes the FixMe r10043 and solves Feature Request
Reference: pywikipediabot-Feature Requests-3516383
Modified Paths: -------------- trunk/pywikipedia/config.py trunk/pywikipedia/userinterfaces/terminal_interface_base.py
Modified: trunk/pywikipedia/config.py =================================================================== --- trunk/pywikipedia/config.py 2012-05-05 17:14:00 UTC (rev 10186) +++ trunk/pywikipedia/config.py 2012-05-05 19:22:03 UTC (rev 10187) @@ -108,6 +108,19 @@ #we get "StdioOnnaStick instance has no attribute 'encoding'" console_encoding = None
+# The encoding the user would like to see text transliterated to. This can be +# set to a charset (e.g. 'ascii', 'iso-8859-1' or 'cp850'), and we will output +# only characters that exist in that charset. However, the characters will be +# output using console_encoding. +# If this is not defined on Windows, we emit a Warning explaining the user +# to either switch to a Unicode-able font and use +# transliteration_target = None +# or to keep using raster fonts and set +# transliteration_target = console_encoding +# After emitting the warning, this last option will be set. + +transliteration_target = 'not set' + # The encoding in which textfiles are stored, which contain lists of page # titles. The most used is: 'utf-8'. 'utf-8-sig' recognizes BOM but it is # available on Python 2.5 or higher. For a complete list please see: @@ -598,6 +611,15 @@ else: console_encoding = 'iso-8859-1'
+# Fix up transliteration_target +if transliteration_target == 'not set': + if __sys.platform == 'win32': + transliteration_target = console_encoding + print "WARNING: Running on Windows and transliteration_target is not set." + print "Please see http://www.mediawiki.org/wiki/Manual:Pywikipediabot/Windows" + else: + transliteration_target = None + # Save base_dir for use by other modules base_dir = _base_dir if _verbose:
Modified: trunk/pywikipedia/userinterfaces/terminal_interface_base.py =================================================================== --- trunk/pywikipedia/userinterfaces/terminal_interface_base.py 2012-05-05 17:14:00 UTC (rev 10186) +++ trunk/pywikipedia/userinterfaces/terminal_interface_base.py 2012-05-05 19:22:03 UTC (rev 10187) @@ -40,6 +40,7 @@ self.stdout = sys.stdout self.stderr = sys.stderr self.encoding = config.console_encoding + self.transliteration_target = config.transliteration_target
def printNonColorized(self, text, targetStream): # We add *** after the text as a whole if anything needed to be colorized. @@ -69,7 +70,12 @@ # Encode our unicode string in the encoding used by the user's console, # and decode it back to unicode. Then we can see which characters # can't be represented in the console encoding. + # We need to take min(console_encoding, transliteration_target) + # the first is what the terminal is capable of + # the second is how unicode-y the user would like the output codecedText = text.encode(self.encoding, 'replace').decode(self.encoding) + if self.transliteration_target: + codecedText = codecedText.encode(self.transliteration_target, 'replace').decode(self.transliteration_target) transliteratedText = '' # Note: A transliteration replacement might be longer than the original # character, e.g. ч is transliterated to ch.
pywikipedia-svn@lists.wikimedia.org