Revision: 6112 Author: btongminh Date: 2008-11-22 19:42:46 +0000 (Sat, 22 Nov 2008)
Log Message: ----------- Add unicode BIDI chars to whitespace list.
Modified Paths: -------------- trunk/pywikipedia/commonsdelinker/delinker.py
Modified: trunk/pywikipedia/commonsdelinker/delinker.py =================================================================== --- trunk/pywikipedia/commonsdelinker/delinker.py 2008-11-21 14:15:01 UTC (rev 6111) +++ trunk/pywikipedia/commonsdelinker/delinker.py 2008-11-22 19:42:46 UTC (rev 6112) @@ -47,7 +47,10 @@
import wikipedia import config - + +# FIXME: They should be defined *somewhere* in the Python library, not? +WHITESPACE = u' \t\u200e\u200f\u202a\u202a\u202b\u202c\u202d\u202e' + def wait_callback(object): output(u'%s Connection has been lost in %s. Attempting reconnection.' % (threading.currentThread(), repr(object)), False) if hasattr(object, 'error'): @@ -255,7 +258,7 @@ if prev in ('', '\r', '\n') and replacement is None: # Kill all spaces after end while (end + 1) < len(new_text): - if new_text[end + 1] in (' ', '\t'): + if new_text[end + 1] in WHITESPACE: end += 1 else: break