SVN: [5515] trunk/pywikipedia - Pywikipedia-l

5 Jun 2008

Revision: 5515
Author:   nicdumz
Date:     2008-06-05 16:47:43 +0000 (Thu, 05 Jun 2008)

Log Message:
-----------
Those hardcoded \u200E should never appear in our code.
\u200E is an unicode control character used in left to right languages. This means that
the regexes using them weren't working on right to left languages, such as ar:

Modified Paths:
--------------
    trunk/pywikipedia/welcome.py
    trunk/pywikipedia/wikipedia.py

Modified: trunk/pywikipedia/welcome.py
===================================================================

--- trunk/pywikipedia/welcome.py	2008-06-04 20:56:45 UTC (rev 5514)
+++ trunk/pywikipedia/welcome.py	2008-06-05 16:47:43 UTC (rev 5515)
@@ -400,7 +400,7 @@
       
     reg =  u'\(<a href=\"' + re.escape(wsite.path()) 
     reg +=
u'\?title=%s(?P<user>.*?)&(?:amp;|)action=(?:edit|editredlink|edit&amp;redlink=1)\"'
% talk
-    reg += u'.*?\) (?P<reason>.*?) \u200E </li>'
+    reg += u'.*?\) (?P<reason>.*?) . </li>'
     p = re.compile(reg, re.UNICODE)
     
     for x in p.finditer(raw):

Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py	2008-06-04 20:56:45 UTC (rev 5514)
+++ trunk/pywikipedia/wikipedia.py	2008-06-05 16:47:43 UTC (rev 5515)
@@ -4659,7 +4659,7 @@
             path = self.longpages_address(n=number)
             get_throttle()
             html = self.getUrl(path)
-            entryR = re.compile(ur'<li>\(<a href=".+?"
title=".+?">hist</a>\) ‎<a href=".+?"
title="(?P<title>.+?)">.+?</a>
‎\[(?P<length>\d+)(.+?)\]</li>')
+            entryR = re.compile(ur'<li>\(<a href=".+?"
title=".+?">hist</a>\) .<a href=".+?"
title="(?P<title>.+?)">.+?</a>
.\[(?P<length>\d+)(.+?)\]</li>', re.UNICODE)
             for m in entryR.finditer(html):
                 title = m.group('title')
                 length = int(m.group('length'))
@@ -4678,7 +4678,7 @@
             path = self.shortpages_address(n = number)
             get_throttle()
             html = self.getUrl(path)
-            entryR = re.compile(ur'<li>\(<a href=".+?"
title=".+?">hist</a>\) ‎<a href=".+?"
title="(?P<title>.+?)">.+?</a>
‎\[(?P<length>\d+)(.+?)\]</li>')
+            entryR = re.compile(ur'<li>\(<a href=".+?"
title=".+?">hist</a>\) .<a href=".+?"
title="(?P<title>.+?)">.+?</a>
.\[(?P<length>\d+)(.+?)\]</li>', re.UNICODE)
             for m in entryR.finditer(html):
                 title = m.group('title')
                 length = int(m.group('length'))