Revision: 5157
Author: cosoleto
Date: 2008-03-24 10:38:55 +0000 (Mon, 24 Mar 2008)
Log Message:
-----------
* Fix in XML parser for sites that use authentication, 'data' isn't a Unicode
string
* Undo r4965 and others related revisions about an ar.wiki fix, because the new code
generate a not useful regex
(
http://lists.wikimedia.org/pipermail/pywikipedia-l/2008-February/002141.html) or a RE
compilation error ('sre_constants.error: multiple repeat', lastest revision)
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2008-03-23 21:35:08 UTC (rev 5156)
+++ trunk/pywikipedia/wikipedia.py 2008-03-24 10:38:55 UTC (rev 5157)
@@ -2674,9 +2674,9 @@
data = response.read()
else:
response, data = self.site.postForm(address, predata)
- # The XML parser doesn't expect a Unicode string, but an encoded one,
- # so we'll encode it back.
- data = data.encode(self.site.encoding())
+ # The XML parser doesn't expect a Unicode string, but an encoded one,
+ # so we'll encode it back.
+ data = data.encode(self.site.encoding())
get_throttle.setDelay(time.time() - now)
return data
@@ -4941,26 +4941,18 @@
Group 1 in the regex match object will be the target title.
"""
- redDefault = 'redirect'
- red = 'redirect'
- if self.lang == 'ar':
- red = u"تحويل"
+
try:
- if redDefault == red:
- redirKeywords = [red] + self.family.redirect[self.lang]
- redirKeywordsR = r'(?:redirect|' +
'|'.join(redirKeywords) + ')' # always redirect as default
- else:
- redirKeywords = [red] + self.family.redirect[self.lang]
- redirKeywordsR = r'(?:redirect|' + redDefault +
'|'.join(redirKeywords) + ')'
+ redirKeywords = [u'redirect'] + self.family.redirect[self.lang]
+ redirKeywordsR = r'(?:' + '|'.join(redirKeywords) +
')'
except KeyError:
# no localized keyword for redirects
- if redDefault == red:
- redirKeywordsR = r'%s' % red
- else:
- redirKeywordsR = r'(?:%s|%s)' % (red, redDefault)
+ redirKeywordsR = r'redirect'
+
# A redirect starts with hash (#), followed by a keyword, then
# arbitrary stuff, then a wikilink. The wikilink may contain
# a label, although this is not useful.
+
return re.compile(r'#' + redirKeywordsR +
'.*?\[\[(.*?)(?:\|.*?)?\]\]',
re.IGNORECASE | re.UNICODE | re.DOTALL)