Revision: 5157 Author: cosoleto Date: 2008-03-24 10:38:55 +0000 (Mon, 24 Mar 2008)
Log Message: ----------- * Fix in XML parser for sites that use authentication, 'data' isn't a Unicode string * Undo r4965 and others related revisions about an ar.wiki fix, because the new code generate a not useful regex (http://lists.wikimedia.org/pipermail/pywikipedia-l/2008-February/002141.html) or a RE compilation error ('sre_constants.error: multiple repeat', lastest revision)
Modified Paths: -------------- trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2008-03-23 21:35:08 UTC (rev 5156) +++ trunk/pywikipedia/wikipedia.py 2008-03-24 10:38:55 UTC (rev 5157) @@ -2674,9 +2674,9 @@ data = response.read() else: response, data = self.site.postForm(address, predata) - # The XML parser doesn't expect a Unicode string, but an encoded one, - # so we'll encode it back. - data = data.encode(self.site.encoding()) + # The XML parser doesn't expect a Unicode string, but an encoded one, + # so we'll encode it back. + data = data.encode(self.site.encoding()) get_throttle.setDelay(time.time() - now) return data
@@ -4941,26 +4941,18 @@ Group 1 in the regex match object will be the target title.
""" - redDefault = 'redirect' - red = 'redirect' - if self.lang == 'ar': - red = u"تحويل" + try: - if redDefault == red: - redirKeywords = [red] + self.family.redirect[self.lang] - redirKeywordsR = r'(?:redirect|' + '|'.join(redirKeywords) + ')' # always redirect as default - else: - redirKeywords = [red] + self.family.redirect[self.lang] - redirKeywordsR = r'(?:redirect|' + redDefault + '|'.join(redirKeywords) + ')' + redirKeywords = [u'redirect'] + self.family.redirect[self.lang] + redirKeywordsR = r'(?:' + '|'.join(redirKeywords) + ')' except KeyError: # no localized keyword for redirects - if redDefault == red: - redirKeywordsR = r'%s' % red - else: - redirKeywordsR = r'(?:%s|%s)' % (red, redDefault) + redirKeywordsR = r'redirect' + # A redirect starts with hash (#), followed by a keyword, then # arbitrary stuff, then a wikilink. The wikilink may contain # a label, although this is not useful. + return re.compile(r'#' + redirKeywordsR + '.*?[[(.*?)(?:|.*?)?]]', re.IGNORECASE | re.UNICODE | re.DOTALL)