Revision: 6224 Author: wikipedian Date: 2009-01-05 19:27:29 +0000 (Mon, 05 Jan 2009)
Log Message: ----------- bugfix: XML reader: don't crash on articles whose titles start with parenthesis, and also find selflinks in articles whose titles start with a non-ASCII character.
Modified Paths: -------------- trunk/pywikipedia/selflink.py
Modified: trunk/pywikipedia/selflink.py =================================================================== --- trunk/pywikipedia/selflink.py 2009-01-05 17:41:08 UTC (rev 6223) +++ trunk/pywikipedia/selflink.py 2009-01-05 19:27:29 UTC (rev 6224) @@ -71,9 +71,12 @@ mysite = wikipedia.getSite() dump = xmlreader.XmlDump(self.xmlFilename) for entry in dump.parse(): - title = re.escape(entry.title) - if not mysite.nocapitalize: - title = '[%s%s]%s' % (title[0].lower(), title[0].upper(), title[1:]) + if mysite.nocapitalize: + title = re.escape(entry.title) + else: + title = '[%s%s]%s' % (re.escape(entry.title[0].lower()), + re.escape(entry.title[0].upper()), + re.escape(entry.title[1:])) selflinkR = re.compile(r'[[' + title + '(|[^]]*)?]]') if selflinkR.search(entry.text): yield wikipedia.Page(mysite, entry.title)