[Pywikipedia-l] SVN: [6224] trunk/pywikipedia/selflink.py
wikipedian at svn.wikimedia.org
wikipedian at svn.wikimedia.org
Mon Jan 5 19:27:29 UTC 2009
Revision: 6224
Author: wikipedian
Date: 2009-01-05 19:27:29 +0000 (Mon, 05 Jan 2009)
Log Message:
-----------
bugfix: XML reader: don't crash on articles whose titles start with
parenthesis, and also find selflinks in articles whose titles start with a
non-ASCII character.
Modified Paths:
--------------
trunk/pywikipedia/selflink.py
Modified: trunk/pywikipedia/selflink.py
===================================================================
--- trunk/pywikipedia/selflink.py 2009-01-05 17:41:08 UTC (rev 6223)
+++ trunk/pywikipedia/selflink.py 2009-01-05 19:27:29 UTC (rev 6224)
@@ -71,9 +71,12 @@
mysite = wikipedia.getSite()
dump = xmlreader.XmlDump(self.xmlFilename)
for entry in dump.parse():
- title = re.escape(entry.title)
- if not mysite.nocapitalize:
- title = '[%s%s]%s' % (title[0].lower(), title[0].upper(), title[1:])
+ if mysite.nocapitalize:
+ title = re.escape(entry.title)
+ else:
+ title = '[%s%s]%s' % (re.escape(entry.title[0].lower()),
+ re.escape(entry.title[0].upper()),
+ re.escape(entry.title[1:]))
selflinkR = re.compile(r'\[\[' + title + '(\|[^\]]*)?\]\]')
if selflinkR.search(entry.text):
yield wikipedia.Page(mysite, entry.title)
More information about the Pywikipedia-l
mailing list