http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9488
Revision: 9488
Author: xqt
Date: 2011-09-03 08:32:33 +0000 (Sat, 03 Sep 2011)
Log Message:
-----------
raise invalidTitle f u'\ufffd' in title; minor updates from rewrite branch
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2011-09-03 08:23:27 UTC (rev 9487)
+++ trunk/pywikipedia/wikipedia.py 2011-09-03 08:32:33 UTC (rev 9488)
@@ -268,16 +268,14 @@
# restriction affects us or not
self._editrestriction = False
- if site is None:
- site = getSite()
- elif type(site) in [str, unicode]:
+ if site is None or isinstance(site, basestring):
site = getSite(site)
-
self._site = site
if not insite:
insite = site
+ # Clean up the name, it can come from anywhere.
# Convert HTML entities to unicode
t = html2unicode(title)
@@ -285,7 +283,7 @@
# Sometimes users copy the link to a site from one to another.
# Try both the source site and the destination site to decode.
try:
- t = url2unicode(t, site = insite, site2 = site)
+ t = url2unicode(t, site=insite, site2=site)
except UnicodeDecodeError:
raise InvalidTitle(u'Bad page title : %s' % t)
@@ -296,13 +294,15 @@
# (which might result in information loss).
t = unicodedata.normalize('NFC', t)
- # Clean up the name, it can come from anywhere.
- # Replace underscores by spaces, also multiple spaces and underscores with a
single space
+ if u'\ufffd' in t:
+ raise InvalidTitle("Title contains illegal char (\\uFFFD)")
+
+ # Replace underscores by spaces
t = t.replace(u"_", u" ")
- while u" " in t:
- t = t.replace(u" ", u" ")
+ # replace multiple spaces a single space
+ while u" " in t: t = t.replace(u" ", u" ")
# Strip spaces at both ends
- t = t.strip(u" ")
+ t = t.strip()
# Remove left-to-right and right-to-left markers.
t = t.replace(u'\u200e', '').replace(u'\u200f',
'')
# leading colon implies main namespace instead of the default