Revision: 5155 Author: russblau Date: 2008-03-23 20:01:02 +0000 (Sun, 23 Mar 2008)
Log Message: ----------- Improve title parsing (.strip() without arguments may remove some Unicode chars that are valid in wiki page titles).
Modified Paths: -------------- trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2008-03-23 14:45:18 UTC (rev 5154) +++ trunk/pywikipedia/wikipedia.py 2008-03-23 20:01:02 UTC (rev 5155) @@ -331,7 +331,7 @@ while u" " in t: t = t.replace(u" ", u" ") # Strip spaces at both ends - t = t.strip() + t = t.strip(u" ") # Remove left-to-right and right-to-left markers. t = t.replace(u'\u200e', '').replace(u'\u200f', '') # leading colon implies main namespace instead of the default @@ -403,21 +403,20 @@
sectionStart = t.find(u'#') if sectionStart >= 0: - self._section = t[sectionStart+1 : ].strip() + self._section = t[sectionStart+1 : ].lstrip(" ") self._section = sectionencode(self._section, self.site().encoding()) if not self._section: self._section = None - t = t[ : sectionStart].strip() + t = t[ : sectionStart].rstrip(" ") else: self._section = None
if t: if not self.site().nocapitalize: - t = t[0].upper() + t[1:] + t = t[:1].upper() + t[1:]
# reassemble the title from its parts - if self._namespace != 0: t = self.site().namespace(self._namespace) + u':' + t if self._section: @@ -1518,7 +1517,8 @@
for match in Rlink.finditer(thistxt): title = match.group('title') - if title.strip().startswith("#"): + title = title.replace("_", " ").strip(" ") + if title.startswith("#"): # this is an internal section link continue if not self.site().isInterwikiLink(title): @@ -4892,12 +4892,12 @@ of the link refers to this site's own family and/or language.
""" - s = s.strip().lstrip(":") + s = s.replace("_", " ").strip(" ").lstrip(":") if not ':' in s: return False first, rest = s.split(':',1) # interwiki codes are case-insensitive - first = first.lower().strip() + first = first.lower().strip(" ") # commons: forwards interlanguage links to wikipedia:, etc. if self.family.interwiki_forward: interlangTargetFamily = Family(self.family.interwiki_forward)