jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/165712 )
Change subject: [IMPROV] Link: parser rewritten ......................................................................
[IMPROV] Link: parser rewritten
Instead of all those stripping this is only iterating over the string and never modifying it until the very last moment after all prefixes have been analysed.
Change-Id: Ib93b32de6dafa9edb062df16ac55bfe580410088 --- M pywikibot/page.py 1 file changed, 22 insertions(+), 19 deletions(-)
Approvals: jenkins-bot: Verified Xqt: Looks good to me, approved
diff --git a/pywikibot/page.py b/pywikibot/page.py index dc83ae4..b96e15c 100644 --- a/pywikibot/page.py +++ b/pywikibot/page.py @@ -4983,40 +4983,38 @@ self._site = self._source self._namespace = self._defaultns self._is_interwiki = False - t = self._text ns_prefix = False
- # This code was adapted from Title.php : secureAndSplit() - # + old_position = 0 if self._text.find(':') != 0 else 1 + colon_position = self._text.find(':', old_position) first_other_site = None - while u":" in t: - # Initial colon indicates main namespace rather than default - if t.startswith(u":"): - self._namespace = self._site.namespaces[0] - # remove the colon but continue processing - # remove any subsequent whitespace - t = t.lstrip(u":").lstrip(u" ") - continue - - prefix = t[:t.index(u":")].lower() + while colon_position >= 0: + prefix = self._text[old_position:colon_position].lower() + # All spaces after a prefix are discarded + colon_position += 1 + while (len(self._text) > colon_position and + self._text[colon_position] == ' '): + colon_position += 1 ns = self._site.namespaces.lookup_name(prefix) if ns: - # Ordinary namespace - t = t[t.index(u":"):].lstrip(u":").lstrip(u" ") + if len(self._text) <= colon_position: + raise pywikibot.InvalidTitle( + "'{0}' has no title.".format(self._text)) self._namespace = ns ns_prefix = True + old_position = colon_position break + try: newsite = self._site.interwiki(prefix) except KeyError: break # text before : doesn't match any known prefix except SiteDefinitionError as e: raise SiteDefinitionError( - u'{0} is not a local page on {1}, and the interwiki prefix ' - '{2} is not supported by Pywikibot!:\n{3}'.format( - self._text, self._site, prefix, e)) + '{0} is not a local page on {1}, and the interwiki ' + 'prefix {2} is not supported by Pywikibot!\n{3}' + .format(self._text, self._site, prefix, e)) else: - t = t[t.index(u":"):].lstrip(u":").lstrip(u" ") if first_other_site: if not self._site.local_interwiki(prefix): raise pywikibot.InvalidTitle( @@ -5027,6 +5025,11 @@ first_other_site = newsite self._site = newsite self._is_interwiki = True + old_position = colon_position + colon_position = self._text.find(':', old_position) + + # Remove any namespaces/interwiki prefixes + t = self._text[old_position:]
if u"#" in t: t, sec = t.split(u'#', 1)
pywikibot-commits@lists.wikimedia.org