Revision: 8201 Author: russblau Date: 2010-05-24 16:13:06 +0000 (Mon, 24 May 2010)
Log Message: ----------- Fix Link.parse_site method so that it doesn't have to log in to the target site to identify an interwiki link.
Modified Paths: -------------- branches/rewrite/pywikibot/page.py branches/rewrite/pywikibot/site.py
Modified: branches/rewrite/pywikibot/page.py =================================================================== --- branches/rewrite/pywikibot/page.py 2010-05-22 16:14:13 UTC (rev 8200) +++ branches/rewrite/pywikibot/page.py 2010-05-24 16:13:06 UTC (rev 8201) @@ -2108,48 +2108,38 @@ self._text = t
def parse_site(self): - """Parse only enough text to determine the host site.""" + """Parse only enough text to determine which site the link points to.
+ This method does not parse anything after the first ":"; links + with multiple interwiki prefixes (such as "wikt:fr:Parlais") need + to be re-parsed on the first linked wiki to get the actual site. + + @return: tuple of (familyname, languagecode) for the linked site. + + """ t = self._text - self._site = self._source - firstPass = True + fam = self._source.family + code = self._source.code while u":" in t: # Initial colon if t.startswith(u":"): # remove the colon but continue processing # remove any subsequent whitespace t = t.lstrip(u":").lstrip(u" ") - continue - fam = self._site.family prefix = t[ :t.index(u":")].lower() # part of text before : - ns = self._site.ns_index(prefix) + ns = self._source.ns_index(prefix) if ns: - # Ordinary namespace - return - if prefix in fam.langs.keys()\ - or prefix in fam.get_known_families(site=self._site): - # looks like an interwiki link - if not firstPass: - return - t = t[t.index(u":"): ].lstrip(u": ") # part of text after : - if prefix in fam.langs.keys(): - newsite = pywikibot.Site(prefix, fam) - else: - otherlang = self._site.code - familyName = fam.get_known_families(site=self._site)[prefix] - if familyName in ['commons', 'meta']: - otherlang = familyName - try: - newsite = pywikibot.Site(otherlang, familyName) - except ValueError: - return - # Redundant interwiki prefix to the local wiki - if newsite == self._site: - firstPass = False - continue - self._site = newsite - else: - return # text before : doesn't match any known prefix + # The prefix is a namespace in the source wiki + return (fam.name, code) + if prefix in fam.langs: + # prefix is a language code within the source wiki family + return (fam.name, prefix) + known = fam.get_known_families(site=self._source) + if prefix in known: + # prefix is a different wiki family + return (known[prefix], code) + break + return (fam.name, code) # text before : doesn't match any known prefix
def parse(self): """Parse text; called internally when accessing attributes""" @@ -2267,7 +2257,7 @@ @property def site(self): if not hasattr(self, "_site"): - self.parse_site() + self.parse() return self._site
@property
Modified: branches/rewrite/pywikibot/site.py =================================================================== --- branches/rewrite/pywikibot/site.py 2010-05-22 16:14:13 UTC (rev 8200) +++ branches/rewrite/pywikibot/site.py 2010-05-24 16:13:06 UTC (rev 8201) @@ -337,7 +337,8 @@ belonging to a different site, this method returns True.
""" - return (pywikibot.Link(s, self).site != self) + linkfam, linkcode = pywikibot.Link(s, self).parse_site() + return (linkfam != self.family.name or linkcode != self.code)
def redirectRegex(self, pattern=None): """Return a compiled regular expression matching on redirect pages. @@ -3132,4 +3133,3 @@ else: cache.append(title) yield Page(self, title) -
pywikipedia-svn@lists.wikimedia.org