jenkins-bot has submitted this change and it was merged.
Change subject: [FIX] Allow dynamic interwiki links ......................................................................
[FIX] Allow dynamic interwiki links
This uses the 'interwikimap' siteinfo to determine the Site an interwiki prefix is redirecting to. A Family supports 'from_url' which returns the 'code' (an entry in 'langs') to which the URL belongs to. If no code of that family does support that URL 'None' is returned.
Because both the test and wikipedia family support the URL test.wikipedia.org, the test family returns always None so it chooses the wikipedia family. This is then consistent with the test wikidata instance which is in the wikidata family and not in a separate test family.
To get the family for a specific URL it might need to iterate through all families, which are loaded in the process. Each site has a local cache which holds the Site represented by a specific interwiki prefix. There is also a global cache which stores the family name and code for each URL so that later searches for the URL don't require to iterate through all families.
This also adds an parameter "url" to pywikibot.Site function which can only be set when neither "code" nor "fam" are set.
The code used in 'nice_get_address' should be returned when the url is parsed by 'from_url'. This also fixes all link tests so that the 'show_failures' features could be removed.
It also changes the exception from Error in InvalidTitle, when there is no title given and it has a namespace or no interwiki link. It also only throws an InvalidTitle if an interwiki link links to a non local page via a local interwiki link. Previously all interwiki links which referred to it's own site wouldn't be allowed.
Change-Id: Iec4f32fdefde15b2330dbb191fb3bbb0d04803e2 --- M pywikibot/__init__.py M pywikibot/families/test_family.py M pywikibot/family.py M pywikibot/page.py M pywikibot/site.py M tests/interwiki_link_tests.py M tests/link_tests.py 7 files changed, 354 insertions(+), 288 deletions(-)
Approvals: John Vandenberg: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/__init__.py b/pywikibot/__init__.py index 5257f65..2e569c0 100644 --- a/pywikibot/__init__.py +++ b/pywikibot/__init__.py @@ -459,9 +459,10 @@
_sites = {} +_url_cache = {} # The code/fam pair for each URL
-def Site(code=None, fam=None, user=None, sysop=None, interface=None): +def Site(code=None, fam=None, user=None, sysop=None, interface=None, url=None): """A factory method to obtain a Site object.
Site objects are cached and reused by this method. @@ -479,12 +480,40 @@ @type sysop: unicode @param interface: site interface (override config.site_interface) @type interface: string + @param url: Instead of code and fam, does try to get a Site based on the + URL. Still requires that the family supporting that URL exists. + @type url: string """ + # Either code and fam or only url + assert(not url or (not code and not fam)) _logger = "wiki"
- # Fallback to config defaults - code = code or config.mylang - fam = fam or config.family + if url: + if url in _url_cache: + cached = _url_cache[url] + if cached: + code = cached[0] + fam = cached[1] + else: + raise Error("Unknown URL '{0}'.".format(url)) + else: + # Iterate through all families and look, which does apply to + # the given URL + for fam in config.family_files: + family = pywikibot.family.Family.load(fam) + code = family.from_url(url) + if code: + _url_cache[url] = (code, fam) + break + else: + _url_cache[url] = None + # TODO: As soon as AutoFamily is ready, try and use an + # AutoFamily + raise Error("Unknown URL '{0}'.".format(url)) + else: + # Fallback to config defaults + code = code or config.mylang + fam = fam or config.family interface = interface or config.site_interface
# config.usernames is initialised with a dict for each family name diff --git a/pywikibot/families/test_family.py b/pywikibot/families/test_family.py index 27d5f4b..c2cc459 100644 --- a/pywikibot/families/test_family.py +++ b/pywikibot/families/test_family.py @@ -13,3 +13,6 @@ self.langs = { 'test': 'test.wikipedia.org', } + + def from_url(self, url): + return None # Don't accept this, but 'test' of 'wikipedia' diff --git a/pywikibot/family.py b/pywikibot/family.py index 1bfd558..6765176 100644 --- a/pywikibot/family.py +++ b/pywikibot/family.py @@ -1035,6 +1035,64 @@ def nice_get_address(self, code, title): return '%s%s' % (self.nicepath(code), title)
+ def _get_path_regex(self): + """ + Return a regex matching the path after the domain. + + It is using L{Family.path} and L{Family.nicepath} with code set to + 'None'. If that returns a KeyError (L{Family.scriptpath} probably + using the C{langs} dictionary) it retries it with the key from + L{Family.langs} if it only contains one entry and throws an Error + otherwise. In that case the Family instance should overwrite this + method or supply code independent methods. + + @raise Error: If it's not possible to automatically get a code + independent regex. + """ + def _get_coded_path_regex(code): + return ('(?:' + re.escape(self.path(code) + '/') + '|' + + re.escape(self.nicepath(code)) + ')') + try: + return _get_coded_path_regex(None) + except KeyError: + # Probably automatically generated family + if len(self.langs) == 1: + return _get_coded_path_regex(next(iter(self.langs.keys()))) + else: + raise Error('Pywikibot is unable to generate an automatic ' + 'path regex for the family {0}. It is recommended ' + 'to overwrite "_get_path_regex" in that ' + 'family.'.format(self.name)) + + def from_url(self, url): + """ + Return whether this family matches the given url. + + The protocol must match, if it is present in the URL. It must match + URLs generated via C{self.langs} and L{Family.nice_get_address} or + L{Family.path}. + + It uses L{Family._get_path_regex} to generate a regex defining the path + after the domain. + + @return: The language code of the url. None if that url is not from + this family. + @rtype: str or None + """ + url_match = re.match(r'(?:(https?)://|//)?(.*){0}' + '$1'.format(self._get_path_regex()), url) + if not url_match: + return None + for code, domain in self.langs.items(): + if domain == url_match.group(2): + break + else: + return None + if url_match.group(1) and url_match.group(1) != self.protocol(code): + return None + else: + return code + def dbName(self, code): # returns the name of the MySQL database return '%s%s' % (code, self.name) diff --git a/pywikibot/page.py b/pywikibot/page.py index 70f8dd1..d4f4c08 100644 --- a/pywikibot/page.py +++ b/pywikibot/page.py @@ -44,7 +44,7 @@ from pywikibot import config from pywikibot.family import Family from pywikibot.site import Namespace -from pywikibot.exceptions import AutoblockUser, UserActionRefuse +from pywikibot.exceptions import AutoblockUser, UserActionRefuse, NoSuchSite from pywikibot.tools import ComparableMixin, deprecated, deprecate_arg from pywikibot import textlib
@@ -4028,7 +4028,7 @@
# This code was adapted from Title.php : secureAndSplit() # - firstPass = True + first_other_site = None while u":" in t: # Initial colon indicates main namespace rather than default if t.startswith(u":"): @@ -4038,50 +4038,37 @@ t = t.lstrip(u":").lstrip(u" ") continue
- fam = self._site.family prefix = t[:t.index(u":")].lower() ns = self._site.ns_index(prefix) if ns: # Ordinary namespace t = t[t.index(u":"):].lstrip(u":").lstrip(u" ") + # 'namespace:' is not a valid title + if not t: + raise pywikibot.InvalidTitle( + "'{0}' has no title.".format(self._text)) self._namespace = ns break - if prefix in list(fam.langs.keys())\ - or prefix in fam.get_known_families(site=self._site): - # looks like an interwiki link - if not firstPass: - # Can't make a local interwiki link to an interwiki link. - raise pywikibot.Error( - "Improperly formatted interwiki link '%s'" - % self._text) - t = t[t.index(u":"):].lstrip(u":").lstrip(u" ") - if prefix in list(fam.langs.keys()): - newsite = pywikibot.Site(prefix, fam) - else: - otherlang = self._site.code - familyName = fam.get_known_families(site=self._site)[prefix] - if familyName in ['commons', 'meta']: - otherlang = familyName - try: - newsite = pywikibot.Site(otherlang, familyName) - except ValueError: - raise pywikibot.Error( - """\ -%s is not a local page on %s, and the %s family is -not supported by PyWikiBot!""" - % (self._text, self._site, familyName)) - - # Redundant interwiki prefix to the local wiki - if newsite == self._site: - if not t: - # Can't have an empty self-link - raise pywikibot.InvalidTitle( - "Invalid link title: '%s'" % self._text) - firstPass = False - continue - self._site = newsite + try: + newsite = self._site.interwiki(prefix) + except KeyError: + break # text before : doesn't match any known prefix + except NoSuchSite: + raise pywikibot.Error( + '{0} is not a local page on {1}, and the interwiki prefix ' + '{2} is not supported by PyWikiBot!'.format( + self._text, self._site, prefix)) else: - break # text before : doesn't match any known prefix + t = t[t.index(u":"):].lstrip(u":").lstrip(u" ") + if first_other_site: + if not self._site.local_interwiki(prefix): + raise pywikibot.InvalidTitle( + '{0} links to a non local site {1} via an ' + 'interwiki link to {2}.'.format( + self._text, newsite, first_other_site)) + elif newsite != self._source: + first_other_site = newsite + self._site = newsite
if u"#" in t: t, sec = t.split(u'#', 1) @@ -4119,6 +4106,12 @@ if self._namespace != -1 and len(t) > 255: raise pywikibot.InvalidTitle("(over 255 bytes): '%s'" % t)
+ # "empty" local links can only be self-links + # with a fragment identifier. + if not self._text.strip(): + raise pywikibot.InvalidTitle("The link does not contain a page " + "title") + if hasattr(self._site.namespaces()[self._namespace], 'case'): case = self._site.namespaces()[self._namespace].case else: @@ -4126,13 +4119,6 @@
if case == 'first-letter': t = t[:1].upper() + t[1:] - - # Can't make a link to a namespace alone... - # "empty" local links can only be self-links - # with a fragment identifier. - if not t and self._site == self._source and self._namespace != 0: - raise pywikibot.Error("Invalid link (no page title): '%s'" - % self._text)
self._title = t
diff --git a/pywikibot/site.py b/pywikibot/site.py index 17c47aa..09a915b 100644 --- a/pywikibot/site.py +++ b/pywikibot/site.py @@ -542,6 +542,55 @@ return [lang for lang in self.languages() if lang[:1].upper() + lang[1:] not in nsnames]
+ def interwiki(self, prefix): + """ + Return the site for a corresponding interwiki prefix. + + @raise NoSuchSite: if the url given in the interwiki table doesn't + match any of the existing families. + @raise KeyError: if the prefix is not an interwiki prefix. + """ + # _iw_sites is a local cache to return a APISite instance depending + # on the interwiki prefix of that site + if not hasattr(self, '_iw_sites'): + self._iw_sites = {} + if prefix in self._iw_sites: + site = self._iw_sites[prefix] + else: + for interwiki in self.siteinfo['interwikimap']: + if interwiki['prefix'] == prefix: + break + else: + raise KeyError( + "'{0}' is not an interwiki prefix.".format(prefix)) + try: + site = (pywikibot.Site(url=interwiki['url']), + 'local' in interwiki) + except Error: + site = (None, False) + self._iw_sites[prefix] = site + if site[0]: + return site[0] + else: + raise NoSuchSite( + "No family/site found for prefix '{0}'".format(prefix)) + + def local_interwiki(self, prefix): + """ + Return whether the interwiki prefix is local. + + A local interwiki prefix is handled by the target site like a normal + link. So if that link also contains an interwiki link it does follow + it as long as it's a local link. + + @raise NoSuchSite: if the url given in the interwiki table doesn't + match any of the existing families. + @raise KeyError: if the prefix is not an interwiki prefix. + """ + # Request if necessary + self.interwiki(prefix) + return self._iw_sites[prefix][1] + def ns_index(self, namespace): """Given a namespace name, return its int index, or None if invalid.""" for ns in self.namespaces(): diff --git a/tests/interwiki_link_tests.py b/tests/interwiki_link_tests.py index 6b75ab7..2b34015 100644 --- a/tests/interwiki_link_tests.py +++ b/tests/interwiki_link_tests.py @@ -8,6 +8,7 @@
from pywikibot import config2 as config from pywikibot.page import Link +from pywikibot.exceptions import InvalidTitle from tests.aspects import unittest, TestCase
@@ -46,6 +47,44 @@ self.assertEqual(link.namespace, 1)
+class TestInterwikiLinksToNonLocalSites(TestCase): + + """Tests for interwiki links to non local sites.""" + + sites = { + 'wp': { + 'family': 'wikipedia', + 'code': 'en' + }, + 'tw': { + 'family': 'i18n', + 'code': 'i18n' + } + } + + def test_direct_non_local(self): + link = Link('translatewiki:Main Page', self.get_site('wp')) + link.parse() + self.assertEqual(link.site, self.get_site('tw')) + self.assertEqual(link.title, 'Main Page') + self.assertEqual(link.namespace, 0) + + def test_indirect_non_local(self): + link = Link('en:translatewiki:Main Page', self.get_site('wp')) + link.parse() + self.assertEqual(link.site, self.get_site('tw')) + self.assertEqual(link.title, 'Main Page') + self.assertEqual(link.namespace, 0) + + def test_via_local_non_local(self): + link = Link('de:translatewiki:Main Page', self.get_site('wp')) + self.assertRaisesRegex( + InvalidTitle, + "de:translatewiki:Main Page links to a non local site i18n:i18n " + "via an interwiki link to wikipedia:de", + link.parse) + + if __name__ == '__main__': try: unittest.main() diff --git a/tests/link_tests.py b/tests/link_tests.py index aa89306..b1ca6d0 100644 --- a/tests/link_tests.py +++ b/tests/link_tests.py @@ -6,13 +6,11 @@ # __version__ = '$Id$'
-import os import pywikibot from pywikibot import config2 as config from pywikibot.page import Link +from pywikibot.exceptions import Error, InvalidTitle from tests.aspects import unittest, TestCase - -show_failures = os.environ.get('PYWIKIBOT2_TEST_SHOW_FAILURE', '0') == '1'
# ---- The first set of tests are explicit links, starting with a ':'.
@@ -210,50 +208,20 @@ config.mylang = 'en' config.family = 'wikisource' link = Link(':en:wikipedia:Main Page') - if show_failures: - link.parse() - else: - self.assertRaisesRegex( - pywikibot.Error, - "Improperly formatted interwiki link ':en:wikipedia:Main Page'", - link.parse) - if show_failures: - self.assertEqual(link.site, self.get_site('enwp')) - else: - self.assertEqual(link.site, self.get_site('enws')) - if show_failures: - self.assertEqual(link.title, 'Main Page') - self.assertEqual(link.namespace, 0) - else: - try: - link.title - except pywikibot.Error as e: - self.assertEqual(str(e), "Improperly formatted interwiki link ':en:wikipedia:Main Page'") + link.parse() + self.assertEqual(link.site, self.get_site('enwp')) + self.assertEqual(link.title, 'Main Page') + self.assertEqual(link.namespace, 0)
def test_fully_qualified_NS1_code(self): """Test ':en:wikipedia:Main Page' on enwp is namespace 1.""" config.mylang = 'en' config.family = 'wikisource' link = Link(':en:wikipedia:Talk:Main Page') - if show_failures: - link.parse() - else: - self.assertRaisesRegex( - pywikibot.Error, - "Improperly formatted interwiki link ':en:wikipedia:Talk:Main Page'", - link.parse) - if show_failures: - self.assertEqual(link.site, self.get_site('enwp')) - else: - self.assertEqual(link.site, self.get_site('enws')) - if show_failures: - self.assertEqual(link.title, 'Main Page') - self.assertEqual(link.namespace, 1) - else: - try: - link.title - except pywikibot.Error as e: - self.assertEqual(str(e), "Improperly formatted interwiki link ':en:wikipedia:Talk:Main Page'") + link.parse() + self.assertEqual(link.site, self.get_site('enwp')) + self.assertEqual(link.title, 'Main Page') + self.assertEqual(link.namespace, 1)
def test_fully_qualified_NS0_family(self): """Test ':wikipedia:en:Main Page' on enws is namespace 0.""" @@ -296,16 +264,9 @@ config.family = 'wikidata' link = Link(':en:wikipedia:Main Page') link.parse() - if show_failures: - self.assertEqual(link.site, self.get_site('enwp')) - else: - self.assertEqual(link.site, self.get_site('wikidata')) - if show_failures: - self.assertEqual(link.title, 'Main Page') - self.assertEqual(link.namespace, 4) - else: - self.assertEqual(link.title, 'En:wikipedia:Main Page') - self.assertEqual(link.namespace, 0) + self.assertEqual(link.site, self.get_site('enwp')) + self.assertEqual(link.title, 'Main Page') + self.assertEqual(link.namespace, 4)
def test_fully_qualified_NS1_code(self): """Test ':en:wikipedia:Talk:Main Page' on wikidata is namespace 4.""" @@ -313,52 +274,29 @@ config.family = 'wikidata' link = Link(':en:wikipedia:Talk:Main Page') link.parse() - if show_failures: - self.assertEqual(link.site, self.get_site('enwp')) - else: - self.assertEqual(link.site, self.get_site('wikidata')) - if show_failures: - self.assertEqual(link.title, 'Talk:Main Page') - self.assertEqual(link.namespace, 4) - else: - self.assertEqual(link.title, 'En:wikipedia:Talk:Main Page') - self.assertEqual(link.namespace, 0) + self.assertEqual(link.site, self.get_site('enwp')) + self.assertEqual(link.title, 'Talk:Main Page') + self.assertEqual(link.namespace, 4)
def test_fully_qualified_NS0_family(self): """Test ':wikipedia:en:Main Page' on wikidata is namespace 0.""" config.mylang = 'wikidata' config.family = 'wikidata' link = Link(':wikipedia:en:Main Page') - if show_failures: - link.parse() - self.assertEqual(link.site, self.get_site('enwp')) - self.assertEqual(link.title, 'Main Page') - self.assertEqual(link.namespace, 0) - else: - self.assertRaisesRegex( - pywikibot.NoSuchSite, - 'Language wikidata does not exist in family wikipedia', - link.parse) # very bad + link.parse() + self.assertEqual(link.site, self.get_site('enwp')) + self.assertEqual(link.title, 'Main Page') + self.assertEqual(link.namespace, 0)
def test_fully_qualified_NS1_family(self): """Test ':wikipedia:en:Talk:Main Page' on wikidata is namespace 1.""" config.mylang = 'wikidata' config.family = 'wikidata' link = Link(':wikipedia:en:Talk:Main Page') - if show_failures: - link.parse() - else: - self.assertRaisesRegex( - pywikibot.NoSuchSite, - 'Language wikidata does not exist in family wikipedia', - link.parse) # very bad - if show_failures: - self.assertEqual(link.site, self.get_site('enwp')) - else: - self.assertEqual(link.site, self.get_site('wikidata')) - if show_failures: - self.assertEqual(link.title, 'Main Page') - self.assertEqual(link.namespace, 1) + link.parse() + self.assertEqual(link.site, self.get_site('enwp')) + self.assertEqual(link.title, 'Main Page') + self.assertEqual(link.namespace, 1)
class TestFullyQualifiedNoLangFamilyExplicitLinkParser(TestCase): @@ -384,32 +322,20 @@ config.mylang = 'en' config.family = 'wikipedia' link = Link(':testwiki:wikidata:Q6') - if show_failures: - link.parse() - self.assertEqual(link.site, self.get_site('wikidata')) - self.assertEqual(link.title, 'Q6') - self.assertEqual(link.namespace, 0) - else: - self.assertRaisesRegex( - pywikibot.Error, - 'Family testwiki does not exist', - link.parse) # very bad + link.parse() + self.assertEqual(link.site, self.get_site('wikidata')) + self.assertEqual(link.title, 'Q6') + self.assertEqual(link.namespace, 0)
def test_fully_qualified_NS1_code(self): """Test ':testwiki:wikidata:Talk:Q6' on enwp is namespace 1.""" config.mylang = 'en' config.family = 'wikipedia' link = Link(':testwiki:wikidata:Talk:Q6') - if show_failures: - link.parse() - self.assertEqual(link.site, self.get_site('wikidata')) - self.assertEqual(link.title, 'Q6') - self.assertEqual(link.namespace, 1) - else: - self.assertRaisesRegex( - pywikibot.Error, - 'Family testwiki does not exist', - link.parse) # very bad + link.parse() + self.assertEqual(link.site, self.get_site('wikidata')) + self.assertEqual(link.title, 'Q6') + self.assertEqual(link.namespace, 1)
def test_fully_qualified_NS0_family(self): """Test ':wikidata:testwiki:Q6' on enwp is namespace 0.""" @@ -417,14 +343,9 @@ config.family = 'wikipedia' link = Link(':wikidata:testwiki:Q6') link.parse() - if show_failures: - self.assertEqual(link.site, self.get_site('test.wp')) - self.assertEqual(link.title, 'Q6') - self.assertEqual(link.namespace, 0) - else: - self.assertEqual(link.site, self.get_site('enwp')) - self.assertEqual(link.title, 'Wikidata:testwiki:Q6') - self.assertEqual(link.namespace, 0) + self.assertEqual(link.site, self.get_site('test.wp')) + self.assertEqual(link.title, 'Q6') + self.assertEqual(link.namespace, 0)
def test_fully_qualified_NS1_family(self): """Test ':wikidata:testwiki:Talk:Q6' on enwp is namespace 1.""" @@ -432,14 +353,9 @@ config.family = 'wikipedia' link = Link(':wikidata:testwiki:Talk:Q6') link.parse() - if show_failures: - self.assertEqual(link.site, self.get_site('test.wp')) - self.assertEqual(link.title, 'Q6') - self.assertEqual(link.namespace, 1) - else: - self.assertEqual(link.site, self.get_site('enwp')) - self.assertEqual(link.title, 'Wikidata:testwiki:Talk:Q6') - self.assertEqual(link.namespace, 0) + self.assertEqual(link.site, self.get_site('test.wp')) + self.assertEqual(link.title, 'Q6') + self.assertEqual(link.namespace, 1)
class TestFullyQualifiedOneSiteFamilyExplicitLinkParser(TestCase): @@ -643,50 +559,20 @@ config.mylang = 'en' config.family = 'wikisource' link = Link('en:wikipedia:Main Page') - if show_failures: - link.parse() - else: - self.assertRaisesRegex( - pywikibot.Error, - "Improperly formatted interwiki link 'en:wikipedia:Main Page'", - link.parse) - if show_failures: - self.assertEqual(link.site, self.get_site('enwp')) - else: - self.assertEqual(link.site, self.get_site('enws')) - if show_failures: - self.assertEqual(link.title, 'Main Page') - self.assertEqual(link.namespace, 0) - else: - try: - link.title - except pywikibot.Error as e: - self.assertEqual(str(e), "Improperly formatted interwiki link 'en:wikipedia:Main Page'") + link.parse() + self.assertEqual(link.site, self.get_site('enwp')) + self.assertEqual(link.title, 'Main Page') + self.assertEqual(link.namespace, 0)
def test_fully_qualified_NS1_code(self): """Test 'en:wikipedia:Main Page' on enws is namespace 1.""" config.mylang = 'en' config.family = 'wikisource' link = Link('en:wikipedia:Talk:Main Page') - if show_failures: - link.parse() - else: - self.assertRaisesRegex( - pywikibot.Error, - "Improperly formatted interwiki link 'en:wikipedia:Talk:Main Page'", - link.parse) - if show_failures: - self.assertEqual(link.site, self.get_site('enwp')) - else: - self.assertEqual(link.site, self.get_site('enws')) - if show_failures: - self.assertEqual(link.title, 'Main Page') - self.assertEqual(link.namespace, 1) - else: - try: - link.title - except pywikibot.Error as e: - self.assertEqual(str(e), "Improperly formatted interwiki link 'en:wikipedia:Talk:Main Page'") + link.parse() + self.assertEqual(link.site, self.get_site('enwp')) + self.assertEqual(link.title, 'Main Page') + self.assertEqual(link.namespace, 1)
def test_fully_qualified_NS0_family(self): """Test 'wikipedia:en:Main Page' on enws is namespace 0.""" @@ -729,16 +615,9 @@ config.family = 'wikidata' link = Link('en:wikipedia:Main Page') link.parse() - if show_failures: - self.assertEqual(link.site, self.get_site('enwp')) - else: - self.assertEqual(link.site, self.get_site('wikidata')) - if show_failures: - self.assertEqual(link.title, 'Main Page') - self.assertEqual(link.namespace, 4) - else: - self.assertEqual(link.title, 'En:wikipedia:Main Page') - self.assertEqual(link.namespace, 0) + self.assertEqual(link.site, self.get_site('enwp')) + self.assertEqual(link.title, 'Main Page') + self.assertEqual(link.namespace, 4)
def test_fully_qualified_NS1_code(self): """Test 'en:wikipedia:Talk:Main Page' on wikidata is not namespace 1.""" @@ -746,52 +625,29 @@ config.family = 'wikidata' link = Link('en:wikipedia:Talk:Main Page') link.parse() - if show_failures: - self.assertEqual(link.site, self.get_site('enwp')) - else: - self.assertEqual(link.site, self.get_site('wikidata')) - if show_failures: - self.assertEqual(link.title, 'Talk:Main Page') - self.assertEqual(link.namespace, 4) - else: - self.assertEqual(link.title, 'En:wikipedia:Talk:Main Page') - self.assertEqual(link.namespace, 0) + self.assertEqual(link.site, self.get_site('enwp')) + self.assertEqual(link.title, 'Talk:Main Page') + self.assertEqual(link.namespace, 4)
def test_fully_qualified_NS0_family(self): """Test 'wikipedia:en:Main Page' on wikidata is namespace 0.""" config.mylang = 'wikidata' config.family = 'wikidata' link = Link('wikipedia:en:Main Page') - if show_failures: - link.parse() - self.assertEqual(link.site, self.get_site('enwp')) - self.assertEqual(link.namespace, 0) - self.assertEqual(link.title, 'Main Page') - else: - self.assertRaisesRegex( - pywikibot.NoSuchSite, - 'Language wikidata does not exist in family wikipedia', - link.parse) # very bad + link.parse() + self.assertEqual(link.site, self.get_site('enwp')) + self.assertEqual(link.namespace, 0) + self.assertEqual(link.title, 'Main Page')
def test_fully_qualified_NS1_family(self): """Test 'wikipedia:en:Talk:Main Page' on wikidata is namespace 1.""" config.mylang = 'wikidata' config.family = 'wikidata' link = Link('wikipedia:en:Talk:Main Page') - if show_failures: - link.parse() - else: - self.assertRaisesRegex( - pywikibot.NoSuchSite, - 'Language wikidata does not exist in family wikipedia', - link.parse) # very bad - if show_failures: - self.assertEqual(link.site, self.get_site('enwp')) - else: - self.assertEqual(link.site, self.get_site('wikidata')) - if show_failures: - self.assertEqual(link.title, 'Main Page') - self.assertEqual(link.namespace, 1) + link.parse() + self.assertEqual(link.site, self.get_site('enwp')) + self.assertEqual(link.title, 'Main Page') + self.assertEqual(link.namespace, 1)
class TestFullyQualifiedNoLangFamilyImplicitLinkParser(TestCase): @@ -805,32 +661,20 @@ config.mylang = 'en' config.family = 'wikipedia' link = Link('testwiki:wikidata:Q6') - if show_failures: - link.parse() - self.assertEqual(link.site, pywikibot.Site('wikidata', 'wikidata')) - self.assertEqual(link.title, 'Q6') - self.assertEqual(link.namespace, 0) - else: - self.assertRaisesRegex( - pywikibot.Error, - 'Family testwiki does not exist', - link.parse) # very bad + link.parse() + self.assertEqual(link.site, pywikibot.Site('wikidata', 'wikidata')) + self.assertEqual(link.title, 'Q6') + self.assertEqual(link.namespace, 0)
def test_fully_qualified_NS1_code(self): """Test 'testwiki:wikidata:Talk:Q6' on enwp is namespace 1.""" config.mylang = 'en' config.family = 'wikipedia' link = Link('testwiki:wikidata:Talk:Q6') - if show_failures: - link.parse() - self.assertEqual(link.site, pywikibot.Site('wikidata', 'wikidata')) - self.assertEqual(link.title, 'Q6') - self.assertEqual(link.namespace, 1) - else: - self.assertRaisesRegex( - pywikibot.Error, - 'Family testwiki does not exist', - link.parse) # very bad + link.parse() + self.assertEqual(link.site, pywikibot.Site('wikidata', 'wikidata')) + self.assertEqual(link.title, 'Q6') + self.assertEqual(link.namespace, 1)
def test_fully_qualified_NS0_family(self): """Test 'wikidata:testwiki:Q6' on enwp is namespace 0.""" @@ -838,14 +682,9 @@ config.family = 'wikipedia' link = Link('wikidata:testwiki:Q6') link.parse() - if show_failures: - self.assertEqual(link.site, pywikibot.Site('test', 'wikipedia')) - self.assertEqual(link.title, 'Q6') - self.assertEqual(link.namespace, 0) - else: - self.assertEqual(link.site, pywikibot.Site('en', 'wikipedia')) - self.assertEqual(link.title, 'Wikidata:testwiki:Q6') - self.assertEqual(link.namespace, 0) + self.assertEqual(link.site, pywikibot.Site('test', 'wikipedia')) + self.assertEqual(link.title, 'Q6') + self.assertEqual(link.namespace, 0)
def test_fully_qualified_NS1_family(self): """Test 'wikidata:testwiki:Talk:Q6' on enwp is namespace 1.""" @@ -853,14 +692,9 @@ config.family = 'wikipedia' link = Link('wikidata:testwiki:Talk:Q6') link.parse() - if show_failures: - self.assertEqual(link.site, pywikibot.Site('test', 'wikipedia')) - self.assertEqual(link.title, 'Q6') - self.assertEqual(link.namespace, 1) - else: - self.assertEqual(link.site, pywikibot.Site('en', 'wikipedia')) - self.assertEqual(link.title, 'Wikidata:testwiki:Talk:Q6') - self.assertEqual(link.namespace, 0) + self.assertEqual(link.site, pywikibot.Site('test', 'wikipedia')) + self.assertEqual(link.title, 'Q6') + self.assertEqual(link.namespace, 1)
class TestFullyQualifiedOneSiteFamilyImplicitLinkParser(TestCase): @@ -910,6 +744,74 @@ self.assertEqual(link.namespace, 1)
+class TestEmptyTitle(TestCase): + + """Test links which contain no title.""" + + family = 'wikipedia' + code = 'en' + + def test_interwiki_mainpage(self): + """Test that Link allow links without a title to the main page.""" + link = Link('en:', self.get_site()) + link.parse() + self.assertEqual(link.site, self.get_site()) + self.assertEqual(link.title, '') + self.assertEqual(link.namespace, 0) + + def test_interwiki_namespace_without_title(self): + """Test that Link doesn't allow links without a title.""" + link = Link('en:Help:', self.get_site()) + self.assertRaisesRegex( + InvalidTitle, "'en:Help:' has no title.", link.parse) + + def test_no_text(self): + """Test that Link doesn't allow empty.""" + link = Link('', self.get_site()) + self.assertRaisesRegex( + InvalidTitle, "The link does not contain a page title", link.parse) + + def test_namespace_lookalike(self): + """Test that Link does only detect valid namespaces.""" + link = Link('CAT:', self.get_site()) + link.parse() + self.assertEqual(link.site, self.get_site()) + self.assertEqual(link.title, 'CAT:') + self.assertEqual(link.namespace, 0) + + link = Link('en:CAT:', self.get_site()) + link.parse() + self.assertEqual(link.site, self.get_site()) + self.assertEqual(link.title, 'CAT:') + self.assertEqual(link.namespace, 0) + + +class TestInvalidInterwikiLinks(TestCase): + + """Test links to non-wikis.""" + + family = 'wikipedia' + code = 'en' + + def test_non_wiki_prefix(self): + """Test that Link fails if the interwiki prefix is not a wiki.""" + link = Link('bugzilla:1337') + self.assertRaisesRegex( + Error, + 'bugzilla:1337 is not a local page on wikipedia:en, and the ' + 'interwiki prefix bugzilla is not supported by PyWikiBot!', + link.parse) + + def test_other_wiki_prefix(self): + """Test that Link fails if the interwiki prefix is a unknown family.""" + link = Link('bulba:this-will-never-work') + self.assertRaisesRegex( + Error, + 'bulba:this-will-never-work is not a local page on wikipedia:en, ' + 'and the interwiki prefix bulba is not supported by PyWikiBot!', + link.parse) + + if __name__ == '__main__': try: unittest.main()
pywikibot-commits@lists.wikimedia.org