jenkins-bot has submitted this change and it was merged.
Change subject: Add Namespace class ......................................................................
Add Namespace class
Replaces the current implicit data structure of site._namespaces with a class with variables with names that explain their value, and methods that simplify using namespaces.
The class also stores other attributes provided by API siprop=namespaces.
This changeset does not implement new Namespace semantics in other parts of pywikibot so-as to ensure that existing usage of site._namespaces is supported by the Namespace class.
Change-Id: I0cce21e6161031861c0056dc5498f47bc26e1cc8 --- M pywikibot/site.py M tests/dry_api_tests.py M tests/dry_site_tests.py A tests/namespace_tests.py M tests/site_tests.py M tests/wikibase_tests.py 6 files changed, 506 insertions(+), 47 deletions(-)
Approvals: John Vandenberg: Looks good to me, approved XZise: Looks good to me, but someone else must approve Mpaa: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/site.py b/pywikibot/site.py index b70ad2d..060776b 100644 --- a/pywikibot/site.py +++ b/pywikibot/site.py @@ -20,6 +20,7 @@ import re import sys from distutils.version import LooseVersion as LV +from collections import Iterable import threading import time import urllib @@ -119,6 +120,276 @@ raise Error("Family %s does not exist" % fam) _families[fam] = myfamily.Family() return _families[fam] + + +class Namespace(Iterable): + + """ Namespace site data object. + + This is backwards compatible with the structure of entries + in site._namespaces which were a list of + [customised namespace, + canonical namespace name?, + namespace alias*] + + If the canonical_name is not provided for a namespace between -2 + and 15, the MediaWiki 1.14+ built-in names are used. + Enable use_image_name to use built-in names from MediaWiki 1.13 + and earlier as the details. + + Image and File are aliases of each other by default. + + If only one of canonical_name and custom_name are available, both + properties will have the same value. + """ + + # These are the MediaWiki built-in names for MW 1.14+. + # Namespace prefixes are always case-insensitive, but the + # canonical forms are capitalized. + canonical_namespaces = { + -2: u"Media", + -1: u"Special", + 0: u"", + 1: u"Talk", + 2: u"User", + 3: u"User talk", + 4: u"Project", + 5: u"Project talk", + 6: u"File", + 7: u"File talk", + 8: u"MediaWiki", + 9: u"MediaWiki talk", + 10: u"Template", + 11: u"Template talk", + 12: u"Help", + 13: u"Help talk", + 14: u"Category", + 15: u"Category talk", + } + + def __init__(self, id, canonical_name=None, custom_name=None, + aliases=None, use_image_name=False, **kwargs): + """Constructor. + + @param custom_name: Name defined in server LocalSettings.php + @type custom_name: unicode + @param canonical_name: Canonical name + @type canonical_name: str + @param aliases: Aliases + @type aliases: list of unicode + @param use_image_name: Use 'Image' as default canonical + for 'File' namespace + @param use_image_name: bool + + """ + self.id = id + + if aliases is None: + self.aliases = list() + else: + self.aliases = aliases + + if not canonical_name and id in self.canonical_namespaces: + if use_image_name: + if id == 6: + canonical_name = u'Image' + elif id == 7: + canonical_name = u"Image talk" + + if not canonical_name: + canonical_name = self.canonical_namespaces[id] + + assert(custom_name is not None or canonical_name is not None) + + self.custom_name = custom_name if custom_name is not None else canonical_name + self.canonical_name = canonical_name if canonical_name is not None else custom_name + + if not aliases: + if id in (6, 7): + if use_image_name: + alias = u'File' + else: + alias = u'Image' + if id == 7: + alias += u' talk' + self.aliases = [alias] + else: + self.aliases = list() + else: + self.aliases = aliases + + self.info = kwargs + + def __getattr__(self, attr): + """Look for undefined attributes in info.""" + if attr in self.info: + return self.info[attr] + else: + raise AttributeError("%s instance has no attribute '%s'" + % (self.__class__.__name__, attr)) + + def _distinct(self): + if self.custom_name == self.canonical_name: + return [self.canonical_name] + self.aliases + else: + return [self.custom_name, self.canonical_name] + self.aliases + + def _contains_lowercase_name(self, name): + """Determine a lowercase normalised name is a name of this namespace. + + """ + return name in [x.lower() for x in self._distinct()] + + def __contains__(self, item): + """Determine if item is a name of this namespace. + + The comparison is case insensitive, and item may have a single + colon on one or both sides of the name. + + @param item: name to check + @type item: basestring + """ + if item == '' and self.id == 0: + return True + + name = Namespace.normalize_name(item) + if not name: + return False + + return self._contains_lowercase_name(name.lower()) + + def __len__(self): + """Obtain length of the iterable.""" + if self.custom_name == self.canonical_name: + return len(self.aliases) + 1 + else: + return len(self.aliases) + 2 + + def __iter__(self): + """Return an iterator.""" + return iter(self._distinct()) + + def __getitem__(self, index): + """Obtain an item from the iterable.""" + if self.custom_name != self.canonical_name: + if index == 0: + return self.custom_name + else: + index -= 1 + + if index == 0: + return self.canonical_name + else: + return self.aliases[index - 1] + + def __str__(self): + """Return a string representation.""" + if self.id == 0: + return ':' + elif self.id in (6, 14): + return ':' + self.canonical_name + ':' + else: + return self.canonical_name + ':' + + def __unicode__(self): + """Return a unicode string representation.""" + if self.id == 0: + return u':' + elif self.id in (6, 14): + return u':' + self.custom_name + u':' + else: + return u'' + self.custom_name + u':' + + def __index__(self): + return self.id + + def __eq__(self, other): + """Compare whether two namespace objects are equal.""" + if isinstance(other, int): + return self.id == other + elif isinstance(other, Namespace): + return self.id == other.id + elif isinstance(other, basestring): + return other in self + elif other is None: + return self.id == 0 + + def __ne__(self, other): + """Compare whether two namespace objects are not equal.""" + if self.id == other.id: + return False + else: + return True + + def __cmp__(self, other): + """Compare two namespace ids.""" + if self.id == other.id: + return 0 + elif self.id > other.id: + return 1 + else: + return -1 + + def __repr__(self): + """Return a reconstructable representation.""" + return '%s(id=%d, custom_name=%r, canonical_name=%r, aliases=%r, ' \ + 'kwargs=%r)' \ + % (self.__class__.__name__, self.id, self.custom_name, + self.canonical_name, self.aliases, self.info) + + @staticmethod + def builtin_namespaces(use_image_name=False): + """Return a dict of the builtin namespaces.""" + return dict([(i, Namespace(i, use_image_name=use_image_name)) + for i in range(-2, 16)]) + + @staticmethod + def normalize_name(name): + """Remove an optional colon before and after name. + + TODO: reject illegal characters. + """ + if name == '': + return '' + + parts = name.split(':', 4) + count = len(parts) + if count > 3: + return False + elif count == 3: + if parts[2] != '': + return False + + # Discard leading colon + if count >= 2 and parts[0] == '' and parts[1]: + return parts[1] + elif parts[0]: + return parts[0] + return False + + @staticmethod + def lookup_name(name, namespaces=None): + """Find the namespace for a name. + + @param name: Name of the namespace. + @param namespaces: namespaces to search + default: builtins only + @type namespaces: dict of Namespace + @return: Namespace or None + """ + if not namespaces: + namespaces = Namespace.builtin_namespaces() + + name = Namespace.normalize_name(name) + if name is False: + return None + name = name.lower() + + for namespace in namespaces.values(): + if namespace._contains_lowercase_name(name): + return namespace + + return None
class BaseSite(object): @@ -293,6 +564,9 @@
def namespaces(self): """Return dict of valid namespaces on this wiki.""" + if not hasattr(self, '_namespaces'): + use_image_name = LV(self.version()) < LV("1.14") + self._namespaces = Namespace.builtin_namespaces(use_image_name) return self._namespaces
def ns_normalize(self, value): @@ -638,33 +912,6 @@ def __init__(self, code, fam=None, user=None, sysop=None): """ Constructor. """ BaseSite.__init__(self, code, fam, user, sysop) - self._namespaces = { - # These are the MediaWiki built-in names, which always work. - # Localized names are loaded later upon accessing the wiki. - # Namespace prefixes are always case-insensitive, but the - # canonical forms are capitalized - -2: [u"Media"], - -1: [u"Special"], - 0: [u""], - 1: [u"Talk"], - 2: [u"User"], - 3: [u"User talk"], - 4: [u"Project"], - 5: [u"Project talk"], - 6: [u"Image"], - 7: [u"Image talk"], - 8: [u"MediaWiki"], - 9: [u"MediaWiki talk"], - 10: [u"Template"], - 11: [u"Template talk"], - 12: [u"Help"], - 13: [u"Help talk"], - 14: [u"Category"], - 15: [u"Category talk"], - } - if LV(self.version()) >= LV("1.14"): - self._namespaces[6] = [u"File"] - self._namespaces[7] = [u"File talk"] self._msgcache = {} self._loginstatus = LoginStatus.NOT_ATTEMPTED return @@ -1219,31 +1466,39 @@ self._siteinfo = sidata['general']
nsdata = sidata['namespaces'] + + self._namespaces = {} + + # In MW 1.14, API siprop 'namespaces' added 'canonical', + # and Image became File with Image as an alias. + # For versions lower than 1.14, APISite needs to override + # the defaults defined in Namespace. + is_mw114 = LV(self.version()) >= LV('1.14') + for nskey in nsdata: ns = int(nskey) - # this is the preferred form so it goes at front of list - self._namespaces.setdefault(ns, []).insert(0, nsdata[nskey]["*"]) + custom_name = None + canonical_name = None + if ns == 0: + canonical_name = nsdata[nskey].pop('*') + custom_name = canonical_name + else: + custom_name = nsdata[nskey].pop('*') + if is_mw114: + canonical_name = nsdata[nskey].pop('canonical')
- if LV(self.version()) >= LV("1.14"): - # nsdata["0"] has no canonical key. - # canonical ns -2 to 15 are hard coded in self._namespaces - # do not get them from API result to avoid canonical duplicates - if -2 <= ns <= 15: - continue - if 'canonical' not in nsdata[nskey]: - pywikibot.warning( - u'namespace %s without a canonical name. Misconfigured?' - % self._namespaces[ns][0]) - continue - self._namespaces.setdefault(ns, []).append(nsdata[nskey]["canonical"]) + # Remove the 'id' from nsdata + nsdata[nskey].pop('id') + namespace = Namespace(ns, canonical_name, custom_name, + use_image_name=is_mw114, **nsdata[nskey]) + + self._namespaces[ns] = namespace
if 'namespacealiases' in sidata: aliasdata = sidata['namespacealiases'] for item in aliasdata: - if item["*"] in self._namespaces[int(item['id'])]: - continue - # this is a less preferred form so it goes at the end - self._namespaces[int(item['id'])].append(item["*"]) + ns = int(item['id']) + self._namespaces[ns].aliases.append(item['*'])
if 'extensions' in sidata: self._extensions = sidata['extensions'] diff --git a/tests/dry_api_tests.py b/tests/dry_api_tests.py index 9bc44d2..c6cddf8 100644 --- a/tests/dry_api_tests.py +++ b/tests/dry_api_tests.py @@ -69,6 +69,9 @@ self._user = 'anon' pywikibot.site.BaseSite.__init__(self, 'mock', MockFamily())
+ def version(self): + return '1.13' # pre 1.14 + def languages(self): return ['mock']
diff --git a/tests/dry_site_tests.py b/tests/dry_site_tests.py index 16f247e..709d569 100644 --- a/tests/dry_site_tests.py +++ b/tests/dry_site_tests.py @@ -53,6 +53,7 @@ self._logged_in_as = None self.obsolete = False super(TestMustBe, self).setUp() + self.version = lambda: '1.13' # pre 1.14
def login(self, sysop): # mock call diff --git a/tests/namespace_tests.py b/tests/namespace_tests.py new file mode 100644 index 0000000..b6fe44f --- /dev/null +++ b/tests/namespace_tests.py @@ -0,0 +1,190 @@ +# -*- coding: utf-8 -*- +""" +Tests for the Namespace class. +""" +# +# (C) Pywikibot team, 2014 +# +# Distributed under the terms of the MIT license. +# +__version__ = '$Id$' + +from collections import Iterable +from pywikibot.site import Namespace +from tests.utils import PywikibotTestCase, unittest + +import sys +if sys.version_info[0] > 2: + basestring = (str, ) + + +class TestNamespaceObject(PywikibotTestCase): + """Test cases for Namespace class.""" + + # These should work in any MW wiki + builtin_ids = { + 'Media': -2, + 'Special': -1, + '': 0, + 'Talk': 1, + 'User': 2, + 'User talk': 3, + 'Project': 4, + 'Project talk': 5, + 'File': 6, + 'File talk': 7, + 'MediaWiki': 8, + 'MediaWiki talk': 9, + 'Template': 10, + 'Template talk': 11, + 'Help': 12, + 'Help talk': 13, + 'Category': 14, + 'Category talk': 15, + } + + old_builtin_ids = { + 'Image': 6, + 'Image talk': 7, + } + + all_builtin_ids = dict(builtin_ids.items() + old_builtin_ids.items()) + + def testNamespaceTypes(self): + """Test cases for methods manipulating namespace names""" + + ns = Namespace.builtin_namespaces(use_image_name=False) + + self.assertType(ns, dict) + self.assertTrue(all(x in ns for x in range(0, 16))) + + self.assertTrue(all(isinstance(key, int) + for key in ns)) + self.assertTrue(all(isinstance(val, Iterable) + for val in ns.values())) + self.assertTrue(all(isinstance(name, basestring) + for val in ns.values() + for name in val)) + + self.assertTrue(all(isinstance(Namespace.lookup_name(b, ns), Namespace) + for b in self.builtin_ids)) + + self.assertTrue(all(Namespace.lookup_name(b, ns).id == self.all_builtin_ids[b] + for b in self.all_builtin_ids)) + + ns = Namespace.builtin_namespaces(use_image_name=True) + + self.assertTrue(all(isinstance(Namespace.lookup_name(b, ns), Namespace) + for b in self.builtin_ids)) + + self.assertTrue(all(Namespace.lookup_name(b, ns).id == self.all_builtin_ids[b] + for b in self.all_builtin_ids)) + + def testNamespaceConstructor(self): + kwargs = {u'case': u'first-letter'} + y = Namespace(id=6, custom_name=u'dummy', canonical_name=u'File', + aliases=[u'Image', u'Immagine'], **kwargs) + + self.assertEquals(y.id, 6) + self.assertEquals(y.custom_name, u'dummy') + self.assertEquals(y.canonical_name, u'File') + + self.assertNotEquals(y.custom_name, u'Dummy') + self.assertNotEquals(y.canonical_name, u'file') + + self.assertIn(u'Image', y.aliases) + self.assertIn(u'Immagine', y.aliases) + + self.assertEquals(len(y), 4) + self.assertEquals(list(y), ['dummy', u'File', u'Image', u'Immagine']) + self.assertEquals(y.case, u'first-letter') + + def testNamespaceNameCase(self): + """Namespace names are always case-insensitive.""" + kwargs = {u'case': u'first-letter'} + y = Namespace(id=6, custom_name=u'dummy', canonical_name=u'File', + aliases=[u'Image', u'Immagine'], **kwargs) + self.assertIn(u'dummy', y) + self.assertIn(u'Dummy', y) + self.assertIn(u'file', y) + self.assertIn(u'File', y) + self.assertIn(u'image', y) + self.assertIn(u'Image', y) + self.assertIn(u'immagine', y) + self.assertIn(u'Immagine', y) + + def testNamespaceToString(self): + ns = Namespace.builtin_namespaces(use_image_name=False) + + self.assertEquals(str(ns[0]), ':') + self.assertEquals(str(ns[1]), 'Talk:') + self.assertEquals(str(ns[6]), ':File:') + + self.assertEquals(unicode(ns[0]), u':') + self.assertEquals(unicode(ns[1]), u'Talk:') + self.assertEquals(unicode(ns[6]), u':File:') + + kwargs = {u'case': u'first-letter'} + y = Namespace(id=6, custom_name=u'ملف', canonical_name=u'File', + aliases=[u'Image', u'Immagine'], **kwargs) + + self.assertEquals(str(y), ':File:') + self.assertEquals(unicode(y), u':ملف:') + + def testNamespaceCompare(self): + a = Namespace(id=0, canonical_name=u'') + + self.assertEquals(a, 0) + self.assertEquals(a, '') + self.assertEquals(a, None) + + x = Namespace(id=6, custom_name=u'dummy', canonical_name=u'File', + aliases=[u'Image', u'Immagine']) + y = Namespace(id=6, custom_name=u'ملف', canonical_name=u'File', + aliases=[u'Image', u'Immagine']) + z = Namespace(id=7, custom_name=u'dummy', canonical_name=u'File', + aliases=[u'Image', u'Immagine']) + + self.assertEquals(x, x) + self.assertEquals(x, y) + self.assertNotEquals(x, a) + self.assertNotEquals(x, z) + + self.assertEquals(x, 6) + self.assertEquals(x, u'dummy') + self.assertEquals(x, u'Dummy') + self.assertEquals(x, u'file') + self.assertEquals(x, u'File') + self.assertEquals(x, u':File') + self.assertEquals(x, u':File:') + self.assertEquals(x, u'File:') + self.assertEquals(x, u'image') + self.assertEquals(x, u'Image') + + self.assertEquals(y, u'ملف') + + self.assertTrue(a < x) + self.assertTrue(x > a) + self.assertTrue(z > x) + + def testNamespaceNormalizeName(self): + self.assertEquals(Namespace.normalize_name(u'File'), u'File') + self.assertEquals(Namespace.normalize_name(u':File'), u'File') + self.assertEquals(Namespace.normalize_name(u'File:'), u'File') + self.assertEquals(Namespace.normalize_name(u':File:'), u'File') + + self.assertEquals(Namespace.normalize_name(u''), u'') + + self.assertEquals(Namespace.normalize_name(u':'), False) + self.assertEquals(Namespace.normalize_name(u'::'), False) + self.assertEquals(Namespace.normalize_name(u':::'), False) + self.assertEquals(Namespace.normalize_name(u':File::'), False) + self.assertEquals(Namespace.normalize_name(u'::File:'), False) + self.assertEquals(Namespace.normalize_name(u'::File::'), False) + + +if __name__ == '__main__': + try: + unittest.main() + except SystemExit: + pass diff --git a/tests/site_tests.py b/tests/site_tests.py index 4550618..6b9dd33 100644 --- a/tests/site_tests.py +++ b/tests/site_tests.py @@ -11,6 +11,7 @@
from distutils.version import LooseVersion as LV +from collections import Iterable import pywikibot from tests.utils import PywikibotTestCase, unittest
@@ -115,19 +116,26 @@ self.assertType(mysite.ns_normalize("project"), basestring) self.assertTrue(all(isinstance(key, int) for key in ns)) - self.assertTrue(all(isinstance(val, list) + self.assertTrue(all(isinstance(val, Iterable) for val in ns.values())) self.assertTrue(all(isinstance(name, basestring) for val in ns.values() for name in val)) self.assertTrue(all(isinstance(mysite.namespace(key), basestring) for key in ns)) - self.assertTrue(all(isinstance(mysite.namespace(key, True), list) + self.assertTrue(all(isinstance(mysite.namespace(key, True), Iterable) for key in ns)) self.assertTrue(all(isinstance(item, basestring) for key in ns for item in mysite.namespace(key, True)))
+ def testNamespaceCase(self): + site = pywikibot.Site('en', 'wiktionary') + main_namespace = site.namespaces()[0] + self.assertEquals(main_namespace.case, 'case-sensitive') + user_namespace = site.namespaces()[2] + self.assertEquals(user_namespace.case, 'first-letter') + def testApiMethods(self): """Test generic ApiSite methods"""
diff --git a/tests/wikibase_tests.py b/tests/wikibase_tests.py index 9ef249e..f30ded4 100644 --- a/tests/wikibase_tests.py +++ b/tests/wikibase_tests.py @@ -40,6 +40,8 @@ if not site.has_transcluded_data: return repo = site.data_repository() + item_namespace = repo.namespaces()[0] + self.assertEqual(item_namespace.defaultcontentmodel, 'wikibase-item') item = pywikibot.ItemPage.fromPage(mainpage) self.assertType(item, pywikibot.ItemPage) self.assertEqual(item.getID(), 'Q5296')
pywikibot-commits@lists.wikimedia.org