jenkins-bot has submitted this change and it was merged.
Change subject: Allow pagegen filtering using namespace names ......................................................................
Allow pagegen filtering using namespace names
The ability to filter using namespace names was removed from the isbn script in 31c07b2, but still exists in other scripts. Namespace names are more user-friendly as users interact with these names regularly in titles, while the namespace numbers are less known, especially namespaces which are not frequently used.
NamespaceFilterPageGenerator behaviour when site is not provided is modified from 'no validation of int namespaces' to become 'validation using the namespaces of the default site', which will at least be equivalent for the builtins.
Equality of Namespace(0) with None is no longer supported.
Exceptions raised during Site methods for namespace validation are now defined behaviour.
Change-Id: Ib6caa11577546e14a69bbd898860843d69d4efb0 --- M pywikibot/data/api.py M pywikibot/pagegenerators.py M pywikibot/site.py M scripts/add_text.py M scripts/commonscat.py M scripts/interwiki.py M scripts/isbn.py M tests/namespace_tests.py M tests/pagegenerators_tests.py 9 files changed, 374 insertions(+), 96 deletions(-)
Approvals: XZise: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py index 3366eaf..5225322 100644 --- a/pywikibot/data/api.py +++ b/pywikibot/data/api.py @@ -1401,8 +1401,14 @@ def set_namespace(self, namespaces): """Set a namespace filter on this query.
- @param namespaces: Either an int or a list of ints - + @param namespaces: namespace identifiers to limit query results + @type namespaces: iterable of basestring or Namespace key, + or a single instance of those types. May be a '|' separated + list of namespace identifiers. + @raises KeyError: a namespace identifier was not resolved + @raises TypeError: a namespace identifier has an inappropriate + type such as NoneType or bool, or more than one namespace + if the API module does not support multiple namespaces """ assert(self.limited_module) # some modules do not have a prefix param = self.site._paraminfo.parameter(self.limited_module, 'namespace') @@ -1414,15 +1420,13 @@ if isinstance(namespaces, basestring): namespaces = namespaces.split('|')
- try: - iter(namespaces) - except TypeError: - namespaces = [namespaces] - - namespaces = [str(namespace) for namespace in namespaces] + # Use Namespace id (int) here; Request will cast int to str + namespaces = [ns.id for ns in + pywikibot.site.Namespace.resolve(namespaces, + self.site.namespaces)] if 'multi' not in param and len(namespaces) != 1: - raise pywikibot.Error(u'{0} module does not support multiple ' - 'namespaces.'.format(self.limited_module)) + raise TypeError(u'{0} module does not support multiple namespaces' + .format(self.limited_module))
self.request[self.prefix + "namespace"] = namespaces
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py index 2294b46..d863e18 100644 --- a/pywikibot/pagegenerators.py +++ b/pywikibot/pagegenerators.py @@ -29,6 +29,7 @@ import time
import pywikibot + from pywikibot import date, config, i18n from pywikibot.tools import ( deprecated, @@ -38,6 +39,7 @@ ) from pywikibot.comms import http import pywikibot.data.wikidataquery as wdquery +from pywikibot.site import Namespace
if sys.version_info[0] > 2: basestring = (str, ) @@ -106,7 +108,10 @@
-namespaces Filter the page generator to only yield pages in the -namespace specified namespaces. Separate multiple namespace --ns numbers with commas. Example "-ns:0,2,4" +-ns numbers or names with commas. + Examples: + -ns:0,2,4 + -ns:Help,MediaWiki If used with -newpages, -namepace/ns must be provided before -newpages. If used with -recentchanges, efficiency is improved if @@ -258,7 +263,7 @@ @type site: L{pywikibot.site.BaseSite} """ self.gens = [] - self.namespaces = [] + self._namespaces = [] self.step = None self.limit = None self.articlefilter_list = [] @@ -270,12 +275,41 @@ """ Generator site.
- @return: Site given to constructor, otherwise the default Site. + The generator site should not be accessed until after the global + arguments have been handled, otherwise the default Site may be changed + by global arguments, which will cause this cached value to be stale. + + @return: Site given to constructor, otherwise the default Site at the + time this property is first accessed. @rtype: L{pywikibot.site.BaseSite} """ if not self._site: self._site = pywikibot.Site() return self._site + + @property + def namespaces(self): + """ + List of Namespace parameters. + + Converts int or string namespaces to Namespace objects and + change the storage to immutable once it has been accessed. + + The resolving and validation of namespace command line arguments + is performed in this method, as it depends on the site property + which is lazy loaded to avoid being cached before the global + arguments are handled. + + @return: namespaces selected using arguments + @rtype: list of Namespace + @raises KeyError: a namespace identifier was not resolved + @raises TypeError: a namespace identifier has an inappropriate + type such as NoneType or bool + """ + if isinstance(self._namespaces, list): + self._namespaces = frozenset( + Namespace.resolve(self._namespaces, self.site.namespaces)) + return self._namespaces
def getCombinedGenerator(self, gen=None): """Return the combination of all accumulated generators. @@ -296,7 +330,8 @@ else: if self.namespaces: self.gens[i] = NamespaceFilterPageGenerator(self.gens[i], - self.namespaces) + self.namespaces, + self.site) if self.limit: self.gens[i] = itertools.islice(self.gens[i], self.limit) if len(self.gens) == 0: @@ -463,6 +498,11 @@ u'Please enter the local file name:') gen = TextfilePageGenerator(textfilename, site=self.site) elif arg.startswith('-namespace') or arg.startswith('-ns'): + if isinstance(self._namespaces, frozenset): + pywikibot.warning('Cannot handle arg %s as namespaces can not ' + 'be altered after a generator is created.' + % arg) + return True value = None if arg.startswith('-ns:'): value = arg[len('-ns:'):] @@ -473,13 +513,7 @@ if not value: value = pywikibot.input( u'What namespace are you filtering on?') - try: - self.namespaces.extend( - [int(ns) for ns in value.split(",")] - ) - except ValueError: - pywikibot.output(u'Invalid namespaces argument: %s' % value) - return False + self._namespaces += value.split(",") return True elif arg.startswith('-step'): if len(arg) == len('-step'): @@ -1046,35 +1080,36 @@ """ A generator yielding pages from another generator in given namespaces.
- The namespace list can contain both integers (namespace numbers) and - strings/unicode strings (namespace names). + If a site is provided, the namespaces are validated using the namespaces + of that site, otherwise the namespaces are validated using the default + site.
NOTE: API-based generators that have a "namespaces" parameter perform namespace filtering more efficiently than this generator.
- @param namespaces: list of namespace numbers to limit results - @type namespaces: list of int - @param site: Site for generator results, only needed if - namespaces contains namespace names. + @param namespaces: list of namespace identifiers to limit results + @type namespaces: iterable of basestring or Namespace key, + or a single instance of those types. + @param site: Site for generator results; mandatory if + namespaces contains namespace names. Defaults to the default site. @type site: L{pywikibot.site.BaseSite} + @raises KeyError: a namespace identifier was not resolved + @raises TypeError: a namespace identifier has an inappropriate + type such as NoneType or bool, or more than one namespace + if the API module does not support multiple namespaces """ - if isinstance(namespaces, (int, basestring)): - namespaces = [namespaces] - # convert namespace names to namespace numbers - for i in range(len(namespaces)): - ns = namespaces[i] - if isinstance(ns, basestring): - try: - # namespace might be given as str representation of int - index = int(ns) - except ValueError: - # FIXME: deprecate providing strings as namespaces - if site is None: - site = pywikibot.Site() - index = site.getNamespaceIndex(ns) - if index is None: - raise ValueError(u'Unknown namespace: %s' % ns) - namespaces[i] = index + # As site was only required if the namespaces contain strings, dont + # attempt to use the config selected site unless the initial attempt + # at resolving the namespaces fails. + try: + namespaces = Namespace.resolve(namespaces, + site.namespaces if site else + pywikibot.Site().namespaces) + except KeyError as e: + pywikibot.log('Failed resolving namespaces:') + pywikibot.exception(e) + raise + for page in generator: if page.namespace() in namespaces: yield page diff --git a/pywikibot/site.py b/pywikibot/site.py index c4cd53d..4851c65 100644 --- a/pywikibot/site.py +++ b/pywikibot/site.py @@ -323,8 +323,6 @@ return self.id == other.id elif isinstance(other, basestring): return other in self - elif other is None: - return self.id == 0
def __ne__(self, other): """Compare whether two namespace objects are not equal.""" @@ -396,9 +394,10 @@
@staticmethod def lookup_name(name, namespaces=None): - """Find the namespace for a name. + """Find the Namespace for a name.
@param name: Name of the namespace. + @type name: basestring @param namespaces: namespaces to search default: builtins only @type namespaces: dict of Namespace @@ -417,6 +416,61 @@ return namespace
return None + + @staticmethod + def resolve(identifiers, namespaces=None): + """ + Resolve namespace identifiers to obtain Namespace objects. + + Identifiers may be any value for which int() produces a valid + namespace id, except bool, or any string which Namespace.lookup_name + successfully finds. A numerical string is resolved as an integer. + + @param identifiers: namespace identifiers + @type identifiers: iterable of basestring or Namespace key, + or a single instance of those types + @param namespaces: namespaces to search (default: builtins only) + @type namespaces: dict of Namespace + @return: list of Namespace objects in the same order as the + identifiers + @raises KeyError: a namespace identifier was not resolved + @raises TypeError: a namespace identifier has an inappropriate + type such as NoneType or bool + """ + if not namespaces: + namespaces = Namespace.builtin_namespaces() + + if isinstance(identifiers, (basestring, Namespace)): + identifiers = [identifiers] + else: + # convert non-iterators to single item list + try: + iter(identifiers) + except TypeError: + identifiers = [identifiers] + + # lookup namespace names, and assume anything else is a key. + # int(None) raises TypeError; however, bool needs special handling. + result = [NotImplemented if isinstance(ns, bool) else + Namespace.lookup_name(ns, namespaces) + if isinstance(ns, basestring) + and not ns.lstrip('-').isdigit() else + namespaces[int(ns)] if int(ns) in namespaces + else None + for ns in identifiers] + + if NotImplemented in result: + raise TypeError('identifiers contains inappropriate types: %r' + % identifiers) + + # Namespace.lookup_name returns None if the name is not recognised + if None in result: + raise KeyError(u'Namespace identifier(s) not recognised: %s' + % u','.join([str(identifier) for identifier, ns + in zip(identifiers, result) + if ns is None])) + + return result
class BaseSite(ComparableMixin): @@ -1512,13 +1566,19 @@ @type type_arg: str @param namespaces: if not None, limit the query to namespaces in this list - @type namespaces: int, or list of ints + @type namespaces: iterable of basestring or Namespace key, + or a single instance of those types. May be a '|' separated + list of namespace identifiers. @param step: if not None, limit each API call to this many items @type step: int @param total: if not None, limit the generator to yielding this many items in total @type total: int - + @return: iterable with parameters set + @rtype: QueryGenerator + @raises KeyError: a namespace identifier was not resolved + @raises TypeError: a namespace identifier has an inappropriate + type such as NoneType or bool """ if type_arg is not None: gen = gen_class(type_arg, site=self, **args) @@ -2661,11 +2721,16 @@ both (no filtering). @param namespaces: If present, only return links from the namespaces in this list. + @type namespaces: iterable of basestring or Namespace key, + or a single instance of those types. May be a '|' separated + list of namespace identifiers. @param step: Limit on number of pages to retrieve per API query. @param total: Maximum number of pages to retrieve in total. @param content: if True, load the current content of each iterated page (default False) - + @raises KeyError: a namespace identifier was not resolved + @raises TypeError: a namespace identifier has an inappropriate + type such as NoneType or bool """ bltitle = page.title(withSection=False).encode(self.encoding()) blargs = {"gbltitle": bltitle} @@ -2714,9 +2779,14 @@ None, return both (no filtering). @param namespaces: If present, only return links from the namespaces in this list. + @type namespaces: iterable of basestring or Namespace key, + or a single instance of those types. May be a '|' separated + list of namespace identifiers. @param content: if True, load the current content of each iterated page (default False) - + @raises KeyError: a namespace identifier was not resolved + @raises TypeError: a namespace identifier has an inappropriate + type such as NoneType or bool """ eiargs = {"geititle": page.title(withSection=False).encode(self.encoding())} @@ -2731,7 +2801,18 @@ def pagereferences(self, page, followRedirects=False, filterRedirects=None, withTemplateInclusion=True, onlyTemplateInclusion=False, namespaces=None, step=None, total=None, content=False): - """Convenience method combining pagebacklinks and page_embeddedin.""" + """ + Convenience method combining pagebacklinks and page_embeddedin. + + @param namespaces: If present, only return links from the namespaces + in this list. + @type namespaces: iterable of basestring or Namespace key, + or a single instance of those types. May be a '|' separated + list of namespace identifiers. + @raises KeyError: a namespace identifier was not resolved + @raises TypeError: a namespace identifier has an inappropriate + type such as NoneType or bool + """ if onlyTemplateInclusion: return self.page_embeddedin(page, namespaces=namespaces, filterRedirects=filterRedirects, @@ -2756,12 +2837,16 @@ """Iterate internal wikilinks contained (or transcluded) on page.
@param namespaces: Only iterate pages in these namespaces (default: all) - @type namespaces: list of ints + @type namespaces: iterable of basestring or Namespace key, + or a single instance of those types. May be a '|' separated + list of namespace identifiers. @param follow_redirects: if True, yields the target of any redirects, rather than the redirect page @param content: if True, load the current content of each iterated page (default False) - + @raises KeyError: a namespace identifier was not resolved + @raises TypeError: a namespace identifier has an inappropriate + type such as NoneType or bool """ plargs = {} if hasattr(page, "_pageid"): @@ -2814,9 +2899,16 @@ content=False): """Iterate templates transcluded (not just linked) on the page.
+ @param namespaces: Only iterate pages in these namespaces + @type namespaces: iterable of basestring or Namespace key, + or a single instance of those types. May be a '|' separated + list of namespace identifiers. @param content: if True, load the current content of each iterated page (default False)
+ @raises KeyError: a namespace identifier was not resolved + @raises TypeError: a namespace identifier has an inappropriate + type such as NoneType or bool """ tltitle = page.title(withSection=False).encode(self.encoding()) tlgen = self._generator(api.PageGenerator, type_arg="templates", @@ -2836,7 +2928,9 @@ subcategories, use namespaces=[6] to yield image files, etc. Note, however, that the iterated values are always Page objects, even if in the Category or Image namespace. - @type namespaces: list of ints + @type namespaces: iterable of basestring or Namespace key, + or a single instance of those types. May be a '|' separated + list of namespace identifiers. @param sortby: determines the order in which results are generated, valid values are "sortkey" (default, results ordered by category sort key) or "timestamp" (results ordered by time page was @@ -2858,7 +2952,9 @@ @type endsort: str @param content: if True, load the current content of each iterated page (default False) - + @raises KeyError: a namespace identifier was not resolved + @raises TypeError: a namespace identifier has an inappropriate + type such as NoneType or bool """ if category.namespace() != 14: raise Error( @@ -3115,7 +3211,7 @@ @param start: Start at this title (page need not exist). @param prefix: Only yield pages starting with this string. @param namespace: Iterate pages from this (single) namespace - (default: 0) + @type namespace: int or Namespace. @param filterredir: if True, only yield redirects; if False (and not None), only yield non-redirects (default: yield both) @param filterlanglinks: if True, only yield pages with language links; @@ -3135,11 +3231,10 @@ @param includeredirects: DEPRECATED, use filterredir instead @param content: if True, load the current content of each iterated page (default False) - + @raises KeyError: the namespace identifier was not resolved + @raises TypeError: the namespace identifier has an inappropriate + type such as bool, or an iterable with more than one namespace """ - if not isinstance(namespace, (int, Namespace)): - raise Error("allpages: only one namespace permitted.") - if includeredirects is not None: if includeredirects: if includeredirects == "only": @@ -3196,18 +3291,18 @@ @param start: Start at this title (page need not exist). @param prefix: Only yield pages starting with this string. @param namespace: Iterate pages from this (single) namespace - (default: 0) + @type namespace: int or Namespace @param unique: If True, only iterate each link title once (default: iterate once for each linking page) @param fromids: if True, include the pageid of the page containing each link (default: False) as the '_fromid' attribute of the Page; cannot be combined with unique - + @raises KeyError: the namespace identifier was not resolved + @raises TypeError: the namespace identifier has an inappropriate + type such as bool, or an iterable with more than one namespace """ if unique and fromids: raise Error("alllinks: unique and fromids cannot both be True.") - if not isinstance(namespace, (int, Namespace)): - raise Error("alllinks: only one namespace permitted.") algen = self._generator(api.ListGenerator, type_arg="alllinks", alnamespace=int(namespace), alfrom=start, step=step, total=total) @@ -3409,11 +3504,17 @@ @param image: the image to search for (FilePage need not exist on the wiki) @type image: FilePage + @param namespaces: If present, only iterate pages in these namespaces + @type namespaces: iterable of basestring or Namespace key, + or a single instance of those types. May be a '|' separated + list of namespace identifiers. @param filterredir: if True, only yield redirects; if False (and not None), only yield non-redirects (default: yield both) @param content: if True, load the current content of each iterated page (default False) - + @raises KeyError: a namespace identifier was not resolved + @raises TypeError: a namespace identifier has an inappropriate + type such as NoneType or bool """ iuargs = dict(giutitle=image.title(withSection=False)) if filterredir is not None: @@ -3435,10 +3536,13 @@ @param user: only iterate entries that match this user name @param page: only iterate entries affecting this page @param namespace: namespace to retrieve logevents from + @type namespace: int or Namespace @param start: only iterate entries from and after this Timestamp @param end: only iterate entries up to and through this Timestamp @param reverse: if True, iterate oldest entries first (default: newest) - + @raises KeyError: the namespace identifier was not resolved + @raises TypeError: the namespace identifier has an inappropriate + type such as bool, or an iterable with more than one namespace """ if start and end: self.assert_valid_iter_params('logevents', start, end, reverse) @@ -3474,6 +3578,10 @@ @type end: pywikibot.Timestamp @param reverse: if True, start with oldest changes (default: newest) @type reverse: bool + @param namespaces: only iterate pages in these namespaces + @type namespaces: iterable of basestring or Namespace key, + or a single instance of those types. May be a '|' separated + list of namespace identifiers. @param pagelist: iterate changes to pages in this list only @param pagelist: list of Pages @param changetype: only iterate changes of this type ("edit" for @@ -3502,7 +3610,9 @@ @type user: basestring|list @param excludeuser: if not None, exclude edits by this user or users @type excludeuser: basestring|list - + @raises KeyError: a namespace identifier was not resolved + @raises TypeError: a namespace identifier has an inappropriate + type such as NoneType or bool """ if start and end: self.assert_valid_iter_params('recentchanges', start, end, reverse) @@ -3565,14 +3675,17 @@ @type searchstring: unicode @param where: Where to search; value must be "text" or "titles" (many wikis do not support title search) - @param namespaces: search only in these namespaces (defaults to 0) - @type namespaces: list of ints, or an empty list to signal all - namespaces + @param namespaces: search only in these namespaces (defaults to all) + @type namespaces: iterable of basestring or Namespace key, + or a single instance of those types. May be a '|' separated + list of namespace identifiers. @param getredirects: if True, include redirects in results. Since version MediaWiki 1.23 it will always return redirects. @param content: if True, load the current content of each iterated page (default False) - + @raises KeyError: a namespace identifier was not resolved + @raises TypeError: a namespace identifier has an inappropriate + type such as NoneType or bool """ if not searchstring: raise Error("search: searchstring cannot be empty") @@ -3604,11 +3717,17 @@ @param start: Iterate contributions starting at this Timestamp @param end: Iterate contributions ending at this Timestamp @param reverse: Iterate oldest contributions first (default: newest) + @param namespaces: only iterate pages in these namespaces + @type namespaces: iterable of basestring or Namespace key, + or a single instance of those types. May be a '|' separated + list of namespace identifiers. @param showMinor: if True, iterate only minor edits; if False and not None, iterate only non-minor edits (default: iterate both) @param top_only: if True, iterate only edits which are the latest revision - + @raises KeyError: a namespace identifier was not resolved + @raises TypeError: a namespace identifier has an inappropriate + type such as NoneType or bool """ if not (user or userprefix): raise Error( @@ -3647,13 +3766,19 @@ @param start: Iterate revisions starting at this Timestamp @param end: Iterate revisions ending at this Timestamp @param reverse: Iterate oldest revisions first (default: newest) + @param namespaces: only iterate pages in these namespaces + @type namespaces: iterable of basestring or Namespace key, + or a single instance of those types. May be a '|' separated + list of namespace identifiers. @param showMinor: if True, only list minor edits; if False (and not None), only list non-minor edits @param showBot: if True, only list bot edits; if False (and not None), only list non-bot edits @param showAnon: if True, only list anon edits; if False (and not None), only list non-anon edits - + @raises KeyError: a namespace identifier was not resolved + @raises TypeError: a namespace identifier has an inappropriate + type such as NoneType or bool """ if start and end: self.assert_valid_iter_params('watchlist_revs', start, end, reverse) @@ -3784,11 +3909,16 @@
@param total: the maximum number of pages to iterate (default: 1) @param namespaces: only iterate pages in these namespaces. + @type namespaces: iterable of basestring or Namespace key, + or a single instance of those types. May be a '|' separated + list of namespace identifiers. @param redirects: if True, include only redirect pages in results (default: include only non-redirects) @param content: if True, load the current content of each iterated page (default False) - + @raises KeyError: a namespace identifier was not resolved + @raises TypeError: a namespace identifier has an inappropriate + type such as NoneType or bool """ rngen = self._generator(api.PageGenerator, type_arg="random", namespaces=namespaces, step=step, total=total, @@ -4734,6 +4864,13 @@ timestamp (unicode), length (int), an empty unicode string, username or IP address (str), comment (unicode).
+ @param namespaces: only iterate pages in these namespaces + @type namespaces: iterable of basestring or Namespace key, + or a single instance of those types. May be a '|' separated + list of namespace identifiers. + @raises KeyError: a namespace identifier was not resolved + @raises TypeError: a namespace identifier has an inappropriate + type such as NoneType or bool """ # TODO: update docstring
diff --git a/scripts/add_text.py b/scripts/add_text.py index 20a0d56..69ab777 100644 --- a/scripts/add_text.py +++ b/scripts/add_text.py @@ -358,7 +358,7 @@ if index % 2 == 1 and index > 0: namespaces += [index] generator = pagegenerators.NamespaceFilterPageGenerator( - generator, namespaces) + generator, namespaces, site) for page in generator: (text, newtext, always) = add_text(page, addText, summary, regexSkip, regexSkipUrl, always, up, True, diff --git a/scripts/commonscat.py b/scripts/commonscat.py index 3dee15c..13031a5 100755 --- a/scripts/commonscat.py +++ b/scripts/commonscat.py @@ -543,14 +543,16 @@ genFactory.handleArg(arg)
if checkcurrent: + site = pywikibot.Site() primaryCommonscat, commonscatAlternatives = \ CommonscatBot.getCommonscatTemplate( - pywikibot.Site().code) + site.code) generator = pagegenerators.NamespaceFilterPageGenerator( pagegenerators.ReferringPageGenerator( - pywikibot.Page(pywikibot.Site(), - u'Template:' + primaryCommonscat), - onlyTemplateInclusion=True), ns) + pywikibot.Page(site, u'Template:' + primaryCommonscat), + onlyTemplateInclusion=True), + ns, + site)
if not generator: generator = genFactory.getCombinedGenerator() diff --git a/scripts/interwiki.py b/scripts/interwiki.py index 396a085..342cd5e 100755 --- a/scripts/interwiki.py +++ b/scripts/interwiki.py @@ -2601,7 +2601,8 @@ hintlessPageGen = genFactory.getCombinedGenerator() if hintlessPageGen: if len(namespaces) > 0: - hintlessPageGen = pagegenerators.NamespaceFilterPageGenerator(hintlessPageGen, namespaces) + hintlessPageGen = pagegenerators.NamespaceFilterPageGenerator( + hintlessPageGen, namespaces, site) # we'll use iter() to create make a next() function available. bot.setPageGenerator(iter(hintlessPageGen), number=number, until=until) elif warnfile: diff --git a/scripts/isbn.py b/scripts/isbn.py index 50663ac..fa7f472 100755 --- a/scripts/isbn.py +++ b/scripts/isbn.py @@ -11,12 +11,6 @@
¶ms;
--namespace:n Number or name of namespace to process. The parameter can be - used multiple times. It works in combination with all other - parameters, except for the -start parameter. If you e.g. - want to iterate over all categories starting at M, use - -start:Category:M. - Furthermore, the following command line parameters are supported:
-to13 Converts all ISBN-10 codes to ISBN-13. diff --git a/tests/namespace_tests.py b/tests/namespace_tests.py index c33a6e3..703e4c2 100644 --- a/tests/namespace_tests.py +++ b/tests/namespace_tests.py @@ -143,7 +143,7 @@
self.assertEqual(a, 0) self.assertEqual(a, '') - self.assertEqual(a, None) + self.assertNotEqual(a, None)
x = Namespace(id=6, custom_name=u'dummy', canonical_name=u'File', aliases=[u'Image', u'Immagine']) @@ -173,6 +173,9 @@ self.assertLess(a, x) self.assertGreater(x, a) self.assertGreater(z, x) + + self.assertIn(6, [x, y, z]) + self.assertNotIn(8, [x, y, z])
def testNamespaceNormalizeName(self): self.assertEqual(Namespace.normalize_name(u'File'), u'File') @@ -211,6 +214,63 @@ b = eval(repr(a)) self.assertEqual(a, b)
+ def test_resolve(self): + namespaces = Namespace.builtin_namespaces(use_image_name=False) + main_ns = namespaces[0] + file_ns = namespaces[6] + special_ns = namespaces[-1] + + self.assertEqual(Namespace.resolve([6]), [file_ns]) + self.assertEqual(Namespace.resolve(['File']), [file_ns]) + self.assertEqual(Namespace.resolve(['6']), [file_ns]) + self.assertEqual(Namespace.resolve([file_ns]), [file_ns]) + + self.assertEqual(Namespace.resolve([file_ns, special_ns]), + [file_ns, special_ns]) + self.assertEqual(Namespace.resolve([file_ns, file_ns]), + [file_ns, file_ns]) + + self.assertEqual(Namespace.resolve(6), [file_ns]) + self.assertEqual(Namespace.resolve('File'), [file_ns]) + self.assertEqual(Namespace.resolve('6'), [file_ns]) + self.assertEqual(Namespace.resolve(file_ns), [file_ns]) + + self.assertEqual(Namespace.resolve(0), [main_ns]) + self.assertEqual(Namespace.resolve('0'), [main_ns]) + + self.assertEqual(Namespace.resolve(-1), [special_ns]) + self.assertEqual(Namespace.resolve('-1'), [special_ns]) + + self.assertEqual(Namespace.resolve('File:'), [file_ns]) + self.assertEqual(Namespace.resolve(':File'), [file_ns]) + self.assertEqual(Namespace.resolve(':File:'), [file_ns]) + + self.assertEqual(Namespace.resolve('Image:'), [file_ns]) + self.assertEqual(Namespace.resolve(':Image'), [file_ns]) + self.assertEqual(Namespace.resolve(':Image:'), [file_ns]) + + self.assertRaises(TypeError, Namespace.resolve, [True]) + self.assertRaises(TypeError, Namespace.resolve, [False]) + self.assertRaises(TypeError, Namespace.resolve, [None]) + self.assertRaises(TypeError, Namespace.resolve, True) + self.assertRaises(TypeError, Namespace.resolve, False) + self.assertRaises(TypeError, Namespace.resolve, None) + + self.assertRaises(KeyError, Namespace.resolve, -10) + self.assertRaises(KeyError, Namespace.resolve, '-10') + self.assertRaises(KeyError, Namespace.resolve, 'foo') + self.assertRaises(KeyError, Namespace.resolve, ['foo']) + + self.assertRaisesRegex(KeyError, + r'Namespace identifier(s) not recognised: -10', + Namespace.resolve, [-10, 0]) + self.assertRaisesRegex(KeyError, + r'Namespace identifier(s) not recognised: foo', + Namespace.resolve, [0, 'foo']) + self.assertRaisesRegex(KeyError, + r'Namespace identifier(s) not recognised: -10,-11', + Namespace.resolve, [-10, 0, -11]) +
if __name__ == '__main__': try: diff --git a/tests/pagegenerators_tests.py b/tests/pagegenerators_tests.py index 32f3fda..bb7a965 100755 --- a/tests/pagegenerators_tests.py +++ b/tests/pagegenerators_tests.py @@ -79,17 +79,23 @@
def test_NamespaceFilterPageGenerator(self): self.assertFunction("NamespaceFilterPageGenerator") - gen = pagegenerators.PagesFromTitlesGenerator(self.titles, self.site) - gen = pagegenerators.NamespaceFilterPageGenerator(gen, 0) + site = self.site + gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site) + gen = pagegenerators.NamespaceFilterPageGenerator(gen, 0, site) self.assertEqual(len(tuple(gen)), 3) - gen = pagegenerators.PagesFromTitlesGenerator(self.titles, self.site) - gen = pagegenerators.NamespaceFilterPageGenerator(gen, 1) + gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site) + gen = pagegenerators.NamespaceFilterPageGenerator(gen, 1, site) self.assertEqual(len(tuple(gen)), 4) - gen = pagegenerators.PagesFromTitlesGenerator(self.titles, self.site) - gen = pagegenerators.NamespaceFilterPageGenerator(gen, 10) + gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site) + gen = pagegenerators.NamespaceFilterPageGenerator(gen, 10, site) self.assertEqual(len(tuple(gen)), 6) - gen = pagegenerators.PagesFromTitlesGenerator(self.titles, self.site) - gen = pagegenerators.NamespaceFilterPageGenerator(gen, (1, 10)) + gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site) + gen = pagegenerators.NamespaceFilterPageGenerator(gen, (1, 10), site) + self.assertEqual(len(tuple(gen)), 10) + gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site) + gen = pagegenerators.NamespaceFilterPageGenerator(gen, + ('Talk', 'Template'), + site) self.assertEqual(len(tuple(gen)), 10)
def test_RegexFilterPageGenerator(self): @@ -345,6 +351,45 @@ self.assertTrue(all(isinstance(item, pywikibot.ItemPage) for item in gen))
+class DryFactoryGeneratorTest(TestCase): + + """Dry tests for pagegenerators.GeneratorFactory.""" + + family = 'wikipedia' + code = 'en' + + dry = True + + def test_one_namespace(self): + gf = pagegenerators.GeneratorFactory(site=self.get_site()) + gf.handleArg('-ns:2') + self.assertEqual(gf.namespaces, set([2])) + + def test_two_namespaces(self): + gf = pagegenerators.GeneratorFactory(site=self.get_site()) + gf.handleArg('-ns:2') + gf.handleArg('-ns:Talk') + self.assertEqual(gf.namespaces, set([2, 1])) + + def test_two_named_namespaces(self): + gf = pagegenerators.GeneratorFactory(site=self.get_site()) + gf.handleArg('-ns:Talk,File') + self.assertEqual(gf.namespaces, set([1, 6])) + + def test_two_numeric_namespaces(self): + gf = pagegenerators.GeneratorFactory(site=self.get_site()) + gf.handleArg('-ns:1,6') + self.assertEqual(gf.namespaces, set([1, 6])) + + def test_immutable_namespaces_on_read(self): + gf = pagegenerators.GeneratorFactory(site=self.get_site()) + gf.handleArg('-ns:1,6') + self.assertEqual(gf.namespaces, set([1, 6])) + self.assertIsInstance(gf.namespaces, frozenset) + gf.handleArg('-ns:0') + self.assertEqual(gf.namespaces, set([1, 6])) + + class TestFactoryGenerator(DefaultSiteTestCase):
"""Test pagegenerators.GeneratorFactory."""