jenkins-bot has submitted this change and it was merged.
Change subject: Add orderless page title assert to fix broken test
......................................................................
Add orderless page title assert to fix broken test
Split assertPagelistTitles into assertPageTitlesEqual and
assertPageTitlesCountEqual, with semantics similar to assertEqual
and assertCountEqual, however the generator is only used until
it has more items than the expected number of titles.
Create new test module 'tests' to test features of the test system.
It is disabled by default.
Fix pagegenerator tests which misused assertPagelistTitles 'titles' arg.
Change the reflinks xml tests to use assertPageTitlesCountEqual,
as the use of preloading means the order can vary.
Improve the xmlreader asserts to be more precise, to assert that
there are no order problems in xmlreader, and fix a unicode bug
in an expectedFailure test.
Change-Id: I00ceb32ac468b98f418d0fbe7ab1f0748e15b352
---
M tests/__init__.py
M tests/aspects.py
M tests/http_tests.py
M tests/pagegenerators_tests.py
M tests/reflinks_tests.py
A tests/tests_tests.py
M tests/xmlreader_tests.py
7 files changed, 163 insertions(+), 69 deletions(-)
Approvals:
XZise: Looks good to me, approved
jenkins-bot: Verified
diff --git a/tests/__init__.py b/tests/__init__.py
index 2634a8b..f72d451 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -58,6 +58,7 @@
library_test_modules = [
'deprecation',
+ 'tests',
'date',
'mediawikiversion',
'ipregex',
@@ -97,7 +98,8 @@
]
disabled_test_modules = [
- 'ui',
+ 'tests', # tests of the tests package
+ 'ui', # these tests havent been designed to be run in the test runner.
]
disabled_tests = {
diff --git a/tests/aspects.py b/tests/aspects.py
index 6915e39..86d1341 100644
--- a/tests/aspects.py
+++ b/tests/aspects.py
@@ -29,11 +29,11 @@
UITestCase:
Not integrated; direct subclass of unittest.TestCase.
"""
-import collections
-import time
-import sys
-import os
import inspect
+import itertools
+import os
+import sys
+import time
import pywikibot
@@ -106,6 +106,56 @@
self.assertIn(page.namespace(), namespaces,
"%s not in namespace %r" % (page, namespaces))
+ def _get_gen_pages(self, gen, count=None, site=None):
+ """
+ Get pages from gen, asserting they are Page from site.
+
+ Iterates at most two greater than count, including the
+ Page after count if it exists, and then a Page with title '...'
+ if additional items are in the iterator.
+
+ @param gen: Page generator
+ @type gen: generator of Page
+ @param count: number of pages to get
+ @type titles: int
+ @param site: Site of expected pages
+ @type site: APISite
+ """
+ original_iter = iter(gen)
+
+ gen = itertools.islice(original_iter, 0, count)
+
+ gen_pages = list(gen)
+
+ try:
+ gen_pages.append(next(original_iter))
+ next(original_iter)
+ if not site:
+ site = gen_pages[0].site
+ gen_pages.append(pywikibot.Page(site, '...'))
+ except StopIteration:
+ pass
+
+ for page in gen_pages:
+ self.assertIsInstance(page, pywikibot.Page)
+ if site:
+ self.assertEqual(page.site, site)
+
+ return gen_pages
+
+ def _get_gen_titles(self, gen, count, site=None):
+ gen_pages = self._get_gen_pages(gen, count, site)
+ gen_titles = [page.title() for page in gen_pages]
+ return gen_titles
+
+ def _get_canonical_titles(self, titles, site=None):
+ if site:
+ titles = [pywikibot.Link(title, site).canonical_title()
+ for title in titles]
+ elif not isinstance(titles, list):
+ titles = list(titles)
+ return titles
+
def assertPagesInNamespaces(self, gen, namespaces):
"""
Assert that generator returns Pages all in namespaces.
@@ -147,42 +197,41 @@
else:
self.assertEqual(set(page_namespaces), namespaces)
- def assertPagelistTitles(self, gen, titles, site=None):
+ def assertPageTitlesEqual(self, gen, titles, site=None):
"""
Test that pages in gen match expected titles.
- If the expected titles is a tuple, assert that the generator yields
- pages with the same number and order of titles.
+ Only iterates to the length of titles plus two.
@param gen: Page generator
@type gen: generator of Page
@param titles: Expected titles
- @type titles: tuple or list
+ @type titles: iterator
+ @param site: Site of expected pages
+ @type site: APISite
"""
- is_tuple = isinstance(titles, tuple)
- if site:
- titles = [pywikibot.Link(title, site).canonical_title()
- for title in titles]
- if is_tuple:
- titles = tuple(titles)
+ titles = self._get_canonical_titles(titles, site)
+ gen_titles = self._get_gen_titles(gen, len(titles), site)
+ self.assertEqual(gen_titles, titles)
- if is_tuple:
- working_set = collections.deque(titles)
+ def assertPageTitlesCountEqual(self, gen, titles, site=None):
+ """
+ Test that pages in gen match expected titles, regardless of order.
- for page in gen:
- self.assertIsInstance(page, pywikibot.Page)
- if site:
- self.assertEqual(page.site, site)
+ Only iterates to the length of titles plus two.
- title = page.title()
- self.assertIn(title, titles)
- if is_tuple:
- self.assertIn(title, working_set)
- self.assertEqual(title, working_set[0])
- working_set.popleft()
+ @param gen: Page generator
+ @type gen: generator of Page
+ @param titles: Expected titles
+ @type titles: iterator
+ @param site: Site of expected pages
+ @type site: APISite
+ """
+ titles = self._get_canonical_titles(titles, site)
+ gen_titles = self._get_gen_titles(gen, len(titles), site)
+ self.assertCountEqual(gen_titles, titles)
- if is_tuple:
- self.assertEqual(working_set, collections.deque([]))
+ assertPagelistTitles = assertPageTitlesEqual
class TestLoggingMixin(TestCaseBase):
diff --git a/tests/http_tests.py b/tests/http_tests.py
index 1a23179..4b1a885 100644
--- a/tests/http_tests.py
+++ b/tests/http_tests.py
@@ -67,22 +67,6 @@
self.assertIn('<html lang="mul"', r)
-class HttpServerProblemTestCase(TestCase):
-
- """Test HTTP status 502 causes this test class to be skipped."""
-
- sites = {
- '502': {
- 'hostname': 'http://getstatuscode.com/502',
- }
- }
-
- def test_502(self):
- """Test a HTTP 502 response using http://getstatuscode.com/502."""
- self.fail('The test framework should skip this test.')
- pass
-
-
class HttpsCertificateTestCase(TestCase):
"""HTTPS certificate test."""
diff --git a/tests/pagegenerators_tests.py b/tests/pagegenerators_tests.py
index 31d0ec3..00eb2fb 100755
--- a/tests/pagegenerators_tests.py
+++ b/tests/pagegenerators_tests.py
@@ -28,7 +28,6 @@
)
from tests.thread_tests import GeneratorIntersectTestCase
-
en_wp_page_titles = (
# just a bunch of randomly selected titles for English Wikipedia tests
u"Eastern Sayan",
@@ -116,11 +115,11 @@
gen = pagegenerators.PagesFromTitlesGenerator(self.titles, self.site)
gen = pagegenerators.RegexFilterPageGenerator(gen, ['/doc', '/meta'],
quantifier='all')
- self.assertPagelistTitles(gen, ())
+ self.assertPagelistTitles(gen, [])
gen = pagegenerators.PagesFromTitlesGenerator(self.titles, self.site)
gen = pagegenerators.RegexFilterPageGenerator(gen, ['Template', '/meta'],
quantifier='all')
- self.assertPagelistTitles(gen, ('Template:Template/Meta'))
+ self.assertPagelistTitles(gen, ('Template:Template/Meta', ))
gen = pagegenerators.PagesFromTitlesGenerator(self.titles, self.site)
gen = pagegenerators.RegexFilterPageGenerator(gen, ['template', '/meta'],
quantifier='any')
diff --git a/tests/reflinks_tests.py b/tests/reflinks_tests.py
index d5dda0a..34499ba 100644
--- a/tests/reflinks_tests.py
+++ b/tests/reflinks_tests.py
@@ -133,7 +133,13 @@
class TestReferencesBotConstructor(TestCase):
- """Test reflinks with non-write patching (if the testpage exists)."""
+ """
+ Test reflinks with run() removed.
+
+ These tests cant verify the order of the pages in the XML
+ as the constructor is given a preloading generator.
+ See APISite.preloadpages for details.
+ """
family = 'wikipedia'
code = 'en'
@@ -159,34 +165,31 @@
def test_xml_simple(self):
main('-xml:' + os.path.join(_xml_data_dir, 'dummy-reflinks.xml'))
gen = self.constructor_args[0]
- pages = list(gen)
- self.assertPagelistTitles(pages, (u'Fake page', u'Talk:Fake page'),
- site=self.get_site())
+ self.assertPageTitlesCountEqual(gen, [u'Fake page', u'Talk:Fake page'],
+ site=self.get_site())
def test_xml_one_namespace(self):
main('-xml:' + os.path.join(_xml_data_dir, 'dummy-reflinks.xml'),
'-namespace:1')
gen = self.constructor_args[0]
pages = list(gen)
- self.assertPagelistTitles(pages, (u'Talk:Fake page', ),
+ self.assertPagelistTitles(pages, [u'Talk:Fake page'],
site=self.get_site())
def test_xml_multiple_namespace_ids(self):
main('-xml:' + os.path.join(_xml_data_dir, 'dummy-reflinks.xml'),
'-namespace:0', '-namespace:1', '-xmlstart:Fake page')
gen = self.constructor_args[0]
- pages = list(gen)
- self.assertPagelistTitles(pages, (u'Fake page', u'Talk:Fake page'),
- site=self.get_site())
+ self.assertPageTitlesCountEqual(gen, [u'Fake page', u'Talk:Fake page'],
+ site=self.get_site())
@unittest.expectedFailure
def test_xml_multiple_namespace_ids_2(self):
main('-xml:' + os.path.join(_xml_data_dir, 'dummy-reflinks.xml'),
'-namespace:0,1', '-xmlstart:Fake page')
gen = self.constructor_args[0]
- pages = list(gen)
- self.assertPagelistTitles(pages, (u'Fake page', u'Talk:Fake page'),
- site=self.get_site())
+ self.assertPageTitlesCountEqual(gen, [u'Fake page', u'Talk:Fake page'],
+ site=self.get_site())
@unittest.expectedFailure
def test_xml_start_prefix(self):
@@ -194,7 +197,7 @@
'-namespace:1', '-xmlstart:Fake')
gen = self.constructor_args[0]
pages = list(gen)
- self.assertPagelistTitles(pages, (u'Talk:Fake page', ),
+ self.assertPagelistTitles(pages, [u'Talk:Fake page'],
site=self.get_site())
@unittest.expectedFailure
@@ -203,7 +206,7 @@
'-namespace:1', '-xmlstart:Fake_page')
gen = self.constructor_args[0]
pages = list(gen)
- self.assertPagelistTitles(pages, (u'Talk:Fake page', ),
+ self.assertPagelistTitles(pages, [u'Talk:Fake page'],
site=self.get_site())
def test_xml_namespace_name(self):
@@ -211,7 +214,7 @@
'-namespace:Talk', '-xmlstart:Fake page')
gen = self.constructor_args[0]
pages = list(gen)
- self.assertPagelistTitles(pages, (u'Talk:Fake page', ),
+ self.assertPagelistTitles(pages, [u'Talk:Fake page'],
site=self.get_site())
diff --git a/tests/tests_tests.py b/tests/tests_tests.py
new file mode 100755
index 0000000..6ac9c69
--- /dev/null
+++ b/tests/tests_tests.py
@@ -0,0 +1,56 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""Tests for the tests package."""
+#
+# (C) Pywikibot team, 2014
+#
+# Distributed under the terms of the MIT license.
+__version__ = '$Id$'
+
+import pywikibot
+
+from tests.aspects import unittest, TestCase
+from tests.utils import allowed_failure
+
+
+class HttpServerProblemTestCase(TestCase):
+
+ """Test HTTP status 502 causes this test class to be skipped."""
+
+ sites = {
+ '502': {
+ 'hostname': 'http://getstatuscode.com/502',
+ }
+ }
+
+ def test_502(self):
+ """Test a HTTP 502 response using http://getstatuscode.com/502."""
+ self.fail('The test framework should skip this test.')
+ pass
+
+
+class TestPageAssert(TestCase):
+
+ """Test page assertion methods."""
+
+ family = 'wikipedia'
+ code = 'en'
+
+ dry = True
+
+ @allowed_failure
+ def test_assertPageTitlesEqual(self):
+ """Test assertPageTitlesEqual shows the second page title and '...'."""
+ pages = [pywikibot.Page(self.site, 'Foo'),
+ pywikibot.Page(self.site, 'Bar'),
+ pywikibot.Page(self.site, 'Baz')]
+ self.assertPageTitlesEqual(pages,
+ ['Foo'],
+ self.site)
+
+
+if __name__ == "__main__":
+ try:
+ unittest.main()
+ except SystemExit:
+ pass
diff --git a/tests/xmlreader_tests.py b/tests/xmlreader_tests.py
index 3681e19..85dbbe8 100644
--- a/tests/xmlreader_tests.py
+++ b/tests/xmlreader_tests.py
@@ -13,7 +13,6 @@
from tests import _data_dir
from tests.aspects import unittest, TestCase
-from tests.utils import allowed_failure
_xml_data_dir = os.path.join(_data_dir, 'xml')
@@ -90,34 +89,36 @@
def test_pair(self):
entries = self._get_entries('pair-0.10.xml', allrevisions=True)
self.assertEqual(4, len(entries))
- self.assertTrue(all(entry.title.endswith(u"Çullu, Agdam")
- for entry in entries))
self.assertTrue(all(entry.username == 'Carlossuarez46'
for entry in entries))
self.assertTrue(all(entry.isredirect is False for entry in entries))
- articles = [entry for entry in entries if entry.ns == "0"]
- talks = [entry for entry in entries if entry.ns == "1"]
+ articles = entries[0:2]
+ talks = entries[2:4]
self.assertEqual(2, len(articles))
self.assertTrue(all(entry.id == "19252820" for entry in articles))
+ self.assertTrue(all(entry.title == u"Çullu, Agdam"
+ for entry in articles))
self.assertTrue(all(u'Çullu, Quzanlı' in entry.text
for entry in articles))
self.assertEqual(articles[0].text, u'#REDIRECT [[Çullu, Quzanlı]]')
self.assertEqual(2, len(talks))
self.assertTrue(all(entry.id == "19252824" for entry in talks))
+ self.assertTrue(all(entry.title == u"Talk:Çullu, Agdam"
+ for entry in talks))
self.assertEqual(talks[1].text, '{{DisambigProject}}')
self.assertEqual(talks[1].comment, 'proj')
- @allowed_failure
def test_edit_summary_decoding(self):
"""Test edit summaries are decoded."""
entries = self._get_entries('pair-0.10.xml', allrevisions=True)
articles = [entry for entry in entries if entry.ns == "0"]
+ # It does not decode the edit summary
self.assertEqual(articles[0].comment,
- 'moved [[Çullu, Agdam]] to [[Çullu, Quzanlı]]: dab')
+ u'moved [[Çullu, Agdam]] to [[Çullu, Quzanlı]]: dab')
if __name__ == '__main__':
--
To view, visit https://gerrit.wikimedia.org/r/181983
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I00ceb32ac468b98f418d0fbe7ab1f0748e15b352
Gerrit-PatchSet: 7
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>
Build Update for wikimedia/pywikibot-core
-------------------------------------
Build: #1858
Status: Still Failing
Duration: 43 minutes and 6 seconds
Commit: ac5a4a9 (master)
Author: jenkins-bot
Message: Updated pywikibot/core
Project: pywikibot/i18n 75ba3020be6171481a31be6046e1f0828095c0f3
Use interwiki links for qqq messages
qqq messages used http external links which could
be replaced with interwiki links to WMF projects.
Change-Id: I7319053be018cb597801959d39cc571f67549294
View the changeset: https://github.com/wikimedia/pywikibot-core/compare/6db4dd6dc1b1...ac5a4a91…
View the full build log and details: https://travis-ci.org/wikimedia/pywikibot-core/builds/45341607
--
You can configure recipients for build notifications in your .travis.yml file. See http://docs.travis-ci.com/user/notifications
jenkins-bot has submitted this change and it was merged.
Change subject: Use interwiki links for qqq messages
......................................................................
Use interwiki links for qqq messages
qqq messages used http external links which could
be replaced with interwiki links to WMF projects.
Change-Id: I7319053be018cb597801959d39cc571f67549294
---
M category.py
M category/qqq.json
M isbn.py
M isbn/qqq.json
M thirdparty.py
M thirdparty/qqq.json
6 files changed, 6 insertions(+), 6 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/category.py b/category.py
index 1277b68..503a0fe 100644
--- a/category.py
+++ b/category.py
@@ -31,7 +31,7 @@
'category-removing': u'Edit summary. Parameters:\n* %(oldcat)s - old category name',
'category-was-moved': u'{{doc-important|Do not translate "[[:Category:%(newcat)s|%(title)s]]"}}',
'category-section-title': u'Section title for keeping page history',
- 'category-listifying': u'Definition of [http://meta.wikimedia.org/wiki/Pywikipediabot/category.py#Syntax listify] - make a list of all of the articles that are in a category.\n\n*Variable "%(fromcat)s" = the category to make a list of in the listify option.\n*Variable "%(num)d" is probably a number.\n*You may use PLURAL tag like (<code><nowiki>{{PLURAL:%(num)d|1 entry|%(num)d entries}}</nowiki></code>)\nDo not translate the variables.',
+ 'category-listifying': u'Definition of [[mw:Manual:Pywikibot/category.py#Syntax listify] - make a list of all of the articles that are in a category.\n\n*Variable "%(fromcat)s" = the category to make a list of in the listify option.\n*Variable "%(num)d" is probably a number.\n*You may use PLURAL tag like (<code><nowiki>{{PLURAL:%(num)d|1 entry|%(num)d entries}}</nowiki></code>)\nDo not translate the variables.',
},
# Author: Csisc
'aeb': {
diff --git a/category/qqq.json b/category/qqq.json
index 3a723ae..88f1880 100644
--- a/category/qqq.json
+++ b/category/qqq.json
@@ -16,5 +16,5 @@
"category-removing": "Edit summary. Parameters:\n* %(oldcat)s - old category name",
"category-was-moved": "{{doc-important|Do not translate \"[[:Category:%(newcat)s|%(title)s]]\"}}",
"category-section-title": "Section title for keeping page history",
- "category-listifying": "Definition of [http://meta.wikimedia.org/wiki/Pywikipediabot/category.py#Syntax listify] - make a list of all of the articles that are in a category.\n\n*Variable \"%(fromcat)s\" = the category to make a list of in the listify option.\n*Variable \"%(num)d\" is probably a number.\n*You may use PLURAL tag like (<code><nowiki>{{PLURAL:%(num)d|1 entry|%(num)d entries}}</nowiki></code>)\nDo not translate the variables."
+ "category-listifying": "Definition of [[mw:Manual:Pywikibot/category.py#Syntax listify] - make a list of all of the articles that are in a category.\n\n*Variable \"%(fromcat)s\" = the category to make a list of in the listify option.\n*Variable \"%(num)d\" is probably a number.\n*You may use PLURAL tag like (<code><nowiki>{{PLURAL:%(num)d|1 entry|%(num)d entries}}</nowiki></code>)\nDo not translate the variables."
}
diff --git a/isbn.py b/isbn.py
index b56aa93..3aab3d8 100644
--- a/isbn.py
+++ b/isbn.py
@@ -7,7 +7,7 @@
# Author: Lloffiwr
# Author: Xqt
'qqq': {
- 'isbn-formatting': u'Edit summary when the bot fixes [http://en.wikipedia.org/wiki/International_Standard_Book_Number ISBN] number formatting.',
+ 'isbn-formatting': u'Edit summary when the bot fixes [[w:International Standard Book Number|ISBN]] number formatting.',
},
# Author: Csisc
'aeb': {
diff --git a/isbn/qqq.json b/isbn/qqq.json
index b0cef69..c7952d4 100644
--- a/isbn/qqq.json
+++ b/isbn/qqq.json
@@ -4,5 +4,5 @@
"Lloffiwr"
]
},
- "isbn-formatting": "Edit summary when the bot fixes [http://en.wikipedia.org/wiki/International_Standard_Book_Number ISBN] number formatting."
+ "isbn-formatting": "Edit summary when the bot fixes [[w:International Standard Book Number|ISBN]] number formatting."
}
diff --git a/thirdparty.py b/thirdparty.py
index 338fb28..f857e38 100644
--- a/thirdparty.py
+++ b/thirdparty.py
@@ -30,7 +30,7 @@
'qqq': {
'thirdparty-drtrigonbot-sum_disc-summary-head': u'Summary line first part.\n{{Identical|Bot}}',
'thirdparty-drtrigonbot-sum_disc-parse-nonhuman': u'Message part indicating the fact that no human editor is given for a page.',
- 'thirdparty-drtrigonbot-sum_disc-parse-start': u'Message header for bot report output. Is just a timestamp format and language independent.\n\nTimestamp format markers are as in the "strftime" C function. See "man 3 strftime" on any Linux/Unix system, or http://www.manpagez.com/man/3/strftime/',
+ 'thirdparty-drtrigonbot-sum_disc-parse-start': u'Message header for bot report output. Is just a timestamp format and language independent.\n\nTimestamp format markers are as in the "strftime" C function. See "man 3 strftime" on any Linux/Unix system, or [[w:strftime|strftime]]',
'thirdparty-drtrigonbot-sum_disc-parse': u'Message for reporting of new or changed discussion pages (sections within pages).',
'thirdparty-drtrigonbot-sum_disc-parse-warning': u'Message for reporting of unexpected behaviour or situations to the user.',
'thirdparty-drtrigonbot-sum_disc-notify-new': u'First part of message if discussion is new.',
diff --git a/thirdparty/qqq.json b/thirdparty/qqq.json
index 7830e22..a523bd6 100644
--- a/thirdparty/qqq.json
+++ b/thirdparty/qqq.json
@@ -10,7 +10,7 @@
},
"thirdparty-drtrigonbot-sum_disc-summary-head": "Summary line first part.\n{{Identical|Bot}}",
"thirdparty-drtrigonbot-sum_disc-parse-nonhuman": "Message part indicating the fact that no human editor is given for a page.",
- "thirdparty-drtrigonbot-sum_disc-parse-start": "Message header for bot report output. Is just a timestamp format and language independent.\n\nTimestamp format markers are as in the \"strftime\" C function. See \"man 3 strftime\" on any Linux/Unix system, or http://www.manpagez.com/man/3/strftime/",
+ "thirdparty-drtrigonbot-sum_disc-parse-start": "Message header for bot report output. Is just a timestamp format and language independent.\n\nTimestamp format markers are as in the \"strftime\" C function. See \"man 3 strftime\" on any Linux/Unix system, or [[w:strftime|strftime]]",
"thirdparty-drtrigonbot-sum_disc-parse": "Message for reporting of new or changed discussion pages (sections within pages).",
"thirdparty-drtrigonbot-sum_disc-parse-warning": "Message for reporting of unexpected behaviour or situations to the user.",
"thirdparty-drtrigonbot-sum_disc-notify-new": "First part of message if discussion is new.",
--
To view, visit https://gerrit.wikimedia.org/r/181862
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I7319053be018cb597801959d39cc571f67549294
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/i18n
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>
jenkins-bot has submitted this change and it was merged.
Change subject: Allow pagegen filtering using namespace names
......................................................................
Allow pagegen filtering using namespace names
The ability to filter using namespace names was removed from the
isbn script in 31c07b2, but still exists in other scripts.
Namespace names are more user-friendly as users interact with
these names regularly in titles, while the namespace numbers are
less known, especially namespaces which are not frequently used.
NamespaceFilterPageGenerator behaviour when site is not provided
is modified from 'no validation of int namespaces' to become
'validation using the namespaces of the default site', which will
at least be equivalent for the builtins.
Equality of Namespace(0) with None is no longer supported.
Exceptions raised during Site methods for namespace validation
are now defined behaviour.
Change-Id: Ib6caa11577546e14a69bbd898860843d69d4efb0
---
M pywikibot/data/api.py
M pywikibot/pagegenerators.py
M pywikibot/site.py
M scripts/add_text.py
M scripts/commonscat.py
M scripts/interwiki.py
M scripts/isbn.py
M tests/namespace_tests.py
M tests/pagegenerators_tests.py
9 files changed, 374 insertions(+), 96 deletions(-)
Approvals:
XZise: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py
index 3366eaf..5225322 100644
--- a/pywikibot/data/api.py
+++ b/pywikibot/data/api.py
@@ -1401,8 +1401,14 @@
def set_namespace(self, namespaces):
"""Set a namespace filter on this query.
- @param namespaces: Either an int or a list of ints
-
+ @param namespaces: namespace identifiers to limit query results
+ @type namespaces: iterable of basestring or Namespace key,
+ or a single instance of those types. May be a '|' separated
+ list of namespace identifiers.
+ @raises KeyError: a namespace identifier was not resolved
+ @raises TypeError: a namespace identifier has an inappropriate
+ type such as NoneType or bool, or more than one namespace
+ if the API module does not support multiple namespaces
"""
assert(self.limited_module) # some modules do not have a prefix
param = self.site._paraminfo.parameter(self.limited_module, 'namespace')
@@ -1414,15 +1420,13 @@
if isinstance(namespaces, basestring):
namespaces = namespaces.split('|')
- try:
- iter(namespaces)
- except TypeError:
- namespaces = [namespaces]
-
- namespaces = [str(namespace) for namespace in namespaces]
+ # Use Namespace id (int) here; Request will cast int to str
+ namespaces = [ns.id for ns in
+ pywikibot.site.Namespace.resolve(namespaces,
+ self.site.namespaces)]
if 'multi' not in param and len(namespaces) != 1:
- raise pywikibot.Error(u'{0} module does not support multiple '
- 'namespaces.'.format(self.limited_module))
+ raise TypeError(u'{0} module does not support multiple namespaces'
+ .format(self.limited_module))
self.request[self.prefix + "namespace"] = namespaces
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index 2294b46..d863e18 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -29,6 +29,7 @@
import time
import pywikibot
+
from pywikibot import date, config, i18n
from pywikibot.tools import (
deprecated,
@@ -38,6 +39,7 @@
)
from pywikibot.comms import http
import pywikibot.data.wikidataquery as wdquery
+from pywikibot.site import Namespace
if sys.version_info[0] > 2:
basestring = (str, )
@@ -106,7 +108,10 @@
-namespaces Filter the page generator to only yield pages in the
-namespace specified namespaces. Separate multiple namespace
--ns numbers with commas. Example "-ns:0,2,4"
+-ns numbers or names with commas.
+ Examples:
+ -ns:0,2,4
+ -ns:Help,MediaWiki
If used with -newpages, -namepace/ns must be provided
before -newpages.
If used with -recentchanges, efficiency is improved if
@@ -258,7 +263,7 @@
@type site: L{pywikibot.site.BaseSite}
"""
self.gens = []
- self.namespaces = []
+ self._namespaces = []
self.step = None
self.limit = None
self.articlefilter_list = []
@@ -270,12 +275,41 @@
"""
Generator site.
- @return: Site given to constructor, otherwise the default Site.
+ The generator site should not be accessed until after the global
+ arguments have been handled, otherwise the default Site may be changed
+ by global arguments, which will cause this cached value to be stale.
+
+ @return: Site given to constructor, otherwise the default Site at the
+ time this property is first accessed.
@rtype: L{pywikibot.site.BaseSite}
"""
if not self._site:
self._site = pywikibot.Site()
return self._site
+
+ @property
+ def namespaces(self):
+ """
+ List of Namespace parameters.
+
+ Converts int or string namespaces to Namespace objects and
+ change the storage to immutable once it has been accessed.
+
+ The resolving and validation of namespace command line arguments
+ is performed in this method, as it depends on the site property
+ which is lazy loaded to avoid being cached before the global
+ arguments are handled.
+
+ @return: namespaces selected using arguments
+ @rtype: list of Namespace
+ @raises KeyError: a namespace identifier was not resolved
+ @raises TypeError: a namespace identifier has an inappropriate
+ type such as NoneType or bool
+ """
+ if isinstance(self._namespaces, list):
+ self._namespaces = frozenset(
+ Namespace.resolve(self._namespaces, self.site.namespaces))
+ return self._namespaces
def getCombinedGenerator(self, gen=None):
"""Return the combination of all accumulated generators.
@@ -296,7 +330,8 @@
else:
if self.namespaces:
self.gens[i] = NamespaceFilterPageGenerator(self.gens[i],
- self.namespaces)
+ self.namespaces,
+ self.site)
if self.limit:
self.gens[i] = itertools.islice(self.gens[i], self.limit)
if len(self.gens) == 0:
@@ -463,6 +498,11 @@
u'Please enter the local file name:')
gen = TextfilePageGenerator(textfilename, site=self.site)
elif arg.startswith('-namespace') or arg.startswith('-ns'):
+ if isinstance(self._namespaces, frozenset):
+ pywikibot.warning('Cannot handle arg %s as namespaces can not '
+ 'be altered after a generator is created.'
+ % arg)
+ return True
value = None
if arg.startswith('-ns:'):
value = arg[len('-ns:'):]
@@ -473,13 +513,7 @@
if not value:
value = pywikibot.input(
u'What namespace are you filtering on?')
- try:
- self.namespaces.extend(
- [int(ns) for ns in value.split(",")]
- )
- except ValueError:
- pywikibot.output(u'Invalid namespaces argument: %s' % value)
- return False
+ self._namespaces += value.split(",")
return True
elif arg.startswith('-step'):
if len(arg) == len('-step'):
@@ -1046,35 +1080,36 @@
"""
A generator yielding pages from another generator in given namespaces.
- The namespace list can contain both integers (namespace numbers) and
- strings/unicode strings (namespace names).
+ If a site is provided, the namespaces are validated using the namespaces
+ of that site, otherwise the namespaces are validated using the default
+ site.
NOTE: API-based generators that have a "namespaces" parameter perform
namespace filtering more efficiently than this generator.
- @param namespaces: list of namespace numbers to limit results
- @type namespaces: list of int
- @param site: Site for generator results, only needed if
- namespaces contains namespace names.
+ @param namespaces: list of namespace identifiers to limit results
+ @type namespaces: iterable of basestring or Namespace key,
+ or a single instance of those types.
+ @param site: Site for generator results; mandatory if
+ namespaces contains namespace names. Defaults to the default site.
@type site: L{pywikibot.site.BaseSite}
+ @raises KeyError: a namespace identifier was not resolved
+ @raises TypeError: a namespace identifier has an inappropriate
+ type such as NoneType or bool, or more than one namespace
+ if the API module does not support multiple namespaces
"""
- if isinstance(namespaces, (int, basestring)):
- namespaces = [namespaces]
- # convert namespace names to namespace numbers
- for i in range(len(namespaces)):
- ns = namespaces[i]
- if isinstance(ns, basestring):
- try:
- # namespace might be given as str representation of int
- index = int(ns)
- except ValueError:
- # FIXME: deprecate providing strings as namespaces
- if site is None:
- site = pywikibot.Site()
- index = site.getNamespaceIndex(ns)
- if index is None:
- raise ValueError(u'Unknown namespace: %s' % ns)
- namespaces[i] = index
+ # As site was only required if the namespaces contain strings, dont
+ # attempt to use the config selected site unless the initial attempt
+ # at resolving the namespaces fails.
+ try:
+ namespaces = Namespace.resolve(namespaces,
+ site.namespaces if site else
+ pywikibot.Site().namespaces)
+ except KeyError as e:
+ pywikibot.log('Failed resolving namespaces:')
+ pywikibot.exception(e)
+ raise
+
for page in generator:
if page.namespace() in namespaces:
yield page
diff --git a/pywikibot/site.py b/pywikibot/site.py
index c4cd53d..4851c65 100644
--- a/pywikibot/site.py
+++ b/pywikibot/site.py
@@ -323,8 +323,6 @@
return self.id == other.id
elif isinstance(other, basestring):
return other in self
- elif other is None:
- return self.id == 0
def __ne__(self, other):
"""Compare whether two namespace objects are not equal."""
@@ -396,9 +394,10 @@
@staticmethod
def lookup_name(name, namespaces=None):
- """Find the namespace for a name.
+ """Find the Namespace for a name.
@param name: Name of the namespace.
+ @type name: basestring
@param namespaces: namespaces to search
default: builtins only
@type namespaces: dict of Namespace
@@ -417,6 +416,61 @@
return namespace
return None
+
+ @staticmethod
+ def resolve(identifiers, namespaces=None):
+ """
+ Resolve namespace identifiers to obtain Namespace objects.
+
+ Identifiers may be any value for which int() produces a valid
+ namespace id, except bool, or any string which Namespace.lookup_name
+ successfully finds. A numerical string is resolved as an integer.
+
+ @param identifiers: namespace identifiers
+ @type identifiers: iterable of basestring or Namespace key,
+ or a single instance of those types
+ @param namespaces: namespaces to search (default: builtins only)
+ @type namespaces: dict of Namespace
+ @return: list of Namespace objects in the same order as the
+ identifiers
+ @raises KeyError: a namespace identifier was not resolved
+ @raises TypeError: a namespace identifier has an inappropriate
+ type such as NoneType or bool
+ """
+ if not namespaces:
+ namespaces = Namespace.builtin_namespaces()
+
+ if isinstance(identifiers, (basestring, Namespace)):
+ identifiers = [identifiers]
+ else:
+ # convert non-iterators to single item list
+ try:
+ iter(identifiers)
+ except TypeError:
+ identifiers = [identifiers]
+
+ # lookup namespace names, and assume anything else is a key.
+ # int(None) raises TypeError; however, bool needs special handling.
+ result = [NotImplemented if isinstance(ns, bool) else
+ Namespace.lookup_name(ns, namespaces)
+ if isinstance(ns, basestring)
+ and not ns.lstrip('-').isdigit() else
+ namespaces[int(ns)] if int(ns) in namespaces
+ else None
+ for ns in identifiers]
+
+ if NotImplemented in result:
+ raise TypeError('identifiers contains inappropriate types: %r'
+ % identifiers)
+
+ # Namespace.lookup_name returns None if the name is not recognised
+ if None in result:
+ raise KeyError(u'Namespace identifier(s) not recognised: %s'
+ % u','.join([str(identifier) for identifier, ns
+ in zip(identifiers, result)
+ if ns is None]))
+
+ return result
class BaseSite(ComparableMixin):
@@ -1512,13 +1566,19 @@
@type type_arg: str
@param namespaces: if not None, limit the query to namespaces in this
list
- @type namespaces: int, or list of ints
+ @type namespaces: iterable of basestring or Namespace key,
+ or a single instance of those types. May be a '|' separated
+ list of namespace identifiers.
@param step: if not None, limit each API call to this many items
@type step: int
@param total: if not None, limit the generator to yielding this many
items in total
@type total: int
-
+ @return: iterable with parameters set
+ @rtype: QueryGenerator
+ @raises KeyError: a namespace identifier was not resolved
+ @raises TypeError: a namespace identifier has an inappropriate
+ type such as NoneType or bool
"""
if type_arg is not None:
gen = gen_class(type_arg, site=self, **args)
@@ -2661,11 +2721,16 @@
both (no filtering).
@param namespaces: If present, only return links from the namespaces
in this list.
+ @type namespaces: iterable of basestring or Namespace key,
+ or a single instance of those types. May be a '|' separated
+ list of namespace identifiers.
@param step: Limit on number of pages to retrieve per API query.
@param total: Maximum number of pages to retrieve in total.
@param content: if True, load the current content of each iterated page
(default False)
-
+ @raises KeyError: a namespace identifier was not resolved
+ @raises TypeError: a namespace identifier has an inappropriate
+ type such as NoneType or bool
"""
bltitle = page.title(withSection=False).encode(self.encoding())
blargs = {"gbltitle": bltitle}
@@ -2714,9 +2779,14 @@
None, return both (no filtering).
@param namespaces: If present, only return links from the namespaces
in this list.
+ @type namespaces: iterable of basestring or Namespace key,
+ or a single instance of those types. May be a '|' separated
+ list of namespace identifiers.
@param content: if True, load the current content of each iterated page
(default False)
-
+ @raises KeyError: a namespace identifier was not resolved
+ @raises TypeError: a namespace identifier has an inappropriate
+ type such as NoneType or bool
"""
eiargs = {"geititle":
page.title(withSection=False).encode(self.encoding())}
@@ -2731,7 +2801,18 @@
def pagereferences(self, page, followRedirects=False, filterRedirects=None,
withTemplateInclusion=True, onlyTemplateInclusion=False,
namespaces=None, step=None, total=None, content=False):
- """Convenience method combining pagebacklinks and page_embeddedin."""
+ """
+ Convenience method combining pagebacklinks and page_embeddedin.
+
+ @param namespaces: If present, only return links from the namespaces
+ in this list.
+ @type namespaces: iterable of basestring or Namespace key,
+ or a single instance of those types. May be a '|' separated
+ list of namespace identifiers.
+ @raises KeyError: a namespace identifier was not resolved
+ @raises TypeError: a namespace identifier has an inappropriate
+ type such as NoneType or bool
+ """
if onlyTemplateInclusion:
return self.page_embeddedin(page, namespaces=namespaces,
filterRedirects=filterRedirects,
@@ -2756,12 +2837,16 @@
"""Iterate internal wikilinks contained (or transcluded) on page.
@param namespaces: Only iterate pages in these namespaces (default: all)
- @type namespaces: list of ints
+ @type namespaces: iterable of basestring or Namespace key,
+ or a single instance of those types. May be a '|' separated
+ list of namespace identifiers.
@param follow_redirects: if True, yields the target of any redirects,
rather than the redirect page
@param content: if True, load the current content of each iterated page
(default False)
-
+ @raises KeyError: a namespace identifier was not resolved
+ @raises TypeError: a namespace identifier has an inappropriate
+ type such as NoneType or bool
"""
plargs = {}
if hasattr(page, "_pageid"):
@@ -2814,9 +2899,16 @@
content=False):
"""Iterate templates transcluded (not just linked) on the page.
+ @param namespaces: Only iterate pages in these namespaces
+ @type namespaces: iterable of basestring or Namespace key,
+ or a single instance of those types. May be a '|' separated
+ list of namespace identifiers.
@param content: if True, load the current content of each iterated page
(default False)
+ @raises KeyError: a namespace identifier was not resolved
+ @raises TypeError: a namespace identifier has an inappropriate
+ type such as NoneType or bool
"""
tltitle = page.title(withSection=False).encode(self.encoding())
tlgen = self._generator(api.PageGenerator, type_arg="templates",
@@ -2836,7 +2928,9 @@
subcategories, use namespaces=[6] to yield image files, etc. Note,
however, that the iterated values are always Page objects, even
if in the Category or Image namespace.
- @type namespaces: list of ints
+ @type namespaces: iterable of basestring or Namespace key,
+ or a single instance of those types. May be a '|' separated
+ list of namespace identifiers.
@param sortby: determines the order in which results are generated,
valid values are "sortkey" (default, results ordered by category
sort key) or "timestamp" (results ordered by time page was
@@ -2858,7 +2952,9 @@
@type endsort: str
@param content: if True, load the current content of each iterated page
(default False)
-
+ @raises KeyError: a namespace identifier was not resolved
+ @raises TypeError: a namespace identifier has an inappropriate
+ type such as NoneType or bool
"""
if category.namespace() != 14:
raise Error(
@@ -3115,7 +3211,7 @@
@param start: Start at this title (page need not exist).
@param prefix: Only yield pages starting with this string.
@param namespace: Iterate pages from this (single) namespace
- (default: 0)
+ @type namespace: int or Namespace.
@param filterredir: if True, only yield redirects; if False (and not
None), only yield non-redirects (default: yield both)
@param filterlanglinks: if True, only yield pages with language links;
@@ -3135,11 +3231,10 @@
@param includeredirects: DEPRECATED, use filterredir instead
@param content: if True, load the current content of each iterated page
(default False)
-
+ @raises KeyError: the namespace identifier was not resolved
+ @raises TypeError: the namespace identifier has an inappropriate
+ type such as bool, or an iterable with more than one namespace
"""
- if not isinstance(namespace, (int, Namespace)):
- raise Error("allpages: only one namespace permitted.")
-
if includeredirects is not None:
if includeredirects:
if includeredirects == "only":
@@ -3196,18 +3291,18 @@
@param start: Start at this title (page need not exist).
@param prefix: Only yield pages starting with this string.
@param namespace: Iterate pages from this (single) namespace
- (default: 0)
+ @type namespace: int or Namespace
@param unique: If True, only iterate each link title once (default:
iterate once for each linking page)
@param fromids: if True, include the pageid of the page containing
each link (default: False) as the '_fromid' attribute of the Page;
cannot be combined with unique
-
+ @raises KeyError: the namespace identifier was not resolved
+ @raises TypeError: the namespace identifier has an inappropriate
+ type such as bool, or an iterable with more than one namespace
"""
if unique and fromids:
raise Error("alllinks: unique and fromids cannot both be True.")
- if not isinstance(namespace, (int, Namespace)):
- raise Error("alllinks: only one namespace permitted.")
algen = self._generator(api.ListGenerator, type_arg="alllinks",
alnamespace=int(namespace), alfrom=start,
step=step, total=total)
@@ -3409,11 +3504,17 @@
@param image: the image to search for (FilePage need not exist on
the wiki)
@type image: FilePage
+ @param namespaces: If present, only iterate pages in these namespaces
+ @type namespaces: iterable of basestring or Namespace key,
+ or a single instance of those types. May be a '|' separated
+ list of namespace identifiers.
@param filterredir: if True, only yield redirects; if False (and not
None), only yield non-redirects (default: yield both)
@param content: if True, load the current content of each iterated page
(default False)
-
+ @raises KeyError: a namespace identifier was not resolved
+ @raises TypeError: a namespace identifier has an inappropriate
+ type such as NoneType or bool
"""
iuargs = dict(giutitle=image.title(withSection=False))
if filterredir is not None:
@@ -3435,10 +3536,13 @@
@param user: only iterate entries that match this user name
@param page: only iterate entries affecting this page
@param namespace: namespace to retrieve logevents from
+ @type namespace: int or Namespace
@param start: only iterate entries from and after this Timestamp
@param end: only iterate entries up to and through this Timestamp
@param reverse: if True, iterate oldest entries first (default: newest)
-
+ @raises KeyError: the namespace identifier was not resolved
+ @raises TypeError: the namespace identifier has an inappropriate
+ type such as bool, or an iterable with more than one namespace
"""
if start and end:
self.assert_valid_iter_params('logevents', start, end, reverse)
@@ -3474,6 +3578,10 @@
@type end: pywikibot.Timestamp
@param reverse: if True, start with oldest changes (default: newest)
@type reverse: bool
+ @param namespaces: only iterate pages in these namespaces
+ @type namespaces: iterable of basestring or Namespace key,
+ or a single instance of those types. May be a '|' separated
+ list of namespace identifiers.
@param pagelist: iterate changes to pages in this list only
@param pagelist: list of Pages
@param changetype: only iterate changes of this type ("edit" for
@@ -3502,7 +3610,9 @@
@type user: basestring|list
@param excludeuser: if not None, exclude edits by this user or users
@type excludeuser: basestring|list
-
+ @raises KeyError: a namespace identifier was not resolved
+ @raises TypeError: a namespace identifier has an inappropriate
+ type such as NoneType or bool
"""
if start and end:
self.assert_valid_iter_params('recentchanges', start, end, reverse)
@@ -3565,14 +3675,17 @@
@type searchstring: unicode
@param where: Where to search; value must be "text" or "titles" (many
wikis do not support title search)
- @param namespaces: search only in these namespaces (defaults to 0)
- @type namespaces: list of ints, or an empty list to signal all
- namespaces
+ @param namespaces: search only in these namespaces (defaults to all)
+ @type namespaces: iterable of basestring or Namespace key,
+ or a single instance of those types. May be a '|' separated
+ list of namespace identifiers.
@param getredirects: if True, include redirects in results. Since
version MediaWiki 1.23 it will always return redirects.
@param content: if True, load the current content of each iterated page
(default False)
-
+ @raises KeyError: a namespace identifier was not resolved
+ @raises TypeError: a namespace identifier has an inappropriate
+ type such as NoneType or bool
"""
if not searchstring:
raise Error("search: searchstring cannot be empty")
@@ -3604,11 +3717,17 @@
@param start: Iterate contributions starting at this Timestamp
@param end: Iterate contributions ending at this Timestamp
@param reverse: Iterate oldest contributions first (default: newest)
+ @param namespaces: only iterate pages in these namespaces
+ @type namespaces: iterable of basestring or Namespace key,
+ or a single instance of those types. May be a '|' separated
+ list of namespace identifiers.
@param showMinor: if True, iterate only minor edits; if False and
not None, iterate only non-minor edits (default: iterate both)
@param top_only: if True, iterate only edits which are the latest
revision
-
+ @raises KeyError: a namespace identifier was not resolved
+ @raises TypeError: a namespace identifier has an inappropriate
+ type such as NoneType or bool
"""
if not (user or userprefix):
raise Error(
@@ -3647,13 +3766,19 @@
@param start: Iterate revisions starting at this Timestamp
@param end: Iterate revisions ending at this Timestamp
@param reverse: Iterate oldest revisions first (default: newest)
+ @param namespaces: only iterate pages in these namespaces
+ @type namespaces: iterable of basestring or Namespace key,
+ or a single instance of those types. May be a '|' separated
+ list of namespace identifiers.
@param showMinor: if True, only list minor edits; if False (and not
None), only list non-minor edits
@param showBot: if True, only list bot edits; if False (and not
None), only list non-bot edits
@param showAnon: if True, only list anon edits; if False (and not
None), only list non-anon edits
-
+ @raises KeyError: a namespace identifier was not resolved
+ @raises TypeError: a namespace identifier has an inappropriate
+ type such as NoneType or bool
"""
if start and end:
self.assert_valid_iter_params('watchlist_revs', start, end, reverse)
@@ -3784,11 +3909,16 @@
@param total: the maximum number of pages to iterate (default: 1)
@param namespaces: only iterate pages in these namespaces.
+ @type namespaces: iterable of basestring or Namespace key,
+ or a single instance of those types. May be a '|' separated
+ list of namespace identifiers.
@param redirects: if True, include only redirect pages in results
(default: include only non-redirects)
@param content: if True, load the current content of each iterated page
(default False)
-
+ @raises KeyError: a namespace identifier was not resolved
+ @raises TypeError: a namespace identifier has an inappropriate
+ type such as NoneType or bool
"""
rngen = self._generator(api.PageGenerator, type_arg="random",
namespaces=namespaces, step=step, total=total,
@@ -4734,6 +4864,13 @@
timestamp (unicode), length (int), an empty unicode string, username
or IP address (str), comment (unicode).
+ @param namespaces: only iterate pages in these namespaces
+ @type namespaces: iterable of basestring or Namespace key,
+ or a single instance of those types. May be a '|' separated
+ list of namespace identifiers.
+ @raises KeyError: a namespace identifier was not resolved
+ @raises TypeError: a namespace identifier has an inappropriate
+ type such as NoneType or bool
"""
# TODO: update docstring
diff --git a/scripts/add_text.py b/scripts/add_text.py
index 20a0d56..69ab777 100644
--- a/scripts/add_text.py
+++ b/scripts/add_text.py
@@ -358,7 +358,7 @@
if index % 2 == 1 and index > 0:
namespaces += [index]
generator = pagegenerators.NamespaceFilterPageGenerator(
- generator, namespaces)
+ generator, namespaces, site)
for page in generator:
(text, newtext, always) = add_text(page, addText, summary, regexSkip,
regexSkipUrl, always, up, True,
diff --git a/scripts/commonscat.py b/scripts/commonscat.py
index 3dee15c..13031a5 100755
--- a/scripts/commonscat.py
+++ b/scripts/commonscat.py
@@ -543,14 +543,16 @@
genFactory.handleArg(arg)
if checkcurrent:
+ site = pywikibot.Site()
primaryCommonscat, commonscatAlternatives = \
CommonscatBot.getCommonscatTemplate(
- pywikibot.Site().code)
+ site.code)
generator = pagegenerators.NamespaceFilterPageGenerator(
pagegenerators.ReferringPageGenerator(
- pywikibot.Page(pywikibot.Site(),
- u'Template:' + primaryCommonscat),
- onlyTemplateInclusion=True), ns)
+ pywikibot.Page(site, u'Template:' + primaryCommonscat),
+ onlyTemplateInclusion=True),
+ ns,
+ site)
if not generator:
generator = genFactory.getCombinedGenerator()
diff --git a/scripts/interwiki.py b/scripts/interwiki.py
index 396a085..342cd5e 100755
--- a/scripts/interwiki.py
+++ b/scripts/interwiki.py
@@ -2601,7 +2601,8 @@
hintlessPageGen = genFactory.getCombinedGenerator()
if hintlessPageGen:
if len(namespaces) > 0:
- hintlessPageGen = pagegenerators.NamespaceFilterPageGenerator(hintlessPageGen, namespaces)
+ hintlessPageGen = pagegenerators.NamespaceFilterPageGenerator(
+ hintlessPageGen, namespaces, site)
# we'll use iter() to create make a next() function available.
bot.setPageGenerator(iter(hintlessPageGen), number=number, until=until)
elif warnfile:
diff --git a/scripts/isbn.py b/scripts/isbn.py
index 50663ac..fa7f472 100755
--- a/scripts/isbn.py
+++ b/scripts/isbn.py
@@ -11,12 +11,6 @@
¶ms;
--namespace:n Number or name of namespace to process. The parameter can be
- used multiple times. It works in combination with all other
- parameters, except for the -start parameter. If you e.g.
- want to iterate over all categories starting at M, use
- -start:Category:M.
-
Furthermore, the following command line parameters are supported:
-to13 Converts all ISBN-10 codes to ISBN-13.
diff --git a/tests/namespace_tests.py b/tests/namespace_tests.py
index c33a6e3..703e4c2 100644
--- a/tests/namespace_tests.py
+++ b/tests/namespace_tests.py
@@ -143,7 +143,7 @@
self.assertEqual(a, 0)
self.assertEqual(a, '')
- self.assertEqual(a, None)
+ self.assertNotEqual(a, None)
x = Namespace(id=6, custom_name=u'dummy', canonical_name=u'File',
aliases=[u'Image', u'Immagine'])
@@ -173,6 +173,9 @@
self.assertLess(a, x)
self.assertGreater(x, a)
self.assertGreater(z, x)
+
+ self.assertIn(6, [x, y, z])
+ self.assertNotIn(8, [x, y, z])
def testNamespaceNormalizeName(self):
self.assertEqual(Namespace.normalize_name(u'File'), u'File')
@@ -211,6 +214,63 @@
b = eval(repr(a))
self.assertEqual(a, b)
+ def test_resolve(self):
+ namespaces = Namespace.builtin_namespaces(use_image_name=False)
+ main_ns = namespaces[0]
+ file_ns = namespaces[6]
+ special_ns = namespaces[-1]
+
+ self.assertEqual(Namespace.resolve([6]), [file_ns])
+ self.assertEqual(Namespace.resolve(['File']), [file_ns])
+ self.assertEqual(Namespace.resolve(['6']), [file_ns])
+ self.assertEqual(Namespace.resolve([file_ns]), [file_ns])
+
+ self.assertEqual(Namespace.resolve([file_ns, special_ns]),
+ [file_ns, special_ns])
+ self.assertEqual(Namespace.resolve([file_ns, file_ns]),
+ [file_ns, file_ns])
+
+ self.assertEqual(Namespace.resolve(6), [file_ns])
+ self.assertEqual(Namespace.resolve('File'), [file_ns])
+ self.assertEqual(Namespace.resolve('6'), [file_ns])
+ self.assertEqual(Namespace.resolve(file_ns), [file_ns])
+
+ self.assertEqual(Namespace.resolve(0), [main_ns])
+ self.assertEqual(Namespace.resolve('0'), [main_ns])
+
+ self.assertEqual(Namespace.resolve(-1), [special_ns])
+ self.assertEqual(Namespace.resolve('-1'), [special_ns])
+
+ self.assertEqual(Namespace.resolve('File:'), [file_ns])
+ self.assertEqual(Namespace.resolve(':File'), [file_ns])
+ self.assertEqual(Namespace.resolve(':File:'), [file_ns])
+
+ self.assertEqual(Namespace.resolve('Image:'), [file_ns])
+ self.assertEqual(Namespace.resolve(':Image'), [file_ns])
+ self.assertEqual(Namespace.resolve(':Image:'), [file_ns])
+
+ self.assertRaises(TypeError, Namespace.resolve, [True])
+ self.assertRaises(TypeError, Namespace.resolve, [False])
+ self.assertRaises(TypeError, Namespace.resolve, [None])
+ self.assertRaises(TypeError, Namespace.resolve, True)
+ self.assertRaises(TypeError, Namespace.resolve, False)
+ self.assertRaises(TypeError, Namespace.resolve, None)
+
+ self.assertRaises(KeyError, Namespace.resolve, -10)
+ self.assertRaises(KeyError, Namespace.resolve, '-10')
+ self.assertRaises(KeyError, Namespace.resolve, 'foo')
+ self.assertRaises(KeyError, Namespace.resolve, ['foo'])
+
+ self.assertRaisesRegex(KeyError,
+ r'Namespace identifier\(s\) not recognised: -10',
+ Namespace.resolve, [-10, 0])
+ self.assertRaisesRegex(KeyError,
+ r'Namespace identifier\(s\) not recognised: foo',
+ Namespace.resolve, [0, 'foo'])
+ self.assertRaisesRegex(KeyError,
+ r'Namespace identifier\(s\) not recognised: -10,-11',
+ Namespace.resolve, [-10, 0, -11])
+
if __name__ == '__main__':
try:
diff --git a/tests/pagegenerators_tests.py b/tests/pagegenerators_tests.py
index 32f3fda..bb7a965 100755
--- a/tests/pagegenerators_tests.py
+++ b/tests/pagegenerators_tests.py
@@ -79,17 +79,23 @@
def test_NamespaceFilterPageGenerator(self):
self.assertFunction("NamespaceFilterPageGenerator")
- gen = pagegenerators.PagesFromTitlesGenerator(self.titles, self.site)
- gen = pagegenerators.NamespaceFilterPageGenerator(gen, 0)
+ site = self.site
+ gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site)
+ gen = pagegenerators.NamespaceFilterPageGenerator(gen, 0, site)
self.assertEqual(len(tuple(gen)), 3)
- gen = pagegenerators.PagesFromTitlesGenerator(self.titles, self.site)
- gen = pagegenerators.NamespaceFilterPageGenerator(gen, 1)
+ gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site)
+ gen = pagegenerators.NamespaceFilterPageGenerator(gen, 1, site)
self.assertEqual(len(tuple(gen)), 4)
- gen = pagegenerators.PagesFromTitlesGenerator(self.titles, self.site)
- gen = pagegenerators.NamespaceFilterPageGenerator(gen, 10)
+ gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site)
+ gen = pagegenerators.NamespaceFilterPageGenerator(gen, 10, site)
self.assertEqual(len(tuple(gen)), 6)
- gen = pagegenerators.PagesFromTitlesGenerator(self.titles, self.site)
- gen = pagegenerators.NamespaceFilterPageGenerator(gen, (1, 10))
+ gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site)
+ gen = pagegenerators.NamespaceFilterPageGenerator(gen, (1, 10), site)
+ self.assertEqual(len(tuple(gen)), 10)
+ gen = pagegenerators.PagesFromTitlesGenerator(self.titles, site)
+ gen = pagegenerators.NamespaceFilterPageGenerator(gen,
+ ('Talk', 'Template'),
+ site)
self.assertEqual(len(tuple(gen)), 10)
def test_RegexFilterPageGenerator(self):
@@ -345,6 +351,45 @@
self.assertTrue(all(isinstance(item, pywikibot.ItemPage) for item in gen))
+class DryFactoryGeneratorTest(TestCase):
+
+ """Dry tests for pagegenerators.GeneratorFactory."""
+
+ family = 'wikipedia'
+ code = 'en'
+
+ dry = True
+
+ def test_one_namespace(self):
+ gf = pagegenerators.GeneratorFactory(site=self.get_site())
+ gf.handleArg('-ns:2')
+ self.assertEqual(gf.namespaces, set([2]))
+
+ def test_two_namespaces(self):
+ gf = pagegenerators.GeneratorFactory(site=self.get_site())
+ gf.handleArg('-ns:2')
+ gf.handleArg('-ns:Talk')
+ self.assertEqual(gf.namespaces, set([2, 1]))
+
+ def test_two_named_namespaces(self):
+ gf = pagegenerators.GeneratorFactory(site=self.get_site())
+ gf.handleArg('-ns:Talk,File')
+ self.assertEqual(gf.namespaces, set([1, 6]))
+
+ def test_two_numeric_namespaces(self):
+ gf = pagegenerators.GeneratorFactory(site=self.get_site())
+ gf.handleArg('-ns:1,6')
+ self.assertEqual(gf.namespaces, set([1, 6]))
+
+ def test_immutable_namespaces_on_read(self):
+ gf = pagegenerators.GeneratorFactory(site=self.get_site())
+ gf.handleArg('-ns:1,6')
+ self.assertEqual(gf.namespaces, set([1, 6]))
+ self.assertIsInstance(gf.namespaces, frozenset)
+ gf.handleArg('-ns:0')
+ self.assertEqual(gf.namespaces, set([1, 6]))
+
+
class TestFactoryGenerator(DefaultSiteTestCase):
"""Test pagegenerators.GeneratorFactory."""
--
To view, visit https://gerrit.wikimedia.org/r/179627
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ib6caa11577546e14a69bbd898860843d69d4efb0
Gerrit-PatchSet: 8
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>