jenkins-bot has submitted this change and it was merged.
Change subject: Use cmtype to restrict page type for Cat.articles
......................................................................
Use cmtype to restrict page type for Cat.articles
- Page.articles
- change sortby default from "" to None
- APISite.categorymembers
- add param member_type
- change sortby default from "" to None
- remove locals()
- add misc tests for namespace, including Namespace,
ListGenerator and APISite._generator
Bug: T67981
Change-Id: I46a53497a86c3ab3761f9166b6c268c7a3c5f6be
---
M pywikibot/page.py
M pywikibot/site.py
M tests/api_tests.py
M tests/namespace_tests.py
M tests/site_tests.py
M tests/utils.py
M tox.ini
7 files changed, 188 insertions(+), 44 deletions(-)
Approvals:
Mpaa: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/page.py b/pywikibot/page.py
index c63af01..bb0826b 100644
--- a/pywikibot/page.py
+++ b/pywikibot/page.py
@@ -2196,7 +2196,7 @@
if not hasattr(self, "_subcats"):
self._subcats = []
for member in self.site.categorymembers(
- self, namespaces=[14], step=step,
+ self, member_type='subcat', step=step,
total=total, content=content):
subcat = Category(member)
self._subcats.append(subcat)
@@ -2231,7 +2231,7 @@
@deprecate_arg("startFrom", "startsort")
def articles(self, recurse=False, step=None, total=None,
- content=False, namespaces=None, sortby="",
+ content=False, namespaces=None, sortby=None,
starttime=None, endtime=None, startsort=None,
endsort=None):
"""
@@ -2271,9 +2271,6 @@
@type endsort: str
"""
- if namespaces is None:
- namespaces = [x for x in self.site.namespaces()
- if x >= 0 and x != 14]
for member in self.site.categorymembers(self,
namespaces=namespaces,
step=step, total=total,
@@ -2281,7 +2278,8 @@
starttime=starttime,
endtime=endtime,
startsort=startsort,
- endsort=endsort
+ endsort=endsort,
+ member_type=['page',
'file']
):
yield member
if total is not None:
diff --git a/pywikibot/site.py b/pywikibot/site.py
index 2a347bb..a7551c1 100644
--- a/pywikibot/site.py
+++ b/pywikibot/site.py
@@ -32,9 +32,8 @@
from pywikibot.tools import (
itergroup, UnicodeMixin, ComparableMixin, SelfCallDict, SelfCallString,
deprecated, deprecate_arg, deprecated_args, remove_last_args,
- redirect_func, manage_wrapping,
+ redirect_func, manage_wrapping, MediaWikiVersion,
)
-from pywikibot.tools import MediaWikiVersion
from pywikibot.throttle import Throttle
from pywikibot.data import api
from pywikibot.exceptions import (
@@ -2952,18 +2951,16 @@
step=step, total=total, g_content=content)
return tlgen
- def categorymembers(self, category, namespaces=None, sortby="",
+ def categorymembers(self, category, namespaces=None, sortby=None,
reverse=False, starttime=None, endtime=None,
startsort=None, endsort=None, step=None, total=None,
- content=False):
+ content=False, member_type=None):
"""Iterate members of specified category.
@param category: The Category to iterate.
@param namespaces: If present, only return category members from
- these namespaces. For example, use namespaces=[14] to yield
- subcategories, use namespaces=[6] to yield image files, etc. Note,
- however, that the iterated values are always Page objects, even
- if in the Category or Image namespace.
+ these namespaces. To yield subcategories or files, use
+ parameter member_type instead.
@type namespaces: iterable of basestring or Namespace key,
or a single instance of those types. May be a '|' separated
list of namespace identifiers.
@@ -2988,6 +2985,12 @@
@type endsort: str
@param content: if True, load the current content of each iterated page
(default False)
+ @type content: bool
+ @param member_type: member type; if member_type includes 'page' and is
+ used in conjunction with sortby="timestamp", the API may limit
+ results to only pages in the first 50 namespaces.
+ @type member_type: str or iterable of str; values: page, subcat, file
+
@raises KeyError: a namespace identifier was not resolved
@raises TypeError: a namespace identifier has an inappropriate
type such as NoneType or bool
@@ -3004,14 +3007,61 @@
cmargs["gcmsort"] = sortby
elif sortby:
raise ValueError(
- "categorymembers: invalid sortby value '%(sortby)s'"
- % locals())
+ "categorymembers: invalid sortby value '%s'"
+ % sortby)
if starttime and endtime and starttime > endtime:
raise ValueError(
"categorymembers: starttime must be before endtime")
if startsort and endsort and startsort > endsort:
raise ValueError(
"categorymembers: startsort must be less than endsort")
+
+ if isinstance(member_type, basestring):
+ member_type = set([member_type])
+
+ if (member_type and
+ (sortby == 'timestamp' or
+ MediaWikiVersion(self.version()) <
MediaWikiVersion("1.12"))):
+ # Retrofit cmtype/member_type, available on MW API 1.12+,
+ # to use namespaces available on earlier versions.
+
+ # Covert namespaces to a known type
+ namespaces = set(Namespace.resolve(namespaces or [],
+ self.namespaces()))
+
+ if 'page' in member_type:
+ excluded_namespaces = set()
+ if 'file' not in member_type:
+ excluded_namespaces.add(6)
+ if 'subcat' not in member_type:
+ excluded_namespaces.add(14)
+
+ if namespaces:
+ if excluded_namespaces.intersect(namespaces):
+ raise ValueError(
+ 'incompatible namespaces %r and member_type %r'
+ % (namespaces, member_type))
+ # All excluded namespaces are not present in `namespaces`.
+ else:
+ # If the number of namespaces is greater than permitted by
+ # the API, it will issue a warning and use the namespaces
+ # up until the limit, which will usually be sufficient.
+ # TODO: QueryGenerator should detect when the number of
+ # namespaces requested is higher than available, and split
+ # the request into several batches.
+ excluded_namespaces.add([-1, -2])
+ namespaces = set(self.namespaces()) - excluded_namespaces
+ else:
+ if 'file' in member_type:
+ namespaces.add(6)
+ if 'subcat' in member_type:
+ namespaces.add(14)
+
+ member_type = None
+
+ if member_type:
+ cmargs['gcmtype'] = member_type
+
if reverse:
cmargs["gcmdir"] = "desc"
# API wants start/end params in opposite order if using descending
diff --git a/tests/api_tests.py b/tests/api_tests.py
index fae6537..1611e93 100644
--- a/tests/api_tests.py
+++ b/tests/api_tests.py
@@ -321,6 +321,7 @@
"Wikipedia:Disambiguation")
def setUp(self):
+ """Set up test case."""
super(TestDryPageGenerator, self).setUp()
mysite = self.get_site()
self.gen = api.PageGenerator(site=mysite,
@@ -352,8 +353,8 @@
# On a dry site, the namespace objects only have canonical names.
# Add custom_name for this site namespace, to match the live site.
- if 'Wikipedia' not in self.site._namespaces:
- self.site._namespaces[4].custom_name = 'Wikipedia'
+ if 'Wikipedia' not in self.site.namespaces:
+ self.site.namespaces[4].custom_name = 'Wikipedia'
def test_results(self):
"""Test that PageGenerator yields pages with expected
attributes."""
@@ -398,6 +399,12 @@
self.gen.set_maximum_items(-1)
self.assertPagelistTitles(self.gen, self.titles)
+ def test_namespace(self):
+ """Test PageGenerator set_namespace."""
+ self.assertRaises(AssertionError, self.gen.set_namespace, 0)
+ self.assertRaises(AssertionError, self.gen.set_namespace, 1)
+ self.assertRaises(AssertionError, self.gen.set_namespace, None)
+
class TestPropertyGenerator(TestCase):
@@ -407,6 +414,7 @@
code = 'en'
def test_info(self):
+ """Test PropertyGenerator with prop
'info'."""
mainpage = self.get_mainpage()
links = list(self.site.pagelinks(mainpage, total=10))
titles = [l.title(withSection=False)
@@ -424,6 +432,7 @@
self.assertEqual(len(links), count)
def test_one_continuation(self):
+ """Test PropertyGenerator with prop
'revisions'."""
mainpage = self.get_mainpage()
links = list(self.site.pagelinks(mainpage, total=10))
titles = [l.title(withSection=False)
@@ -443,6 +452,7 @@
self.assertEqual(len(links), count)
def test_two_continuations(self):
+ """Test PropertyGenerator with prop 'revisions' and
'coordinates'."""
mainpage = self.get_mainpage()
links = list(self.site.pagelinks(mainpage, total=10))
titles = [l.title(withSection=False)
@@ -463,6 +473,7 @@
@allowed_failure
def test_many_continuations_limited(self):
+ """Test PropertyGenerator with many limited
props."""
mainpage = self.get_mainpage()
links = list(self.site.pagelinks(mainpage, total=30))
titles = [l.title(withSection=False)
@@ -487,6 +498,7 @@
@allowed_failure
def test_two_continuations_limited(self):
+ """Test PropertyGenerator with many limited props and
continuations."""
# FIXME: test fails
mainpage = self.get_mainpage()
links = list(self.site.pagelinks(mainpage, total=30))
@@ -527,6 +539,36 @@
self.assertEqual(len(links), count)
+class TestDryListGenerator(TestCase):
+
+ """Test ListGenerator."""
+
+ family = 'wikipedia'
+ code = 'en'
+
+ dry = True
+
+ def setUp(self):
+ """Set up test case."""
+ super(TestDryListGenerator, self).setUp()
+ mysite = self.get_site()
+ mysite._paraminfo['allpages'] = {
+ 'prefix': 'ap',
+ 'limit': {'max': 10},
+ 'namespace': {'multi': True}
+ }
+ mysite._paraminfo.query_modules_with_limits = set(['allpages'])
+ self.gen = api.ListGenerator(listaction="allpages", site=mysite)
+
+ def test_namespace_none(self):
+ """Test ListGenerator set_namespace with None."""
+ self.assertRaises(TypeError, self.gen.set_namespace, None)
+
+ def test_namespace_zero(self):
+ """Test ListGenerator set_namespace with 0."""
+ self.gen.set_namespace(0)
+
+
class TestCachedRequest(DefaultSiteTestCase):
"""Test API Request caching.
diff --git a/tests/namespace_tests.py b/tests/namespace_tests.py
index 703e4c2..81000a3 100644
--- a/tests/namespace_tests.py
+++ b/tests/namespace_tests.py
@@ -178,6 +178,7 @@
self.assertNotIn(8, [x, y, z])
def testNamespaceNormalizeName(self):
+ """Test Namespace.normalize_name."""
self.assertEqual(Namespace.normalize_name(u'File'), u'File')
self.assertEqual(Namespace.normalize_name(u':File'), u'File')
self.assertEqual(Namespace.normalize_name(u'File:'), u'File')
@@ -193,6 +194,7 @@
self.assertEqual(Namespace.normalize_name(u'::File::'), False)
def test_repr(self):
+ """Test Namespace.__repr__."""
a = Namespace(id=0, canonical_name=u'Foo')
s = repr(a)
r = "Namespace(id=0, custom_name=%r, canonical_name=%r, aliases=[])" \
@@ -215,6 +217,7 @@
self.assertEqual(a, b)
def test_resolve(self):
+ """Test Namespace.resolve."""
namespaces = Namespace.builtin_namespaces(use_image_name=False)
main_ns = namespaces[0]
file_ns = namespaces[6]
@@ -272,6 +275,38 @@
Namespace.resolve, [-10, 0, -11])
+class TestNamespaceCollections(TestCase):
+
+ """Test how Namespace interact when in collections."""
+
+ net = False
+
+ def test_set(self):
+ """Test converting sequence of Namespace to a
set."""
+ namespaces = Namespace.builtin_namespaces(use_image_name=False)
+
+ self.assertTrue(all(isinstance(x, int) for x in namespaces))
+ self.assertTrue(all(isinstance(x, int) for x in namespaces.keys()))
+ self.assertTrue(all(isinstance(x, Namespace)
+ for x in namespaces.values()))
+
+ namespaces_set = set(namespaces)
+
+ self.assertEqual(len(namespaces), len(namespaces_set))
+ self.assertTrue(all(isinstance(x, int) for x in namespaces_set))
+
+ def test_set_minus(self):
+ """Test performing set minus operation on set of Namespace
objects."""
+ namespaces = Namespace.builtin_namespaces(use_image_name=False)
+
+ excluded_namespaces = set([-1, -2])
+
+ positive_namespaces = set(namespaces) - excluded_namespaces
+
+ self.assertEqual(len(namespaces), len(positive_namespaces) +
+ len(excluded_namespaces))
+
+
if __name__ == '__main__':
try:
unittest.main()
diff --git a/tests/site_tests.py b/tests/site_tests.py
index d8daebf..4647074 100644
--- a/tests/site_tests.py
+++ b/tests/site_tests.py
@@ -298,6 +298,25 @@
if a:
self.assertEqual(a[0], mainpage)
+
+class TestSiteGenerators(DefaultSiteTestCase):
+
+ """Test cases for Site methods."""
+
+ cached = True
+
+ def test_generator_namespace(self):
+ """Test site._generator with namespaces."""
+ site = self.get_site()
+ gen = site._generator(pywikibot.data.api.PageGenerator,
+ type_arg='backlinks',
+ namespaces=None)
+ self.assertTrue('gblnamespace' not in gen.request)
+ gen = site._generator(pywikibot.data.api.PageGenerator,
+ type_arg='backlinks',
+ namespaces=1)
+ self.assertEqual(gen.request['gblnamespace'], [1])
+
def testLinkMethods(self):
"""Test site methods for getting links to and from a
page."""
mysite = self.get_site()
diff --git a/tests/utils.py b/tests/utils.py
index 6698261..e9ff62b 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -89,35 +89,23 @@
"""Dummy class to use instead of
L{pywikibot.data.api.ParamInfo}."""
- @property
- def modules(self):
- """Empty set."""
- return set()
+ def __init__(self, *args, **kwargs):
+ """Constructor."""
+ super(DryParamInfo, self).__init__(*args, **kwargs)
+ self.modules = set()
+ self.action_modules = set()
+ self.query_modules = set()
+ self.query_modules_with_limits = set()
+ self.prefixes = set()
- @property
- def action_modules(self):
- """Empty set."""
- return set()
-
- @property
- def query_modules(self):
- """Empty set."""
- return set()
-
- @property
- def query_modules_with_limits(self):
- """Empty set."""
- return set()
-
- @property
- def prefixes(self):
- """Empty set."""
- return set()
+ def fetch(self, modules, _init=False):
+ """Prevented method."""
+ raise Exception(u'DryParamInfo.fetch(%r, %r) prevented'
+ % (modules, _init))
def parameter(self, module, param_name):
- """Prevented method."""
- raise Exception(u'DryParamInfo.parameter(%r, %r) prevented'
- % (module, param_name))
+ """Load dry data."""
+ return self[module][param_name]
class DummySiteinfo():
@@ -125,15 +113,19 @@
"""Dummy class to use instead of
L{pywikibot.site.Siteinfo}."""
def __init__(self, cache):
+ """Constructor."""
self._cache = dict((key, (item, False)) for key, item in cache.items())
def __getitem__(self, key):
+ """Get item."""
return self.get(key, False)
def __setitem__(self, key, value):
+ """Set item."""
self._cache[key] = (value, False)
def get(self, key, get_default=True, cache=True, expiry=False):
+ """Return dry data."""
# Default values are always expired, so only expiry=False doesn't force
# a reload
force = expiry is not False
@@ -152,12 +144,15 @@
raise KeyError(key)
def __contains__(self, key):
+ """Return False."""
return False
def is_recognised(self, key):
+ """Return None."""
return None
def get_requested_time(self, key):
+ """Return False."""
return False
@@ -166,6 +161,7 @@
"""Dummy class to use instead of
L{pywikibot.data.api.Request}."""
def __init__(self, *args, **kwargs):
+ """Constructor."""
_original_Request.__init__(self, *args, **kwargs)
def _expired(self, dt):
@@ -177,6 +173,7 @@
return
def submit(self):
+ """Prevented method."""
raise Exception(u'DryRequest rejecting request: %r'
% self._params)
@@ -204,6 +201,7 @@
@property
def userinfo(self):
+ """Return dry data."""
return self._userinfo
def version(self):
@@ -213,6 +211,7 @@
return '1.24'
def case(self):
+ """Return case-sensitive if wiktionary."""
if self.family.name == 'wiktionary':
return 'case-sensitive'
else:
diff --git a/tox.ini b/tox.ini
index 19fe0af..93d1c4e 100644
--- a/tox.ini
+++ b/tox.ini
@@ -94,6 +94,7 @@
scripts/watchlist.py \
setup.py \
tests/__init__.py \
+ tests/utils.py \
tests/archivebot_tests.py \
tests/category_tests.py \
tests/date_tests.py \
--
To view, visit
https://gerrit.wikimedia.org/r/179071
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I46a53497a86c3ab3761f9166b6c268c7a3c5f6be
Gerrit-PatchSet: 10
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Mpaa <mpaa.wiki(a)gmail.com>
Gerrit-Reviewer: Purodha <bli00(a)hostsharing.net>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>