jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/375719 )
Change subject: QueryGenerator: Allow filtering namespaces after API fetch ......................................................................
QueryGenerator: Allow filtering namespaces after API fetch
- Provide a way to bypass the TypeError of QueryGenerator.set_namespace when multiple namespaces are given but module does not support multiple namespaces. - Modify QueryGenerator.__iter__ slightly to allow filtering the results after they have been fetched from the API. - Revert 5cf7fb4275847ab39cafd0e974a3c0cba9c70213. It is not neccessary anymore. Both `logevents` and `logpages` now support multiple namespaces. - Add a test for the new functionality of LogEntryListGenerator.
Bug: T174899 Change-Id: Ic523845fd2b143be328ee6edc3b3c53783486026 --- M pywikibot/data/api.py M pywikibot/site.py M tests/site_tests.py 3 files changed, 38 insertions(+), 7 deletions(-)
Approvals: jenkins-bot: Verified Xqt: Looks good to me, approved
diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py index 6dc86d6..c057b67 100644 --- a/pywikibot/data/api.py +++ b/pywikibot/data/api.py @@ -2507,6 +2507,16 @@
"""
+ # Should results be filtered during iteration according to set_namespace? + # Used if the API module does not support multiple namespaces. + # Override in subclasses by defining a function that returns True if + # the result's namespace is in self._namespaces. + _check_result_namespace = NotImplemented + + # Set of allowed namespaces will be assigned to _namespaces during + # set_namespace call. Only to be used by _check_result_namespace. + _namespaces = None + def __init__(self, **kwargs): """ Construct a QueryGenerator object. @@ -2683,8 +2693,12 @@ self.site.namespaces.resolve(namespaces)]
if 'multi' not in param and len(namespaces) != 1: - raise TypeError(u'{0} module does not support multiple namespaces' - .format(self.limited_module)) + if self._check_result_namespace is NotImplemented: + raise TypeError('{0} module does not support multiple ' + 'namespaces'.format(self.limited_module)) + else: + self._namespaces = set(namespaces) + namespaces = None
if namespaces: self.request[self.prefix + 'namespace'] = namespaces @@ -2801,7 +2815,11 @@ else: self.normalized = {} for item in resultdata: - yield self.result(item) + result = self.result(item) + if self._check_result_namespace is not NotImplemented: + if not self._check_result_namespace(result): + continue + yield result if isinstance(item, dict) and set(self.continuekey) & set(item.keys()): # if we need to count elements contained in items in # self.data["query"]["pages"], we want to count @@ -3015,6 +3033,10 @@ """Instatiate LogEntry from data from api.""" return self.entryFactory.create(pagedata)
+ def _check_result_namespace(self, result): + """Return True if result.ns() is in self._namespaces.""" + return result.ns() in self._namespaces +
class LoginManager(login.LoginManager):
diff --git a/pywikibot/site.py b/pywikibot/site.py index 2b81e4d..f637342 100644 --- a/pywikibot/site.py +++ b/pywikibot/site.py @@ -4509,8 +4509,11 @@ @type user: basestring @param page: only iterate entries affecting this page @type page: Page or basestring - @param namespace: namespace to retrieve logevents from - @type namespace: int or Namespace + @param namespace: namespace(s) to retrieve logevents from + @type namespace: int or Namespace or an iterable of them + @note: due to an API limitation, if namespace param contains multiple + namespaces, log entries from all namespaces will be fetched from + the API and will be filtered later during iteration. @param start: only iterate entries from and after this Timestamp @type start: Timestamp or ISO date string @param end: only iterate entries up to and through this Timestamp @@ -4544,8 +4547,8 @@ legen.request["leend"] = end if reverse: legen.request["ledir"] = "newer" - if namespace or namespace == 0: - legen.request["lenamespace"] = namespace + if namespace is not None: + legen.set_namespace(namespace) if tag: # Supported in version 1.16+; earlier sites will cause APIError legen.request['letag'] = tag diff --git a/tests/site_tests.py b/tests/site_tests.py index 2740a44..1a7f2b8 100644 --- a/tests/site_tests.py +++ b/tests/site_tests.py @@ -1234,6 +1234,12 @@ entry[2], long if PY2 and entry[2] > sys.maxint else int) self.assertIsInstance(entry[3], basestring)
+ def test_list_namespace(self): + """Test the deprecated site.logpages() when namespace is a list.""" + le = list(self.site.logpages(namespace=[2, 3], number=10)) + for entry in le: + self.assertIn(entry[0].namespace(), [2, 3]) + def test_logpages_dump(self): """Test the deprecated site.logpages() method using dump mode.""" le = list(self.site.logpages(number=10, dump=True))
pywikibot-commits@lists.wikimedia.org