jenkins-bot has submitted this change and it was merged.
Change subject: Implement wbsearchentities ......................................................................
Implement wbsearchentities
'wbsearchentities' API request was implemented as search_entities method in DataSite class. WikibaseSearchItemPageGenerator was created which yields the pages from the newly created method, regarding the language code specified.
Bug: T68949 Change-Id: Ib7459a4b7c6bafe04d56dcd09ee0f8386711b4cf --- M pywikibot/data/api.py M pywikibot/pagegenerators.py M pywikibot/site.py M tests/pagegenerators_tests.py M tests/site_tests.py 5 files changed, 230 insertions(+), 0 deletions(-)
Approvals: John Vandenberg: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py index f979b81..d5100ad 100644 --- a/pywikibot/data/api.py +++ b/pywikibot/data/api.py @@ -1464,6 +1464,110 @@ return self._data
+class APIGenerator(object): + + """Iterator that handle API responses containing lists. + + The iterator will iterate each item in the query response and use the + continue request parameter to retrieve the next portion of items + automatically. If the limit attribute is set, the iterator will stop + after iterating that many values. + """ + + def __init__(self, action, continue_name='continue', limit_name='limit', + data_name='data', **kwargs): + """ + Construct an APIGenerator object. + + kwargs are used to create a Request object; see that object's + documentation for values. + + @param action: API action name. + @type action: str + @param continue_name: Name of the continue API parameter. + @type continue_name: str + @param limit_name: Name of the limit API parameter. + @type limit_name: str + @param data_name: Name of the data in API response. + @type data_name: str + """ + kwargs['action'] = action + try: + self.site = kwargs['site'] + except KeyError: + self.site = pywikibot.Site() + kwargs['site'] = self.site + + self.continue_name = continue_name + self.limit_name = limit_name + self.data_name = data_name + + self.limit = None + self.starting_offset = kwargs.pop(self.continue_name, 0) + self.request = Request(**kwargs) + self.request[self.limit_name] = 50 + + def set_query_increment(self, value): + """ + Set the maximum number of items to be retrieved per API query. + + If not called, the default is 50. + + @param value: The value of maximum number of items to be retrieved + per API request to set. + @type value: int + """ + self.request[self.limit_name] = int(value) + pywikibot.debug(u"%s: Set query_limit to %i." + % (self.__class__.__name__, int(value)), _logger) + + def set_maximum_items(self, value): + """ + Set the maximum number of items to be retrieved from the wiki. + + If not called, most queries will continue as long as there is + more data to be retrieved from the API. + + @param value: The value of maximum number of items to be retrieved + in total to set. + @type value: int + """ + self.limit = int(value) + if self.limit < self.request[self.limit_name]: + self.request[self.limit_name] = self.limit + + def __iter__(self): + """Submit request and iterate the response. + + Continues response as needed until limit (if defined) is reached. + """ + offset = self.starting_offset + n = 0 + while True: + self.request[self.continue_name] = offset + pywikibot.debug(u"%s: Request: %s" % (self.__class__.__name__, + self.request), _logger) + data = self.request.submit() + + n_items = len(data[self.data_name]) + pywikibot.debug(u"%s: Retrieved %d items" % ( + self.__class__.__name__, n_items), _logger) + if n_items > 0: + for item in data[self.data_name]: + yield item + n += 1 + if self.limit is not None and n >= self.limit: + pywikibot.debug(u"%s: Stopped iterating due to " + u"exceeding item limit." % + self.__class__.__name__, _logger) + return + offset += n_items + else: + pywikibot.debug(u"%s: Stopped iterating due to empty list in " + u"response." % self.__class__.__name__, _logger) + break + + class QueryGenerator(object):
"""Base class for iterators that handle responses to API action=query. diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py index 6f01605..792e092 100644 --- a/pywikibot/pagegenerators.py +++ b/pywikibot/pagegenerators.py @@ -199,6 +199,12 @@ -wikidataquery Takes a WikidataQuery query string like claim[31:12280] and works on the resulting pages.
+-searchitem Takes a search string and works on Wikibase pages that + contain it. + Argument can be given as "-searchitem:text", where text + is the string to look for, or "-searchitem:lang:text", where + lang is the langauge to search items in. + -random Work on random pages returned by [[Special:Random]]. Can also be given as "-random:n" where n is the number of pages to be returned, otherwise the default is 10 pages. @@ -668,6 +674,15 @@ imagelinksPage = pywikibot.Page(pywikibot.Link(imagelinkstitle, self.site)) gen = ImagesPageGenerator(imagelinksPage) + elif arg.startswith('-searchitem'): + text = arg[len('-searchitem:'):] + if not text: + text = pywikibot.input(u'Text to look for:') + params = text.split(':') + text = params[-1] + lang = params[0] if len(params) == 2 else None + gen = WikibaseSearchItemPageGenerator(text, language=lang, + site=self.site) elif arg.startswith('-search'): mediawikiQuery = arg[8:] if not mediawikiQuery: @@ -2258,6 +2273,33 @@ yield pywikibot.Page(pywikibot.Link(link, site))
+def WikibaseSearchItemPageGenerator(text, language=None, total=None, site=None): + """ + Generate pages that contain the provided text. + + @param text: Text to look for. + @type text: str + @param language: Code of the language to search in. If not specified, + value from pywikibot.config.data_lang is used. + @type language: str + @param total: Maximum number of pages to retrieve in total, or None in + case of no limit. + @type total: int or None + @param site: Site for generator results. + @type site: L{pywikibot.site.BaseSite} + """ + if site is None: + site = pywikibot.Site() + if language is None: + language = site.lang + repo = site.data_repository() + + data = repo.search_entities(text, language, limit=total, site=site) + pywikibot.output(u'retrieved %d items' % len(list(data))) + for item in data: + yield pywikibot.ItemPage(repo, item['id']) + + if __name__ == "__main__": pywikibot.output(u'Pagegenerators cannot be run as script - are you ' u'looking for listpages.py?') diff --git a/pywikibot/site.py b/pywikibot/site.py index 5454267..0e2009e 100644 --- a/pywikibot/site.py +++ b/pywikibot/site.py @@ -5676,6 +5676,31 @@ result = self.editEntity({}, data, bot=bot, **kwargs) return pywikibot.ItemPage(self, result['entity']['id'])
+ def search_entities(self, search, language, limit=None, **kwargs): + """ + Search for pages or properties that contain the given text. + + @param search: Text to find. + @type search: str + @param language: Language to search in. + @type language: str + @param limit: Maximum number of pages to retrieve in total, or None in + case of no limit. + @type limit: int or None + @return: 'search' list from API output. + """ + lang_codes = [lang['code'] for lang in self._siteinfo.get('languages')] + if language not in lang_codes: + raise ValueError(u'Data site used does not support provided ' + u'language.') + + gen = api.APIGenerator('wbsearchentities', data_name='search', + search=search, language=language, **kwargs) + gen.set_query_increment(50) + if limit is not None: + gen.set_maximum_items(limit) + return gen + # deprecated BaseSite methods def fam(self): raise NotImplementedError diff --git a/tests/pagegenerators_tests.py b/tests/pagegenerators_tests.py index ae7e2d8..2dec8dd 100755 --- a/tests/pagegenerators_tests.py +++ b/tests/pagegenerators_tests.py @@ -657,6 +657,25 @@ gen = gf.getCombinedGenerator() self.assertEqual(len(set(gen)), 1)
+ def test_searchitem(self): + """Test -searchitem.""" + gf = pagegenerators.GeneratorFactory(site=self.site) + gf.handleArg('-searchitem:abc') + gen = gf.getCombinedGenerator() + self.assertGreater(len(set(gen)), 0) + + def test_searchitem_language(self): + """Test -searchitem with custom language specified.""" + gf = pagegenerators.GeneratorFactory(site=self.site) + gf.handleArg('-searchitem:pl:abc') + gen = gf.getCombinedGenerator() + pages = set(gen) + gf = pagegenerators.GeneratorFactory(site=self.site) + gf.handleArg('-searchitem:en:abc') + gen = gf.getCombinedGenerator() + pages2 = set(gen) + self.assertNotEqual(pages, pages2) +
class TestLogeventsFactoryGenerator(DefaultSiteTestCase):
diff --git a/tests/site_tests.py b/tests/site_tests.py index 55e3d3f..760c965 100644 --- a/tests/site_tests.py +++ b/tests/site_tests.py @@ -2103,6 +2103,46 @@ self.assertEqual(item.id, 'Q5296')
+class TestDataSiteSearchEntities(WikidataTestCase): + + """Test DataSite.search_entities.""" + + def test_general(self): + """Test basic search_entities functionality.""" + datasite = self.get_repo() + pages = datasite.search_entities('abc', 'en', limit=50, + site=self.get_site()) + self.assertGreater(len(list(pages)), 0) + self.assertLessEqual(len(list(pages)), 50) + pages = datasite.search_entities('alphabet', 'en', type='property', + limit=50, site=self.get_site()) + self.assertGreater(len(list(pages)), 0) + self.assertLessEqual(len(list(pages)), 50) + + def test_continue(self): + """Test that continue parameter in search_entities works.""" + datasite = self.get_repo() + kwargs = {'limit': 50, 'site': self.get_site()} + pages = datasite.search_entities('Rembrandt', 'en', **kwargs) + kwargs['continue'] = 1 + pages_continue = datasite.search_entities('Rembrandt', 'en', **kwargs) + self.assertNotEqual(list(pages), list(pages_continue)) + + def test_language_lists(self): + """Test that languages returned by paraminfo and MW are the same.""" + site = self.get_site() + lang_codes = site._paraminfo.parameter('wbsearchentities', + 'language')['type'] + lang_codes2 = [lang['code'] for lang in site._siteinfo.get('languages')] + self.assertEqual(lang_codes, lang_codes2) + + def test_invalid_language(self): + """Test behavior of search_entities with invalid language provided.""" + datasite = self.get_repo() + self.assertRaises(ValueError, datasite.search_entities, 'abc', + 'invalidlanguage') + + class TestSametitleSite(TestCase):
"""Test APISite.sametitle on sites with known behaviour."""
pywikibot-commits@lists.wikimedia.org