jenkins-bot has submitted this change and it was merged.
Change subject: WikidataSPARQLPageGenerator should not always remove duplicates
......................................................................
WikidataSPARQLPageGenerator should not always remove duplicates
get_items creates a set from the results, to avoid duplicates. It has
the unfortunate side effect of shuffling the results of all queries,
including those with an ORDER BY.
The result_type argument is added to allow the caller to change the
default set() to a list() to preserve order and duplicates.
Bug: T141348
Change-Id: I19873439e82f1a74797d1d2b2ac253086b38721e
---
M pywikibot/data/sparql.py
M pywikibot/pagegenerators.py
M tests/sparql_tests.py
3 files changed, 45 insertions(+), 17 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/data/sparql.py b/pywikibot/data/sparql.py
index 9e16d4d..dd09b21 100644
--- a/pywikibot/data/sparql.py
+++ b/pywikibot/data/sparql.py
@@ -103,21 +103,24 @@
data = self.query(query, headers=headers)
return data['boolean']
- def get_items(self, query, item_name='item'):
+ def get_items(self, query, item_name='item', result_type=set):
"""
- Retrieve set of items which satisfy given query.
+ Retrieve items which satisfy given query.
Items are returned as Wikibase IDs.
@param query: Query string. Must contain ?{item_name} as one of the projected
values.
@param item_name: Name of the value to extract
- @return: Set of item ids, e.g. Q1234
- @rtype: set
+ @param result_type: type of the iterable in which
+ SPARQL results are stored (default set)
+ @type result_type: iterable
+ @return: item ids, e.g. Q1234
+ @rtype: same as result_type
"""
res = self.select(query, full_data=True)
if res:
- return set([r[item_name].getID() for r in res])
- return set()
+ return result_type(r[item_name].getID() for r in res)
+ return result_type()
class URI(object):
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index a79a6bc..6ce7417 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -2665,12 +2665,17 @@
return WikidataPageFromItemGenerator(items_pages, site)
-def WikidataSPARQLPageGenerator(query, site=None, item_name='item',
endpoint=None):
+def WikidataSPARQLPageGenerator(query, site=None,
+ item_name='item', endpoint=None,
+ result_type=set):
"""Generate pages that result from the given SPARQL query.
@param query: the SPARQL query string.
@param site: Site for generator results.
@type site: L{pywikibot.site.BaseSite}
+ @param result_type: type of the iterable in which
+ SPARQL results are stored (default set)
+ @type result_type: iterable
"""
from pywikibot.data import sparql
@@ -2682,7 +2687,9 @@
endpoint = sparql.WIKIDATA
query_object = sparql.SparqlQuery(endpoint=endpoint)
- data = query_object.get_items(query, item_name=item_name)
+ data = query_object.get_items(query,
+ item_name=item_name,
+ result_type=result_type)
items_pages = (pywikibot.ItemPage(repo, item) for item in data)
if isinstance(site, pywikibot.site.DataSite):
return items_pages
diff --git a/tests/sparql_tests.py b/tests/sparql_tests.py
index d86368e..26a0b7b 100644
--- a/tests/sparql_tests.py
+++ b/tests/sparql_tests.py
@@ -18,13 +18,21 @@
# See:
https://www.w3.org/TR/2013/REC-sparql11-results-json-20130321/
-SQL_RESPONSE = """
+SQL_RESPONSE_CONTAINER = """
{
"head" : {
"vars" : [ "cat", "d", "catLabel" ]
},
"results" : {
- "bindings" : [ {
+ "bindings" : [
+ %s
+ ]
+ }
+}
+"""
+
+ITEM_Q498787 = """
+ {
"cat" : {
"type" : "uri",
"value" : "http://www.wikidata.org/entity/Q498787"
@@ -39,7 +47,11 @@
"type" : "literal",
"value" : "Muezza"
}
- }, {
+ }
+"""
+
+ITEM_Q677525 = """
+ {
"cat" : {
"type" : "uri",
"value" : "http://www.wikidata.org/entity/Q677525"
@@ -54,9 +66,7 @@
"type" : "literal",
"value" : "Orangey"
}
- } ]
- }
-}
+ }
"""
RESPONSE_TRUE = """
@@ -90,7 +100,8 @@
@patch.object(sparql.http, 'fetch')
def testQuerySelect(self, mock_method):
"""Test SELECT query."""
- mock_method.return_value = Container(SQL_RESPONSE)
+ mock_method.return_value = Container(
+ SQL_RESPONSE_CONTAINER % ("%s, %s" % (ITEM_Q498787,
ITEM_Q677525)))
q = sparql.SparqlQuery()
res = q.select('SELECT * WHERE { ?x ?y ?z }')
self.assertIsInstance(res, list, 'Result is not a list')
@@ -108,7 +119,8 @@
@patch.object(sparql.http, 'fetch')
def testQuerySelectFull(self, mock_method):
"""Test SELECT query with full data."""
- mock_method.return_value = Container(SQL_RESPONSE)
+ mock_method.return_value = Container(
+ SQL_RESPONSE_CONTAINER % ("%s, %s" % (ITEM_Q498787,
ITEM_Q677525)))
q = sparql.SparqlQuery()
res = q.select('SELECT * WHERE { ?x ?y ?z }', full_data=True)
self.assertIsInstance(res, list, 'Result is not a list')
@@ -130,10 +142,16 @@
@patch.object(sparql.http, 'fetch')
def testGetItems(self, mock_method):
"""Test item list retrieval via SPARQL."""
- mock_method.return_value = Container(SQL_RESPONSE)
+ mock_method.return_value = Container(
+ SQL_RESPONSE_CONTAINER % ("%s, %s, %s" % (ITEM_Q498787,
+ ITEM_Q677525,
+ ITEM_Q677525)))
q = sparql.SparqlQuery()
res = q.get_items('SELECT * WHERE { ?x ?y ?z }', 'cat')
self.assertSetEqual(res, set(['Q498787', 'Q677525']))
+ res = q.get_items('SELECT * WHERE { ?x ?y ?z }', 'cat',
+ result_type=list)
+ self.assertEqual(res, ['Q498787', 'Q677525', 'Q677525'])
@patch.object(sparql.http, 'fetch')
def testQueryAsk(self, mock_method):
--
To view, visit
https://gerrit.wikimedia.org/r/301094
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I19873439e82f1a74797d1d2b2ac253086b38721e
Gerrit-PatchSet: 9
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Dachary <loic(a)dachary.org>
Gerrit-Reviewer: Addshore <addshorewiki(a)gmail.com>
Gerrit-Reviewer: D3r1ck <alangiderick(a)gmail.com>
Gerrit-Reviewer: D3r1ck01 <alangiderick(a)gmail.com>
Gerrit-Reviewer: Dachary <loic(a)dachary.org>
Gerrit-Reviewer: Dalba <dalba.wiki(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: MtDu <justin.d128(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>