jenkins-bot has submitted this change and it was merged.
Change subject: Add SPARQL endpoint support for pywikibot
......................................................................
Add SPARQL endpoint support for pywikibot
Change-Id: I9fd08059fe2ba34bb95789e088dd7416a73daaed
---
M dev-requirements.txt
M pywikibot/README.rst
A pywikibot/data/sparql.py
M pywikibot/pagegenerators.py
A tests/sparql_tests.py
5 files changed, 400 insertions(+), 0 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 81f4b4a..734953c 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -11,6 +11,7 @@
flake8
codecov
coverage
+mock ; python_version < '3'
# pywin32 & pywinauto>=0.4.0 are Win32 UI test dependencies that have been
# excluded from this file as they are quite expensive to install, and they
diff --git a/pywikibot/README.rst b/pywikibot/README.rst
index 1397683..5f77c30 100644
--- a/pywikibot/README.rst
+++ b/pywikibot/README.rst
@@ -133,6 +133,8 @@
+---------------------------+-------------------------------------------------------+
| wikistats.py | Objects representing WikiStats API
|
+---------------------------+-------------------------------------------------------+
+ | sparql.py | Objects representing SPARQL query API
|
+
+---------------------------+-------------------------------------------------------+
+---------------+------------------------------------------------------------------+
diff --git a/pywikibot/data/sparql.py b/pywikibot/data/sparql.py
new file mode 100644
index 0000000..9e16d4d
--- /dev/null
+++ b/pywikibot/data/sparql.py
@@ -0,0 +1,195 @@
+# -*- coding: utf-8 -*-
+"""SPARQL Query interface."""
+#
+# Distributed under the terms of the MIT license.
+#
+from __future__ import absolute_import, unicode_literals
+
+import json
+import sys
+if sys.version_info[0] > 2:
+ from urllib.parse import quote
+else:
+ from urllib2 import quote
+
+from pywikibot.comms import http
+
+WIKIDATA = 'http://query.wikidata.org/sparql'
+DEFAULT_HEADERS = {'cache-control': 'no-cache',
+ 'Accept': 'application/sparql-results+json'}
+
+
+class SparqlQuery(object):
+ """
+ SPARQL Query class.
+
+ This class allows to run SPARQL queries against any SPARQL endpoint.
+ """
+
+ def __init__(self, endpoint=WIKIDATA,
entity_url='http://www.wikidata.org/entity/')y/'):
+ """
+ Create endpoint.
+
+ @param endpoint: SPARQL endpoint URL, by default Wikidata query endpoint
+ """
+ self.endpoint = endpoint
+ self.last_response = None
+ self.entity_url = entity_url
+
+ def get_last_response(self):
+ """
+ Return last received response.
+
+ @return: Response object from last request or None
+ """
+ return self.last_response
+
+ def select(self, query, full_data=False, headers=DEFAULT_HEADERS):
+ """
+ Run SPARQL query and return the result.
+
+ The response is assumed to be in format defined by:
+
https://www.w3.org/TR/2013/REC-sparql11-results-json-20130321/
+
+ @param query: Query text
+ @type query: string
+ @param full_data: Whether return full data objects or only values
+ @type full_data: bool
+ @return: List of query results or None if query failed
+ """
+ data = self.query(query, headers=headers)
+ if data and 'results' in data:
+ result = []
+ qvars = data['head']['vars']
+ for row in data['results']['bindings']:
+ values = {}
+ for var in qvars:
+ if full_data:
+ if row[var]['type'] not in VALUE_TYPES:
+ raise ValueError('Unknown type: %s' %
row[var]['type'])
+ valtype = VALUE_TYPES[row[var]['type']]
+ values[var] = valtype(row[var], entity_url=self.entity_url)
+ else:
+ values[var] = row[var]['value']
+ result.append(values)
+ return result
+ else:
+ return None
+
+ def query(self, query, headers=DEFAULT_HEADERS):
+ """
+ Run SPARQL query and return parsed JSON result.
+
+ @param query: Query text
+ @type query: string
+ """
+ url = '%s?query=%s' % (self.endpoint, quote(query))
+ self.last_response = http.fetch(url, headers=headers)
+ if not self.last_response.content:
+ return None
+ try:
+ return json.loads(self.last_response.content)
+ except ValueError:
+ return None
+
+ def ask(self, query, headers=DEFAULT_HEADERS):
+ """
+ Run SPARQL ASK query and return boolean result.
+
+ @param query: Query text
+ @type query: string
+ @rtype: bool
+ """
+ data = self.query(query, headers=headers)
+ return data['boolean']
+
+ def get_items(self, query, item_name='item'):
+ """
+ Retrieve set of items which satisfy given query.
+
+ Items are returned as Wikibase IDs.
+
+ @param query: Query string. Must contain ?{item_name} as one of the projected
values.
+ @param item_name: Name of the value to extract
+ @return: Set of item ids, e.g. Q1234
+ @rtype: set
+ """
+ res = self.select(query, full_data=True)
+ if res:
+ return set([r[item_name].getID() for r in res])
+ return set()
+
+
+class URI(object):
+ """Representation of URI result type."""
+
+ def __init__(self, data, entity_url, **kwargs):
+ """
+ Create URI object.
+
+ @type data: dict
+ """
+ self.value = data.get('value')
+ self.entity_url = entity_url
+
+ def getID(self):
+ """
+ Get ID of Wikibase object identified by the URI.
+
+ @return: ID of Wikibase object, e.g. Q1234
+ """
+ urllen = len(self.entity_url)
+ if self.value.startswith(self.entity_url):
+ return self.value[urllen:]
+ else:
+ return None
+
+ def __str__(self):
+ return self.value
+
+ def __repr__(self):
+ return '<' + self.value + '>'
+
+
+class Literal(object):
+ """Representation of RDF literal result type."""
+
+ def __init__(self, data, **kwargs):
+ """
+ Create Literal object.
+
+ @type data: dict
+ """
+ self.type = data.get('datatype')
+ self.language = data.get('xml:lang')
+ self.value = data.get('value')
+
+ def __str__(self):
+ return self.value
+
+ def __repr__(self):
+ if self.type:
+ return self.value + '^^' + self.type
+ if self.language:
+ return self.value + '@' + self.language
+ return self.value
+
+
+class Bnode(object):
+ """Representation of blank node."""
+
+ def __init__(self, data, **kwargs):
+ """
+ Create Bnode.
+
+ @type data: dict
+ """
+ self.value = data['value']
+
+ def __str__(self):
+ return self.value
+
+ def __repr__(self):
+ return "_:" + self.value
+
+VALUE_TYPES = {'uri': URI, 'literal': Literal, 'bnode': Bnode}
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index 2a22663..7efef9c 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -241,6 +241,12 @@
-wikidataquery Takes a WikidataQuery query string like claim[31:12280]
and works on the resulting pages.
+-sparql Takes a SPARQL SELECT query string including ?item
+ and works on the resulting pages.
+
+-sparqlendpoint Specify SPARQL endpoint URL (optional).
+ (Example : -sparqlendpoint:http://myserver.com/sparql)
+
-searchitem Takes a search string and works on Wikibase pages that
contain it.
Argument can be given as "-searchitem:text", where text
@@ -362,6 +368,7 @@
self.subpage_max_depth = None
self._site = site
self._positional_arg_name = positional_arg_name
+ self._sparql = None
@property
def site(self):
@@ -820,6 +827,14 @@
if not value:
value = pywikibot.input('WikidataQuery string:')
gen = WikidataQueryPageGenerator(value, site=self.site)
+ elif arg == '-sparqlendpoint':
+ if not value:
+ value = pywikibot.input('SPARQL endpoint:')
+ self._sparql = value
+ elif arg == '-sparql':
+ if not value:
+ value = pywikibot.input('SPARQL query:')
+ gen = WikidataSPARQLPageGenerator(value, site=self.site,
endpoint=self._sparql)
elif arg == '-mysqlquery':
if not value:
value = pywikibot.input('Mysql query string:')
@@ -2581,6 +2596,38 @@
yield pywikibot.Page(pywikibot.Link(link, site))
+def WikidataSPARQLPageGenerator(query, site=None, item_name='item',
endpoint=None):
+ """Generate pages that result from the given SPARQL query.
+
+ @param query: the SPARQL query string.
+ @param site: Site for generator results.
+ @type site: L{pywikibot.site.BaseSite}
+
+ """
+ from pywikibot.data import sparql
+
+ if site is None:
+ site = pywikibot.Site()
+ repo = site.data_repository()
+ if endpoint is None:
+ endpoint = sparql.WIKIDATA
+
+ query_object = sparql.SparqlQuery(endpoint=endpoint)
+ data = query_object.get_items(query, item_name=item_name)
+
+ for item in data:
+ page = pywikibot.ItemPage(repo, item)
+ if isinstance(site, pywikibot.site.DataSite):
+ yield page
+ continue
+
+ try:
+ link = page.getSitelink(site)
+ except pywikibot.NoPage:
+ continue
+ yield pywikibot.Page(pywikibot.Link(link, site))
+
+
def WikibaseSearchItemPageGenerator(text, language=None, total=None, site=None):
"""
Generate pages that contain the provided text.
diff --git a/tests/sparql_tests.py b/tests/sparql_tests.py
new file mode 100644
index 0000000..9db482c
--- /dev/null
+++ b/tests/sparql_tests.py
@@ -0,0 +1,155 @@
+# -*- coding: utf-8 -*-
+"""Test cases for the SPARQL API."""
+#
+# Distributed under the terms of the MIT license.
+#
+from __future__ import absolute_import, unicode_literals
+
+import sys
+
+import pywikibot.data.sparql as sparql
+
+from tests.aspects import unittest, TestCase
+
+if sys.version_info[0] > 2:
+ from unittest.mock import patch
+else:
+ from mock import patch
+
+# See:
https://www.w3.org/TR/2013/REC-sparql11-results-json-20130321/
+
+SQL_RESPONSE = """
+{
+ "head" : {
+ "vars" : [ "cat", "d", "catLabel" ]
+ },
+ "results" : {
+ "bindings" : [ {
+ "cat" : {
+ "type" : "uri",
+ "value" : "http://www.wikidata.org/entity/Q498787"
+ },
+ "d" : {
+ "datatype" : "http://www.w3.org/2001/XMLSchema#dateTime",
+ "type" : "literal",
+ "value" : "1955-01-01T00:00:00Z"
+ },
+ "catLabel" : {
+ "xml:lang" : "en",
+ "type" : "literal",
+ "value" : "Muezza"
+ }
+ }, {
+ "cat" : {
+ "type" : "uri",
+ "value" : "http://www.wikidata.org/entity/Q677525"
+ },
+ "d" : {
+ "datatype" : "http://www.w3.org/2001/XMLSchema#dateTime",
+ "type" : "literal",
+ "value" : "2015-06-22T00:00:00Z"
+ },
+ "catLabel" : {
+ "xml:lang" : "en",
+ "type" : "literal",
+ "value" : "Orangey"
+ }
+ } ]
+ }
+}
+"""
+
+RESPONSE_TRUE = """
+{
+ "head" : { },
+ "boolean" : true
+}
+"""
+
+RESPONSE_FALSE = """
+{
+ "head" : { },
+ "boolean" : false
+}
+"""
+
+
+class TestContainer(object):
+ """Simple test container for return values."""
+
+ def __init__(self, value):
+ """Create container."""
+ self.content = value
+
+
+class TestSparql(TestCase):
+ """Test SPARQL queries."""
+
+ net = False
+
+ @patch.object(sparql.http, 'fetch')
+ def testQuerySelect(self, mock_method):
+ """Test SELECT query."""
+ mock_method.return_value = TestContainer(SQL_RESPONSE)
+ q = sparql.SparqlQuery()
+ res = q.select('SELECT * WHERE { ?x ?y ?z }')
+ self.assertIsInstance(res, list, 'Result is not a list')
+ self.assertEqual(len(res), 2)
+
+ self.assertDictEqual(res[0],
+ {'cat':
'http://www.wikidata.org/entity/Q498787',
+ 'catLabel': 'Muezza', 'd':
'1955-01-01T00:00:00Z'},
+ 'Bad result')
+ self.assertDictEqual(res[1],
+ {'cat':
'http://www.wikidata.org/entity/Q677525',
+ 'catLabel': 'Orangey', 'd':
'2015-06-22T00:00:00Z'},
+ 'Bad result')
+
+ @patch.object(sparql.http, 'fetch')
+ def testQuerySelectFull(self, mock_method):
+ """Test SELECT query with full data."""
+ mock_method.return_value = TestContainer(SQL_RESPONSE)
+ q = sparql.SparqlQuery()
+ res = q.select('SELECT * WHERE { ?x ?y ?z }', full_data=True)
+ self.assertIsInstance(res, list, 'Result is not a list')
+ self.assertEqual(len(res), 2)
+
+ self.assertIsInstance(res[0]['cat'], sparql.URI, 'Wrong type for
URI')
+ self.assertEqual(repr(res[0]['cat']),
'<http://www.wikidata.org/entity/Q498787>',
+ 'Wrong URI representation')
+ self.assertEqual(res[0]['cat'].getID(), 'Q498787', 'Wrong URI
ID')
+
+ self.assertIsInstance(res[0]['catLabel'], sparql.Literal, 'Wrong type
for Literal')
+ self.assertEqual(repr(res[0]['catLabel']), 'Muezza@en',
'Wrong literal representation')
+
+ self.assertIsInstance(res[0]['d'], sparql.Literal, 'Wrong type for
Literal')
+ self.assertEqual(repr(res[0]['d']),
+
'1955-01-01T00:00:00Z^^http://www.w3.org/2001/XMLSchema#dateTime',
+ 'Wrong URI representation')
+
+ @patch.object(sparql.http, 'fetch')
+ def testGetItems(self, mock_method):
+ """Test item list retrieval via SPARQL."""
+ mock_method.return_value = TestContainer(SQL_RESPONSE)
+ q = sparql.SparqlQuery()
+ res = q.get_items('SELECT * WHERE { ?x ?y ?z }', 'cat')
+ self.assertSetEqual(res, set(['Q498787', 'Q677525']))
+
+ @patch.object(sparql.http, 'fetch')
+ def testQueryAsk(self, mock_method):
+ """Test ASK query."""
+ mock_method.return_value = TestContainer(RESPONSE_TRUE)
+ q = sparql.SparqlQuery()
+
+ res = q.ask('ASK { ?x ?y ?z }')
+ self.assertTrue(res)
+
+ mock_method.return_value = TestContainer(RESPONSE_FALSE)
+ res = q.ask('ASK { ?x ?y ?z }')
+ self.assertFalse(res)
+
+if __name__ == '__main__':
+ try:
+ unittest.main()
+ except SystemExit:
+ pass
--
To view, visit
https://gerrit.wikimedia.org/r/274448
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I9fd08059fe2ba34bb95789e088dd7416a73daaed
Gerrit-PatchSet: 12
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Smalyshev <smalyshev(a)wikimedia.org>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Multichill <maarten(a)mdammers.nl>
Gerrit-Reviewer: Ricordisamoa <ricordisamoa(a)openmailbox.org>
Gerrit-Reviewer: Smalyshev <smalyshev(a)wikimedia.org>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>