jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/319857 )
Change subject: Make sparql use Site to access sparql endpoint and entity_url ......................................................................
Make sparql use Site to access sparql endpoint and entity_url
Introduces a repo parameter which takes any DataSite and gets its sparql endpoint and entity_url. Still supports explicit endpoint and entity_url for full flexibility.
As before this defaults to Wikidata but now relies on its Site object rather than hard-coded endpoint and entity_url.
The same change is implemented in pagegenerators.
Note that this change is breaking for anyone who had overridden endpoint but not entity_url (i.e. running their own endpoint but loaded with wikidata data). This was an undocumented feature and these users will now recieve an Error.
Bug: T159956 Change-Id: I057335a56398a7d5f6c8c811233a002228b73093 --- M pywikibot/data/sparql.py M pywikibot/pagegenerators.py M pywikibot/site.py M tests/sparql_tests.py 4 files changed, 64 insertions(+), 15 deletions(-)
Approvals: jenkins-bot: Verified Xqt: Looks good to me, approved
diff --git a/pywikibot/data/sparql.py b/pywikibot/data/sparql.py index e95c916..b577bb0 100644 --- a/pywikibot/data/sparql.py +++ b/pywikibot/data/sparql.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- """SPARQL Query interface.""" # +# (C) Pywikibot team, 2016-2017 +# # Distributed under the terms of the MIT license. # from __future__ import absolute_import, unicode_literals @@ -12,10 +14,10 @@ else: from urllib2 import quote
+from pywikibot import Site, Error from pywikibot.comms import http from pywikibot.tools import UnicodeMixin, py2_encode_utf_8
-WIKIDATA = 'http://query.wikidata.org/sparql' DEFAULT_HEADERS = {'cache-control': 'no-cache', 'Accept': 'application/sparql-results+json'}
@@ -27,15 +29,44 @@ This class allows to run SPARQL queries against any SPARQL endpoint. """
- def __init__(self, endpoint=WIKIDATA, entity_url='http://www.wikidata.org/entity/'): + def __init__(self, endpoint=None, entity_url=None, repo=None): """ Create endpoint.
- @param endpoint: SPARQL endpoint URL, by default Wikidata query endpoint + @param endpoint: SPARQL endpoint URL + @type endpoint: string + @param entity_url: URL prefix for any entities returned in a query. + @type entity_url: string + @param repo: The Wikibase site which we want to run queries on. If + provided this overrides any value in endpoint and entity_url. + Defaults to Wikidata. + @type repo: pywikibot.site.DataSite """ - self.endpoint = endpoint + # default to Wikidata + if not repo and not endpoint: + repo = Site('wikidata', 'wikidata') + + if repo: + try: + self.endpoint = repo.sparql_endpoint + self.entity_url = repo.concept_base_uri + except NotImplementedError: + raise NotImplementedError( + 'Wiki version must be 1.28-wmf.23 or newer to ' + 'automatically extract the sparql endpoint. ' + 'Please provide the endpoint and entity_url ' + 'parameters instead of a repo.') + if not self.endpoint: + raise Error('The site {0} does not provide a sparql endpoint.' + .format(repo)) + else: + if not entity_url: + raise Error('If initialised with an endpoint the entity_url ' + 'must be provided.') + self.endpoint = endpoint + self.entity_url = entity_url + self.last_response = None - self.entity_url = entity_url
def get_last_response(self): """ diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py index 61dacdb..1152def 100644 --- a/pywikibot/pagegenerators.py +++ b/pywikibot/pagegenerators.py @@ -40,13 +40,13 @@ from pywikibot.tools import ( deprecated, deprecated_args, - redirect_func, - issue_deprecation_warning, - itergroup, DequeGenerator, - intersect_generators, - IteratorNextMixin, filter_unique, + intersect_generators, + issue_deprecation_warning, + IteratorNextMixin, + itergroup, + redirect_func, )
from pywikibot import date, config, i18n, xmlreader @@ -2727,7 +2727,16 @@ site = pywikibot.Site() repo = site.data_repository() if endpoint is None: - endpoint = sparql.WIKIDATA + try: + endpoint = repo.sparql_endpoint + except NotImplementedError: + raise NotImplementedError( + 'Wiki version must be 1.28-wmf.23 or newer to automatically ' + 'extract the sparql endpoint. Please provide the endpoint ' + 'parameter as well.') + if not endpoint: + pywikibot.error('The site {0} does not provide a sparql endpoint.' + .format(repo))
query_object = sparql.SparqlQuery(endpoint=endpoint) data = query_object.get_items(query, diff --git a/pywikibot/site.py b/pywikibot/site.py index f2724d9..5da6345 100644 --- a/pywikibot/site.py +++ b/pywikibot/site.py @@ -7057,6 +7057,17 @@ % self)
@property + @need_version("1.28-wmf.3") + def sparql_endpoint(self): + """ + Return the sparql endpoint url, if any has been set. + + @return: sparql endpoint url + @rtype: str|None + """ + return self.siteinfo['general'].get('wikibase-sparql') + + @property @need_version("1.28-wmf.23") def concept_base_uri(self): """ diff --git a/tests/sparql_tests.py b/tests/sparql_tests.py index 02c76d6..c392d57 100644 --- a/tests/sparql_tests.py +++ b/tests/sparql_tests.py @@ -12,7 +12,7 @@ import pywikibot.data.sparql as sparql from pywikibot.tools import UnicodeType
-from tests.aspects import unittest, TestCase +from tests.aspects import unittest, TestCase, WikidataTestCase
if sys.version_info[0] > 2: from unittest.mock import patch @@ -95,10 +95,8 @@ self.content = value
-class TestSparql(TestCase): +class TestSparql(WikidataTestCase): """Test SPARQL queries.""" - - net = False
@patch.object(sparql.http, 'fetch') def testQuerySelect(self, mock_method):
pywikibot-commits@lists.wikimedia.org