Xqt has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1225151?usp=email )
Change subject: Add WikiWho API support to pywikibot ......................................................................
Add WikiWho API support to pywikibot
This adds support for accessing WikiWho API to get token-level provenance annotations for Wikipedia articles. The implementation includes:
- New get_annotations() method to retrieve WikiWho data - Support for 15 Wikipedia language editions (ar, de, en, es, eu, fr, hu, id, it, ja, nl, pl, pt, tr, zh) - Helper methods for WikiWho API URL construction and validation - Comprehensive test coverage for the new functionality
The WikiWho API provides token-level authorship information showing who added each token in an article and when, which is useful for article provenance analysis.
Bug: T414071 Change-Id: Id5024134d98ead21b9d34ce705aeaeb7669ccf85 --- M pywikibot/page/_toolforge.py M tests/wikiblame_tests.py 2 files changed, 143 insertions(+), 0 deletions(-)
Approvals: Xqt: Verified; Looks good to me, approved
diff --git a/pywikibot/page/_toolforge.py b/pywikibot/page/_toolforge.py index e6c5820..d00d983 100644 --- a/pywikibot/page/_toolforge.py +++ b/pywikibot/page/_toolforge.py @@ -11,7 +11,9 @@
import collections import re +import urllib.parse from http import HTTPStatus +from typing import Any from warnings import warn
import pywikibot @@ -28,6 +30,12 @@ #: Supported wikipedia site codes WIKIBLAME_CODES = 'als', 'bar', 'de', 'en', 'it', 'nds', 'sco'
+ #: Supported WikiWho API language codes + WIKIWHO_CODES = ( + 'ar', 'de', 'en', 'es', 'eu', 'fr', 'hu', 'id', 'it', 'ja', 'nl', 'pl', + 'pt', 'tr', 'zh' + ) + def _check_wh_supported(self) -> None: """Check if WikiHistory is supported.""" if self.site.family.name != 'wikipedia': @@ -45,6 +53,45 @@ if not self.exists(): raise pywikibot.exceptions.NoPageError(self)
+ def _check_wikiwho_supported(self) -> None: + """Check if WikiWho API is supported. + + .. versionadded:: 11.0 + + :raise NotImplementedError: unsupported site, language, or namespace + :raise NoPageError: page does not exist + """ + if self.site.family.name != 'wikipedia': + raise NotImplementedError( + 'WikiWho API is implemented for wikipedia family only') + + if (code := self.site.code) not in self.WIKIWHO_CODES: + raise NotImplementedError( + f'WikiWho API is not implemented for wikipedia:{code}') + + if (ns := self.namespace()) != 0: + raise NotImplementedError( + f'WikiWho API is not implemented for {ns} namespace') + + if not self.exists(): + raise pywikibot.exceptions.NoPageError(self) + + def _build_wikiwho_url(self, endpoint: str) -> str: + """Build WikiWho API URL for the given endpoint. + + .. versionadded:: 11.0 + + :param endpoint: API endpoint (all_content, rev_content, + edit_persistence) + :return: Complete API URL + """ + article_title = self.title(with_ns=False, with_section=False) + encoded_title = urllib.parse.quote(article_title, safe='') + base_url = 'https://wikiwho-api.wmcloud.org' + url = (f'{base_url}/{self.site.code}/api/v1.0.0-beta/{endpoint}/' + f'{encoded_title}/') + return url + @deprecated('authorsship', since='9.3.0') @deprecated_args(onlynew=None) # since 9.2.0 def main_authors(self) -> collections.Counter[str, int]: @@ -207,3 +254,56 @@ break
return {user: (chars, percent) for user, chars, percent in result} + + def get_annotations(self) -> dict[str, Any]: + """Get WikiWho annotations for article revisions. + + This method uses the public WikiWho API to get token-level + provenance annotations showing who added each token in the article. + + Sample: + + >>> import pywikibot + >>> site = pywikibot.Site('wikipedia:en') + >>> page = pywikibot.Page(site, 'Python (programming language)') + >>> data = page.get_annotations() # doctest: +SKIP + >>> data['article_title'] # doctest: +SKIP + 'Python (programming language)' + + .. important:: Only implemented for main namespace pages and only + Wikipedias of :attr:`WIKIWHO_CODES` are supported. + .. versionadded:: 11.0 + .. seealso:: + - https://wikiwho-api.wmcloud.org + - https://www.mediawiki.org/wiki/WikiWho + + :return: Dictionary containing article_title, page_id, and revisions + with token-level annotations + + :raise NotImplementedError: unsupported site, language, or namespace + :raise NoPageError: page does not exist + :raise pywikibot.exceptions.ServerError: WikiWho API error + :raise requests.exceptions.HTTPError: HTTP error from WikiWho API + """ + self._check_wikiwho_supported() + + url = self._build_wikiwho_url('all_content') + url = f'{url}?editor=true&o_rev_id=true' + + r = pywikibot.comms.http.fetch(url) + + if r.status_code != HTTPStatus.OK: + r.raise_for_status() + + try: + data = r.json() + except Exception as e: + raise pywikibot.exceptions.ServerError( + f'Failed to parse WikiWho API response: {e}') + + if 'Error' in data or 'error' in data: + error_msg = data.get('Error') or data.get('error', 'Unknown error') + raise pywikibot.exceptions.ServerError( + f'WikiWho API error: {error_msg}') + + return data diff --git a/tests/wikiblame_tests.py b/tests/wikiblame_tests.py index ced4a57..b4d3e30 100644 --- a/tests/wikiblame_tests.py +++ b/tests/wikiblame_tests.py @@ -63,6 +63,49 @@ self.assertGreaterEqual(chars, 100) self.assertGreaterEqual(pct, 5.0)
+ def test_wikiwho_exceptions(self) -> None: + """Test that get_annotations fails for unsupported configurations.""" + en_site = pywikibot.Site('wikipedia:en') + page = pywikibot.Page(en_site, 'NonExistentPageXYZ123') + with self.assertRaisesRegex(pywikibot.exceptions.NoPageError, + "doesn't exist"): + page.get_annotations() + + page = pywikibot.Page(en_site, 'Talk:Wikipedia') + with self.assertRaisesRegex( + NotImplementedError, + 'WikiWho API is not implemented for Talk: namespace'): + page.get_annotations() + + page = pywikibot.Page(pywikibot.Site('wikipedia:ru'), + 'Python') + with self.assertRaisesRegex( + NotImplementedError, + 'WikiWho API is not implemented for wikipedia:ru'): + page.get_annotations() + + def test_wikiwho_url_construction(self) -> None: + """Test WikiWho URL construction.""" + page = pywikibot.Page(pywikibot.Site('wikipedia:en'), 'Test') + url = page._build_wikiwho_url('all_content') + expected = ('https://wikiwho-api.wmcloud.org/en/api/v1.0.0-beta/' + 'all_content/Test/') + self.assertEqual(url, expected) + + page = pywikibot.Page(pywikibot.Site('wikipedia:en'), + 'Python (programming language)') + url = page._build_wikiwho_url('all_content') + self.assertIn('Python%20%28programming%20language%29', url) + + def test_wikiwho_supported_languages(self) -> None: + """Test that WIKIWHO_CODES contains expected languages.""" + from pywikibot.page._toolforge import WikiBlameMixin + codes = WikiBlameMixin.WIKIWHO_CODES + expected_langs = ['ar', 'de', 'en', 'es', 'eu', 'fr', 'hu', 'id', + 'it', 'ja', 'nl', 'pl', 'pt', 'tr', 'zh'] + for lang in expected_langs: + self.assertIn(lang, codes) +
if __name__ == '__main__': with suppress(SystemExit):
pywikibot-commits@lists.wikimedia.org