jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/699536 )
Change subject: [IMPR] Add WikiBlame support to Pywikibot ......................................................................
[IMPR] Add WikiBlame support to Pywikibot
- add main_authors() method to BasePage class which gives the 5 topmost editors based on the current blamed text - provide some tests
Change-Id: I25f1b23cfaa88f02c1721a7032da4938da1777b7 --- M docs/api_ref/pywikibot.page.rst M pywikibot/CONTENT.rst M pywikibot/page/_pages.py A pywikibot/page/_toolforge.py M tests/__init__.py A tests/wikiblame_tests.py 6 files changed, 157 insertions(+), 1 deletion(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/docs/api_ref/pywikibot.page.rst b/docs/api_ref/pywikibot.page.rst index 18e7eb7..f145976 100644 --- a/docs/api_ref/pywikibot.page.rst +++ b/docs/api_ref/pywikibot.page.rst @@ -24,3 +24,9 @@
.. automodule:: page._revision :synopsis: Object representing page revision + +:mod:`page._toolforge` module +--------------------------------- + +.. automodule:: page._toolforge + :synopsis: Object representing interface to toolforge tools \ No newline at end of file diff --git a/pywikibot/CONTENT.rst b/pywikibot/CONTENT.rst index 88fb524..096ce26 100644 --- a/pywikibot/CONTENT.rst +++ b/pywikibot/CONTENT.rst @@ -130,6 +130,8 @@ +----------------------------+------------------------------------------------------+ | _revision.py | Object representing page revision | +----------------------------+------------------------------------------------------+ + | _toolforge.py | BasePage interface to toolforge tools | + +----------------------------+------------------------------------------------------+ | _user.py | Object representing a wiki user | +----------------------------+------------------------------------------------------+ | _wikibase.py | Objects representing wikibase structures | diff --git a/pywikibot/page/_pages.py b/pywikibot/page/_pages.py index 1ead289..cc11304 100644 --- a/pywikibot/page/_pages.py +++ b/pywikibot/page/_pages.py @@ -49,6 +49,7 @@ ) from pywikibot.page._decorators import allow_asynchronous from pywikibot.page._links import BaseLink, Link +from pywikibot.page._toolforge import WikiBlameMixin from pywikibot.site import Namespace, NamespaceArgType from pywikibot.tools import ( ComparableMixin, @@ -2149,7 +2150,7 @@ return link
-class Page(BasePage): +class Page(BasePage, WikiBlameMixin):
"""Page: A MediaWiki page."""
diff --git a/pywikibot/page/_toolforge.py b/pywikibot/page/_toolforge.py new file mode 100644 index 0000000..4dccfdb --- /dev/null +++ b/pywikibot/page/_toolforge.py @@ -0,0 +1,110 @@ +"""Object representing interface to toolforge tools. + +.. versionadded:: 7.7 +""" +# +# (C) Pywikibot team, 2022 +# +# Distributed under the terms of the MIT license. +# +import collections +import re + +from typing import Optional + +import pywikibot + +from pywikibot import config + + +class WikiBlameMixin: + + """Page mixin for main authorship. + + .. versionadded:: 7.7 + """ + + #: Supported wikipedia site codes + WIKIBLAME_CODES = 'als', 'bar', 'de', 'en', 'it', 'nds', 'sco' + + def _check_wh_supported(self): + """Check if WikiHistory is supported.""" + if self.site.family.name != 'wikipedia': + raise NotImplementedError( + 'main_authors method is implemented for wikipedia family only') + + if self.site.code not in self.WIKIBLAME_CODES: + raise NotImplementedError( + 'main_authors method is not implemented for wikipedia:{}' + .format(self.site.code)) + + if self.namespace() != pywikibot.site.Namespace.MAIN: + raise NotImplementedError( + 'main_authors method is implemented for main namespace only') + + if not self.exists(): + raise pywikibot.exceptions.NoPageError(self) + + def main_authors(self, *, + onlynew: Optional[bool] = None) -> collections.Counter: + """Retrieve the 5 topmost main authors of an article. + + This method uses WikiHistory to retrieve the text based main + authorship. + + Sample: + + >>> import pywikibot + >>> site = pywikibot.Site('wikipedia:nds') + >>> page = pywikibot.Page(site, 'Python (Programmeerspraak)') + >>> auth = page.main_authors(onlynew=False) + >>> auth + Counter({'RebeccaBreu': 99, 'Slomox': 1}) + + .. note:: Only implemented for main namespace pages. + .. note:: Only wikipedias of :attr:`WIKIBLAME_CODES` are supported. + .. seealso:: + - https://wikihistory.toolforge.org + - https://de.wikipedia.org/wiki/Wikipedia:Technik/Cloud/wikihistory + + :param onlynew: If False, use the cached values. If True, + calculate the Counter data which can take some time; it may + fail with TimeoutError after ``config.max_retries``. If None + it calculates new data like for True but uses data from + cache if new data cannot be calculated in meantime. + :return: Number of edits for each username + :raise NotImplementedError: unsupported site or unsupported namespace + :raise pywikibot.exceptions.NoPageError: The page does not exist + :raise pywikibot.exceptions.TimeoutError: Maximum retries exceeded + """ + baseurl = 'https://wikihistory.toolforge.org' + pattern = (r'><bdi>(?P<author>.+?)</bdi></a>\s' + r'((?P<percent>\d{1,3})&') + + self._check_wh_supported() + + url = baseurl + '/wiki/getauthors.php?wiki={}wiki&page_id={}'.format( + self.site.code, self.pageid) + if onlynew: + url += '&onlynew=1' + + for current_retries in range(config.max_retries): + r = pywikibot.comms.http.fetch(url) + if r.status_code != 200: + r.raise_for_status() + + if 'Timeout' not in r.text: # window.setTimeout in result + return collections.Counter( + {user: int(cnt) + for user, cnt in re.findall(pattern, r.text)}) + + delay = pywikibot.config.retry_wait * 2 ** current_retries + pywikibot.warning('WikiHistory timeout.\n' + 'Waiting {:.1f} seconds before retrying.' + .format(delay)) + pywikibot.sleep(delay) + if onlynew is None and current_retries >= config.max_retries - 2: + url += '&onlynew=1' + + raise pywikibot.exceptions.TimeoutError( + 'Maximum retries attempted without success.') diff --git a/tests/__init__.py b/tests/__init__.py index 82303c3..ccac997 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -133,6 +133,7 @@ 'user', 'wikibase', 'wikibase_edit', + 'wikiblame', 'wikistats', 'xmlreader' } diff --git a/tests/wikiblame_tests.py b/tests/wikiblame_tests.py new file mode 100644 index 0000000..06f59b2 --- /dev/null +++ b/tests/wikiblame_tests.py @@ -0,0 +1,36 @@ +"""Tests for the WikiHistoryMixin.""" +# +# (C) Pywikibot team, 2022 +# +# Distributed under the terms of the MIT license. +# +import unittest + +from contextlib import suppress + +import pywikibot + +from tests.aspects import TestCase + + +class TestWikiBlameMixin(TestCase): + + """Test WikiBlameMixin using nds wiki.""" + + family = 'wikipedia' + code = 'nds' + + def test_main_authors(self): + """Test main_authors() method.""" + page = pywikibot.Page(self.site, 'Python (Programmeerspraak)') + auth = page.main_authors(onlynew=False) + self.assertLessEqual(len(auth), 5) + self.assertLessEqual(sum(auth.values()), 100) + user, value = auth.most_common(1)[0] + self.assertEqual(user, 'RebeccaBreu') + self.assertGreater(value, 0) + + +if __name__ == '__main__': # pragma: no cover + with suppress(SystemExit): + unittest.main()