jenkins-bot submitted this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
[IMPR] Add WikiBlame support to Pywikibot

- add main_authors() method to BasePage class which gives the
5 topmost editors based on the current blamed text
- provide some tests

Change-Id: I25f1b23cfaa88f02c1721a7032da4938da1777b7
---
M docs/api_ref/pywikibot.page.rst
M pywikibot/CONTENT.rst
M pywikibot/page/_pages.py
A pywikibot/page/_toolforge.py
M tests/__init__.py
A tests/wikiblame_tests.py
6 files changed, 157 insertions(+), 1 deletion(-)

diff --git a/docs/api_ref/pywikibot.page.rst b/docs/api_ref/pywikibot.page.rst
index 18e7eb7..f145976 100644
--- a/docs/api_ref/pywikibot.page.rst
+++ b/docs/api_ref/pywikibot.page.rst
@@ -24,3 +24,9 @@

.. automodule:: page._revision
:synopsis: Object representing page revision
+
+:mod:`page.\_toolforge` module
+---------------------------------
+
+.. automodule:: page._toolforge
+ :synopsis: Object representing interface to toolforge tools
\ No newline at end of file
diff --git a/pywikibot/CONTENT.rst b/pywikibot/CONTENT.rst
index 88fb524..096ce26 100644
--- a/pywikibot/CONTENT.rst
+++ b/pywikibot/CONTENT.rst
@@ -130,6 +130,8 @@
+----------------------------+------------------------------------------------------+
| _revision.py | Object representing page revision |
+----------------------------+------------------------------------------------------+
+ | _toolforge.py | BasePage interface to toolforge tools |
+ +----------------------------+------------------------------------------------------+
| _user.py | Object representing a wiki user |
+----------------------------+------------------------------------------------------+
| _wikibase.py | Objects representing wikibase structures |
diff --git a/pywikibot/page/_pages.py b/pywikibot/page/_pages.py
index 1ead289..cc11304 100644
--- a/pywikibot/page/_pages.py
+++ b/pywikibot/page/_pages.py
@@ -49,6 +49,7 @@
)
from pywikibot.page._decorators import allow_asynchronous
from pywikibot.page._links import BaseLink, Link
+from pywikibot.page._toolforge import WikiBlameMixin
from pywikibot.site import Namespace, NamespaceArgType
from pywikibot.tools import (
ComparableMixin,
@@ -2149,7 +2150,7 @@
return link


-class Page(BasePage):
+class Page(BasePage, WikiBlameMixin):

"""Page: A MediaWiki page."""

diff --git a/pywikibot/page/_toolforge.py b/pywikibot/page/_toolforge.py
new file mode 100644
index 0000000..4dccfdb
--- /dev/null
+++ b/pywikibot/page/_toolforge.py
@@ -0,0 +1,110 @@
+"""Object representing interface to toolforge tools.
+
+.. versionadded:: 7.7
+"""
+#
+# (C) Pywikibot team, 2022
+#
+# Distributed under the terms of the MIT license.
+#
+import collections
+import re
+
+from typing import Optional
+
+import pywikibot
+
+from pywikibot import config
+
+
+class WikiBlameMixin:
+
+ """Page mixin for main authorship.
+
+ .. versionadded:: 7.7
+ """
+
+ #: Supported wikipedia site codes
+ WIKIBLAME_CODES = 'als', 'bar', 'de', 'en', 'it', 'nds', 'sco'
+
+ def _check_wh_supported(self):
+ """Check if WikiHistory is supported."""
+ if self.site.family.name != 'wikipedia':
+ raise NotImplementedError(
+ 'main_authors method is implemented for wikipedia family only')
+
+ if self.site.code not in self.WIKIBLAME_CODES:
+ raise NotImplementedError(
+ 'main_authors method is not implemented for wikipedia:{}'
+ .format(self.site.code))
+
+ if self.namespace() != pywikibot.site.Namespace.MAIN:
+ raise NotImplementedError(
+ 'main_authors method is implemented for main namespace only')
+
+ if not self.exists():
+ raise pywikibot.exceptions.NoPageError(self)
+
+ def main_authors(self, *,
+ onlynew: Optional[bool] = None) -> collections.Counter:
+ """Retrieve the 5 topmost main authors of an article.
+
+ This method uses WikiHistory to retrieve the text based main
+ authorship.
+
+ Sample:
+
+ >>> import pywikibot
+ >>> site = pywikibot.Site('wikipedia:nds')
+ >>> page = pywikibot.Page(site, 'Python (Programmeerspraak)')
+ >>> auth = page.main_authors(onlynew=False)
+ >>> auth
+ Counter({'RebeccaBreu': 99, 'Slomox': 1})
+
+ .. note:: Only implemented for main namespace pages.
+ .. note:: Only wikipedias of :attr:`WIKIBLAME_CODES` are supported.
+ .. seealso::
+ - https://wikihistory.toolforge.org
+ - https://de.wikipedia.org/wiki/Wikipedia:Technik/Cloud/wikihistory
+
+ :param onlynew: If False, use the cached values. If True,
+ calculate the Counter data which can take some time; it may
+ fail with TimeoutError after ``config.max_retries``. If None
+ it calculates new data like for True but uses data from
+ cache if new data cannot be calculated in meantime.
+ :return: Number of edits for each username
+ :raise NotImplementedError: unsupported site or unsupported namespace
+ :raise pywikibot.exceptions.NoPageError: The page does not exist
+ :raise pywikibot.exceptions.TimeoutError: Maximum retries exceeded
+ """
+ baseurl = 'https://wikihistory.toolforge.org'
+ pattern = (r'><bdi>(?P<author>.+?)</bdi></a>\s'
+ r'\((?P<percent>\d{1,3})&')
+
+ self._check_wh_supported()
+
+ url = baseurl + '/wiki/getauthors.php?wiki={}wiki&page_id={}'.format(
+ self.site.code, self.pageid)
+ if onlynew:
+ url += '&onlynew=1'
+
+ for current_retries in range(config.max_retries):
+ r = pywikibot.comms.http.fetch(url)
+ if r.status_code != 200:
+ r.raise_for_status()
+
+ if 'Timeout' not in r.text: # window.setTimeout in result
+ return collections.Counter(
+ {user: int(cnt)
+ for user, cnt in re.findall(pattern, r.text)})
+
+ delay = pywikibot.config.retry_wait * 2 ** current_retries
+ pywikibot.warning('WikiHistory timeout.\n'
+ 'Waiting {:.1f} seconds before retrying.'
+ .format(delay))
+ pywikibot.sleep(delay)
+ if onlynew is None and current_retries >= config.max_retries - 2:
+ url += '&onlynew=1'
+
+ raise pywikibot.exceptions.TimeoutError(
+ 'Maximum retries attempted without success.')
diff --git a/tests/__init__.py b/tests/__init__.py
index 82303c3..ccac997 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -133,6 +133,7 @@
'user',
'wikibase',
'wikibase_edit',
+ 'wikiblame',
'wikistats',
'xmlreader'
}
diff --git a/tests/wikiblame_tests.py b/tests/wikiblame_tests.py
new file mode 100644
index 0000000..06f59b2
--- /dev/null
+++ b/tests/wikiblame_tests.py
@@ -0,0 +1,36 @@
+"""Tests for the WikiHistoryMixin."""
+#
+# (C) Pywikibot team, 2022
+#
+# Distributed under the terms of the MIT license.
+#
+import unittest
+
+from contextlib import suppress
+
+import pywikibot
+
+from tests.aspects import TestCase
+
+
+class TestWikiBlameMixin(TestCase):
+
+ """Test WikiBlameMixin using nds wiki."""
+
+ family = 'wikipedia'
+ code = 'nds'
+
+ def test_main_authors(self):
+ """Test main_authors() method."""
+ page = pywikibot.Page(self.site, 'Python (Programmeerspraak)')
+ auth = page.main_authors(onlynew=False)
+ self.assertLessEqual(len(auth), 5)
+ self.assertLessEqual(sum(auth.values()), 100)
+ user, value = auth.most_common(1)[0]
+ self.assertEqual(user, 'RebeccaBreu')
+ self.assertGreater(value, 0)
+
+
+if __name__ == '__main__': # pragma: no cover
+ with suppress(SystemExit):
+ unittest.main()

To view, visit change 699536. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I25f1b23cfaa88f02c1721a7032da4938da1777b7
Gerrit-Change-Number: 699536
Gerrit-PatchSet: 21
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: JJMC89 <JJMC89.Wikimedia@gmail.com>
Gerrit-Reviewer: Wurgl <heisewurgl@gmail.com>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-CC: DannyS712 <dannys712.wiki@gmail.com>
Gerrit-CC: Meno25 <meno25mail@gmail.com>
Gerrit-MessageType: merged