jenkins-bot has submitted this change and it was merged.
Change subject: Add compare method using mediawiki's action=compare. ......................................................................
Add compare method using mediawiki's action=compare.
This method queries the server to get the difference between any two revisions and returns a HTML string. The html_comparator function in the diff module then parses this result.
Change-Id: If776435e96a88bd4bee56a6af1f74e759370b7f5 --- M pywikibot/diff.py M pywikibot/site.py M requirements.txt 3 files changed, 74 insertions(+), 0 deletions(-)
Approvals: XZise: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/diff.py b/pywikibot/diff.py index 09fdfdf..9bde69f 100644 --- a/pywikibot/diff.py +++ b/pywikibot/diff.py @@ -14,6 +14,10 @@ from itertools import zip_longest else: from itertools import izip_longest as zip_longest +try: + from bs4 import BeautifulSoup +except ImportError as bserror: + BeautifulSoup = False
import pywikibot from pywikibot.backports import format_range_unified # introduced in 2.7.2 @@ -347,3 +351,29 @@ text = ''.join(text_list)
return text + + +def html_comparator(compare_string): + """List of added and deleted contexts from 'action=compare' html string. + + This function is useful when combineds with site.py's "compare" method. + Site.compare() returns HTML that is useful for displaying on a page. + Here we use BeautifulSoup to get the un-HTML-ify the context of changes. + Finally we present the added and deleted contexts. + @param compare_string: HTML string from mediawiki API + @type compare_string: str + @return: deleted and added list of contexts + @rtype: dict + """ + # check if BeautifulSoup imported + if not BeautifulSoup: + raise bserror # should have been raised and stored earlier. + + comparands = {'deleted-context': [], 'added-context': []} + soup = BeautifulSoup(compare_string) + for change_type, css_class in (('deleted-context', 'diff-deletedline'), ('added-context', 'diff-addedline')): + crutons = soup.find_all('td', class_=css_class) + for cruton in crutons: + cruton_string = ''.join(cruton.strings) + comparands[change_type].append(cruton_string) + return comparands diff --git a/pywikibot/site.py b/pywikibot/site.py index a6ed308..222145d 100644 --- a/pywikibot/site.py +++ b/pywikibot/site.py @@ -5372,6 +5372,47 @@ return self.allpages(namespace=namespaces[0], protect_level=level, protect_type=type, total=total)
+ @need_version("1.18") + def compare(self, old, diff): + """Corresponding method to the 'action=compare' API action. + + See: https://en.wikipedia.org/w/api.php?action=help&modules=compare + Use pywikibot.diff's html_comparator() method to parse result. + @param old: starting revision ID, title, Page, or Revision + @type old: int, str, pywikibot.Page, or pywikibot.Page.Revision + @param diff: ending revision ID, title, Page, or Revision + @type diff: int, str, pywikibot.Page, or pywikibot.Page.Revision + @return: Returns an HTML string of a diff between two revisions. + @rtype: str + """ + # check old and diff types + def get_param(item): + if isinstance(item, basestring): + return 'title', item + elif isinstance(item, pywikibot.Page): + return 'title', item.title() + elif isinstance(item, int): + return 'rev', item + elif isinstance(item, pywikibot.page.Revision): + return 'rev', item.revid + else: + return None + + old = get_param(old) + if not old: + raise TypeError('old parameter is of invalid type') + diff = get_param(diff) + if not diff: + raise TypeError('diff parameter is of invalid type') + + params = {'from{0}'.format(old[0]): old[1], + 'to{0}'.format(diff[0]): diff[1]} + + req = api.Request(site=self, action='compare', **params) + data = req.submit() + comparison = data['compare']['*'] + return comparison +
class DataSite(APISite):
diff --git a/requirements.txt b/requirements.txt index f537b59..e0f5680 100644 --- a/requirements.txt +++ b/requirements.txt @@ -59,3 +59,6 @@
# scripts/script_wui.py depends on Lua, which is not available using pip # but can be obtained from: https://github.com/bastibe/lunatic-python + +# core HTML comparison parser in diff module +beautifulsoup4
pywikibot-commits@lists.wikimedia.org