jenkins-bot has submitted this change and it was merged.
Change subject: add scripts/interwikidata.py ......................................................................
add scripts/interwikidata.py
It's interwiki.py but for wikis which work with Wikibase.
Bug: T57025 Change-Id: Ibbb7047d7e6be7b997577b2ea5d662bd6a361af8 --- A scripts/interwikidata.py A tests/interwikidata_tests.py 2 files changed, 319 insertions(+), 0 deletions(-)
Approvals: John Vandenberg: Looks good to me, approved jenkins-bot: Verified
diff --git a/scripts/interwikidata.py b/scripts/interwikidata.py new file mode 100644 index 0000000..406a8de --- /dev/null +++ b/scripts/interwikidata.py @@ -0,0 +1,211 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" +Script to handle interwiki links based on Wikibase. + +This script connects pages to Wikibase items using language links on the page. +If multiple language links are present, and they are connected to different +items, the bot skips. After connecting the page to an item, language links +can be removed from the page. + +These command line parameters can be used to specify which pages to work on: + +¶ms; + +Furthermore, the following command line parameters are supported: + +-clean Clean pages. + +-create Create items only. +""" + +# (C) Pywikibot team, 2015 +# +# Distributed under the terms of the MIT license. +# +from __future__ import unicode_literals, absolute_import + +__version__ = '$Id$' +# + +import pywikibot + +from pywikibot import pagegenerators, output, warning +from pywikibot.bot import ExistingPageBot, SingleSiteBot, suggest_help + +# This is required for the text that is shown when you run this script +# with the parameter -help. +docuReplacements = { + '¶ms;': pagegenerators.parameterHelp, +} + +# Allowed namespaces. main, project, template, category +namespaces = [0, 4, 10, 14] + +# TODO: Some templates on pages, like csd and afd templates, +# should cause the bot to skip the page + + +class IWBot(ExistingPageBot, SingleSiteBot): + + """The bot for interwiki.""" + + def __init__(self, generator, site, **kwargs): + """Construct the bot.""" + self.availableOptions.update({ + 'clean': False, + 'create': False, + 'summary': None, + 'ignore_ns': False + }) + super(IWBot, self).__init__(generator=generator, site=site, **kwargs) + if not self.site.has_data_repository: + raise ValueError('{site} does not have a data repository, ' + 'use interwiki.py instead.'.format( + site=self.site)) + self.repo = site.data_repository() + if not self.getOption('summary'): + self.options['summary'] = pywikibot.i18n.twtranslate( + site, 'interwikidata-clean-summary', fallback=True) + + def treat_page(self): + """Check page.""" + if (self.current_page.namespace() not in namespaces and + not self.getOption('ignore_ns')): + output('{page} is not in allowed namespaces, skipping' + .format(page=self.current_page.title( + asLink=True))) + return False + self.iwlangs = pywikibot.textlib.getLanguageLinks( + self.current_page.text, insite=self.current_page.site) + if not self.iwlangs: + output('No interlanguagelinks on {page}'.fromat( + page=self.current_page.title(asLink=True))) + return False + try: + item = pywikibot.ItemPage.fromPage(self.current_page) + except pywikibot.NoPage: + item = None + + if item is None: + item = self.try_to_add() + if self.getOption('create') and item is None: + item = self.create_item() + + self.current_item = item + if item and self.getOption('clean'): + self.clean_page() + + def create_item(self): + """Create item in repo for current_page.""" + data = {'sitelinks': + {self.site.dbName(): + {'site': self.site.dbName(), + 'title': self.current_page.title()} + }, + 'labels': + {self.site.lang: + {'language': self.site.lang, + 'value': self.current_page.title()} + } + } + summary = (u'Bot: New item with sitelink from %s' + % self.current_page.title(asLink=True, insite=self.repo)) + + item = pywikibot.ItemPage(self.repo) + item.editEntity(data, new='item', summary=summary) + output('Created item {item}'.format(item=item.getID())) + return item + + def handle_complicated(self): + """ + Handle pages when they have interwiki conflict. + + When this method returns True it means conflict has resolved + and it's okay to clean old interwiki links. + This method should change self.current_item and fix conflicts. + Change it in subclasses. + """ + return False + + def clean_page(self): + """Clean interwiki links from the page.""" + if not self.iwlangs: + return + dbnames = [iw_site.dbName() for iw_site in self.iwlangs] + if set(dbnames) < set(self.current_item.sitelinks.keys()): + if not self.handle_complicated(): + warning('Interwiki conflict in %s, skipping...' % + self.current_page.title(asLink=True)) + return False + output('Cleaning up the page') + new_text = pywikibot.textlib.removeLanguageLinks( + self.current_page.text, site=self.current_page.site) + self.put_current(new_text, summary=self.getOption('summary')) + + def try_to_add(self): + """Add current page in repo.""" + wd_data = set() + for iw_page in self.iwlangs.values(): + try: + wd_data.add(pywikibot.ItemPage.fromPage(iw_page)) + except pywikibot.NoPage: + warning('Interwiki %s does not exist, skipping...' % + iw_page.title(asLink=True)) + continue + except pywikibot.InvalidTitle: + warning('Invalid title %s, skipping...' % + iw_page.title(asLink=True)) + continue + if len(wd_data) != 1: + warning('Interwiki conflict in %s, skipping...' % + self.current_page.title(asLink=True)) + return False + item = list(wd_data).pop() + output('Adding link to %s' % item.title()) + item.setSitelink(self.current_page) + return item + + +def main(*args): + """ + Process command line arguments and invoke bot. + + If args is an empty list, sys.argv is used. + + @param args: command line arguments + @type args: list of unicode + """ + generator = None + clean = False + create = False + always = False + local_args = pywikibot.handle_args(args) + genFactory = pagegenerators.GeneratorFactory() + options = {} + for arg in local_args: + if arg == '-clean': + clean = True + elif arg == '-create': + create = True + elif arg.startswith('-always'): + always = True + else: + genFactory.handleArg(arg) + + site = pywikibot.Site() + + options = {'always': always, 'create': create, 'clean': clean} + if not generator: + generator = genFactory.getCombinedGenerator() + if generator: + generator = pagegenerators.PreloadingGenerator(generator) + bot = IWBot(generator, site, **options) + bot.run() + else: + suggest_help(missing_generator=True) + return False + + +if __name__ == '__main__': + main() diff --git a/tests/interwikidata_tests.py b/tests/interwikidata_tests.py new file mode 100644 index 0000000..81c94d1 --- /dev/null +++ b/tests/interwikidata_tests.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- +"""Tests for scripts/interwikidata.py.""" +# +# (C) Pywikibot team, 2015 +# +# Distributed under the terms of the MIT license. +# +from __future__ import unicode_literals, absolute_import + +__version__ = '$Id$' + +import pywikibot + +from scripts import interwikidata +from pywikibot import Link + +from tests.aspects import unittest, SiteAttributeTestCase + + +class DummyBot(interwikidata.IWBot): + + """A dummy bot to prevent editing in production wikis.""" + + def __init__(self, *args, **kwargs): + """Initiate the class.""" + super(DummyBot, self).__init__(*args, **kwargs) + + def put_current(self): + """Prevent editing.""" + return False + + def create_item(self): + """Prevent creating items.""" + return False + + def try_to_add(self): + """Prevent adding sitelinks to items.""" + return None + + +class TestInterwikidataBot(SiteAttributeTestCase): + + """Test Interwikidata.""" + + sites = { + 'en': { + 'family': 'wikipedia', + 'code': 'en', + }, + 'fa': { + 'family': 'wikipedia', + 'code': 'fa', + }, + 'wt': { + 'family': 'wiktionary', + 'code': 'en', + }, + } + + def test_main(self): + """Test main function interwikidata.py.""" + # The main function should return False when no generator is defined. + self.assertFalse(interwikidata.main()) + + def test_iw_bot(self): + """Test IWBot class.""" + page = pywikibot.Page(self.en, 'User:Ladsgroup') + text = page.get() + + # The page looks as excpected. + self.assertEqual(len(page.langlinks()), 1) + iw_link = page.langlinks()[0] + self.assertIsInstance(iw_link, Link) + self.assertEqual(iw_link.canonical_title(), 'کاربر:Ladsgroup') + self.assertEqual(iw_link.site, self.fa) + + repo = self.en.data_repository() + bot = DummyBot(generator=[page], site=self.en, ignore_ns=True) + bot.run() + + # Repo and site should not change during a run. + self.assertEqual(bot.repo, repo) + self.assertEqual(bot.site, self.en) + + # Test iwlangs method. + self.assertIn(self.fa, bot.iwlangs) + self.assertEqual(Link.fromPage(bot.iwlangs[self.fa]), iw_link) + + page2 = pywikibot.Page(self.en, 'User:Ladsgroup') + self.assertEqual(page2.get(), text) + + self.assertFalse(bot.handle_complicated()) + + def test_without_repo(self): + """Test throwing error when site does not have a data repo.""" + wt_page = pywikibot.Page(self.wt, 'User:Ladsgroup') + self.assertRaises(ValueError, DummyBot, generator=[wt_page], site=self.wt) + + self.assertRaises(ValueError, interwikidata.main, + '-page:User:Ladsgroup', '-lang:fa', + '-family:wiktionary') + + +if __name__ == '__main__': + try: + unittest.main() + except SystemExit: + pass
pywikibot-commits@lists.wikimedia.org