jenkins-bot has submitted this change and it was merged.
Change subject: add scripts/interwikidata.py
......................................................................
add scripts/interwikidata.py
It's interwiki.py but for wikis which work with Wikibase.
Bug: T57025
Change-Id: Ibbb7047d7e6be7b997577b2ea5d662bd6a361af8
---
A scripts/interwikidata.py
A tests/interwikidata_tests.py
2 files changed, 319 insertions(+), 0 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/interwikidata.py b/scripts/interwikidata.py
new file mode 100644
index 0000000..406a8de
--- /dev/null
+++ b/scripts/interwikidata.py
@@ -0,0 +1,211 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+Script to handle interwiki links based on Wikibase.
+
+This script connects pages to Wikibase items using language links on the page.
+If multiple language links are present, and they are connected to different
+items, the bot skips. After connecting the page to an item, language links
+can be removed from the page.
+
+These command line parameters can be used to specify which pages to work on:
+
+¶ms;
+
+Furthermore, the following command line parameters are supported:
+
+-clean Clean pages.
+
+-create Create items only.
+"""
+
+# (C) Pywikibot team, 2015
+#
+# Distributed under the terms of the MIT license.
+#
+from __future__ import unicode_literals, absolute_import
+
+__version__ = '$Id$'
+#
+
+import pywikibot
+
+from pywikibot import pagegenerators, output, warning
+from pywikibot.bot import ExistingPageBot, SingleSiteBot, suggest_help
+
+# This is required for the text that is shown when you run this script
+# with the parameter -help.
+docuReplacements = {
+ '¶ms;': pagegenerators.parameterHelp,
+}
+
+# Allowed namespaces. main, project, template, category
+namespaces = [0, 4, 10, 14]
+
+# TODO: Some templates on pages, like csd and afd templates,
+# should cause the bot to skip the page
+
+
+class IWBot(ExistingPageBot, SingleSiteBot):
+
+ """The bot for interwiki."""
+
+ def __init__(self, generator, site, **kwargs):
+ """Construct the bot."""
+ self.availableOptions.update({
+ 'clean': False,
+ 'create': False,
+ 'summary': None,
+ 'ignore_ns': False
+ })
+ super(IWBot, self).__init__(generator=generator, site=site, **kwargs)
+ if not self.site.has_data_repository:
+ raise ValueError('{site} does not have a data repository, '
+ 'use interwiki.py instead.'.format(
+ site=self.site))
+ self.repo = site.data_repository()
+ if not self.getOption('summary'):
+ self.options['summary'] = pywikibot.i18n.twtranslate(
+ site, 'interwikidata-clean-summary', fallback=True)
+
+ def treat_page(self):
+ """Check page."""
+ if (self.current_page.namespace() not in namespaces and
+ not self.getOption('ignore_ns')):
+ output('{page} is not in allowed namespaces, skipping'
+ .format(page=self.current_page.title(
+ asLink=True)))
+ return False
+ self.iwlangs = pywikibot.textlib.getLanguageLinks(
+ self.current_page.text, insite=self.current_page.site)
+ if not self.iwlangs:
+ output('No interlanguagelinks on {page}'.fromat(
+ page=self.current_page.title(asLink=True)))
+ return False
+ try:
+ item = pywikibot.ItemPage.fromPage(self.current_page)
+ except pywikibot.NoPage:
+ item = None
+
+ if item is None:
+ item = self.try_to_add()
+ if self.getOption('create') and item is None:
+ item = self.create_item()
+
+ self.current_item = item
+ if item and self.getOption('clean'):
+ self.clean_page()
+
+ def create_item(self):
+ """Create item in repo for current_page."""
+ data = {'sitelinks':
+ {self.site.dbName():
+ {'site': self.site.dbName(),
+ 'title': self.current_page.title()}
+ },
+ 'labels':
+ {self.site.lang:
+ {'language': self.site.lang,
+ 'value': self.current_page.title()}
+ }
+ }
+ summary = (u'Bot: New item with sitelink from %s'
+ % self.current_page.title(asLink=True, insite=self.repo))
+
+ item = pywikibot.ItemPage(self.repo)
+ item.editEntity(data, new='item', summary=summary)
+ output('Created item {item}'.format(item=item.getID()))
+ return item
+
+ def handle_complicated(self):
+ """
+ Handle pages when they have interwiki conflict.
+
+ When this method returns True it means conflict has resolved
+ and it's okay to clean old interwiki links.
+ This method should change self.current_item and fix conflicts.
+ Change it in subclasses.
+ """
+ return False
+
+ def clean_page(self):
+ """Clean interwiki links from the page."""
+ if not self.iwlangs:
+ return
+ dbnames = [iw_site.dbName() for iw_site in self.iwlangs]
+ if set(dbnames) < set(self.current_item.sitelinks.keys()):
+ if not self.handle_complicated():
+ warning('Interwiki conflict in %s, skipping...' %
+ self.current_page.title(asLink=True))
+ return False
+ output('Cleaning up the page')
+ new_text = pywikibot.textlib.removeLanguageLinks(
+ self.current_page.text, site=self.current_page.site)
+ self.put_current(new_text, summary=self.getOption('summary'))
+
+ def try_to_add(self):
+ """Add current page in repo."""
+ wd_data = set()
+ for iw_page in self.iwlangs.values():
+ try:
+ wd_data.add(pywikibot.ItemPage.fromPage(iw_page))
+ except pywikibot.NoPage:
+ warning('Interwiki %s does not exist, skipping...' %
+ iw_page.title(asLink=True))
+ continue
+ except pywikibot.InvalidTitle:
+ warning('Invalid title %s, skipping...' %
+ iw_page.title(asLink=True))
+ continue
+ if len(wd_data) != 1:
+ warning('Interwiki conflict in %s, skipping...' %
+ self.current_page.title(asLink=True))
+ return False
+ item = list(wd_data).pop()
+ output('Adding link to %s' % item.title())
+ item.setSitelink(self.current_page)
+ return item
+
+
+def main(*args):
+ """
+ Process command line arguments and invoke bot.
+
+ If args is an empty list, sys.argv is used.
+
+ @param args: command line arguments
+ @type args: list of unicode
+ """
+ generator = None
+ clean = False
+ create = False
+ always = False
+ local_args = pywikibot.handle_args(args)
+ genFactory = pagegenerators.GeneratorFactory()
+ options = {}
+ for arg in local_args:
+ if arg == '-clean':
+ clean = True
+ elif arg == '-create':
+ create = True
+ elif arg.startswith('-always'):
+ always = True
+ else:
+ genFactory.handleArg(arg)
+
+ site = pywikibot.Site()
+
+ options = {'always': always, 'create': create, 'clean':
clean}
+ if not generator:
+ generator = genFactory.getCombinedGenerator()
+ if generator:
+ generator = pagegenerators.PreloadingGenerator(generator)
+ bot = IWBot(generator, site, **options)
+ bot.run()
+ else:
+ suggest_help(missing_generator=True)
+ return False
+
+
+if __name__ == '__main__':
+ main()
diff --git a/tests/interwikidata_tests.py b/tests/interwikidata_tests.py
new file mode 100644
index 0000000..81c94d1
--- /dev/null
+++ b/tests/interwikidata_tests.py
@@ -0,0 +1,108 @@
+# -*- coding: utf-8 -*-
+"""Tests for scripts/interwikidata.py."""
+#
+# (C) Pywikibot team, 2015
+#
+# Distributed under the terms of the MIT license.
+#
+from __future__ import unicode_literals, absolute_import
+
+__version__ = '$Id$'
+
+import pywikibot
+
+from scripts import interwikidata
+from pywikibot import Link
+
+from tests.aspects import unittest, SiteAttributeTestCase
+
+
+class DummyBot(interwikidata.IWBot):
+
+ """A dummy bot to prevent editing in production
wikis."""
+
+ def __init__(self, *args, **kwargs):
+ """Initiate the class."""
+ super(DummyBot, self).__init__(*args, **kwargs)
+
+ def put_current(self):
+ """Prevent editing."""
+ return False
+
+ def create_item(self):
+ """Prevent creating items."""
+ return False
+
+ def try_to_add(self):
+ """Prevent adding sitelinks to items."""
+ return None
+
+
+class TestInterwikidataBot(SiteAttributeTestCase):
+
+ """Test Interwikidata."""
+
+ sites = {
+ 'en': {
+ 'family': 'wikipedia',
+ 'code': 'en',
+ },
+ 'fa': {
+ 'family': 'wikipedia',
+ 'code': 'fa',
+ },
+ 'wt': {
+ 'family': 'wiktionary',
+ 'code': 'en',
+ },
+ }
+
+ def test_main(self):
+ """Test main function interwikidata.py."""
+ # The main function should return False when no generator is defined.
+ self.assertFalse(interwikidata.main())
+
+ def test_iw_bot(self):
+ """Test IWBot class."""
+ page = pywikibot.Page(self.en, 'User:Ladsgroup')
+ text = page.get()
+
+ # The page looks as excpected.
+ self.assertEqual(len(page.langlinks()), 1)
+ iw_link = page.langlinks()[0]
+ self.assertIsInstance(iw_link, Link)
+ self.assertEqual(iw_link.canonical_title(), 'کاربر:Ladsgroup')
+ self.assertEqual(iw_link.site, self.fa)
+
+ repo = self.en.data_repository()
+ bot = DummyBot(generator=[page], site=self.en, ignore_ns=True)
+ bot.run()
+
+ # Repo and site should not change during a run.
+ self.assertEqual(bot.repo, repo)
+ self.assertEqual(bot.site, self.en)
+
+ # Test iwlangs method.
+ self.assertIn(self.fa, bot.iwlangs)
+ self.assertEqual(Link.fromPage(bot.iwlangs[self.fa]), iw_link)
+
+ page2 = pywikibot.Page(self.en, 'User:Ladsgroup')
+ self.assertEqual(page2.get(), text)
+
+ self.assertFalse(bot.handle_complicated())
+
+ def test_without_repo(self):
+ """Test throwing error when site does not have a data
repo."""
+ wt_page = pywikibot.Page(self.wt, 'User:Ladsgroup')
+ self.assertRaises(ValueError, DummyBot, generator=[wt_page], site=self.wt)
+
+ self.assertRaises(ValueError, interwikidata.main,
+ '-page:User:Ladsgroup', '-lang:fa',
+ '-family:wiktionary')
+
+
+if __name__ == '__main__':
+ try:
+ unittest.main()
+ except SystemExit:
+ pass
--
To view, visit
https://gerrit.wikimedia.org/r/227454
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ibbb7047d7e6be7b997577b2ea5d662bd6a361af8
Gerrit-PatchSet: 39
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Ricordisamoa <ricordisamoa(a)openmailbox.org>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>