jenkins-bot submitted this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
Revert "[cleanup] remove interwiki_graph.py"

This reverts commit 078cc3d719699169a01889b7f0add0ed2cdaea8c.

Bug: T278675
Change-Id: Ibedf155ca1d0620de407d9538f7897048ea21fde
---
M .travis.yml
M docs/api_ref/pywikibot.rst
M pywikibot/CONTENT.rst
M pywikibot/README.rst
A pywikibot/interwiki_graph.py
M requirements.txt
M setup.py
M tests/__init__.py
A tests/interwiki_graph_tests.py
M tox.ini
10 files changed, 330 insertions(+), 0 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 82ec627..37e5f18 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -132,6 +132,7 @@
apt:
packages:
- djvulibre-bin
+ - graphviz
- python: '3.6'
env: LANGUAGE=en FAMILY=wpbeta PYWIKIBOT_SITE_ONLY=1 OAUTH_DOMAIN="en.wikipedia.beta.wmflabs.org"
- python: '3.6'
diff --git a/docs/api_ref/pywikibot.rst b/docs/api_ref/pywikibot.rst
index 131221c..05b3f0b 100644
--- a/docs/api_ref/pywikibot.rst
+++ b/docs/api_ref/pywikibot.rst
@@ -95,6 +95,11 @@

.. automodule:: pywikibot.i18n

+pywikibot.interwiki\_graph module
+---------------------------------
+
+.. automodule:: pywikibot.interwiki_graph
+
pywikibot.logentries module
---------------------------

diff --git a/pywikibot/CONTENT.rst b/pywikibot/CONTENT.rst
index eff397d..6ef9ee8 100644
--- a/pywikibot/CONTENT.rst
+++ b/pywikibot/CONTENT.rst
@@ -45,6 +45,8 @@
| i18n.py | Helper functions for both the internal translation |
| | system and for TranslateWiki-based translations |
+----------------------------+------------------------------------------------------+
+ | interwiki_graph.py | Possible create graph with interwiki.py script |
+ +----------------------------+------------------------------------------------------+
| logentries.py | Objects representing Mediawiki log entries |
+----------------------------+------------------------------------------------------+
| logging.py | Logging and output functions |
diff --git a/pywikibot/README.rst b/pywikibot/README.rst
index 82b2943..08b668f 100644
--- a/pywikibot/README.rst
+++ b/pywikibot/README.rst
@@ -15,6 +15,7 @@
.. include:: CONTENT.rst

**External software can be used with Pywikibot:**
+ * Pydot, Pyparsing and Graphviz for use with interwiki_graph.py
* PyMySQL to access MySQL database for use with pagegenerators.py
* google to access Google Web API for use with pagegenerators.py

diff --git a/pywikibot/interwiki_graph.py b/pywikibot/interwiki_graph.py
new file mode 100644
index 0000000..2acbf85
--- /dev/null
+++ b/pywikibot/interwiki_graph.py
@@ -0,0 +1,228 @@
+"""Module with the Graphviz drawing calls."""
+#
+# (C) Pywikibot team, 2006-2021
+#
+# Distributed under the terms of the MIT license.
+#
+import itertools
+import threading
+
+from collections import Counter
+from typing import Optional
+
+import pywikibot
+
+from pywikibot import config2 as config
+
+try:
+ import pydot
+except ImportError as e:
+ pydot = e
+
+
+class GraphImpossible(Exception):
+
+ """Drawing a graph is not possible on your system."""
+
+
+class GraphSavingThread(threading.Thread):
+
+ """
+ Threaded graph renderer.
+
+ Rendering a graph can take extremely long. We use
+ multithreading because of that.
+
+ TODO: Find out if several threads running in parallel
+ can slow down the system too much. Consider adding a
+ mechanism to kill a thread if it takes too long.
+ """
+
+ def __init__(self, graph, origin):
+ """Initializer."""
+ super().__init__()
+ self.graph = graph
+ self.origin = origin
+
+ def run(self):
+ """Write graphs to the data directory."""
+ for fmt in config.interwiki_graph_formats:
+ filename = config.datafilepath(
+ 'interwiki-graphs/' + getFilename(self.origin, fmt))
+ if self.graph.write(filename, prog='dot', format=fmt):
+ pywikibot.output('Graph saved as ' + filename)
+ else:
+ pywikibot.output('Graph could not be saved as ' + filename)
+
+
+class Subject:
+
+ """Data about a page with translations on multiple wikis."""
+
+ def __init__(self, origin=None):
+ """Initializer.
+
+ @param origin: the page on the 'origin' wiki
+ @type origin: pywikibot.page.Page
+ """
+ # Remember the "origin page"
+ self._origin = origin
+
+ # found_in is a dictionary where pages are keys and lists of
+ # pages are values. It stores where we found each page.
+ # As we haven't yet found a page that links to the origin page, we
+ # start with an empty list for it.
+ self.found_in = {}
+ if origin:
+ self.found_in = {origin: []}
+
+ @property
+ def origin(self):
+ """Page on the origin wiki."""
+ return self._origin
+
+ @origin.setter
+ def origin(self, value):
+ self._origin = value
+
+
+class GraphDrawer:
+
+ """Graphviz (dot) code creator."""
+
+ def __init__(self, subject):
+ """Initializer.
+
+ @param subject: page data to graph
+ @type subject: pywikibot.interwiki_graph.Subject
+
+ @raises GraphImpossible: pydot is not installed
+ """
+ if isinstance(pydot, ImportError):
+ raise GraphImpossible('pydot is not installed: {}.'.format(pydot))
+ self.graph = None
+ self.subject = subject
+
+ def getLabel(self, page):
+ """Get label for page."""
+ return '"{}:{}"'.format(page.site.code, page.title())
+
+ def _octagon_site_set(self):
+ """Build a list of sites with more than one valid page."""
+ page_list = self.subject.found_in.keys()
+
+ # Only track sites of normal pages
+ each_site = (page.site for page in page_list
+ if page.exists() and not page.isRedirectPage())
+
+ return {x[0] for x in itertools.takewhile(
+ lambda x: x[1] > 1,
+ Counter(each_site).most_common())}
+
+ def addNode(self, page):
+ """Add a node for page."""
+ node = pydot.Node(self.getLabel(page), shape='rectangle')
+ node.set_URL('"http://%s%s"'
+ % (page.site.hostname(),
+ page.site.get_address(page.title(as_url=True))))
+ node.set_style('filled')
+ node.set_fillcolor('white')
+ node.set_fontsize('11')
+ if not page.exists():
+ node.set_fillcolor('red')
+ elif page.isRedirectPage():
+ node.set_fillcolor('blue')
+ elif page.isDisambig():
+ node.set_fillcolor('orange')
+ if page.namespace() != self.subject.origin.namespace():
+ node.set_color('green')
+ node.set_style('filled,bold')
+ if page.site in self.octagon_sites:
+ # mark conflict by octagonal node
+ node.set_shape('octagon')
+ self.graph.add_node(node)
+
+ def addDirectedEdge(self, page, refPage):
+ """Add a directed edge from refPage to page."""
+ # if page was given as a hint, referrers would be [None]
+ if refPage is not None:
+ sourceLabel = self.getLabel(refPage)
+ targetLabel = self.getLabel(page)
+ edge = pydot.Edge(sourceLabel, targetLabel)
+
+ oppositeEdge = self.graph.get_edge(targetLabel, sourceLabel)
+ if oppositeEdge:
+ oppositeEdge = oppositeEdge[0]
+ oppositeEdge.set_dir('both')
+ # workaround for sf.net bug 401: prevent duplicate edges
+ # (it is unclear why duplicate edges occur)
+ # https://sourceforge.net/p/pywikipediabot/bugs/401/
+ elif self.graph.get_edge(sourceLabel, targetLabel):
+ pywikibot.error(
+ 'Tried to create duplicate edge from {} to {}'
+ .format(refPage, page))
+ # duplicate edges would be bad because then get_edge() would
+ # give a list of edges, not a single edge when we handle the
+ # opposite edge.
+ else:
+ # add edge
+ if refPage.site == page.site:
+ edge.set_color('blue')
+ elif not page.exists():
+ # mark dead links
+ edge.set_color('red')
+ elif refPage.isDisambig() != page.isDisambig():
+ # mark links between disambiguation and non-disambiguation
+ # pages
+ edge.set_color('orange')
+ if refPage.namespace() != page.namespace():
+ edge.set_color('green')
+ self.graph.add_edge(edge)
+
+ def saveGraphFile(self):
+ """Write graphs to the data directory."""
+ thread = GraphSavingThread(self.graph, self.subject.origin)
+ thread.start()
+
+ def createGraph(self):
+ """
+ Create graph of the interwiki links.
+
+ For more info see U{https://meta.wikimedia.org/wiki/Interwiki_graphs}
+ """
+ pywikibot.output('Preparing graph for {}'
+ .format(self.subject.origin.title()))
+ # create empty graph
+ self.graph = pydot.Dot()
+ # self.graph.set('concentrate', 'true')
+
+ self.octagon_sites = self._octagon_site_set()
+
+ for page in self.subject.found_in.keys():
+ # a node for each found page
+ self.addNode(page)
+ # mark start node by pointing there from a black dot.
+ firstLabel = self.getLabel(self.subject.origin)
+ self.graph.add_node(pydot.Node('start', shape='point'))
+ self.graph.add_edge(pydot.Edge('start', firstLabel))
+ for page, referrers in self.subject.found_in.items():
+ for refPage in referrers:
+ self.addDirectedEdge(page, refPage)
+ self.saveGraphFile()
+
+
+def getFilename(page, extension: Optional[str] = None) -> str:
+ """
+ Create a filename that is unique for the page.
+
+ @param page: page used to create the new filename
+ @type page: pywikibot.page.Page
+ @param extension: file extension
+ @return: filename of <family>-<lang>-<page>.<ext>
+ """
+ filename = '-'.join((page.site.family.name,
+ page.site.code,
+ page.title(as_filename=True)))
+ if extension:
+ filename += '.{}'.format(extension)
+ return filename
diff --git a/requirements.txt b/requirements.txt
index de654f8..0a0890d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -29,6 +29,9 @@
# about the user
mwoauth>=0.2.4,!=0.3.1

+# core interwiki_graph.py:
+pydot >= 1.2
+
# cosmetic_changes
python-stdnum >= 1.16

diff --git a/setup.py b/setup.py
index a8378f4..8858293 100644
--- a/setup.py
+++ b/setup.py
@@ -59,6 +59,7 @@
# Core library dependencies
'eventstreams': ['sseclient!=0.0.23,!=0.0.24,>=0.0.18'],
'isbn': ['python-stdnum>=1.16'],
+ 'Graphviz': ['pydot>=1.2'],
'Google': ['google>=1.7'],
'mwparserfromhell': ['mwparserfromhell>=0.3.3'],
'Tkinter': [ # vulnerability found in Pillow<6.2.2
diff --git a/tests/__init__.py b/tests/__init__.py
index db64b70..2602c96 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -91,6 +91,7 @@
'flow_thanks',
'http',
'i18n',
+ 'interwiki_graph',
'interwiki_link',
'link',
'linter',
diff --git a/tests/interwiki_graph_tests.py b/tests/interwiki_graph_tests.py
new file mode 100644
index 0000000..765a360
--- /dev/null
+++ b/tests/interwiki_graph_tests.py
@@ -0,0 +1,85 @@
+#!/usr/bin/python
+"""Test Interwiki Graph functionality."""
+#
+# (C) Pywikibot team, 2015-2021
+#
+# Distributed under the terms of the MIT license.
+#
+import unittest
+
+from contextlib import suppress
+
+from pywikibot import interwiki_graph
+
+from tests.aspects import require_modules, SiteAttributeTestCase
+from tests.utils import DryPage
+
+
+@require_modules('pydot')
+class TestWiktionaryGraph(SiteAttributeTestCase):
+
+ """Tests for interwiki links to local sites."""
+
+ sites = {
+ 'enwikt': {
+ 'family': 'wiktionary',
+ 'code': 'en',
+ },
+ 'frwikt': {
+ 'family': 'wiktionary',
+ 'code': 'fr',
+ },
+ 'plwikt': {
+ 'family': 'wiktionary',
+ 'code': 'pl',
+ },
+ }
+ dry = True
+
+ @classmethod
+ def setUpClass(cls):
+ """Setup test class."""
+ super(TestWiktionaryGraph, cls).setUpClass()
+
+ cls.pages = {
+ 'en': DryPage(cls.enwikt, 'origin'),
+ 'en2': DryPage(cls.enwikt, 'origin2'),
+ 'fr': DryPage(cls.frwikt, 'origin'),
+ 'pl': DryPage(cls.plwikt, 'origin'),
+ }
+
+ def setUp(self):
+ """Setup interwiki_graph data."""
+ super().setUp()
+ data = interwiki_graph.Subject(self.pages['en'])
+ data.found_in[self.pages['en']] = [self.pages['fr'], self.pages['pl']]
+ data.found_in[self.pages['fr']] = [self.pages['en'], self.pages['pl']]
+ data.found_in[self.pages['pl']] = [self.pages['en'], self.pages['fr']]
+ self.data = data
+
+ def test_simple_graph(self):
+ """Test that GraphDrawer.createGraph does not raise exception."""
+ drawer = interwiki_graph.GraphDrawer(self.data)
+ drawer.createGraph()
+
+ def test_octagon(self):
+ """Test octagon nodes."""
+ self.data.found_in[self.pages['en2']] = [self.pages['fr']]
+ drawer = interwiki_graph.GraphDrawer(self.data)
+
+ self.assertEqual({self.pages['en'].site}, drawer._octagon_site_set())
+
+ drawer.createGraph()
+ nodes = drawer.graph.obj_dict['nodes']
+
+ for node, shape in [('"pl:origin"', 'rectangle'),
+ ('"fr:origin"', 'rectangle'),
+ ('"en:origin"', 'octagon')]:
+ with self.subTest(node=node):
+ self.assertEqual(
+ nodes[node][0]['attributes']['shape'], shape)
+
+
+if __name__ == '__main__': # pragma: no cover
+ with suppress(SystemExit):
+ unittest.main()
diff --git a/tox.ini b/tox.ini
index 14d7041..04b4e0c 100644
--- a/tox.ini
+++ b/tox.ini
@@ -130,6 +130,9 @@
pywikibot/families/* : D102
pywikibot/family.py : N802, N803, N806, N815
pywikibot/fixes.py : E241
+ pywikibot/interwiki_graph.py : N802, N803, N806
+ pywikibot/logentries.py: N802
+ pywikibot/logging.py : N803
pywikibot/login.py: N802, N816
pywikibot/page/__init__.py: N802
pywikibot/page/_collections.py: N802

To view, visit change 675527. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ibedf155ca1d0620de407d9538f7897048ea21fde
Gerrit-Change-Number: 675527
Gerrit-PatchSet: 2
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged