jenkins-bot submitted this change.

View Change


Approvals: Costa Shul: Looks good to me, approved jenkins-bot: Verified
Category graph generator

uses Graphviz
provides graph in formats dot, svg and html5

Change-Id: I3991a6d311686b66f1b0274c527606c290aa22e9
---
A scripts/category_graph.py
1 file changed, 202 insertions(+), 0 deletions(-)

diff --git a/scripts/category_graph.py b/scripts/category_graph.py
new file mode 100755
index 0000000..4813a3b
--- /dev/null
+++ b/scripts/category_graph.py
@@ -0,0 +1,190 @@
+#!/usr/bin/python3
+
+"""
+Visualizes category hierarchy.
+
+Generates graphical representation in formats dot, svg and html5
+of category hierarchy.
+
+usage: pwb.py graph [-style STYLE] [-depth DEPTH] [-from FROM] [-to TO]
+
+actions:
+ -from [FROM] Category name to scan, default is main category, "?" to ask.
+
+optional arguments:
+ -to TO base file name to save, "?" to ask.
+ -style STYLE graphviz style definitions in dot format.
+ -depth DEPTH maximal hierarchy depth. 2 by default.
+
+Examples:
+
+pwb.py -v graph category
+pwb.py -v graph category Life --style rankdir=BT
+"""
+
+import argparse
+import io
+from collections import defaultdict
+
+import pywikibot
+from pywikibot import config
+from pywikibot.bot import SingleSiteBot, suggest_help
+
+
+class CategoryGraphBot(SingleSiteBot):
+ """Bot to create graph of the category structure."""
+
+ def args(self, ap):
+ """Declares arguments."""
+ ap.add_argument('-from', nargs='?', default=argparse.SUPPRESS)
+ ap.add_argument('-to', nargs='?', default='')
+ ap.add_argument('-style', nargs='?', default='')
+ ap.add_argument('-depth', nargs='?', default=2)
+
+ def __init__(self, ap, args: argparse.Namespace) -> None:
+ """Initializer."""
+ super().__init__()
+ self.args = args
+ cat_title = args.__dict__.get('from')
+ if not cat_title:
+ cat_title = 'Main topic classifications'
+ if cat_title == '?':
+ cat_title = pywikibot.input(
+ 'For which category do you want to create a graph?')
+ pywikibot.output('Scanning "{}"'.format(cat_title))
+ self.cat = pywikibot.Category(self.site, cat_title)
+ self.to = args.to
+ if self.to == '?':
+ self.to = pywikibot.input(
+ 'Please enter the name of the file '
+ 'where the tree should be saved,\n'
+ 'or press enter to use category name:')
+ if not self.to:
+ self.to = cat_title.replace(' ', '_')
+ self.rev = defaultdict(list)
+ self.fw = defaultdict(list)
+ self.leaves = set()
+ self.counter = 0
+ font = 'fontname="Helvetica,Arial,sans-serif"'
+ style = 'rankdir=LR ranksep=2 concentrate=true ' + font + \
+ 'node [newrank=true shape=plaintext ' + font + ']' \
+ 'edge [arrowhead=open labeldistance=3 ' \
+ 'labelfontcolor="#00000080" ' + font + '] ' \
+ + args.style
+ self.dot = pydot.graph_from_dot_data('digraph {' + style + '}')[0]
+ self.dot.set_name('"' + cat_title + '"')
+
+ def scan_level(self, cat, level, hue=None) -> str:
+ """
+ Recursive function to fill dot graph.
+
+ Parameters:
+ * cat - the Category of the node we're currently opening.
+ * level - the current decreasing from depth to zero
+ level in the tree (for recursion), opposite of depth.
+
+ """
+ title = cat.title(with_ns=False)
+ size = 4 ** level
+ subcats = sorted(cat.subcategories())
+
+ def node():
+ subs = ', '.join([c.title(with_ns=False).replace(' ', ' ')
+ for c in subcats])
+ n = pydot.Node(title,
+ label=r'"{}\n{} C"'.
+ format(title, len(subcats)),
+ tooltip=title + '\n\n' + subs,
+ URL='https://' + self.site.code
+ + '.wikipedia.org/wiki/' + cat.title(as_url=True),
+ fontsize=int(10 * size))
+ return n
+
+ def edge(n, h):
+ minlen = n % columns + 1 if level != self.args.depth else 1
+ e = pydot.Edge(title,
+ subcat.title(with_ns=False),
+ tooltip=title + ' ⟶ '
+ + subcat.title(with_ns=False),
+ headlabel=title,
+ # distribute the graph to depth
+ minlen=minlen,
+ penwidth=round(size / 2, 2),
+ arrowsize=round(size / 4, 2),
+ color=str(round(h, 2)) + ' 1 0.7',
+ labelfontsize=int(size),
+ labelfontcolor=str(round(h, 2)) + ' 1 0.5')
+ return e
+
+ if config.verbose_output:
+ pywikibot.output('Adding ' + cat.title(with_ns=False))
+ node = node()
+ self.dot.add_node(node)
+ self.counter += 1
+ if not level or self.counter >= 1e4:
+ # because graphviz crashes on huge graphs
+ if self.counter == 1e4:
+ pywikibot.warning('Number of nodes reached limit')
+ self.leaves.add(node.get_name())
+ return
+ columns = len(subcats) // 5 + 1
+ for n, subcat in enumerate(subcats):
+ # generating different hue for color per each root branch
+ h = hue if hue is not None else (11 / 18 * n) % 1
+ e = edge(n, h)
+ self.dot.add_edge(e)
+ # repeat recursively
+ self.scan_level(subcat, level - 1, h)
+ # track graph's structure to reduse too big graph
+ self.rev[e.get_destination()].append(e.get_source())
+ self.fw[e.get_source()].append(e.get_destination())
+
+ def run(self) -> None:
+ """Main function of CategoryGraphBot."""
+ self.scan_level(self.cat, int(self.args.depth))
+ # reduce too big graph
+ if self.counter > 1000:
+ pywikibot.warning('Removing standalone subcategories '
+ 'because graph is too big')
+ for n in self.leaves:
+ while len(self.rev[n]) == 1:
+ if config.verbose_output:
+ pywikibot.output('Removing ' + n)
+ self.dot.del_edge(self.rev[n][0], n)
+ self.dot.del_node(n)
+ self.fw[self.rev[n][0]].remove(n)
+ if self.fw[self.rev[n][0]]:
+ break
+ n = self.rev[n][0]
+ pywikibot.output('Saving results')
+ pywikibot.output(self.to + '.dot')
+ self.dot.write(self.to + '.dot')
+ pywikibot.output(self.to + '.svg')
+ self.dot.write_svg(self.to + '.svg')
+ pywikibot.output(self.to + '.html')
+ header = ('<head><meta charset="UTF-8"/>'
+ '<title>' + self.cat.title(with_ns=False)
+ + '</title> </head>\n'
+ '<div style="position:absolute;">'
+ 'Zoom and drag with mouse. '
+ 'Nodes are links to Wikipedia.'
+ '</div>\n'
+ '<script '
+ 'src="https://unpkg.com/panzoom@9.4.0/dist/panzoom.min.js" '
+ 'query="#graph0" name="pz"></script>\n'
+ '<style> svg { height:100%; width:100%; } </style>\n')
+ with io.open(self.to + '.html', mode='wb') as o:
+ o.write(header.encode())
+ o.write(self.dot.create('dot', 'svg'))
+
+
+if __name__ == '__main__':
+ ap = argparse.ArgumentParser(add_help=False)
+ CategoryGraphBot.args(None, ap)
+ local_args = pywikibot.handle_args()
+ args, rest = ap.parse_known_args()
+
+ if not suggest_help(missing_action='from' not in args):
+ import pydot
+ bot = CategoryGraphBot(ap, args)
+ bot.run()

To view, visit change 760651. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I3991a6d311686b66f1b0274c527606c290aa22e9
Gerrit-Change-Number: 760651
Gerrit-PatchSet: 27
Gerrit-Owner: Costa Shul <constantine.shulyupin@gmail.com>
Gerrit-Reviewer: Costa Shul <constantine.shulyupin@gmail.com>
Gerrit-Reviewer: D3r1ck01 <xsavitar.wiki@aol.com>
Gerrit-Reviewer: jenkins-bot
Gerrit-CC: Xqt <info@gno.de>
Gerrit-MessageType: merged