http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10292
Revision: 10292 Author: xqt Date: 2012-06-04 13:41:38 +0000 (Mon, 04 Jun 2012) Log Message: ----------- interwiki_graph and titletranslate are librarys. Moved it to the pywikibot folder; some parts from cosmetic_changes updated from trunk
Modified Paths: -------------- branches/rewrite/scripts/cosmetic_changes.py branches/rewrite/scripts/interwiki.py
Added Paths: ----------- branches/rewrite/pywikibot/interwiki_graph.py branches/rewrite/pywikibot/titletranslate.py
Removed Paths: ------------- branches/rewrite/scripts/interwiki_graph.py branches/rewrite/scripts/titletranslate.py
Copied: branches/rewrite/pywikibot/interwiki_graph.py (from rev 10289, branches/rewrite/scripts/interwiki_graph.py) =================================================================== --- branches/rewrite/pywikibot/interwiki_graph.py (rev 0) +++ branches/rewrite/pywikibot/interwiki_graph.py 2012-06-04 13:41:38 UTC (rev 10292) @@ -0,0 +1,150 @@ +""" Module with the graphviz drawing calls """ +# +# (C) Pywikipedia bot team, 2006-2010 +# +# Distributed under the terms of the MIT license. +# +__version__ = '$Id$' +import threading +pydotfound = True +try: + import pydot +except ImportError: + pydotfound = False +import pywikibot +from pywikibot import config2 as config + +class GraphImpossible(Exception): + "Drawing a graph is not possible on your system." + +class GraphSavingThread(threading.Thread): + """ + Rendering a graph can take extremely long. We use + multithreading because of that. + + TODO: Find out if several threads running in parallel + can slow down the system too much. Consider adding a + mechanism to kill a thread if it takes too long. + """ + + def __init__(self, graph, originPage): + threading.Thread.__init__(self) + self.graph = graph + self.originPage = originPage + + def run(self): + for format in config.interwiki_graph_formats: + filename = 'interwiki-graphs/' + getFilename(self.originPage, + format) + if self.graph.write(filename, prog = 'dot', format = format): + pywikibot.output(u'Graph saved as %s' % filename) + else: + pywikibot.output(u'Graph could not be saved as %s' % filename) + +class GraphDrawer: + def __init__(self, subject): + if not pydotfound: + raise GraphImpossible, 'pydot is not installed.' + self.graph = None + self.subject = subject + + def getLabel(self, page): + return (u'""%s:%s""' % (page.site.language(), + page.title())).encode('utf-8') + + def addNode(self, page): + node = pydot.Node(self.getLabel(page), shape = 'rectangle') + node.set_URL(""http://%s%s%5C"" + % (page.site.hostname(), + page.site.get_address(page.urlname()))) + node.set_style('filled') + node.set_fillcolor('white') + node.set_fontsize('11') + if not page.exists(): + node.set_fillcolor('red') + elif page.isRedirectPage(): + node.set_fillcolor('blue') + elif page.isDisambig(): + node.set_fillcolor('orange') + if page.namespace() != self.subject.originPage.namespace(): + node.set_color('green') + node.set_style('filled,bold') + # if we found more than one valid page for this language: + if len(filter(lambda p: p.site == page.site and p.exists() \ + and not p.isRedirectPage(), + self.subject.foundIn.keys())) > 1: + # mark conflict by octagonal node + node.set_shape('octagon') + self.graph.add_node(node) + + def addDirectedEdge(self, page, refPage): + # if page was given as a hint, referrers would be [None] + if refPage is not None: + sourceLabel = self.getLabel(refPage) + targetLabel = self.getLabel(page) + edge = pydot.Edge(sourceLabel, targetLabel) + + oppositeEdge = self.graph.get_edge(targetLabel, sourceLabel) + if oppositeEdge: + if isinstance(oppositeEdge, list): + # bugfix for pydot >= 1.0.3 + oppositeEdge = oppositeEdge[0] + #oppositeEdge.set_arrowtail('normal') + oppositeEdge.set_dir('both') + # workaround for bug [ 1722739 ]: prevent duplicate edges + # (it is unclear why duplicate edges occur) + elif self.graph.get_edge(sourceLabel, targetLabel): + pywikibot.output( + u'BUG: Tried to create duplicate edge from %s to %s' + % (refPage.title(asLink=True), page.title(asLink=True))) + # duplicate edges would be bad because then get_edge() would + # give a list of edges, not a single edge when we handle the + # opposite edge. + else: + # add edge + if refPage.site == page.site: + edge.set_color('blue') + elif not page.exists(): + # mark dead links + edge.set_color('red') + elif refPage.isDisambig() != page.isDisambig(): + # mark links between disambiguation and non-disambiguation + # pages + edge.set_color('orange') + if refPage.namespace() != page.namespace(): + edge.set_color('green') + self.graph.add_edge(edge) + + def saveGraphFile(self): + thread = GraphSavingThread(self.graph, self.subject.originPage) + thread.start() + + def createGraph(self): + """ + See http://meta.wikimedia.org/wiki/Interwiki_graphs + """ + pywikibot.output(u'Preparing graph for %s' + % self.subject.originPage.title()) + # create empty graph + self.graph = pydot.Dot() + # self.graph.set('concentrate', 'true') + for page in self.subject.foundIn.iterkeys(): + # a node for each found page + self.addNode(page) + # mark start node by pointing there from a black dot. + firstLabel = self.getLabel(self.subject.originPage) + self.graph.add_node(pydot.Node('start', shape = 'point')) + self.graph.add_edge(pydot.Edge('start', firstLabel)) + for page, referrers in self.subject.foundIn.iteritems(): + for refPage in referrers: + self.addDirectedEdge(page, refPage) + self.saveGraphFile() + +def getFilename(page, extension = None): + filename = '%s-%s-%s' % (page.site.family.name, + page.site.language(), + page.titleForFilename()) + if extension: + filename += '.%s' % extension + return filename +
Copied: branches/rewrite/pywikibot/titletranslate.py (from rev 10289, branches/rewrite/scripts/titletranslate.py) =================================================================== --- branches/rewrite/pywikibot/titletranslate.py (rev 0) +++ branches/rewrite/pywikibot/titletranslate.py 2012-06-04 13:41:38 UTC (rev 10292) @@ -0,0 +1,135 @@ +# -*- coding: utf-8 -*- +# +# (C) Rob W.W. Hooft, 2003 +# (C) Yuri Astrakhan, 2005 +# (C) Pywikipedia bot team, 2003-2010 +# +# Distributed under the terms of the MIT license. +# +__version__ = '$Id$' +# +import re + +import pywikibot +import pywikibot.date as date + +def translate(page, hints = None, auto = True, removebrackets = False): + """ + Please comment your source code! --Daniel + + Does some magic stuff. Returns a list of Links. + """ + result = [] + site = page.site + if hints: + for h in hints: + if ':' not in h: + # argument given as -hint:xy where xy is a language code + codes = h + newname = '' + else: + codes, newname = h.split(':', 1) + if newname == '': + # if given as -hint:xy or -hint:xy:, assume that there should + # be a page in language xy with the same title as the page + # we're currently working on ... + ns = page.namespace() + if ns: + newname = u'%s:%s' % (site.family.namespace('_default', ns), + page.title(withNamespace=False)) + else: + # article in the main namespace + newname = page.title() + # ... unless we do want brackets + if removebrackets: + newname = re.sub(re.compile(ur"\W*?(.*?)\W*?", re.UNICODE), u" ", newname) + try: + number = int(codes) + codes = site.family.languages_by_size[:number] + except ValueError: + if codes == 'all': + codes = site.family.languages_by_size + elif codes in site.family.language_groups: + codes = site.family.language_groups[codes] + else: + codes = codes.split(',') + for newcode in codes: + if newcode in site.languages(): + if newcode != site.code: + x = pywikibot.Link(site.getSite(code=newcode), newname) + if x not in result: + result.append(x) + else: + if pywikibot.verbose: + pywikibot.output(u"Ignoring unknown language code %s" + % newcode) + + # Autotranslate dates into all other languages, the rest will come from + # existing interwiki links. + if auto: + # search inside all dictionaries for this link + dictName, value = date.getAutoFormat(page.site.code, + page.title()) + if dictName: + if not (dictName == 'yearsBC' and + page.site.code in date.maxyearBC and + value > date.maxyearBC[page.site.code]) or \ + (dictName == 'yearsAD' and + page.site.code in date.maxyearAD and + value > date.maxyearAD[page.site.code]): + pywikibot.output( + u'TitleTranslate: %s was recognized as %s with value %d' + % (page.title(), dictName, value)) + for entryLang, entry in date.formats[dictName].iteritems(): + if entryLang != page.site.code: + if dictName == 'yearsBC' and \ + entryLang in date.maxyearBC and \ + value > date.maxyearBC[entryLang]: + pass + elif dictName == 'yearsAD' and \ + entryLang in date.maxyearAD and \ + value > date.maxyearAD[entryLang]: + pass + else: + newname = entry(value) + x = pywikibot.Link( + newname, + pywikibot.getSite(code=entryLang, + fam=site.family)) + if x not in result: + result.append(x) # add new page + return result + +bcDateErrors = [u'[[ko:%d년]]'] + +def appendFormatedDates( result, dictName, value ): + for code, func in date.formats[dictName].iteritems(): + result.append( u'[[%s:%s]]' % (code,func(value)) ) + +def getPoisonedLinks(pl): + """Returns a list of known corrupted links that should be removed if seen + + """ + result = [] + pywikibot.output(u'getting poisoned links for %s' % pl.title()) + dictName, value = date.getAutoFormat(pl.site.code, pl.title()) + if dictName is not None: + pywikibot.output( u'date found in %s' % dictName ) + # errors in year BC + if dictName in date.bcFormats: + for fmt in bcDateErrors: + result.append( fmt % value ) + # i guess this is like friday the 13th for the years + if value == 398 and dictName == 'yearsBC': + appendFormatedDates(result, dictName, 399) + if dictName == 'yearsBC': + appendFormatedDates(result, 'decadesBC', value) + appendFormatedDates(result, 'yearsAD', value) + if dictName == 'yearsAD': + appendFormatedDates(result, 'decadesAD', value) + appendFormatedDates(result, 'yearsBC', value) + if dictName == 'centuriesBC': + appendFormatedDates(result, 'decadesBC', value * 100 + 1) + if dictName == 'centuriesAD': + appendFormatedDates(result, 'decadesAD', value * 100 + 1) + return result
Modified: branches/rewrite/scripts/cosmetic_changes.py =================================================================== --- branches/rewrite/scripts/cosmetic_changes.py 2012-06-03 22:01:00 UTC (rev 10291) +++ branches/rewrite/scripts/cosmetic_changes.py 2012-06-04 13:41:38 UTC (rev 10292) @@ -39,12 +39,12 @@ # __version__ = '$Id$' # +import sys, re import pywikibot import isbn from pywikibot import pagegenerators from pywikibot import i18n -import sys -import re +from pywikibot import config2 as config
warning = """ ATTENTION: You can run this script as a stand-alone for testing purposes. @@ -57,7 +57,13 @@ '&warning;': warning, }
-nn_iw_msg = u'<!--interwiki (no, sv, da first; then other languages alphabetically by name)-->' +# Interwiki message on top of iw links +# 2nd line is a regex if needed +msg_interwiki = { + 'fr' : u'<!-- Autres langues -->', + 'nn' : (u'<!--interwiki (no, sv, da first; then other languages alphabetically by name)-->', + u'(<!-- ?interwiki \(no(?:/nb)?, ?sv, ?da first; then other languages alphabetically by name\) ?-->)') +}
# This is from interwiki.py; # move it to family file and implement global instances @@ -157,6 +163,8 @@ try: text = isbn.hyphenateIsbnNumbers(text) except isbn.InvalidIsbnException, error: + if config.verbose_output: + pywikibot.output(u"ISBN error: %s" % error) pass if self.debug: pywikibot.showDiff(oldText, text) @@ -168,7 +176,8 @@ Remove their language code prefix. """ if not self.talkpage and pywikibot.calledModuleName() <> 'interwiki': - interwikiR = re.compile(r'[[%s\s?:([^[]\n]*)]]' % self.site.lang) + interwikiR = re.compile(r'[[%s\s?:([^[]\n]*)]]' + % self.site.lang) text = interwikiR.sub(r'[[\1]]', text) return text
Modified: branches/rewrite/scripts/interwiki.py =================================================================== --- branches/rewrite/scripts/interwiki.py 2012-06-03 22:01:00 UTC (rev 10291) +++ branches/rewrite/scripts/interwiki.py 2012-06-04 13:41:38 UTC (rev 10292) @@ -346,13 +346,14 @@ import codecs import pickle import socket +import webbrowser import pywikibot from pywikibot import config from pywikibot import catlib from pywikibot import pagegenerators from pywikibot import i18n -import titletranslate, interwiki_graph -import webbrowser +from pywikibot import interwiki_graph +from pywikibot import titletranslate
docuReplacements = { '&pagegenerators_help;': pagegenerators.parameterHelp
Deleted: branches/rewrite/scripts/interwiki_graph.py =================================================================== --- branches/rewrite/scripts/interwiki_graph.py 2012-06-03 22:01:00 UTC (rev 10291) +++ branches/rewrite/scripts/interwiki_graph.py 2012-06-04 13:41:38 UTC (rev 10292) @@ -1,150 +0,0 @@ -""" Module with the graphviz drawing calls """ -# -# (C) Pywikipedia bot team, 2006-2010 -# -# Distributed under the terms of the MIT license. -# -__version__ = '$Id$' -import threading -pydotfound = True -try: - import pydot -except ImportError: - pydotfound = False -import pywikibot -from pywikibot import config2 as config - -class GraphImpossible(Exception): - "Drawing a graph is not possible on your system." - -class GraphSavingThread(threading.Thread): - """ - Rendering a graph can take extremely long. We use - multithreading because of that. - - TODO: Find out if several threads running in parallel - can slow down the system too much. Consider adding a - mechanism to kill a thread if it takes too long. - """ - - def __init__(self, graph, originPage): - threading.Thread.__init__(self) - self.graph = graph - self.originPage = originPage - - def run(self): - for format in config.interwiki_graph_formats: - filename = 'interwiki-graphs/' + getFilename(self.originPage, - format) - if self.graph.write(filename, prog = 'dot', format = format): - pywikibot.output(u'Graph saved as %s' % filename) - else: - pywikibot.output(u'Graph could not be saved as %s' % filename) - -class GraphDrawer: - def __init__(self, subject): - if not pydotfound: - raise GraphImpossible, 'pydot is not installed.' - self.graph = None - self.subject = subject - - def getLabel(self, page): - return (u'""%s:%s""' % (page.site.language(), - page.title())).encode('utf-8') - - def addNode(self, page): - node = pydot.Node(self.getLabel(page), shape = 'rectangle') - node.set_URL(""http://%s%s%5C"" - % (page.site.hostname(), - page.site.get_address(page.urlname()))) - node.set_style('filled') - node.set_fillcolor('white') - node.set_fontsize('11') - if not page.exists(): - node.set_fillcolor('red') - elif page.isRedirectPage(): - node.set_fillcolor('blue') - elif page.isDisambig(): - node.set_fillcolor('orange') - if page.namespace() != self.subject.originPage.namespace(): - node.set_color('green') - node.set_style('filled,bold') - # if we found more than one valid page for this language: - if len(filter(lambda p: p.site == page.site and p.exists() \ - and not p.isRedirectPage(), - self.subject.foundIn.keys())) > 1: - # mark conflict by octagonal node - node.set_shape('octagon') - self.graph.add_node(node) - - def addDirectedEdge(self, page, refPage): - # if page was given as a hint, referrers would be [None] - if refPage is not None: - sourceLabel = self.getLabel(refPage) - targetLabel = self.getLabel(page) - edge = pydot.Edge(sourceLabel, targetLabel) - - oppositeEdge = self.graph.get_edge(targetLabel, sourceLabel) - if oppositeEdge: - if isinstance(oppositeEdge, list): - # bugfix for pydot >= 1.0.3 - oppositeEdge = oppositeEdge[0] - #oppositeEdge.set_arrowtail('normal') - oppositeEdge.set_dir('both') - # workaround for bug [ 1722739 ]: prevent duplicate edges - # (it is unclear why duplicate edges occur) - elif self.graph.get_edge(sourceLabel, targetLabel): - pywikibot.output( - u'BUG: Tried to create duplicate edge from %s to %s' - % (refPage.title(asLink=True), page.title(asLink=True))) - # duplicate edges would be bad because then get_edge() would - # give a list of edges, not a single edge when we handle the - # opposite edge. - else: - # add edge - if refPage.site == page.site: - edge.set_color('blue') - elif not page.exists(): - # mark dead links - edge.set_color('red') - elif refPage.isDisambig() != page.isDisambig(): - # mark links between disambiguation and non-disambiguation - # pages - edge.set_color('orange') - if refPage.namespace() != page.namespace(): - edge.set_color('green') - self.graph.add_edge(edge) - - def saveGraphFile(self): - thread = GraphSavingThread(self.graph, self.subject.originPage) - thread.start() - - def createGraph(self): - """ - See http://meta.wikimedia.org/wiki/Interwiki_graphs - """ - pywikibot.output(u'Preparing graph for %s' - % self.subject.originPage.title()) - # create empty graph - self.graph = pydot.Dot() - # self.graph.set('concentrate', 'true') - for page in self.subject.foundIn.iterkeys(): - # a node for each found page - self.addNode(page) - # mark start node by pointing there from a black dot. - firstLabel = self.getLabel(self.subject.originPage) - self.graph.add_node(pydot.Node('start', shape = 'point')) - self.graph.add_edge(pydot.Edge('start', firstLabel)) - for page, referrers in self.subject.foundIn.iteritems(): - for refPage in referrers: - self.addDirectedEdge(page, refPage) - self.saveGraphFile() - -def getFilename(page, extension = None): - filename = '%s-%s-%s' % (page.site.family.name, - page.site.language(), - page.titleForFilename()) - if extension: - filename += '.%s' % extension - return filename -
Deleted: branches/rewrite/scripts/titletranslate.py =================================================================== --- branches/rewrite/scripts/titletranslate.py 2012-06-03 22:01:00 UTC (rev 10291) +++ branches/rewrite/scripts/titletranslate.py 2012-06-04 13:41:38 UTC (rev 10292) @@ -1,135 +0,0 @@ -# -*- coding: utf-8 -*- -# -# (C) Rob W.W. Hooft, 2003 -# (C) Yuri Astrakhan, 2005 -# (C) Pywikipedia bot team, 2003-2010 -# -# Distributed under the terms of the MIT license. -# -__version__ = '$Id$' -# -import re - -import pywikibot -import pywikibot.date as date - -def translate(page, hints = None, auto = True, removebrackets = False): - """ - Please comment your source code! --Daniel - - Does some magic stuff. Returns a list of Links. - """ - result = [] - site = page.site - if hints: - for h in hints: - if ':' not in h: - # argument given as -hint:xy where xy is a language code - codes = h - newname = '' - else: - codes, newname = h.split(':', 1) - if newname == '': - # if given as -hint:xy or -hint:xy:, assume that there should - # be a page in language xy with the same title as the page - # we're currently working on ... - ns = page.namespace() - if ns: - newname = u'%s:%s' % (site.family.namespace('_default', ns), - page.title(withNamespace=False)) - else: - # article in the main namespace - newname = page.title() - # ... unless we do want brackets - if removebrackets: - newname = re.sub(re.compile(ur"\W*?(.*?)\W*?", re.UNICODE), u" ", newname) - try: - number = int(codes) - codes = site.family.languages_by_size[:number] - except ValueError: - if codes == 'all': - codes = site.family.languages_by_size - elif codes in site.family.language_groups: - codes = site.family.language_groups[codes] - else: - codes = codes.split(',') - for newcode in codes: - if newcode in site.languages(): - if newcode != site.code: - x = pywikibot.Link(site.getSite(code=newcode), newname) - if x not in result: - result.append(x) - else: - if pywikibot.verbose: - pywikibot.output(u"Ignoring unknown language code %s" - % newcode) - - # Autotranslate dates into all other languages, the rest will come from - # existing interwiki links. - if auto: - # search inside all dictionaries for this link - dictName, value = date.getAutoFormat(page.site.code, - page.title()) - if dictName: - if not (dictName == 'yearsBC' and - page.site.code in date.maxyearBC and - value > date.maxyearBC[page.site.code]) or \ - (dictName == 'yearsAD' and - page.site.code in date.maxyearAD and - value > date.maxyearAD[page.site.code]): - pywikibot.output( - u'TitleTranslate: %s was recognized as %s with value %d' - % (page.title(), dictName, value)) - for entryLang, entry in date.formats[dictName].iteritems(): - if entryLang != page.site.code: - if dictName == 'yearsBC' and \ - entryLang in date.maxyearBC and \ - value > date.maxyearBC[entryLang]: - pass - elif dictName == 'yearsAD' and \ - entryLang in date.maxyearAD and \ - value > date.maxyearAD[entryLang]: - pass - else: - newname = entry(value) - x = pywikibot.Link( - newname, - pywikibot.getSite(code=entryLang, - fam=site.family)) - if x not in result: - result.append(x) # add new page - return result - -bcDateErrors = [u'[[ko:%d년]]'] - -def appendFormatedDates( result, dictName, value ): - for code, func in date.formats[dictName].iteritems(): - result.append( u'[[%s:%s]]' % (code,func(value)) ) - -def getPoisonedLinks(pl): - """Returns a list of known corrupted links that should be removed if seen - - """ - result = [] - pywikibot.output(u'getting poisoned links for %s' % pl.title()) - dictName, value = date.getAutoFormat(pl.site.code, pl.title()) - if dictName is not None: - pywikibot.output( u'date found in %s' % dictName ) - # errors in year BC - if dictName in date.bcFormats: - for fmt in bcDateErrors: - result.append( fmt % value ) - # i guess this is like friday the 13th for the years - if value == 398 and dictName == 'yearsBC': - appendFormatedDates(result, dictName, 399) - if dictName == 'yearsBC': - appendFormatedDates(result, 'decadesBC', value) - appendFormatedDates(result, 'yearsAD', value) - if dictName == 'yearsAD': - appendFormatedDates(result, 'decadesAD', value) - appendFormatedDates(result, 'yearsBC', value) - if dictName == 'centuriesBC': - appendFormatedDates(result, 'decadesBC', value * 100 + 1) - if dictName == 'centuriesAD': - appendFormatedDates(result, 'decadesAD', value * 100 + 1) - return result