http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11099
Revision: 11099 Author: binbot Date: 2013-02-21 01:30:59 +0000 (Thu, 21 Feb 2013) Log Message: ----------- New script to find templates that are worth to be converted to Lua. It lists templates using many parser functions.
Added Paths: ----------- trunk/pywikipedia/parserfunctioncount.py
Added: trunk/pywikipedia/parserfunctioncount.py =================================================================== --- trunk/pywikipedia/parserfunctioncount.py (rev 0) +++ trunk/pywikipedia/parserfunctioncount.py 2013-02-21 01:30:59 UTC (rev 11099) @@ -0,0 +1,205 @@ +# -*- coding: utf-8 -*- +""" +This script helps to find expensive templates that are subject to be converted +to Lua. It counts parser functions and then orders templates by number of these +and uploads the first n titles or alternatively templates having count()>n. + +Parameters: +-start Will start from the given title (it does not have to exist). + Parameter may be given as "-start" or "-start:title". + Defaults to '!'. +-first Returns the first n results in decreasing order of number + of hits (or without ordering if used with -nosort) + Parameter may be given as "-first" or "-first:n". +-atleast Returns templates with at least n hits. + Parameter may be given as "-atleast" or "-atleast:n". +-nosort Keeps the original order of templates. Default behaviour is + to sort them by decreasing order of count(parserfunctions). +-save Saves the results. The file is in the form you may upload it + to a wikipage. May be given as "-save:<filename>". + If it exists, titles will be appended. +-upload Specify a page in your wiki where results will be uploaded. + Parameter may be given as "-upload" or "-upload:title". + Say good-bye to previous content if existed. +Precedence of evaluation: results are first sorted in decreasing order of +templates, unless nosort is switched on. Then first n templates are taken if +first is specified, and at last atleast is evaluated. If nosort and first are +used together, the program will stop at the nth hit without scanning the rest +of the template namespace. This may be used to run it in more sessions +(continue with -start next time). +First is strict. That means if results #90-120 have the same number of parser +functions and you specify -first:100, only the first 100 will be listed (even +if atleast is used as well). +Should you specify neither first nor atleast, all templates using parser +functions will be listed. +""" + +# +# (C) Bináris, 2013 +# +# Distributed under the terms of the MIT license. +# +__version__='$Id$' +''' +Todo: +* Using xml and xmlstart +* Using categories +* Error handling for uploading (anyway, that's the last action, it's only + for the beauty of the program, does not effect anything). +''' + +import codecs, re +import wikipedia as pywikibot +from pagegenerators import \ + AllpagesPageGenerator as APG, \ + RegexFilterPageGenerator as RPG + +def main(*args): + words = ['expr', 'if', 'ifeq', 'ifexpr', 'iferror', 'switch', 'ifexist', + 'time', 'timel', 'rel2abs', 'titleparts', 'len', 'pos', 'rpos', + 'sub', 'count', 'replace', 'explode', 'urldecode'] + # default is left out because it may occur within switch only. + addwords = { + # Write translated parser function names here. + 'hu': [u'kif', u'ha', u'haegyenlő', u'hakif', u'hahibás', u'halétezik', + u'idő', u'hossz', u'pozíció', u'jpozíció'], + } + documentsubpage = { + # You may write here a regex representing the name of template doc + # subpages in your wiki. Defaults to /doc. + # These subpages will be excluded for faster run. + 'de': ur'(?i).*/Doku', + 'fr': ur'(?i).*/Documentation', + } + editcomment = { + # This will be used for uploading the list to your wiki. + 'en': u'Bot: uploading the list of templates having too many parser functions', + 'hu': u'A túl sok parserfüggvényt használó sablonok listájának feltöltése', + } + start = '!' + results = [] + first = None + atleast = None + nosort = False + filename = None # The name of the file to save titles + titlefile = None + uploadpage = None + count = 0 + + # Handling parameters: + for arg in pywikibot.handleArgs(*args): + if arg == '-start': + start = pywikibot.input( + u'From which title do you want to continue?') + elif arg.startswith('-start:'): + start = arg[7:] + elif arg == '-save': + filename = pywikibot.input('Please enter the filename:') + elif arg.startswith('-save:'): + filename = arg[6:] + elif arg == '-upload': + uploadpage = pywikibot.input('Please enter the pagename:') + elif arg.startswith('-upload:'): + uploadpage = arg[8:] + elif arg == '-first': + first = pywikibot.input( + 'Please enter the max. number of templates to display:') + elif arg.startswith('-first:'): + first = arg[7:] + elif arg == '-atleast': + atleast = pywikibot.input( + 'Please enter the min. number of functions to display:') + elif arg.startswith('-atleast:'): + atleast = arg[9:] + elif arg == '-nosort': + nosort = True + + + # File operations: + if filename: + try: + # This opens in strict error mode, that means bot will stop + # on encoding errors with ValueError. + # See http://docs.python.org/library/codecs.html#codecs.open + titlefile = codecs.open(filename, encoding='utf-8', mode='a') + except IOError: + pywikibot.output("%s cannot be opened for writing." % filename) + return + # Limitations for result: + if first: + try: + first = int(first) + if first < 1: + first = None + except ValueError: + first = None + if atleast: + try: + atleast = int(atleast) + if atleast < 2: # 1 has no effect, don't waste resources. + atleast = None + except ValueError: + atleast = None + + # Ready to initialize + site = pywikibot.getSite() + lang = site.lang + try: + words.extend(addwords[lang]) # Adding translated function names + except KeyError: + pass + try: + comment = editcomment[lang] + except KeyError: + comment = editcomment['en'] + try: + docregex = documentsubpage[lang] # Finding document subpage names + except KeyError: + docregex = ur'(?i).*/doc' + regex = re.compile(ur'(?i)#('+ur'|'.join(words)+'):') + gen1 = APG(start=start, namespace=10, includeredirects=False, site=site) + gen = RPG(gen1, docregex, inverse=True) + + # Processing: + pywikibot.output(u'Hold on, this will need some time. ' + u'You will be notified by 50 templates.') + for page in gen: + count += 1 + title = page.title() + if not count % 50: + # Don't let the poor user panic in front of a black screen. + pywikibot.output('%dth template is beeing processed: %s' % + (count, title)) + text = page.get() + functions = regex.findall(text) + if functions: + results.append((title,len(functions))) + if nosort and first and len(results) == first: + break + + # Combing the results: + if not nosort: + results.sort(key=lambda x:str(5000-x[1])+'.'+x[0]) + if first: + results = results[:first] + if atleast: + results = filter(lambda x: x[1] >= atleast, results) + + # Outputs: + resultlist = '\n'.join( + ['#[[%s]] (%d)' % (result[0], result[1]) for result in results]) + pywikibot.output(resultlist) + pywikibot.output(u'%d templates were examined.' % count) + pywikibot.output(u'%d templates were found.' % len(results)) + if titlefile: + titlefile.write(resultlist) + titlefile.close() + if uploadpage: + page = pywikibot.Page(site, uploadpage) + page.put(resultlist, comment) + +if __name__ == "__main__": + try: + main() + finally: + pywikibot.stopme()
Property changes on: trunk/pywikipedia/parserfunctioncount.py ___________________________________________________________________ Added: svn:eol-style + native Added: svn:keywords + Author Date Id Revision
pywikipedia-svn@lists.wikimedia.org