[Pywikipedia-svn] SVN: [11099] trunk/pywikipedia/parserfunctioncount.py

21 Feb 2013

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11099
Revision: 11099
Author:   binbot
Date:     2013-02-21 01:30:59 +0000 (Thu, 21 Feb 2013)
Log Message:
-----------
New script to find templates that are worth to be converted to Lua. It lists templates using many parser functions.
Added Paths:
-----------
    trunk/pywikipedia/parserfunctioncount.py
Added: trunk/pywikipedia/parserfunctioncount.py
===================================================================

--- trunk/pywikipedia/parserfunctioncount.py	                        (rev 0)
+++ trunk/pywikipedia/parserfunctioncount.py	2013-02-21 01:30:59 UTC (rev 11099)
@@ -0,0 +1,205 @@
+# -*- coding: utf-8 -*-
+"""
+This script helps to find expensive templates that are subject to be converted
+to Lua. It counts parser functions and then orders templates by number of these
+and uploads the first n titles or alternatively templates having count()>n.
+
+Parameters:
+-start            Will start from the given title (it does not have to exist).
+                  Parameter may be given as "-start" or "-start:title".
+                  Defaults to '!'.
+-first            Returns the first n results in decreasing order of number
+                  of hits (or without ordering if used with -nosort)
+                  Parameter may be given as "-first" or "-first:n".
+-atleast          Returns templates with at least n hits.
+                  Parameter may be given as "-atleast" or "-atleast:n".
+-nosort           Keeps the original order of templates. Default behaviour is
+                  to sort them by decreasing order of count(parserfunctions).
+-save             Saves the results. The file is in the form you may upload it
+                  to a wikipage. May be given as "-save:<filename>".
+                  If it exists, titles will be appended.
+-upload           Specify a page in your wiki where results will be uploaded.
+                  Parameter may be given as "-upload" or "-upload:title".
+                  Say good-bye to previous content if existed.
+Precedence of evaluation: results are first sorted in decreasing order of
+templates, unless nosort is switched on. Then first n templates are taken if
+first is specified, and at last atleast is evaluated. If nosort and first are
+used together, the program will stop at the nth hit without scanning the rest
+of the template namespace. This may be used to run it in more sessions
+(continue with -start next time).
+First is strict. That means if results #90-120 have the same number of parser
+functions and you specify -first:100, only the first 100 will be listed (even
+if atleast is used as well).
+Should you specify neither first nor atleast, all templates using parser
+functions will be listed.
+"""
+
+#
+# (C) Bináris, 2013
+#
+# Distributed under the terms of the MIT license.
+#
+__version__='$Id$'
+'''
+Todo:
+* Using xml and xmlstart
+* Using categories
+* Error handling for uploading (anyway, that's the last action, it's only
+  for the beauty of the program, does not effect anything).
+'''
+
+import codecs, re
+import wikipedia as pywikibot
+from pagegenerators import \
+                    AllpagesPageGenerator as APG, \
+                    RegexFilterPageGenerator as RPG
+
+def main(*args):
+    words = ['expr', 'if', 'ifeq', 'ifexpr', 'iferror', 'switch', 'ifexist',
+             'time', 'timel', 'rel2abs', 'titleparts', 'len', 'pos', 'rpos',
+             'sub', 'count', 'replace', 'explode', 'urldecode']
+            # default is left out because it may occur within switch only.
+    addwords = {
+        # Write translated parser function names here.
+        'hu': [u'kif', u'ha', u'haegyenlő', u'hakif', u'hahibás', u'halétezik',
+               u'idő', u'hossz', u'pozíció', u'jpozíció'],
+    }
+    documentsubpage = {
+        # You may write here a regex representing the name of template doc
+        # subpages in your wiki. Defaults to /doc.
+        # These subpages will be excluded for faster run.
+        'de': ur'(?i).*/Doku',
+        'fr': ur'(?i).*/Documentation',
+    }
+    editcomment = {
+        # This will be used for uploading the list to your wiki.
+        'en': u'Bot: uploading the list of templates having too many parser functions',
+        'hu': u'A túl sok parserfüggvényt használó sablonok listájának feltöltése',
+    }
+    start = '!'
+    results = []
+    first = None
+    atleast = None
+    nosort = False
+    filename = None # The name of the file to save titles
+    titlefile = None
+    uploadpage = None
+    count = 0
+
+    # Handling parameters:
+    for arg in pywikibot.handleArgs(*args):
+        if arg == '-start':
+            start = pywikibot.input(
+                    u'From which title do you want to continue?')
+        elif arg.startswith('-start:'):
+            start = arg[7:]
+        elif arg == '-save':
+            filename = pywikibot.input('Please enter the filename:')
+        elif arg.startswith('-save:'):
+            filename = arg[6:]
+        elif arg == '-upload':
+            uploadpage = pywikibot.input('Please enter the pagename:')
+        elif arg.startswith('-upload:'):
+            uploadpage = arg[8:]
+        elif arg == '-first':
+            first = pywikibot.input(
+                'Please enter the max. number of templates to display:')
+        elif arg.startswith('-first:'):
+            first = arg[7:]
+        elif arg == '-atleast':
+            atleast = pywikibot.input(
+                'Please enter the min. number of functions to display:')
+        elif arg.startswith('-atleast:'):
+            atleast = arg[9:]
+        elif arg == '-nosort':
+            nosort = True
+
+
+    # File operations:
+    if filename:
+        try:
+            # This opens in strict error mode, that means bot will stop
+            # on encoding errors with ValueError.
+            # See http://docs.python.org/library/codecs.html#codecs.open
+            titlefile = codecs.open(filename, encoding='utf-8', mode='a')
+        except IOError:
+            pywikibot.output("%s cannot be opened for writing." % filename)
+            return
+    # Limitations for result:
+    if first:
+        try:
+            first = int(first)
+            if first < 1:
+                first = None
+        except ValueError:
+            first = None
+    if atleast:
+        try:
+            atleast = int(atleast)
+            if atleast < 2: # 1 has no effect, don't waste resources.
+                atleast = None
+        except ValueError:
+            atleast = None
+
+    # Ready to initialize
+    site = pywikibot.getSite()
+    lang = site.lang
+    try:
+        words.extend(addwords[lang]) # Adding translated function names
+    except KeyError:
+        pass
+    try:
+        comment = editcomment[lang]
+    except KeyError:
+        comment = editcomment['en']
+    try:
+        docregex = documentsubpage[lang] # Finding document subpage names
+    except KeyError:
+        docregex = ur'(?i).*/doc'
+    regex = re.compile(ur'(?i)#('+ur'|'.join(words)+'):')
+    gen1 = APG(start=start, namespace=10, includeredirects=False, site=site)
+    gen = RPG(gen1, docregex, inverse=True)
+
+    # Processing:
+    pywikibot.output(u'Hold on, this will need some time. '
+                     u'You will be notified by 50 templates.')
+    for page in gen:
+        count += 1
+        title = page.title()
+        if not count % 50:
+            # Don't let the poor user panic in front of a black screen.
+            pywikibot.output('%dth template is beeing processed: %s' %
+                (count, title))
+        text = page.get()
+        functions = regex.findall(text)
+        if functions:
+            results.append((title,len(functions)))
+        if nosort and first and len(results) == first:
+            break
+
+    # Combing the results:
+    if not nosort:
+        results.sort(key=lambda x:str(5000-x[1])+'.'+x[0])
+    if first:
+        results = results[:first]
+    if atleast:
+        results = filter(lambda x: x[1] >= atleast, results)
+
+    # Outputs:
+    resultlist = '\n'.join(
+        ['#[[%s]] (%d)' % (result[0], result[1]) for result in results])
+    pywikibot.output(resultlist)
+    pywikibot.output(u'%d templates were examined.' % count)
+    pywikibot.output(u'%d templates were found.' % len(results))
+    if titlefile:
+        titlefile.write(resultlist)
+        titlefile.close()
+    if uploadpage:
+        page = pywikibot.Page(site, uploadpage)
+        page.put(resultlist, comment)
+
+if __name__ == "__main__":
+    try:
+        main()
+    finally:
+        pywikibot.stopme()
Property changes on: trunk/pywikipedia/parserfunctioncount.py
___________________________________________________________________
Added: svn:eol-style
   + native
Added: svn:keywords
   + Author Date Id Revision

    

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

[Pywikipedia-svn] SVN: [11099] trunk/pywikipedia/parserfunctioncount.py