jenkins-bot submitted this change.
Porting parser_function_count.py from compat to core/scripts
- Added parsefunctioncount.py in core/scripts/ as part of
Pywikibot:Compat to Core migration.
- use SingleSiteBot, ExistingPageBot, NoRedirectPageBot classes
- use collections.Counter to hold the result
- Use magicwords for variable declaration via siteinfo.
- Use doc_subpages infrastructure of core
Bug: T66878
Change-Id: I28aafa2aa2928b9585fc825523dfc17fb808e4f9
---
M docs/scripts/scripts.rst
M scripts/README.rst
A scripts/parser_function_count.py
3 files changed, 225 insertions(+), 1 deletion(-)
diff --git a/docs/scripts/scripts.rst b/docs/scripts/scripts.rst
index 088b454..ba353c7 100644
--- a/docs/scripts/scripts.rst
+++ b/docs/scripts/scripts.rst
@@ -271,6 +271,11 @@
.. automodule:: scripts.pagefromfile
+scripts.parser\_function\_count script
+--------------------------------------
+
+.. automodule:: scripts.parser_function_count
+
scripts.patrol script
---------------------
diff --git a/scripts/README.rst b/scripts/README.rst
index b51e4fb..402ef5b 100644
--- a/scripts/README.rst
+++ b/scripts/README.rst
@@ -167,7 +167,10 @@
| | number of pages to be put on the wiki. |
+------------------------+---------------------------------------------------------+
| #pageimport.py | Import pages from a certain wiki to another. |
- +------------------------+---------------------------------------------------------+
+ +------------------------+--+------------------------------------------------------+
+ | parser_function_count.py | Find expensive templates that are subject to be |
+ | | converted to Lua. |
+ +------------------------+--+------------------------------------------------------+
| patrol.py | Obtains a list pages and marks the edits as patrolled |
| | based on a whitelist. |
+------------------------+---------------------------------------------------------+
diff --git a/scripts/parser_function_count.py b/scripts/parser_function_count.py
new file mode 100644
index 0000000..7293e5d
--- /dev/null
+++ b/scripts/parser_function_count.py
@@ -0,0 +1,216 @@
+# -*- coding: utf-8 -*-
+"""
+Used to find expensive templates that are subject to be converted to Lua.
+
+It counts parser functions and then orders templates by number of these
+and uploads the first n titles or alternatively templates having count()>n.
+
+Parameters:
+
+-start Will start from the given title (it does not have to exist).
+ Parameter may be given as "-start" or "-start:title".
+ Defaults to '!'.
+
+-first Returns the first n results in decreasing order of number
+ of hits (or without ordering if used with -nosort)
+ Parameter may be given as "-first" or "-first:n".
+
+-atleast Returns templates with at least n hits.
+ Parameter may be given as "-atleast" or "-atleast:n".
+
+-nosort Keeps the original order of templates. Default behaviour is
+ to sort them by decreasing order of count(parserfunctions).
+
+-save Saves the results. The file is in the form you may upload it
+ to a wikipage. May be given as "-save:<filename>".
+ If it exists, titles will be appended.
+
+-upload Specify a page in your wiki where results will be uploaded.
+ Parameter may be given as "-upload" or "-upload:title".
+ Say good-bye to previous content if existed.
+
+Precedence of evaluation: results are first sorted in decreasing order of
+templates, unless nosort is switched on. Then first n templates are taken if
+first is specified, and at last atleast is evaluated. If nosort and first are
+used together, the program will stop at the nth hit without scanning the rest
+of the template namespace. This may be used to run it in more sessions
+(continue with -start next time).
+
+First is strict. That means if results #90-120 have the same number of parser
+functions and you specify -first:100, only the first 100 will be listed (even
+if atleast is used as well).
+
+Should you specify neither first nor atleast, all templates using parser
+functions will be listed.
+"""
+#
+# (C) Pywikibot team, 2013-2020
+#
+# Distributed under the terms of the MIT license.
+#
+# Todo:
+# * Using xml and xmlstart
+# * Using categories
+# * Error handling for uploading (anyway, that's the last action, it's only
+# for the beauty of the program, does not effect anything).
+
+import codecs
+import re
+
+from collections import Counter
+
+import pywikibot
+
+from pywikibot.bot import ExistingPageBot, NoRedirectPageBot, SingleSiteBot
+from pywikibot import pagegenerators
+
+
+class ParserFunctionCountBot(SingleSiteBot,
+ ExistingPageBot, NoRedirectPageBot):
+
+ """Bot class used for obtaining Parser function Count."""
+
+ def __init__(self, **kwargs):
+ """Initializer."""
+ self.available_options.update({
+ 'atleast': None,
+ 'first': None,
+ 'nosort': False,
+ 'save': None,
+ 'start': '!',
+ 'upload': None,
+ })
+ super().__init__(**kwargs)
+
+ editcomment = {
+ # This will be used for uploading the list to your wiki.
+ 'en':
+ 'Bot: uploading list of templates having too many parser '
+ 'functions',
+ 'hu':
+ 'A túl sok parserfüggvényt használó sablonok listájának '
+ 'feltöltése',
+ }
+
+ # Limitations for result:
+ if self.opt.first:
+ try:
+ self.opt.first = int(self.opt.first)
+ if self.opt.first < 1:
+ self.opt.first = None
+ except ValueError:
+ self.opt.first = None
+
+ if self.opt.atleast:
+ try:
+ self.opt.atleast = int(self.opt.atleast)
+ # 1 has no effect, don't waste resources.
+ if self.opt.atleast < 2:
+ self.opt.atleast = None
+ except ValueError:
+ self.opt.atleast = None
+
+ lang = self.site.lang
+ self.summary = editcomment.get(lang, editcomment['en'])
+
+ @property
+ def generator(self):
+ """Generator."""
+ gen = self.site.allpages(start=self.option.start,
+ namespace=10, filterredir=False)
+ if self.site.doc_subpage:
+ gen = pagegenerators.RegexFilterPageGenerator(
+ gen, self.site.doc_subpage, quantifier='none')
+ return gen
+
+ def setup(self):
+ """Setup magic words, regex and result counter."""
+ pywikibot.output('Hold on, this will need some time. '
+ 'You will be notified by 50 templates.')
+ magicwords = []
+ for magic_word in self.site.siteinfo['magicwords']:
+ magicwords += magic_word['aliases']
+ self.regex = re.compile(r'#({}):'.format('|'.join(magicwords)), re.I)
+ self.results = Counter()
+
+ def treat(self, page):
+ """Process a single template."""
+ title = page.title()
+ if (self._treat_counter + 1) % 50 == 0:
+ # Don't let the poor user panic in front of a black screen.
+ pywikibot.output('{}th template is being processed: {}'
+ .format(self._treat_counter + 1, title))
+
+ text = page.text
+ functions = self.regex.findall(text)
+ if functions and (self.opt.atleast is None
+ or self.opt.atleast <= len(functions)):
+ self.results[title] = len(functions)
+
+ if self.opt.nosort and self.opt.first \
+ and len(self.results) >= self.opt.first:
+ self.stop()
+
+ def teardown(self):
+ """Final processing."""
+ resultlist = '\n'.join(
+ '# [[{result[0]}]] ({result[1]})'
+ .format(result=result)
+ for result in self.results.most_common(self.opt.first))
+ pywikibot.output(resultlist)
+ pywikibot.output('{} templates were found.'.format(len(self.results)))
+
+ # File operations:
+ if self.opt.save:
+ # This opens in strict error mode, that means bot will stop
+ # on encoding errors with ValueError.
+ # See http://docs.python.org/library/codecs.html#codecs.open
+ try:
+ with codecs.open(
+ self.opt.save, encoding='utf-8', mode='a') as f:
+ f.write(resultlist)
+ except OSError:
+ pywikibot.exception()
+
+ if self.opt.upload:
+ page = pywikibot.Page(self.site, self.opt.upload)
+ self.userPut(page, page.text, resultlist,
+ ignore_save_related_errors=True,
+ summary=self.summary)
+
+
+def main(*args):
+ """Process command line arguments and invoke ParserFunctionCountBot."""
+ local_args = pywikibot.handle_args(*args)
+ options = {}
+
+ # Parse command line arguments
+ for arg in local_args:
+ opt, _, value = arg.partition(':')
+ if not opt.startswith('-'):
+ continue
+ opt = opt[1:]
+ if opt == 'start':
+ options[opt] = value or pywikibot.input(
+ 'From which title do you want to continue?')
+ elif opt == 'save':
+ options[opt] = value or pywikibot.input(
+ 'Please enter the filename:')
+ elif opt == 'upload':
+ options[opt] = value or pywikibot.input(
+ 'Please enter the pagename:')
+ elif opt == 'first':
+ options[opt] = value or pywikibot.input(
+ 'Please enter the max. number of templates to display:')
+ elif opt == 'atleast':
+ options[opt] = value or pywikibot.input(
+ 'Please enter the min. number of functions to display:')
+ elif opt == 'nosort':
+ options[opt] = True
+
+ bot = ParserFunctionCountBot(**options)
+ bot.run()
+
+
+if __name__ == '__main__':
+ main()
To view, visit change 181724. To unsubscribe, or for help writing mail filters, visit settings.