jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/181724 )
Change subject: Porting parser_function_count.py from compat to core/scripts ......................................................................
Porting parser_function_count.py from compat to core/scripts
- Added parsefunctioncount.py in core/scripts/ as part of Pywikibot:Compat to Core migration. - use SingleSiteBot, ExistingPageBot, NoRedirectPageBot classes - use collections.Counter to hold the result - Use magicwords for variable declaration via siteinfo. - Use doc_subpages infrastructure of core
Bug: T66878 Change-Id: I28aafa2aa2928b9585fc825523dfc17fb808e4f9 --- M docs/scripts/scripts.rst M scripts/README.rst A scripts/parser_function_count.py 3 files changed, 225 insertions(+), 1 deletion(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/docs/scripts/scripts.rst b/docs/scripts/scripts.rst index 088b454..ba353c7 100644 --- a/docs/scripts/scripts.rst +++ b/docs/scripts/scripts.rst @@ -271,6 +271,11 @@
.. automodule:: scripts.pagefromfile
+scripts.parser_function_count script +-------------------------------------- + +.. automodule:: scripts.parser_function_count + scripts.patrol script ---------------------
diff --git a/scripts/README.rst b/scripts/README.rst index b51e4fb..402ef5b 100644 --- a/scripts/README.rst +++ b/scripts/README.rst @@ -167,7 +167,10 @@ | | number of pages to be put on the wiki. | +------------------------+---------------------------------------------------------+ | #pageimport.py | Import pages from a certain wiki to another. | - +------------------------+---------------------------------------------------------+ + +------------------------+--+------------------------------------------------------+ + | parser_function_count.py | Find expensive templates that are subject to be | + | | converted to Lua. | + +------------------------+--+------------------------------------------------------+ | patrol.py | Obtains a list pages and marks the edits as patrolled | | | based on a whitelist. | +------------------------+---------------------------------------------------------+ diff --git a/scripts/parser_function_count.py b/scripts/parser_function_count.py new file mode 100644 index 0000000..7293e5d --- /dev/null +++ b/scripts/parser_function_count.py @@ -0,0 +1,216 @@ +# -*- coding: utf-8 -*- +""" +Used to find expensive templates that are subject to be converted to Lua. + +It counts parser functions and then orders templates by number of these +and uploads the first n titles or alternatively templates having count()>n. + +Parameters: + +-start Will start from the given title (it does not have to exist). + Parameter may be given as "-start" or "-start:title". + Defaults to '!'. + +-first Returns the first n results in decreasing order of number + of hits (or without ordering if used with -nosort) + Parameter may be given as "-first" or "-first:n". + +-atleast Returns templates with at least n hits. + Parameter may be given as "-atleast" or "-atleast:n". + +-nosort Keeps the original order of templates. Default behaviour is + to sort them by decreasing order of count(parserfunctions). + +-save Saves the results. The file is in the form you may upload it + to a wikipage. May be given as "-save:<filename>". + If it exists, titles will be appended. + +-upload Specify a page in your wiki where results will be uploaded. + Parameter may be given as "-upload" or "-upload:title". + Say good-bye to previous content if existed. + +Precedence of evaluation: results are first sorted in decreasing order of +templates, unless nosort is switched on. Then first n templates are taken if +first is specified, and at last atleast is evaluated. If nosort and first are +used together, the program will stop at the nth hit without scanning the rest +of the template namespace. This may be used to run it in more sessions +(continue with -start next time). + +First is strict. That means if results #90-120 have the same number of parser +functions and you specify -first:100, only the first 100 will be listed (even +if atleast is used as well). + +Should you specify neither first nor atleast, all templates using parser +functions will be listed. +""" +# +# (C) Pywikibot team, 2013-2020 +# +# Distributed under the terms of the MIT license. +# +# Todo: +# * Using xml and xmlstart +# * Using categories +# * Error handling for uploading (anyway, that's the last action, it's only +# for the beauty of the program, does not effect anything). + +import codecs +import re + +from collections import Counter + +import pywikibot + +from pywikibot.bot import ExistingPageBot, NoRedirectPageBot, SingleSiteBot +from pywikibot import pagegenerators + + +class ParserFunctionCountBot(SingleSiteBot, + ExistingPageBot, NoRedirectPageBot): + + """Bot class used for obtaining Parser function Count.""" + + def __init__(self, **kwargs): + """Initializer.""" + self.available_options.update({ + 'atleast': None, + 'first': None, + 'nosort': False, + 'save': None, + 'start': '!', + 'upload': None, + }) + super().__init__(**kwargs) + + editcomment = { + # This will be used for uploading the list to your wiki. + 'en': + 'Bot: uploading list of templates having too many parser ' + 'functions', + 'hu': + 'A túl sok parserfüggvényt használó sablonok listájának ' + 'feltöltése', + } + + # Limitations for result: + if self.opt.first: + try: + self.opt.first = int(self.opt.first) + if self.opt.first < 1: + self.opt.first = None + except ValueError: + self.opt.first = None + + if self.opt.atleast: + try: + self.opt.atleast = int(self.opt.atleast) + # 1 has no effect, don't waste resources. + if self.opt.atleast < 2: + self.opt.atleast = None + except ValueError: + self.opt.atleast = None + + lang = self.site.lang + self.summary = editcomment.get(lang, editcomment['en']) + + @property + def generator(self): + """Generator.""" + gen = self.site.allpages(start=self.option.start, + namespace=10, filterredir=False) + if self.site.doc_subpage: + gen = pagegenerators.RegexFilterPageGenerator( + gen, self.site.doc_subpage, quantifier='none') + return gen + + def setup(self): + """Setup magic words, regex and result counter.""" + pywikibot.output('Hold on, this will need some time. ' + 'You will be notified by 50 templates.') + magicwords = [] + for magic_word in self.site.siteinfo['magicwords']: + magicwords += magic_word['aliases'] + self.regex = re.compile(r'#({}):'.format('|'.join(magicwords)), re.I) + self.results = Counter() + + def treat(self, page): + """Process a single template.""" + title = page.title() + if (self._treat_counter + 1) % 50 == 0: + # Don't let the poor user panic in front of a black screen. + pywikibot.output('{}th template is being processed: {}' + .format(self._treat_counter + 1, title)) + + text = page.text + functions = self.regex.findall(text) + if functions and (self.opt.atleast is None + or self.opt.atleast <= len(functions)): + self.results[title] = len(functions) + + if self.opt.nosort and self.opt.first \ + and len(self.results) >= self.opt.first: + self.stop() + + def teardown(self): + """Final processing.""" + resultlist = '\n'.join( + '# [[{result[0]}]] ({result[1]})' + .format(result=result) + for result in self.results.most_common(self.opt.first)) + pywikibot.output(resultlist) + pywikibot.output('{} templates were found.'.format(len(self.results))) + + # File operations: + if self.opt.save: + # This opens in strict error mode, that means bot will stop + # on encoding errors with ValueError. + # See http://docs.python.org/library/codecs.html#codecs.open + try: + with codecs.open( + self.opt.save, encoding='utf-8', mode='a') as f: + f.write(resultlist) + except OSError: + pywikibot.exception() + + if self.opt.upload: + page = pywikibot.Page(self.site, self.opt.upload) + self.userPut(page, page.text, resultlist, + ignore_save_related_errors=True, + summary=self.summary) + + +def main(*args): + """Process command line arguments and invoke ParserFunctionCountBot.""" + local_args = pywikibot.handle_args(*args) + options = {} + + # Parse command line arguments + for arg in local_args: + opt, _, value = arg.partition(':') + if not opt.startswith('-'): + continue + opt = opt[1:] + if opt == 'start': + options[opt] = value or pywikibot.input( + 'From which title do you want to continue?') + elif opt == 'save': + options[opt] = value or pywikibot.input( + 'Please enter the filename:') + elif opt == 'upload': + options[opt] = value or pywikibot.input( + 'Please enter the pagename:') + elif opt == 'first': + options[opt] = value or pywikibot.input( + 'Please enter the max. number of templates to display:') + elif opt == 'atleast': + options[opt] = value or pywikibot.input( + 'Please enter the min. number of functions to display:') + elif opt == 'nosort': + options[opt] = True + + bot = ParserFunctionCountBot(**options) + bot.run() + + +if __name__ == '__main__': + main()
pywikibot-commits@lists.wikimedia.org