jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/183266 )
Change subject: [scripts] script to wrap Commons file descriptions in language templates ......................................................................
[scripts] script to wrap Commons file descriptions in language templates
inspired by: https://commons.wikimedia.org/wiki/Commons:Bots/Requests/Dexbot_5
Signed-off-by: xqt info@gno.de Change-Id: Ib8fde813a3da2e1cf7fc35c0a31a64c58ac210a6 --- M docs/scripts/scripts.rst M scripts/README.rst A scripts/commons_information.py 3 files changed, 190 insertions(+), 0 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/docs/scripts/scripts.rst b/docs/scripts/scripts.rst index f202aa4..b72acdf 100644 --- a/docs/scripts/scripts.rst +++ b/docs/scripts/scripts.rst @@ -61,6 +61,11 @@
.. automodule:: scripts.clean_sandbox
+scripts.commons_information script +----------------------------------- + +.. automodule:: scripts.commons_information + scripts.coordinate_import script ---------------------------------
diff --git a/scripts/README.rst b/scripts/README.rst index 5835fbf..ca5e9a7 100644 --- a/scripts/README.rst +++ b/scripts/README.rst @@ -40,6 +40,8 @@ +------------------------+---------------------------------------------------------+ | clean_sandbox.py | This bot makes the cleaned of the page of tests. | +------------------------+---------------------------------------------------------+ + | commons_information.py | Insert a language template into the description field | + +------------------------+---------------------------------------------------------+ | coordinate_import.py | Coordinate importing script. | +------------------------+---------------------------------------------------------+ | cosmetic_changes.py | Can do slight modifications to a wiki page source code | diff --git a/scripts/commons_information.py b/scripts/commons_information.py new file mode 100644 index 0000000..945fa1e --- /dev/null +++ b/scripts/commons_information.py @@ -0,0 +1,183 @@ +#!/usr/bin/python +"""Insert a language template into the description field.""" +# +# (C) Pywikibot team, 2015-2021 +# +# Distributed under the terms of the MIT license. +# +import copy + +import pywikibot + +from pywikibot import i18n, pagegenerators +from pywikibot.backports import Tuple +from pywikibot.bot import SingleSiteBot, ExistingPageBot +from pywikibot.tools.formatter import color_format + +try: + import mwparserfromhell +except ImportError as e: + mwparserfromhell = e + +try: + import langdetect +except ImportError: + langdetect = None + + +class InformationBot(SingleSiteBot, ExistingPageBot): + + """Bot for the Information template.""" + + lang_tmp_cat = 'Language templates' + desc_params = ('Description', 'description') + + comment = { + 'en': ('Bot: wrap the description parameter of Information in the ' + 'appropriate language template') + } + + def __init__(self, **kwargs): + """Initialzer.""" + super(InformationBot, self).__init__(**kwargs) + lang_tmp_cat = pywikibot.Category(self.site, self.lang_tmp_cat) + self.lang_tmps = lang_tmp_cat.articles(namespaces=[10]) + + def get_description(self, template): + """Get description parameter.""" + params = [param for param in template.params + if param.name.strip() in self.desc_params] + if len(params) > 1: + pywikibot.warning('multiple description parameters found') + elif len(params) == 1 and params[0].value.strip() != '': + return params[0] + return None + + @staticmethod + def detect_langs(text): + """Detect language from griven text.""" + if langdetect is not None: + return langdetect.detect_langs(text) + return None + + def process_desc_template(self, template): + """Process description template.""" + tmp_page = pywikibot.Page(self.site, template.name.strip(), ns=10) + if tmp_page in self.lang_tmps and len(template.params) == 1 \ + and template.has('1'): + lang_tmp_val = template.get('1').value.strip() + langs = self.detect_langs(lang_tmp_val) + if langs and langs[0].prob > 0.9: + tmp_page2 = pywikibot.Page(self.site, langs[0].lang, ns=10) + if tmp_page2 != tmp_page: + pywikibot.output( + '\03{{lightblue}}The language template {before!r} ' + 'was found, but langdetect thinks {after!r} is the ' + 'most appropriate with a probability of {prob}:' + '\03{{default}}\n{text}' + .format(before=tmp_page.title(withNamespace=False), + after=tmp_page2.title(withNamespace=False), + prob=langs[0].prob, + text=lang_tmp_val)) + choice = pywikibot.input_choice( + 'What to do?', + [('Replace it', 'r'), ('Do not replace it', 'n'), + ('Choose another', 'c')]) + if choice == 'r': + template.name = langs[0].lang + return True + + if choice == 'c': + newlang = pywikibot.input( + 'Enter the language of the displayed text:') + if newlang and newlang != template.name: + template.name = newlang + return True + return False + + @staticmethod + def replace_value(param, value): + """Replace param with given value.""" + lstrip = param.value.lstrip() + lspaces = param.value[:len(param.value) - len(lstrip)] + rspaces = lstrip[len(lstrip.rstrip()):] + param.value = '{}{}{}'.format(lspaces, value, rspaces) + + def setup(self): + """Raise exception if needed modules are missing.""" + if isinstance(mwparserfromhell, Exception): + raise mwparserfromhell + + def treat_page(self): + """Treat current page.""" + page = self.current_page + code = mwparserfromhell.parse(page.text) + edited = False # to prevent unwanted changes + for template in code.ifilter_templates(): + if not page.site.sametitle(template.name.strip(), 'Information'): + continue + desc = self.get_description(template) + if desc is None: + continue + for tmp in desc.value.filter_templates(recursive=False): + if self.process_desc_template(tmp): + edited = True + desc_clean = copy.deepcopy(desc.value) + for tmp in desc_clean.filter_templates(recursive=False): + # TODO: emit a debug item? + desc_clean.remove(tmp) + value = desc_clean.strip() + if value == '': + pywikibot.output('Empty description') + continue + pywikibot.output(value) + langs = self.detect_langs(value) + if langs: + pywikibot.output(color_format( + '{lightblue}Hints from langdetect:{default}')) + for language in langs: + pywikibot.output(color_format( + '{{lightblue}}{obj.lang}: {obj.prob}{{default}}', + obj=language)) + lang = pywikibot.input( + 'Enter the language of the displayed text:').strip() + if lang != '': + tmp_page = pywikibot.Page(page.site, lang, ns=10) + if tmp_page not in self.lang_tmps: + pywikibot.warning( + '"{lang}" is not a valid language template on {site}' + .format(lang=lang, site=page.site)) + new = mwparserfromhell.nodes.template.Template(lang, [value]) + self.replace_value(desc, new) + edited = True + if edited: + text = str(code) + summary = i18n.translate(page.site.lang, self.comment, + fallback=True) + self.put_current(text, summary=summary) + + +def main(*args: Tuple[str, ...]) -> None: + """ + Process command line arguments and invoke bot. + + If args is an empty list, sys.argv is used. + + @param args: command line arguments + """ + local_args = pywikibot.handle_args(args) + gen_factory = pagegenerators.GeneratorFactory() + + for arg in local_args: + gen_factory.handle_arg(arg) + + gen = gen_factory.getCombinedGenerator() + if gen: + bot = InformationBot(generator=gen) + bot.run() + else: + pywikibot.bot.suggest_help(missing_generator=True) + + +if __name__ == '__main__': + main()