jenkins-bot submitted this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
[scripts] script to wrap Commons file descriptions in language templates

inspired by:
https://commons.wikimedia.org/wiki/Commons:Bots/Requests/Dexbot_5

Signed-off-by: xqt <info@gno.de>
Change-Id: Ib8fde813a3da2e1cf7fc35c0a31a64c58ac210a6
---
M docs/scripts/scripts.rst
M scripts/README.rst
A scripts/commons_information.py
3 files changed, 190 insertions(+), 0 deletions(-)

diff --git a/docs/scripts/scripts.rst b/docs/scripts/scripts.rst
index f202aa4..b72acdf 100644
--- a/docs/scripts/scripts.rst
+++ b/docs/scripts/scripts.rst
@@ -61,6 +61,11 @@

.. automodule:: scripts.clean_sandbox

+scripts.commons\_information script
+-----------------------------------
+
+.. automodule:: scripts.commons_information
+
scripts.coordinate\_import script
---------------------------------

diff --git a/scripts/README.rst b/scripts/README.rst
index 5835fbf..ca5e9a7 100644
--- a/scripts/README.rst
+++ b/scripts/README.rst
@@ -40,6 +40,8 @@
+------------------------+---------------------------------------------------------+
| clean_sandbox.py | This bot makes the cleaned of the page of tests. |
+------------------------+---------------------------------------------------------+
+ | commons_information.py | Insert a language template into the description field |
+ +------------------------+---------------------------------------------------------+
| coordinate_import.py | Coordinate importing script. |
+------------------------+---------------------------------------------------------+
| cosmetic_changes.py | Can do slight modifications to a wiki page source code |
diff --git a/scripts/commons_information.py b/scripts/commons_information.py
new file mode 100644
index 0000000..945fa1e
--- /dev/null
+++ b/scripts/commons_information.py
@@ -0,0 +1,183 @@
+#!/usr/bin/python
+"""Insert a language template into the description field."""
+#
+# (C) Pywikibot team, 2015-2021
+#
+# Distributed under the terms of the MIT license.
+#
+import copy
+
+import pywikibot
+
+from pywikibot import i18n, pagegenerators
+from pywikibot.backports import Tuple
+from pywikibot.bot import SingleSiteBot, ExistingPageBot
+from pywikibot.tools.formatter import color_format
+
+try:
+ import mwparserfromhell
+except ImportError as e:
+ mwparserfromhell = e
+
+try:
+ import langdetect
+except ImportError:
+ langdetect = None
+
+
+class InformationBot(SingleSiteBot, ExistingPageBot):
+
+ """Bot for the Information template."""
+
+ lang_tmp_cat = 'Language templates'
+ desc_params = ('Description', 'description')
+
+ comment = {
+ 'en': ('Bot: wrap the description parameter of Information in the '
+ 'appropriate language template')
+ }
+
+ def __init__(self, **kwargs):
+ """Initialzer."""
+ super(InformationBot, self).__init__(**kwargs)
+ lang_tmp_cat = pywikibot.Category(self.site, self.lang_tmp_cat)
+ self.lang_tmps = lang_tmp_cat.articles(namespaces=[10])
+
+ def get_description(self, template):
+ """Get description parameter."""
+ params = [param for param in template.params
+ if param.name.strip() in self.desc_params]
+ if len(params) > 1:
+ pywikibot.warning('multiple description parameters found')
+ elif len(params) == 1 and params[0].value.strip() != '':
+ return params[0]
+ return None
+
+ @staticmethod
+ def detect_langs(text):
+ """Detect language from griven text."""
+ if langdetect is not None:
+ return langdetect.detect_langs(text)
+ return None
+
+ def process_desc_template(self, template):
+ """Process description template."""
+ tmp_page = pywikibot.Page(self.site, template.name.strip(), ns=10)
+ if tmp_page in self.lang_tmps and len(template.params) == 1 \
+ and template.has('1'):
+ lang_tmp_val = template.get('1').value.strip()
+ langs = self.detect_langs(lang_tmp_val)
+ if langs and langs[0].prob > 0.9:
+ tmp_page2 = pywikibot.Page(self.site, langs[0].lang, ns=10)
+ if tmp_page2 != tmp_page:
+ pywikibot.output(
+ '\03{{lightblue}}The language template {before!r} '
+ 'was found, but langdetect thinks {after!r} is the '
+ 'most appropriate with a probability of {prob}:'
+ '\03{{default}}\n{text}'
+ .format(before=tmp_page.title(withNamespace=False),
+ after=tmp_page2.title(withNamespace=False),
+ prob=langs[0].prob,
+ text=lang_tmp_val))
+ choice = pywikibot.input_choice(
+ 'What to do?',
+ [('Replace it', 'r'), ('Do not replace it', 'n'),
+ ('Choose another', 'c')])
+ if choice == 'r':
+ template.name = langs[0].lang
+ return True
+
+ if choice == 'c':
+ newlang = pywikibot.input(
+ 'Enter the language of the displayed text:')
+ if newlang and newlang != template.name:
+ template.name = newlang
+ return True
+ return False
+
+ @staticmethod
+ def replace_value(param, value):
+ """Replace param with given value."""
+ lstrip = param.value.lstrip()
+ lspaces = param.value[:len(param.value) - len(lstrip)]
+ rspaces = lstrip[len(lstrip.rstrip()):]
+ param.value = '{}{}{}'.format(lspaces, value, rspaces)
+
+ def setup(self):
+ """Raise exception if needed modules are missing."""
+ if isinstance(mwparserfromhell, Exception):
+ raise mwparserfromhell
+
+ def treat_page(self):
+ """Treat current page."""
+ page = self.current_page
+ code = mwparserfromhell.parse(page.text)
+ edited = False # to prevent unwanted changes
+ for template in code.ifilter_templates():
+ if not page.site.sametitle(template.name.strip(), 'Information'):
+ continue
+ desc = self.get_description(template)
+ if desc is None:
+ continue
+ for tmp in desc.value.filter_templates(recursive=False):
+ if self.process_desc_template(tmp):
+ edited = True
+ desc_clean = copy.deepcopy(desc.value)
+ for tmp in desc_clean.filter_templates(recursive=False):
+ # TODO: emit a debug item?
+ desc_clean.remove(tmp)
+ value = desc_clean.strip()
+ if value == '':
+ pywikibot.output('Empty description')
+ continue
+ pywikibot.output(value)
+ langs = self.detect_langs(value)
+ if langs:
+ pywikibot.output(color_format(
+ '{lightblue}Hints from langdetect:{default}'))
+ for language in langs:
+ pywikibot.output(color_format(
+ '{{lightblue}}{obj.lang}: {obj.prob}{{default}}',
+ obj=language))
+ lang = pywikibot.input(
+ 'Enter the language of the displayed text:').strip()
+ if lang != '':
+ tmp_page = pywikibot.Page(page.site, lang, ns=10)
+ if tmp_page not in self.lang_tmps:
+ pywikibot.warning(
+ '"{lang}" is not a valid language template on {site}'
+ .format(lang=lang, site=page.site))
+ new = mwparserfromhell.nodes.template.Template(lang, [value])
+ self.replace_value(desc, new)
+ edited = True
+ if edited:
+ text = str(code)
+ summary = i18n.translate(page.site.lang, self.comment,
+ fallback=True)
+ self.put_current(text, summary=summary)
+
+
+def main(*args: Tuple[str, ...]) -> None:
+ """
+ Process command line arguments and invoke bot.
+
+ If args is an empty list, sys.argv is used.
+
+ @param args: command line arguments
+ """
+ local_args = pywikibot.handle_args(args)
+ gen_factory = pagegenerators.GeneratorFactory()
+
+ for arg in local_args:
+ gen_factory.handle_arg(arg)
+
+ gen = gen_factory.getCombinedGenerator()
+ if gen:
+ bot = InformationBot(generator=gen)
+ bot.run()
+ else:
+ pywikibot.bot.suggest_help(missing_generator=True)
+
+
+if __name__ == '__main__':
+ main()

To view, visit change 183266. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ib8fde813a3da2e1cf7fc35c0a31a64c58ac210a6
Gerrit-Change-Number: 183266
Gerrit-PatchSet: 9
Gerrit-Owner: Ricordisamoa <ricordisamoa@disroot.org>
Gerrit-Reviewer: John Vandenberg <jayvdb@gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw@arctus.nl>
Gerrit-Reviewer: Ricordisamoa <ricordisamoa@disroot.org>
Gerrit-Reviewer: XZise <CommodoreFabianus@gmx.de>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged