jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/942811 )
Change subject: [IMPR] use welcome messages from WelcomeBot within CheckImagesBot
......................................................................
[IMPR] use welcome messages from WelcomeBot within CheckImagesBot
use welcome messages from WelcomeBot within CheckImagesBot to prevent
code duplication and different L10N entries.
welcome:
- move WelcomeBot.check_managed_sites method to get_welcome_text
function to be reused by checkimages.py
- use capital letters for localizing dictionaries
- rename netext dict to WELCOME
- update L10N for WELCOME from checkimages.py
checkimages.py:
- import get_welcome_text from welcome.py script
- remove EMPTY dict
- rewrite put_mex_in_talk() method to use welcome.get_welcome_text()
Change-Id: I374f125837be190a268ef090ffbdbfb19ee14a12
---
M scripts/checkimages.py
M scripts/welcome.py
2 files changed, 100 insertions(+), 90 deletions(-)
Approvals:
Meno25: Looks good to me, but someone else must approve
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/checkimages.py b/scripts/checkimages.py
index 9cf45b3..a5fb81a 100755
--- a/scripts/checkimages.py
+++ b/scripts/checkimages.py
@@ -1,9 +1,8 @@
#!/usr/bin/env python3
-"""
-Script to check recently uploaded files.
+"""Script to check recently uploaded files.
-This script checks if a file description is present and if there are other
-problems in the image's description.
+This script checks if a file description is present and if there are
+other problems in the image's description.
This script will have to be configured for each language. Please submit
translations as addition to the Pywikibot framework.
@@ -68,13 +67,14 @@
* Text= This is the template that the bot will use when it will report the
image's problem.
-Todo
-----
-* Clean the code, some passages are pretty difficult to understand.
-* Add the "catch the language" function for commons.
-* Fix and reorganise the new documentation
-* Add a report for the image tagged.
+.. todo::
+ * Clean the code, some passages are pretty difficult to understand.
+ * Add the "catch the language" function for commons.
+ * Fix and reorganise the new documentation
+ * Add a report for the image tagged.
+.. versionchanged:: 8.4
+ Welcome messages are imported from :mod:`scripts.welcome` script.
"""
#
# (C) Pywikibot team, 2006-2023
@@ -107,6 +107,7 @@
from pywikibot.family import Family
from pywikibot.site import Namespace
+from scripts.welcome import get_welcome_text
###############################################################################
# <--------------------------- Change only below! --------------------------->#
@@ -172,32 +173,6 @@
'zh': ['{{no source', '{{unknown', '{{No license'],
}
-# When the bot find that the usertalk is empty is not pretty to put only the
-# no source without the welcome, isn't it?
-EMPTY = {
- 'commons': '{{subst:welcome}}\n~~~~\n',
- 'meta': '{{subst:Welcome}}\n~~~~\n',
- 'ar': '{{subst:أهلا ومرحبا}}\n~~~~\n',
- 'arz': '{{subst:اهلا و سهلا}}\n~~~~\n',
- 'de': '{{subst:willkommen}} ~~~~',
- 'en': '{{subst:welcome}}\n~~~~\n',
- 'fa': '{{subst:خوشامدید|%s}}',
- 'fr': '{{Bienvenue nouveau\n~~~~\n',
- 'ga': '{{subst:Fáilte}} - ~~~~\n',
- 'hr': '{{subst:dd}}--~~~~\n',
- 'hu': '{{subst:Üdvözlet|~~~~}}\n',
- 'it': '<!-- inizio template di benvenuto -->\n{{subst:Benvebot}}\n~~~~\n'
- '<!-- fine template di benvenuto -->',
- 'ja': '{{subst:Welcome/intro}}\n{{subst:welcome|--~~~~}}\n',
- 'ko': '{{환영}}--~~~~\n',
- 'ru': '{{subst:Приветствие}}\n~~~~\n',
- 'sd': '{{ڀليڪار}}\n~~~~\n',
- 'sr': '{{dd}}--~~~~\n',
- 'ta': '{{welcome}}\n~~~~\n',
- 'ur': '{{خوش آمدید}}\n~~~~\n',
- 'zh': '{{subst:welcome|sign=~~~~}}',
-}
-
# if the file has an unknown extension it will be tagged with this template.
# In reality, there aren't unknown extension, they are only not allowed...
DELETE_IMMEDIATELY = {
@@ -672,9 +647,11 @@
return True
def put_mex_in_talk(self) -> None:
- """Function to put the warning in talk page of the uploader."""
- commento2 = i18n.twtranslate(self.site.lang,
- 'checkimages-source-notice-comment')
+ """Function to put the warning in talk page of the uploader.
+
+ When the bot find that the usertalk is empty it adds the welcome
+ message first. The messages are imported from welcome.py script.
+ """
email_page_name = i18n.translate(self.site, EMAIL_PAGE_WITH_TEXT)
email_subj = i18n.translate(self.site, EMAIL_SUBJECT)
if self.notification2:
@@ -683,10 +660,11 @@
self.notification2 = self.notification
second_text = False
+ curr_text = None
# Getting the talk page's history, to check if there is another
# advise...
try:
- testoattuale = self.talk_page.get()
+ curr_text = self.talk_page.get()
history = list(self.talk_page.revisions(total=10))
latest_user = history[0]['user']
pywikibot.info(
@@ -699,21 +677,19 @@
except IsRedirectPageError:
pywikibot.info(
'The user talk is a redirect, trying to get the right talk...')
- try:
- self.talk_page = self.talk_page.getRedirectTarget()
- testoattuale = self.talk_page.get()
- except NoPageError:
- testoattuale = i18n.translate(self.site, EMPTY)
+ self.talk_page = self.talk_page.getRedirectTarget()
+ if self.talk_page.exists():
+ curr_text = self.talk_page.get()
except NoPageError:
pywikibot.info('The user page is blank')
- testoattuale = i18n.translate(self.site, EMPTY)
- if self.comm_talk:
- commentox = self.comm_talk
- else:
- commentox = commento2
+ if curr_text is None:
+ try:
+ curr_text = get_welcome_text(self.site) % '~~~~'
+ except KeyError:
+ curr_text = ''
- new_text = f'{testoattuale}\n\n'
+ new_text = f'{curr_text}\n\n'
if second_text:
new_text += f'{self.notification2}'
else:
@@ -725,8 +701,10 @@
pywikibot.info('Maximum notifications reached, skip.')
return
+ summary = self.comm_talk or i18n.twtranslate(
+ self.site.lang, 'checkimages-source-notice-comment')
try:
- self.talk_page.put(new_text, summary=commentox, minor=False)
+ self.talk_page.put(new_text, summary=summary, minor=False)
except PageSaveRelatedError as e:
if not self.ignore_save_related_errors:
raise
diff --git a/scripts/welcome.py b/scripts/welcome.py
index 917d5d7..80a6a67 100755
--- a/scripts/welcome.py
+++ b/scripts/welcome.py
@@ -185,8 +185,8 @@
# been eliminated.
# FIXME: Not all language/project combinations have been defined yet.
# Add the following strings to customise for a language:
-# logbook, netext, report_page, bad_pag, report_text, random_sign,
-# whitelist_pg, final_new_text_additions, logpage_header if
+# LOGBOOK, WELCOME, REPORT_PAGE, BAD_PAGE, REPORT_TEXT, RANDOM_SIGN,
+# WHITELIST, FINAL_NEW_TEXT_ADDITIONS, LOGPAGE_HEADER if
# different from Wikipedia entry
############################################################################
@@ -194,7 +194,7 @@
# The page where the bot will save the log (e.g. Wikipedia:Welcome log).
#
# ATTENTION: Projects not listed won't write a log to the wiki.
-logbook = {
+LOGBOOK = {
'ar': 'Project:سجل الترحيب',
'fr': ('Wikipedia:Prise de décision/'
'Accueil automatique des nouveaux par un robot/log'),
@@ -211,8 +211,9 @@
# The text for the welcome message (e.g. {{welcome}}) and %s at the end
# that is your signature (the bot has a random parameter to add different
# sign, so in this way it will change according to your parameters).
-netext = {
+WELCOME = {
'commons': '{{subst:welcome}} %s',
+ 'meta': '{{subst:Welcome}} %s',
'wikipedia': {
'am': '{{subst:Welcome}} %s',
'ar': '{{subst:أهلا ومرحبا}} %s',
@@ -223,19 +224,23 @@
'bn': '{{subst:স্বাগতম/বট}} %s',
'bs': '{{Dobrodošlica}} %s',
'da': '{{velkommen|%s}}',
+ 'de': '{{subst:willkommen}} %s',
'en': '{{subst:welcome}} %s',
- 'fa': '{{جا:خوشامد}} %s',
- 'fr': '{{subst:Discussion Projet:Aide/Bienvenue}} %s',
+ 'fa': '{{subst:خوشامدید|%s}}',
+ 'fr': '{{Bienvenue nouveau}} %s',
'ga': '{{subst:fáilte}} %s',
'gom': '{{subst:welcome}} %s',
'gor': '{{subst:Welcome}} %s',
'he': '{{ס:ברוך הבא}} %s',
'hr': '{{subst:dd}} %s',
+ 'hu': '{{subst:Üdvözlet|%s}}\n',
'id': '{{subst:sdbot2}}\n%s',
- 'it': '<!-- inizio template di benvenuto -->\n{{subst:Benvebot}}\n%s',
+ 'it': '<!-- inizio template di benvenuto -->\n{{subst:Benvebot}}\n%s'
+ '<!-- fine template di benvenuto -->',
'ja': '{{subst:Welcome/intro}}\n{{subst:welcome|%s}}',
'ka': '{{ახალი მომხმარებელი}}--%s',
'kn': '{{subst:ಸುಸ್ವಾಗತ}} %s',
+ 'ko': '{{환영}}--%s\n',
'ml': '{{ബദൽ:സ്വാഗതം/bot}} %s',
'my': '{{subst:welcome}} %s',
'nap': '{{Bemmenuto}}%s',
@@ -244,11 +249,12 @@
'pdc': '{{subst:Wilkum}}%s',
'pt': '{{subst:bem vindo}} %s',
'roa-tara': '{{Bovègne}} %s',
- 'ru': '{{Hello}} %s',
- 'sd': '{{subst:ڀليڪار}} %s',
+ 'ru': '{{subst:Приветствие}}\n%s\n',
+ 'sd': '{{ڀليڪار}}\n%s\n',
'shn': '{{subst:ႁပ်ႉတွၼ်ႈၽူႈၸႂ်ႉတိုဝ်း}} %s',
'sq': '{{subst:tung}} %s',
- 'sr': '{{Добродошлица}} %s',
+ 'sr': '{{dd}}--%s\n',
+ 'ta': '{{welcome}}\n%s\n',
'ur': '{{نقل:خوش آمدید}}%s',
'vec': '{{subst:Benvegnù|%s}}',
'vo': '{{benokömö}} %s',
@@ -292,7 +298,7 @@
},
}
# The page where the bot will report users with a possibly bad username.
-report_page = {
+REPORT_PAGE = {
'commons': ("Project:Administrators'noticeboard/User problems/Usernames"
'to be checked'),
'wikipedia': {
@@ -318,7 +324,7 @@
}
# The page where the bot reads the real-time bad words page
# (this parameter is optional).
-bad_pag = {
+BAD_PAGE = {
'commons': 'Project:Welcome log/Bad_names',
'wikipedia': {
'am': 'User:Beria/Bad_names',
@@ -341,7 +347,7 @@
# The text for reporting a possibly bad username
# e.g. *[[Talk_page:Username|Username]]).
-report_text = {
+REPORT_TEXT = {
'commons': '\n*{{user3|%s}}' + timeselected,
'wikipedia': {
'am': '\n*[[User talk:%s]]' + timeselected,
@@ -365,7 +371,7 @@
}
# Set where you load your list of signatures that the bot will load if you use
# the random argument (this parameter is optional).
-random_sign = {
+RANDOM_SIGN = {
'am': 'User:Beria/Signatures',
'ar': 'Project:سجل الترحيب/توقيعات',
'ba': 'Ҡатнашыусы:Salamat bot/Ярҙам',
@@ -385,7 +391,7 @@
}
# The page where the bot reads the real-time whitelist page.
# (this parameter is optional).
-whitelist_pg = {
+WHITELIST = {
'ar': 'Project:سجل الترحيب/قائمة بيضاء',
'en': 'User:Filnik/whitelist',
'ga': 'Project:Log fáilte/Bánliosta',
@@ -395,14 +401,14 @@
# Text after the {{welcome}} template, if you want to add something
# Default (en): nothing.
-final_new_text_additions = {
+FINAL_NEW_TEXT_ADDITIONS = {
'it': '\n<!-- fine template di benvenuto -->',
'zh': '<small>(via ~~~)</small>',
}
#
#
-logpage_header = {
+LOGPAGE_HEADER = {
'_default': '{|border="2" cellpadding="4" cellspacing="0" style="margin: '
'0.5em 0.5em 0.5em 1em; padding: 0.5em; background: #bfcda5; '
'border: 1px #b6fd2c solid; border-collapse: collapse; '
@@ -459,6 +465,20 @@
quiet = False # Users without contributions aren't displayed
+def get_welcome_text(site: pywikibot.site.BaseSite) -> str:
+ """Check that site is managed by the script and return the message.
+
+ :raises KeyError: site is not in WELCOME dict
+ """
+ msg = i18n.translate(site, WELCOME)
+ if not msg:
+ script = pywikibot.calledModuleName()
+ welcome = 'welcome.' if script != 'welcome' else ''
+ raise KeyError(f'{script}.py is not localized for site {site} in '
+ f'{welcome}WELCOME dict.')
+ return msg
+
+
class WelcomeBot(SingleSiteBot):
"""Bot to add welcome messages on User pages."""
@@ -466,26 +486,17 @@
def __init__(self, **kwargs) -> None:
"""Initializer."""
super().__init__(**kwargs)
- self.check_managed_sites()
+ self.welcome_text = get_welcome_text(self.site)
self.bname: Dict[str, str] = {}
self.welcomed_users: List[str] = []
- self.log_name = i18n.translate(self.site, logbook)
+ self.log_name = i18n.translate(self.site, LOGBOOK)
if not self.log_name:
globalvar.make_welcome_log = False
if globalvar.random_sign:
self.define_sign(True)
- def check_managed_sites(self) -> None:
- """Check that site is managed by welcome.py."""
- # Raises KeyError if site is not in netext dict.
- site_netext = i18n.translate(self.site, netext)
- if site_netext is None:
- raise KeyError(f'welcome.py is not localized for site {self.site}'
- ' in netext dict.')
- self.welcome_text = site_netext
-
def bad_name_filer(self, name, force: bool = False) -> bool:
"""Check for bad names."""
if not globalvar.filt_bad_name:
@@ -532,8 +543,7 @@
# blacklist from wikipage
badword_page = pywikibot.Page(self.site,
- i18n.translate(self.site,
- bad_pag))
+ i18n.translate(self.site, BAD_PAGE))
list_loaded = []
if badword_page.exists():
pywikibot.info(
@@ -548,7 +558,7 @@
if not hasattr(self, '_whitelist') or force:
# initialize whitelist
whitelist_default = ['emiliano']
- wtlpg = i18n.translate(self.site, whitelist_pg)
+ wtlpg = i18n.translate(self.site, WHITELIST)
list_white = []
if wtlpg:
whitelist_page = pywikibot.Page(self.site, wtlpg)
@@ -612,8 +622,7 @@
# name in queue is max, put detail to report page
pywikibot.info('Updating badname accounts to report page...')
rep_page = pywikibot.Page(self.site,
- i18n.translate(self.site,
- report_page))
+ i18n.translate(self.site, REPORT_PAGE))
if rep_page.exists():
text_get = rep_page.get()
else:
@@ -630,8 +639,7 @@
pywikibot.info(f'{username} is already in the report page.')
else:
# Adding the log.
- rep_text += i18n.translate(self.site,
- report_text) % username
+ rep_text += i18n.translate(self.site, REPORT_TEXT) % username
if self.site.code == 'it':
rep_text = f'{rep_text}{self.bname[username]}}}}}'
@@ -663,7 +671,7 @@
self.show_status()
pywikibot.info(
'Log page is not exist, getting information for page creation')
- text = i18n.translate(self.site, logpage_header,
+ text = i18n.translate(self.site, LOGPAGE_HEADER,
fallback=i18n.DEFAULT_FALLBACK)
text += '\n!' + self.site.namespace(2)
text += '\n!' + str.capitalize(
@@ -729,7 +737,7 @@
sign_text = ''
creg = re.compile(r'^\* ?(.*?)$', re.M)
if not globalvar.sign_file_name:
- sign_page_name = i18n.translate(self.site, random_sign)
+ sign_page_name = i18n.translate(self.site, RANDOM_SIGN)
if not sign_page_name:
self.show_status(Msg.WARN)
pywikibot.info(f"{self.site} doesn't allow random signature,"
@@ -817,7 +825,7 @@
elif self.site.sitename != 'wikinews:it':
welcome_text = welcome_text % globalvar.default_sign
- final_text = i18n.translate(self.site, final_new_text_additions)
+ final_text = i18n.translate(self.site, FINAL_NEW_TEXT_ADDITIONS)
if final_text:
welcome_text += final_text
welcome_comment = i18n.twtranslate(self.site, 'welcome-welcome')
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/942811
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I374f125837be190a268ef090ffbdbfb19ee14a12
Gerrit-Change-Number: 942811
Gerrit-PatchSet: 1
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: D3r1ck01 <dalangi-ctr(a)wikimedia.org>
Gerrit-Reviewer: Meno25 <meno25mail(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/942668 )
Change subject: [IMPR] use inline re.IGNORECASE flag in textlib.case_escape function
......................................................................
[IMPR] use inline re.IGNORECASE flag in textlib.case_escape function
- use inline re.IGNORECASE flag for the first letter of string argument
- add underscore parameter to detect interchangeable and collapsible
spaces/underscores in string
- use underscore parameter within scripts
Bug: T308265
Change-Id: I58df8260db97c45cde6e959ada7e5a8acc959d79
---
M pywikibot/textlib.py
M scripts/image.py
M scripts/delinker.py
3 files changed, 31 insertions(+), 15 deletions(-)
Approvals:
Matěj Suchánek: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index 39f8d50..b8ac31f 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -163,19 +163,24 @@
return phrase
-def case_escape(case: str, string: str) -> str:
+def case_escape(case: str, string: str, *, underscore: bool = False) -> str:
"""Return an escaped regex pattern which depends on 'first-letter' case.
.. versionadded:: 7.0
+ .. versionchanged:: 8.4
+ Added the optional *underscore* parameter.
- :param case: if `case` is 'first-letter' the regex contains an
- upper/lower case set for the first letter
+ :param case: if `case` is 'first-letter', the regex contains an
+ inline re.IGNORECASE flag for the first letter
+ :param underscore: if True, expand the regex to detect spaces and
+ underscores which are interchangeable and collapsible
"""
- first = string[0]
- if first.isalpha() and case == 'first-letter':
- pattern = f'[{first.upper()}{first.lower()}]{re.escape(string[1:])}'
+ if case == 'first-letter':
+ pattern = f'(?i:{string[:1]}){re.escape(string[1:])}'
else:
pattern = re.escape(string)
+ if underscore:
+ pattern = re.sub(r'_|\\ ', '[_ ]+', pattern)
return pattern
@@ -1557,9 +1562,7 @@
return oldtext
# title might contain regex special characters
- title = case_escape(site.namespaces[14].case, title)
- # spaces and underscores in page titles are interchangeable and collapsible
- title = title.replace(r'\ ', '[ _]+').replace(r'\_', '[ _]+')
+ title = case_escape(site.namespaces[14].case, title, underscore=True)
categoryR = re.compile(r'\[\[\s*({})\s*:\s*{}[\s\u200e\u200f]*'
r'((?:\|[^]]+)?\]\])'
.format(catNamespace, title), re.I)
diff --git a/scripts/delinker.py b/scripts/delinker.py
index 6282cd6..4d3d0b4 100755
--- a/scripts/delinker.py
+++ b/scripts/delinker.py
@@ -100,9 +100,9 @@
"""Set page to current page and delink that page."""
# use image_regex from image.py
namespace = file_page.site.namespaces[6]
- escaped = case_escape(namespace.case, file_page.title(with_ns=False))
- # Be careful, spaces and _ have been converted to '\ ' and '\_'
- escaped = re.sub('\\\\[_ ]', '[_ ]', escaped)
+ escaped = case_escape(namespace.case,
+ file_page.title(with_ns=False),
+ underscore=True)
self.image_regex = re.compile(
r'\[\[ *(?:{})\s*:\s*{} *(?P<parameters>\|'
r'(?:[^\[\]]|\[\[[^\]]+\]\]|\[[^\]]+\])*|) *\]\]'
diff --git a/scripts/image.py b/scripts/image.py
index 1b5d05d..167fa78 100755
--- a/scripts/image.py
+++ b/scripts/image.py
@@ -85,10 +85,8 @@
param)
namespace = self.site.namespaces[6]
- escaped = case_escape(namespace.case, self.old_image)
+ escaped = case_escape(namespace.case, self.old_image, underscore=True)
- # Be careful, spaces and _ have been converted to '\ ' and '\_'
- escaped = re.sub('\\\\[_ ]', '[_ ]', escaped)
if not self.opt.loose or not self.new_image:
image_regex = re.compile(
r'\[\[ *(?:{})\s*:\s*{} *(?P<parameters>\|'
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/942668
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I58df8260db97c45cde6e959ada7e5a8acc959d79
Gerrit-Change-Number: 942668
Gerrit-PatchSet: 2
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: D3r1ck01 <dalangi-ctr(a)wikimedia.org>
Gerrit-Reviewer: Matěj Suchánek <matejsuchanek97(a)gmail.com>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/942624 )
Change subject: [IMPR] use urllib.parse.unquote() for tools.chars.url2string() function
......................................................................
[IMPR] use urllib.parse.unquote() for tools.chars.url2string() function
Simplify tools.chars.url2string() function by using
urllib.parse.unquote() instead of urllib.parse.unquote_to_bytes and
encoding/decoding strings for it.
Change-Id: I49bf4fec45f6f67ddab75f7248b8b1a9eadc6d8a
---
M pywikibot/tools/chars.py
1 file changed, 30 insertions(+), 9 deletions(-)
Approvals:
Matěj Suchánek: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/tools/chars.py b/pywikibot/tools/chars.py
index c64c84d..47bfb5a 100644
--- a/pywikibot/tools/chars.py
+++ b/pywikibot/tools/chars.py
@@ -8,7 +8,7 @@
import sys
from contextlib import suppress
from typing import Union
-from urllib.parse import unquote_to_bytes
+from urllib.parse import unquote
from pywikibot.backports import Iterable
from pywikibot.tools._unidata import _category_cf
@@ -98,10 +98,22 @@
encodings: Union[str, Iterable[str]] = 'utf-8') -> str:
"""Convert URL-encoded text to unicode using several encoding.
- Uses the first encoding that doesn't cause an error.
+ Uses the first encoding that doesn't cause an error. Raises the
+ first exception if all encodings fails.
+
+ For a single *encodings* string this function is equvalent to
+ :samp:`urllib.parse.unquote(title, encodings, errors='strict')`
+
+ .. versionchanged:: 8.4
+ Ignore *LookupError* and try other encodings.
+
+ .. seealso:: :python:`urllib.parse.unquote
+ <library/urllib.parse.html#urllib.parse.unquote>`
**Example:**
+ >>> url2string('abc%20def')
+ 'abc def'
>>> url2string('/El%20Ni%C3%B1o/')
'/El Niño/'
>>> url2string('/El%20Ni%C3%B1o/', 'ascii')
@@ -118,19 +130,15 @@
:raise LookupError: unknown encoding
"""
if isinstance(encodings, str):
- encodings = [encodings]
+ return unquote(title, encodings, errors='strict')
first_exception = None
for enc in encodings:
try:
- t = title.encode(enc)
- t = unquote_to_bytes(t)
- result = t.decode(enc)
- except UnicodeError as e:
+ return unquote(title, enc, errors='strict')
+ except (UnicodeError, LookupError) as e:
if not first_exception:
first_exception = e
- else:
- return result
# Couldn't convert, raise the first exception
raise first_exception
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/942624
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I49bf4fec45f6f67ddab75f7248b8b1a9eadc6d8a
Gerrit-Change-Number: 942624
Gerrit-PatchSet: 4
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Matěj Suchánek <matejsuchanek97(a)gmail.com>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/942603 )
Change subject: [IMPR] Convert URL-encoded characters also for links outside main namespace
......................................................................
[IMPR] Convert URL-encoded characters also for links outside main namespace
As found by T342470 the CosmeticChangesToolkit.cleanUpLinks() does not
convert URL-encoded characters outside main namespace or for interwiki
links. This patch solved this issue.
Bug: T342470
Change-Id: Ie9f8fc503df842ad45fe44eefc57449c0473cd29
---
M pywikibot/cosmetic_changes.py
1 file changed, 28 insertions(+), 12 deletions(-)
Approvals:
Meno25: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/cosmetic_changes.py b/pywikibot/cosmetic_changes.py
index ffd43a5..bf3e112 100644
--- a/pywikibot/cosmetic_changes.py
+++ b/pywikibot/cosmetic_changes.py
@@ -501,32 +501,38 @@
"""Tidy up wikilinks found in a string.
This function will:
- * Replace underscores with spaces
+ * Replace underscores with spaces
* Move leading and trailing spaces out of the wikilink and into the
surrounding text
-
* Convert URL-encoded characters into Unicode-encoded characters
-
* Move trailing characters out of the link and make the link without
using a pipe, if possible
-
* Capitalize the article title of the link, if appropriate
+ .. versionchanged:: 8.4
+ Convert URL-encoded characters if a link is an interwiki link
+ or different from main namespace.
+
:param text: string to perform the clean-up on
:return: text with tidied wikilinks
"""
# helper function which works on one link and either returns it
# unmodified, or returns a replacement.
def handleOneLink(match: Match[str]) -> str:
- titleWithSection = match['titleWithSection']
+ # Convert URL-encoded characters to str
+ titleWithSection = url2string(match['titleWithSection'],
+ encodings=self.site.encodings())
label = match['label']
trailingChars = match['linktrail']
newline = match['newline']
+ # entire link but convert URL-encoded text
+ oldlink = url2string(match.group(),
+ encodings=self.site.encodings())
is_interwiki = self.site.isInterwikiLink(titleWithSection)
if is_interwiki:
- return match.group()
+ return oldlink
# The link looks like this:
# [[page_title|link_text]]trailing_chars
@@ -538,7 +544,7 @@
except InvalidTitleError:
in_main_namespace = False
if not in_main_namespace:
- return match.group()
+ return oldlink
# Replace underlines by spaces, also multiple underlines
titleWithSection = re.sub('_+', ' ', titleWithSection)
@@ -560,13 +566,9 @@
titleWithSection = titleWithSection.rstrip()
hadTrailingSpaces = len(titleWithSection) != titleLength
- # Convert URL-encoded characters to str
- titleWithSection = url2string(titleWithSection,
- encodings=self.site.encodings())
-
if not titleWithSection:
# just skip empty links.
- return match.group()
+ return match.groups()
# Remove unnecessary initial and final spaces from label.
# Please note that some editors prefer spaces around pipes.
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/core/+/942603
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ie9f8fc503df842ad45fe44eefc57449c0473cd29
Gerrit-Change-Number: 942603
Gerrit-PatchSet: 1
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Meno25 <meno25mail(a)gmail.com>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged
jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/i18n/+/942454 )
Change subject: [i18n] Additional translations for checkimages
......................................................................
[i18n] Additional translations for checkimages
Change-Id: Ib92dd114119efbb3f8dcac5abed6d125ddda840f
---
M checkimages/en.json
M checkimages/qqq.json
2 files changed, 13 insertions(+), 0 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/checkimages/en.json b/checkimages/en.json
index 421142b..3431526 100644
--- a/checkimages/en.json
+++ b/checkimages/en.json
@@ -11,6 +11,8 @@
"checkimages-doubles-file-comment": "Bot: File already on Commons, may be deleted",
"checkimages-doubles-head": "Duplicate file",
"checkimages-doubles-talk-comment": "Bot: Notify that the file already exists on Commons",
+ "checkimages-forced-mode": "('''forced mode''')",
+ "checkimages-has-duplicates": "has the following duplicates%(force)s:",
"checkimages-log-comment": "Bot: Updating the log",
"checkimages-no-license-head": "Image without license",
"checkimages-source-tag-comment": "Bot: Marking newly uploaded untagged file",
diff --git a/checkimages/qqq.json b/checkimages/qqq.json
index 33e9e57..d324fca 100644
--- a/checkimages/qqq.json
+++ b/checkimages/qqq.json
@@ -10,6 +10,8 @@
"checkimages-doubles-file-comment": "Edit summary used by the bot while it reports a problem in the file page",
"checkimages-doubles-head": "Head of the report given to the uploader",
"checkimages-doubles-talk-comment": "Edit summary used by the bot while it reports the problem in the uploader's talk page",
+ "checkimages-forced-mode": "Report is generated in force mode",
+ "checkimages-has-duplicates": "Report that an image has several duplicates",
"checkimages-log-comment": "Edit summary for the checkimages' report",
"checkimages-no-license-head": "The header of a report if an image has no license",
"checkimages-source-tag-comment": "Edit summary for untagged user talk notice",
--
To view, visit https://gerrit.wikimedia.org/r/c/pywikibot/i18n/+/942454
To unsubscribe, or for help writing mail filters, visit https://gerrit.wikimedia.org/r/settings
Gerrit-Project: pywikibot/i18n
Gerrit-Branch: master
Gerrit-Change-Id: Ib92dd114119efbb3f8dcac5abed6d125ddda840f
Gerrit-Change-Number: 942454
Gerrit-PatchSet: 1
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged