jenkins-bot submitted this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
[IMPR] use linktrail via siteinfo

- create linktrail regex from siteinfo['general']['linktrail']
- use tiny cache to cache the result
- Previously Site.linktrail() was delegated to family.linktrail()
by magic Site.__getattr__(). Deprecate this useless method now.
- Also remove obsolete linktrails dict
- Add TestLinktrails to site_tests.py
- Update DrySite class with a default linktrail
- Remove update_linktrails.py maintenance script
- update documentation

Change-Id: Ie12ddb65f2ed9a9d520b39a4c372d3ee5d9f6309
---
M .codecov.yml
M docs/scripts/maintenance.rst
M docs/scripts_ref/scripts.maintenance.rst
M pywikibot/family.py
M pywikibot/scripts/generate_family_file.py
M pywikibot/site/_apisite.py
M scripts/README.rst
D scripts/maintenance/update_linktrails.py
M tests/site_tests.py
M tests/utils.py
10 files changed, 117 insertions(+), 404 deletions(-)

diff --git a/.codecov.yml b/.codecov.yml
index 2ba30a4..f1a70ae 100644
--- a/.codecov.yml
+++ b/.codecov.yml
@@ -37,7 +37,6 @@
- scripts/maintenance/make_i18n_dict.py
- scripts/maintenance/preload_sites.py
- scripts/maintenance/sorting_order.py
- - scripts/maintenance/update_linktrails.py
- scripts/maintenance/wikimedia_sites.py
- scripts/userscripts/
- tests/pwb/
diff --git a/docs/scripts/maintenance.rst b/docs/scripts/maintenance.rst
index d096563..6f3ed7d 100644
--- a/docs/scripts/maintenance.rst
+++ b/docs/scripts/maintenance.rst
@@ -25,12 +25,6 @@
.. automodule:: scripts.maintenance.sorting_order
:no-members:

-update\_linktrails script description
--------------------------------------
-
-.. automodule:: scripts.maintenance.update_linktrails
- :no-members:
-
wikimedia\_sites script description
-----------------------------------

diff --git a/docs/scripts_ref/scripts.maintenance.rst b/docs/scripts_ref/scripts.maintenance.rst
index e7bb9c4..c331693 100644
--- a/docs/scripts_ref/scripts.maintenance.rst
+++ b/docs/scripts_ref/scripts.maintenance.rst
@@ -32,11 +32,6 @@

.. automodule:: scripts.maintenance.sorting_order

-scripts.maintenance.update\_linktrails script
----------------------------------------------
-
-.. automodule:: scripts.maintenance.update_linktrails
-
scripts.maintenance.wikimedia\_sites script
-------------------------------------------

diff --git a/pywikibot/family.py b/pywikibot/family.py
index 871a36c..4f80f39 100644
--- a/pywikibot/family.py
+++ b/pywikibot/family.py
@@ -21,7 +21,7 @@
from pywikibot import config
from pywikibot.backports import Dict, List, Set, Tuple # skipcq: PY-W2000
from pywikibot.exceptions import FamilyMaintenanceWarning, UnknownFamilyError
-from pywikibot.tools import classproperty, deprecated
+from pywikibot.tools import classproperty, deprecated, remove_last_args


logger = logging.getLogger('pywiki.wiki.family')
@@ -29,7 +29,7 @@
# Legal characters for Family.name and Family.langs keys
NAME_CHARACTERS = string.ascii_letters + string.digits
# nds_nl code alias requires "_"n
-# dash must be the last char to be reused as regex in update_linktrails
+# dash must be the last char to be reused as regex
CODE_CHARACTERS = string.ascii_lowercase + string.digits + '_-'


@@ -157,211 +157,6 @@
fyinterwiki.sort(key=lambda x:
x.replace('y', 'i') + x.count('y') * '!')

- # Letters that can follow a wikilink and are regarded as part of
- # this link. This depends on the linktrail setting in LanguageXx.php
- #
- # Do not use this dict directly but Site.linktrail or Family.linktrail
- # methods instead
- linktrails = {
- '_default': '[a-z]*',
- 'ab': '[a-zабвгӷҕдежзӡикқҟлмнопԥҧрстҭуфхҳцҵчҷҽҿшыҩџьә]*',
- 'ady': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюяӀ]*',
- 'als': '[äöüßa-z]*',
- 'alt': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюяјҥӧӱ]*',
- 'ami': '',
- 'an': '[a-záéíóúñ]*',
- 'ar': '[a-zء-يؐ-ًؚ-ٰٟۖ-ۜ۟-۪ۤۧۨ-ۭ]*',
- 'ary': '[a-zء-يؐ-ًؚ-ٰٟۖ-ۜ۟-۪ۤۧۨ-ۭ]*',
- 'arz': '[a-zء-يؐ-ًؚ-ٰٟۖ-ۜ۟-۪ۤۧۨ-ۭ]*',
- 'ast': '[a-záéíóúñ]*',
- 'atj': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
- 'av': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюяӀ]*',
- 'avk': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
- 'awa': '[a-zऀ-ॣ०-꣠-ꣿ]*',
- 'ay': '[a-záéíóúñ]*',
- 'az': '[a-zçəğıöşü]*',
- 'azb': '[ابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیآأئؤة‌]*',
- 'ba': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюяәөүғҡңҙҫһ“»]*',
- 'bar': '[äöüßa-z]*',
- 'bat-smg': '[a-ząčęėįšųūž]*',
- 'be': '[абвгґджзеёжзійклмнопрстуўфхцчшыьэюяćčłńśšŭźža-z]*',
- 'be-tarask': '[абвгґджзеёжзійклмнопрстуўфхцчшыьэюяćčłńśšŭźža-z]*',
- 'bg': '[a-zабвгдежзийклмнопрстуфхцчшщъыьэюя]*',
- 'bm': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
- 'bn': '[ঀ-৿]*',
- 'bpy': '[ঀ-৿]*',
- 'br': "(?:[a-zA-ZàâçéèêîôûäëïöüùñÇÉÂÊÎÔÛÄËÏÖÜÀÈÙÑ]|[cC]['’]h|C['’]H)*",
- 'bs': '[a-zćčžšđž]*',
- 'bxr': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*',
- 'ca': "(?:[a-zàèéíòóúç·ïü]|'(?!'))*",
- 'cbk-zam': '[a-záéíóúñ]*',
- 'ce': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюяӀ]*',
- 'ckb': '[ئابپتجچحخدرڕزژسشعغفڤقکگلڵمنوۆهھەیێ‌]*',
- 'co': '[a-zàéèíîìóòúù]*',
- 'crh': '[a-zâçğıñöşüа-яёʺʹ“»]*',
- 'cs': '[a-záčďéěíňóřšťúůýž]*',
- 'csb': '[a-zęóąśłżźćńĘÓĄŚŁŻŹĆŃ]*',
- 'cu': '[a-zабвгдеєжѕзїіıићклмнопсстѹфхѡѿцчш'
- 'щъыьѣюѥѧѩѫѭѯѱѳѷѵґѓђёјйљњќуўџэ҄я“»]*',
- 'cv': '[a-zа-яĕçăӳ"»]*',
- 'cy': '[àáâèéêìíîïòóôûŵŷa-z]*',
- 'da': '[a-zæøå]*',
- 'dag': '[ɛɣŋɔʒƐƔŊƆƷa-z]*',
- 'de': '[äöüßa-z]*',
- 'din': '[äëɛɛ̈éɣïŋöɔɔ̈óa-z]*',
- 'dsb': '[äöüßa-z]*',
- 'el': '[a-zαβγδεζηθικλμνξοπρστυφχψωςΑΒΓΔΕΖΗΘ'
- 'ΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩάέήίόύώϊϋΐΰΆΈΉΊΌΎΏΪΫ]*',
- 'eml': '[a-zàéèíîìóòúù]*',
- 'es': '[a-záéíóúñ]*',
- 'et': '[äöõšüža-z]*',
- 'ext': '[a-záéíóúñ]*',
- 'fa': '[ابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیآأئؤة‌]*',
- 'ff': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
- 'fi': '[a-zäö]*',
- 'fiu-vro': '[äöõšüža-z]*',
- 'fo': '[áðíóúýæøa-z]*',
- 'fr': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
- 'frp': '[a-zàâçéèêîœôû·’æäåāăëēïīòöōùü‘]*',
- 'frr': '[a-zäöüßåāđē]*',
- 'fur': '[a-zàéèíîìóòúù]*',
- 'fy': '[a-zàáèéìíòóùúâêîôûäëïöü]*',
- 'gag': '[a-zÇĞçğİıÖöŞşÜüÂâÎîÛû]*',
- 'gan': '',
- 'gcr': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
- 'gl': '[áâãàéêẽçíòóôõq̃úüűũa-z]*',
- 'glk': '[ابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیآأئؤة‌]*',
- 'gn': '[a-záéíóúñ]*',
- 'gu': '[઀-૿]*',
- 'guw': '[a-zàáǎèéěìíǐòóǒùúɛ̌ɔɖẹọ]*',
- 'he': '[a-zא-ת]*',
- 'hi': '[a-zऀ-ॣ०-꣠-ꣿ]*',
- 'hr': '[čšžćđßa-z]*',
- 'hsb': '[äöüßa-z]*',
- 'ht': '[a-zàèòÀÈÒ]*',
- 'hu': '[a-záéíóúöüőűÁÉÍÓÚÖÜŐŰ]*',
- 'hy': '[a-zաբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆև«»]*',
- 'hyw': '[a-zաբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆև«»]*',
- 'ii': '',
- 'inh': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюяӀ]*',
- 'is': '[áðéíóúýþæöa-z-–]*',
- 'it': '[a-zàéèíîìóòúù]*',
- 'ka': '[a-zაბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰ“»]*',
- 'kaa': "(?:[a-zıʼ’“»]|'(?!'))*",
- 'kab': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
- 'kbd': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюяӀ]*',
- 'kbp': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
- 'kk': '[a-zäçéğıïñöşüýʺʹа-яёәғіқңөұүһٴ'
- 'ابپتجحدرزسشعفقكلمنڭەوۇۋۆىيچھ“»]*',
- 'kl': '[a-zæøå]*',
- 'koi': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*',
- 'krc': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*',
- 'ksh': '[äöüėëijßəğåůæœça-z]*',
- 'ku': '[a-zçêîşûẍḧÇÊÎŞÛẌḦ]*',
- 'kv': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*',
- 'lad': '[a-záéíóúñ]*',
- 'lb': '[äöüßa-z]*',
- 'lbe': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюяӀ1“»]*',
- 'lez': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюяӀ]*',
- 'li': '[a-zäöüïëéèà]*',
- 'lij': '[a-zàéèíîìóòúù]*',
- 'lld': '[a-zàéèíîìóòúù]*',
- 'lmo': '[a-zàéèíîìóòúù]*',
- 'ln': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
- 'lrc': '[ابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیآأئؤة‌]*',
- 'lt': '[a-ząčęėįšųūž]*',
- 'ltg': '[a-zA-ZĀāČčĒēĢģĪīĶķĻļŅņŠšŪūŽž]*',
- 'lv': '[a-zA-ZĀāČčĒēĢģĪīĶķĻļŅņŠšŪūŽž]*',
- 'mai': '[a-zऀ-ॣ०-꣠-ꣿ]*',
- 'mdf': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*',
- 'mg': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
- 'mhr': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*',
- 'mk': '[a-zабвгдѓежзѕијклљмнњопрстќуфхцчџш]*',
- 'ml': '[a-zം-ൿ]*',
- 'mn': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя“»]*',
- 'mr': '[ऀ-ॣॱ-ॿ‍]*',
- 'mrj': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*',
- 'mwl': '[áâãàéêẽçíòóôõq̃úüűũa-z]*',
- 'myv': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*',
- 'mzn': '[ابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیآأئؤة‌]*',
- 'nah': '[a-záéíóúñ]*',
- 'nap': '[a-zàéèíîìóòúù]*',
- 'nds': '[äöüßa-z]*',
- 'nds-nl': '[a-zäöüïëéèà]*',
- 'nl': '[a-zäöüïëéèà]*',
- 'nn': '[æøåa-z]*',
- 'no': '[æøåa-z]*',
- 'nrm': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
- 'oc': '[a-zàâçéèêîôû]*',
- 'olo': '[a-zčČšŠžŽäÄöÖ]*',
- 'or': '[a-z଀-୿]*',
- 'os': '[a-zаæбвгдеёжзийклмнопрстуфхцчшщъыьэюя“»]*',
- 'pa': '[ਁਂਃਅਆਇਈਉਊਏਐਓਔਕਖਗਘਙਚਛਜਝਞਟਠਡਢਣਤਥਦਧਨਪਫਬਭਮ'
- 'ਯਰਲਲ਼ਵਸ਼ਸਹ਼ਾਿੀੁੂੇੈੋੌ੍ਖ਼ਗ਼ਜ਼ੜਫ਼ੰੱੲੳa-z]*',
- 'pcd': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
- 'pdc': '[äöüßa-z]*',
- 'pfl': '[äöüßa-z]*',
- 'pl': '[a-zęóąśłżźćńĘÓĄŚŁŻŹĆŃ]*',
- 'pms': '[a-zàéèíîìóòúù]*',
- 'pnt': '[a-zαβγδεζηθικλμνξοπρστυφχψωςΑΒΓΔΕΖΗΘ'
- 'ΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩάέήίόύώϊϋΐΰΆΈΉΊΌΎΏΪΫ]*',
- 'pt': '[áâãàéêẽçíòóôõq̃úüűũa-z]*',
- 'pwn': '',
- 'qu': '[a-záéíóúñ]*',
- 'rmy': '[a-zăâîşţșțĂÂÎŞŢȘȚ]*',
- 'ro': '[a-zăâîşţșțĂÂÎŞŢȘȚ]*',
- 'roa-rup': '[a-zăâîşţșțĂÂÎŞŢȘȚ]*',
- 'roa-tara': '[a-zàéèíîìóòúù]*',
- 'ru': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*',
- 'rue': '[a-zабвгґдеєжзиіїйклмнопрстуфхцчшщьєюяёъы“»]*',
- 'sa': '[a-zऀ-ॣ०-꣠-ꣿ]*',
- 'sah': '[a-zабвгҕдеёжзийклмнҥоөпрсһтуүфхцчшщъыьэюя]*',
- 'scn': '[a-zàéèíîìóòúù]*',
- 'se': '[a-zàáâçčʒǯđðéèêëǧǥȟíìîïıǩŋñóòôõßšŧúùûýÿüžþæøåäö]*',
- 'sg': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
- 'sh': '[a-zčćđžš]*',
- 'shi': '[ⴰ-ⵯa-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙḍḥɛṛɣṣṭẓḌḤƐṚƔṢṬẒʷ]*',
- 'sk': '[a-záäčďéíľĺňóôŕšťúýž]*',
- 'skr': '[آابٻپتٹثجچڄحخدڈݙذرڑزژسشصضطظعغفقکگڳلمنݨوہھیےئأءۃڋڰںؤ]*',
- 'sl': '[a-zčćđžš]*',
- 'smn': '[a-zâčđŋšžäá]*',
- 'sr': '[abvgdđežzijklljmnnjoprstćufhcčdž'
- 'šабвгдђежзијклљмнњопрстћуфхцчџш]*',
- 'srn': '[a-zäöüïëéèà]*',
- 'stq': '[äöüßa-z]*',
- 'sv': '[a-zåäöéÅÄÖÉ]*',
- 'szl': '[a-zęóąśłżźćńĘÓĄŚŁŻŹĆŃ]*',
- 'szy': '',
- 'ta': '[஀-௿]*',
- 'tay': '',
- 'te': '[ఁ-౯]*',
- 'tet': '[áâãàéêẽçíòóôõq̃úüűũa-z]*',
- 'tg': '[a-zабвгдеёжзийклмнопрстуфхчшъэюяғӣқўҳҷцщыь]*',
- 'tk': '[a-zÄäÇçĞğŇňÖöŞşÜüÝýŽž]*',
- 'tr': '[a-zÇĞçğİıÖöŞşÜüÂâÎîÛû]*',
- 'trv': '',
- 'tt': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюяӘәӨөҮүҖҗҢңҺһ]*',
- 'ty': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
- 'tyv': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*',
- 'udm': '[a-zа-яёӝӟӥӧӵ]*',
- 'uk': '[a-zабвгґдеєжзиіїйклмнопрстуфхцчшщьєюяёъы“»]*',
- 'ur': '[ابپتٹثجچحخدڈذر​ڑ​زژسشصضطظعغفقکگل​م​نںوؤہھیئےآأءۃ]*',
- 'uz': '[a-zʻʼ“»]*',
- 'vec': '[a-zàéèíîìóòúù]*',
- 'vep': '[äöõšüža-z]*',
- 'vi': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
- 'vls': '[a-zäöüïëéèà]*',
- 'wa': '[a-zåâêîôûçéè]*',
- 'wo': '[a-zàâçéèêîôûäëïöüùÇÉÂÊÎÔÛÄËÏÖÜÀÈÙ]*',
- 'wuu': '',
- 'xal': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюя]*',
- 'xmf': '[a-zაბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰ“»]*',
- 'yi': '[a-zא-ת]*',
- 'za': '',
- 'zea': '[a-zäöüïëéèà]*',
- 'zh': '',
- }
-
# A list of category redirect template names in different languages
category_redirect_templates = {
'_default': []
@@ -617,20 +412,17 @@
Family._families[fam] = cls
return cls

- def linktrail(self, code, fallback: str = '_default'):
+ @deprecated('APISite.linktrail()', since='7.3.0')
+ @remove_last_args(['fallback'])
+ def linktrail(self, code: str) -> str:
"""Return regex for trailing chars displayed as part of a link.

Returns a string, not a compiled regular expression object.
+
+ .. deprecated:: 7.3
"""
- if code in self.linktrails:
- return self.linktrails[code]
-
- if fallback:
- return self.linktrails[fallback]
-
- raise KeyError(
- 'ERROR: linktrail in language {language_code} unknown'
- .format(language_code=code))
+ site = pywikibot.Site(code, 'wikipedia')
+ return site.linktrail()

def category_redirects(self, code, fallback: str = '_default'):
"""Return list of category redirect templates."""
diff --git a/pywikibot/scripts/generate_family_file.py b/pywikibot/scripts/generate_family_file.py
index 817409c..9b22ee3 100755
--- a/pywikibot/scripts/generate_family_file.py
+++ b/pywikibot/scripts/generate_family_file.py
@@ -43,7 +43,7 @@
# Legal characters for Family name and Family langs keys
NAME_CHARACTERS = string.ascii_letters + string.digits
# nds_nl code alias requires "_"n
-# dash must be the last char to be reused as regex in update_linktrails
+# dash must be the last char to be reused as regex
CODE_CHARACTERS = string.ascii_lowercase + string.digits + '_-'


diff --git a/pywikibot/site/_apisite.py b/pywikibot/site/_apisite.py
index 98c2c81..1d6e174 100644
--- a/pywikibot/site/_apisite.py
+++ b/pywikibot/site/_apisite.py
@@ -69,6 +69,7 @@
from pywikibot.site._tokenwallet import TokenWallet
from pywikibot.site._upload import Uploader
from pywikibot.tools import (
+ cached,
MediaWikiVersion,
deprecated,
merge_unique_dicts,
@@ -671,6 +672,47 @@
assert '$1' in path, 'articlepath must contain "$1" placeholder'
return path.replace('$1', '{}')

+ @cached
+ def linktrail(self) -> str:
+ """Build linktrail regex from siteinfo linktrail.
+
+ Letters that can follow a wikilink and are regarded as part of
+ this link. This depends on the linktrail setting in LanguageXx.php
+
+ .. versionadded:: 7.3
+
+ :return: The linktrail regex.
+ """
+ unresolved_linktrails = {
+ 'br': '(?:[a-zA-ZàâçéèêîôûäëïöüùñÇÉÂÊÎÔÛÄËÏÖÜÀÈÙÑ]'
+ "|[cC]['’]h|C['’]H)*",
+ 'ca': "(?:[a-zàèéíòóúç·ïü]|'(?!'))*",
+ 'kaa': "(?:[a-zıʼ’“»]|'(?!'))*",
+ }
+ linktrail = self.siteinfo['general']['linktrail']
+ if linktrail == '/^()(.*)$/sD': # empty linktrail
+ return ''
+
+ match = re.search(r'\((?:\:\?|\?\:)?\[(?P<pattern>.+?)\]'
+ r'(?P<letters>(\|.)*)\)?\+\)', linktrail)
+ if not match:
+ with suppress(KeyError):
+ return unresolved_linktrails[self.code]
+ raise KeyError(
+ '"{}": No linktrail pattern extracted from "{}"'
+ .format(self.code, linktrail))
+
+ pattern = match.group('pattern')
+ letters = match.group('letters')
+
+ if r'x{' in pattern:
+ pattern = re.sub(r'\\x\{([A-F0-9]{4})\}',
+ lambda match: chr(int(match.group(1), 16)),
+ pattern)
+ if letters:
+ pattern += ''.join(letters.split('|'))
+ return '[{}]*'.format(pattern)
+
@staticmethod
def assert_valid_iter_params(
msg_prefix: str,
diff --git a/scripts/README.rst b/scripts/README.rst
index 7282cc6..519b755 100644
--- a/scripts/README.rst
+++ b/scripts/README.rst
@@ -172,8 +172,6 @@
+------------------------+---------------------------------------------------------+
| sorting_order.py | Updates interwiki sorting order in family.py file. |
+------------------------+---------------------------------------------------------+
- | update_linktrails.py | Script that updates the linktrails in family.py file. |
- +------------------------+---------------------------------------------------------+
| wikimedia_sites.py | Updates the language lists in Wikimedia family files. |
+------------------------+---------------------------------------------------------+

diff --git a/scripts/maintenance/update_linktrails.py b/scripts/maintenance/update_linktrails.py
deleted file mode 100755
index abfeddd..0000000
--- a/scripts/maintenance/update_linktrails.py
+++ /dev/null
@@ -1,172 +0,0 @@
-#!/usr/bin/python3
-"""Script that updates the linktrails in family.py file.
-
-linktrails contains a regex for each site code which holds letters that
-can follow a wikilink and are regarded as part of this link. This depends
-on the linktrail setting in LanguageXx.php. This maintenance script
-retrieves the site settings from wikipedia family and updates the Family
-linktrails dict.
-"""
-#
-# (C) Pywikibot team, 2017-2021
-#
-# Distributed under the terms of the MIT license.
-#
-
-import codecs
-import re
-from contextlib import closing
-from os.path import join
-
-import pywikibot
-from pywikibot.family import CODE_CHARACTERS
-from pywikibot.tools import suppress_warnings
-
-
-def format_string(code: str, pattern: str) -> str:
- """Format a single pattern line."""
- fmt = ' ' * 8 + "'{}': {!r}"
- code_len = len(code)
- pattern_len = len(pattern)
-
- if pattern_len > 64 - code_len:
- index = pattern_len // 2
- result = fmt.format(code, pattern[:index]) + '\n'
- result += ' ' * (code_len + 12) + repr(pattern[index:])
- else:
- result = fmt.format(code, pattern)
-
- result += ',\n'
- # convert escape sequences of unprintable characters to unicode
- result = re.sub(r'\\u([a-f0-9]{4})',
- lambda match: chr(int(match.group(1), 16)), result)
-
- return result
-
-
-def coroutine(func):
- """Decorator which starts coroutine."""
- def start(*args, **kwargs):
- cr = func(*args, **kwargs)
- cr.send(None)
- return cr
- return start
-
-
-@coroutine
-def update_sites(fam):
- """Process linktrail for a given site code."""
- formatter = update_line()
- while True:
- code = yield
-
- with suppress_warnings(
- 'Site wikipedia:[{}]+ instantiated using different code'
- .format(CODE_CHARACTERS),
- category=UserWarning,
- filename=r'.+update_linktrails\.py'):
- site = pywikibot.Site(code, 'wikipedia')
-
- if isinstance(site, pywikibot.site.RemovedSite):
- continue
-
- if site.code != code:
- pywikibot.output('"{}" is redirected to "{}"; skipping.'
- .format(code, site.code))
- continue
-
- linktrail = site.siteinfo.get('general', expiry=True)['linktrail']
- oldtrail = fam.linktrails.get(code)
- formatter.send((code, oldtrail, linktrail))
-
-
-@coroutine
-def update_line():
- """Format linktrail for family file."""
- writer = update_family_file()
- matcher = update_matched_line(writer)
- while True:
- code, old, linktrail = yield
- line = format_string(code, old) if old else ''
-
- if not linktrail:
- writer.send(line)
- continue
-
- if linktrail == '/^()(.*)$/sD': # empty linktrail
- line = format_string(code, '')
- writer.send(line)
- continue
-
- match = re.search(
- r'\((?:\:\?|\?\:)?\[(?P<pattern>.+?)\]'
- r'(?P<letters>(\|.)*)\)?\+\)',
- linktrail)
-
- if not match:
- pywikibot.output('"{}": No pattern found in "{}"'
- .format(code, linktrail))
- writer.send(line)
- continue
-
- matcher.send((code, old, match))
-
-
-@coroutine
-def update_matched_line(writer):
- """Update matched linktrail."""
- while True:
- code, old, match = yield
- pattern = match.group('pattern')
- letters = match.group('letters')
- if pattern == 'a-z' and not letters: # default
- if old:
- pywikibot.output('"{}" has default linktrail; '
- 'removing {}'.format(code, old))
- continue
-
- if r'x{' in pattern:
- # replace unicode escape string by corresponding char
- pattern = re.sub(
- r'\\x\{([A-F0-9]{4})\}',
- lambda match: chr(int(match.group(1), 16)),
- pattern)
-
- if letters:
- pattern += ''.join(letters.split('|'))
-
- new = '[{}]*'.format(pattern)
- line = format_string(code, new)
- writer.send(line)
-
-
-@coroutine
-def update_family_file():
- """Collect linktrails and write them to family.py."""
- text = " linktrails = {\n '_default': '[a-z]*',\n"
- try:
- while True:
- text += yield
- except GeneratorExit:
- text += ' }'
- # write linktrails to family file
- pywikibot.output('Writing family file...')
- family_file_name = join('pywikibot', 'family.py')
- with codecs.open(family_file_name, 'r', 'utf8') as family_file:
- family_text = family_file.read()
- family_text = re.sub(r'(?ms)^ {4}linktrails.+?\}',
- text, family_text, 1)
- with codecs.open(family_file_name, 'w', 'utf8') as family_file:
- family_file.write(family_text)
-
-
-def update_linktrails(family):
- """Update linktrails for given family."""
- with closing(update_sites(family)) as updater:
- for code in sorted(family.langs):
- updater.send(code)
-
-
-if __name__ == '__main__':
- site = pywikibot.Site('en', 'wikipedia')
- update_linktrails(site.family)
diff --git a/tests/site_tests.py b/tests/site_tests.py
index 2c0f0e3..4698341 100755
--- a/tests/site_tests.py
+++ b/tests/site_tests.py
@@ -3260,6 +3260,67 @@
self.assertFalse(site.sametitle('Invalid:Foo', 'Invalid:foo'))


+class TestLinktrails(TestCase):
+
+ """Test linktrail method."""
+
+ family = 'wikipedia'
+ code = 'test'
+
+ def test_has_linktrail(self):
+ """Verify that every code has a linktrail.
+
+ Test all smallest wikis and the others randomly.
+ """
+ size = 20
+ small_wikis = self.site.family.languages_by_size[-size:]
+ great_wikis = self.site.family.languages_by_size[:-size]
+ random.shuffle(great_wikis)
+ great_wikis = great_wikis[:size]
+ for code in sorted(small_wikis + great_wikis):
+ site = pywikibot.Site(code, self.family)
+ with self.subTest(site=site):
+ self.assertIsInstance(site.linktrail(), str)
+
+ def test_linktrails(self):
+ """Test special linktrails.
+
+ This is a subset of the old `family.linktrails` dict.
+ """
+ linktrails = {
+ 'ami': '',
+ 'bug': '[a-z]*',
+ 'ca': "(?:[a-zàèéíòóúç·ïü]|'(?!'))*",
+ 'da': '[a-zæøå]*',
+ 'ext': '[a-záéíóúñ]*',
+ 'fa': '[ابپتثجچحخدذرزژسشصضطظعغفقکگلمنوهیآأئؤة‌]*',
+ 'gu': '[઀-૿]*',
+ 'he': '[a-zא-ת]*',
+ 'ii': '',
+ 'jv': '[a-z]*',
+ 'kaa': "(?:[a-zıʼ’“»]|'(?!'))*",
+ 'lez': '[a-zабвгдеёжзийклмнопрстуфхцчшщъыьэюяӀ]*',
+ 'mai': '[a-zऀ-ॣ०-꣠-ꣿ]*',
+ 'nds-nl': '[a-zäöüïëéèà]*',
+ 'or': '[a-z଀-୿]*',
+ 'pt': '[áâãàéêẽçíòóôõq̃úüűũa-z]*',
+ 'qu': '[a-záéíóúñ]*',
+ 'roa-rup': '[a-zăâîşţșțĂÂÎŞŢȘȚ]*',
+ 'sa': '[a-zऀ-ॣ०-꣠-ꣿ]*',
+ 'te': '[ఁ-౯]*',
+ 'uz': '[a-zʻʼ“»]*',
+ 'vec': '[a-zàéèíîìóòúù]*',
+ 'wuu': '',
+ 'xmf': '[a-zაბგდევზთიკლმნოპჟრსტუფქღყშჩცძწჭხჯჰ“»]*',
+ 'yi': '[a-zא-ת]*',
+ 'zh-cn': ''
+ }
+ for code, linktrail in linktrails.items():
+ site = pywikibot.Site(code, self.family)
+ with self.subTest(site=site):
+ self.assertEqual(site.linktrail(), linktrail)
+
+
class TestObsoleteSite(DefaultSiteTestCase):

"""Test 'closed' and obsolete code sites."""
diff --git a/tests/utils.py b/tests/utils.py
index 35f3372..160d703 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -342,6 +342,10 @@
author_ns, 'Author', case=self.siteinfo['case'])
return ns_dict

+ def linktrail(self):
+ """Return default linkrail."""
+ return '[a-z]*'
+
@property
def userinfo(self):
"""Return dry data."""

To view, visit change 579877. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ie12ddb65f2ed9a9d520b39a4c372d3ee5d9f6309
Gerrit-Change-Number: 579877
Gerrit-PatchSet: 7
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: Matěj Suchánek <matejsuchanek97@gmail.com>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-CC: Meno25 <meno25mail@gmail.com>
Gerrit-MessageType: merged