jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/995002?usp=email )
Change subject: [cleanup] Make Family.languages_by_size dynamic ......................................................................
[cleanup] Make Family.languages_by_size dynamic
Currently languages_by_size data is obtained from https://wikistats.wmcloud.org/ via wikimedia_sites script which re-writes the family files for familes which have data in the wikistats database.
The languages_by_size sorting codes are only used by interwiki.py script with -hint option together with a number.
This change replaces the current system with extraction of the statistics from wikistats of each site in the family, done on demand.
- change language_by_size attributes to sorted codes - add language_by_size class property to WikimediaFamily family - remove generic languages_by_size and codes - use Family.codes instead of Family.languages_by_size if appropriate - use a global dict on wikistats to hold the cache - remove wikimedia_sites maintenance script which is no longer needed - add family_list to unidata.py - update tests - update documentation
Bug: T78396 Change-Id: Idb7eb45ec2db002f13c62c02156dd3d7d97e1e04 --- M .codecov.yml M docs/scripts/maintenance.rst M docs/scripts_ref/scripts.maintenance.rst M pywikibot/data/wikistats.py M pywikibot/families/vikidia_family.py M pywikibot/families/wikibooks_family.py M pywikibot/families/wikihow_family.py M pywikibot/families/wikimania_family.py M pywikibot/families/wikimediachapter_family.py M pywikibot/families/wikinews_family.py M pywikibot/families/wikipedia_family.py M pywikibot/families/wikiquote_family.py M pywikibot/families/wikisource_family.py M pywikibot/families/wikiversity_family.py M pywikibot/families/wikivoyage_family.py M pywikibot/families/wiktionary_family.py M pywikibot/families/wowwiki_family.py M pywikibot/family.py M pywikibot/scripts/generate_user_files.py M pywikibot/scripts/preload_sites.py M pywikibot/titletranslate.py M scripts/README.rst M scripts/interwiki.py M scripts/maintenance/unidata.py D scripts/maintenance/wikimedia_sites.py M tests/family_tests.py M tests/l10n_tests.py 27 files changed, 250 insertions(+), 291 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/.codecov.yml b/.codecov.yml index 2096ac8..79e611a 100644 --- a/.codecov.yml +++ b/.codecov.yml @@ -26,7 +26,6 @@ - scripts/dataextend.py - scripts/maintenance/colors.py - scripts/maintenance/make_i18n_dict.py - - scripts/maintenance/wikimedia_sites.py - scripts/userscripts/ - tests/pwb/
diff --git a/docs/scripts/maintenance.rst b/docs/scripts/maintenance.rst index 5cf355a..2eefdcd 100644 --- a/docs/scripts/maintenance.rst +++ b/docs/scripts/maintenance.rst @@ -29,10 +29,3 @@ .. automodule:: scripts.maintenance.unidata :no-members: :noindex: - -wikimedia_sites script -======================= - -.. automodule:: scripts.maintenance.wikimedia_sites - :no-members: - :noindex: diff --git a/docs/scripts_ref/scripts.maintenance.rst b/docs/scripts_ref/scripts.maintenance.rst index 54a0e2d..af4359d 100644 --- a/docs/scripts_ref/scripts.maintenance.rst +++ b/docs/scripts_ref/scripts.maintenance.rst @@ -29,8 +29,3 @@ ---------------------------
.. automodule:: scripts.maintenance.unidata - -scripts.maintenance.wikimedia_sites ------------------------------------- - -.. automodule:: scripts.maintenance.wikimedia_sites diff --git a/pywikibot/data/wikistats.py b/pywikibot/data/wikistats.py index 5746952..bfb6892 100644 --- a/pywikibot/data/wikistats.py +++ b/pywikibot/data/wikistats.py @@ -1,6 +1,6 @@ """Objects representing WikiStats API.""" # -# (C) Pywikibot team, 2014-2022 +# (C) Pywikibot team, 2014-2024 # # Distributed under the terms of the MIT license. # @@ -8,11 +8,16 @@
from csv import DictReader from io import StringIO +from typing import Any
import pywikibot from pywikibot.comms import http
+# cache the data +_data: dict[str, list[Any]] = {} + + class WikiStats:
""" @@ -20,6 +25,9 @@
The methods accept a Pywikibot family name as the WikiStats table name, mapping the names before calling the WikiStats API. + + .. versionchanged:: 9.0 + tables are cached globally instead by instances. """
FAMILY_MAPPING = { @@ -75,15 +83,14 @@ def __init__(self, url: str = 'https://wikistats.wmcloud.org/') -> None: """Initializer.""" self.url = url - self._data = {}
def get(self, table: str) -> list: """Get a list of a table of data.
:param table: table of data to fetch """ - if table in self._data: - return self._data[table] + if table in _data: + return _data[table]
if table not in self.ALL_KEYS: pywikibot.warning('WikiStats unknown table ' + table) @@ -95,7 +102,7 @@ f = StringIO(r.text) reader = DictReader(f) data = list(reader) - self._data[table] = data + _data[table] = data return data
def get_dict(self, table: str) -> dict: diff --git a/pywikibot/families/vikidia_family.py b/pywikibot/families/vikidia_family.py index de74bfb..5673fd5 100644 --- a/pywikibot/families/vikidia_family.py +++ b/pywikibot/families/vikidia_family.py @@ -1,6 +1,6 @@ """Family module for Vikidia.""" # -# (C) Pywikibot team, 2010-2023 +# (C) Pywikibot team, 2010-2024 # # Distributed under the terms of the MIT license. # @@ -17,10 +17,10 @@ name = 'vikidia' domain = 'vikidia.org'
- codes = [ + codes = { 'ar', 'ca', 'de', 'el', 'en', 'es', 'eu', 'fr', 'hy', 'it', 'nl', 'oc', 'pt', 'ru', 'scn', - ] + }
# Sites we want to edit but not count as real languages test_codes = ['central', 'test'] diff --git a/pywikibot/families/wikibooks_family.py b/pywikibot/families/wikibooks_family.py index 3e4ba2c..7d200ba 100644 --- a/pywikibot/families/wikibooks_family.py +++ b/pywikibot/families/wikibooks_family.py @@ -30,15 +30,15 @@ 'als', 'dk', 'tokipona', ]
- languages_by_size = [ - 'en', 'vi', 'hu', 'de', 'fr', 'it', 'ja', 'pt', 'es', 'nl', 'pl', 'id', - 'he', 'fi', 'zh', 'fa', 'az', 'ru', 'sq', 'lt', 'ca', 'eu', 'th', 'cs', - 'da', 'ko', 'hi', 'ba', 'sv', 'gl', 'uk', 'sr', 'hr', 'no', 'tr', 'sa', - 'ar', 'ta', 'bn', 'si', 'eo', 'is', 'sk', 'ro', 'mk', 'bg', 'ms', 'ka', - 'tt', 'el', 'li', 'sl', 'tl', 'ur', 'km', 'la', 'mr', 'kk', 'te', - 'shn', 'et', 'be', 'ia', 'ml', 'oc', 'ne', 'pa', 'hy', 'cv', 'tg', - 'ku', 'fy', 'af', 'bs', 'cy', 'mg', 'ky', - ] + codes = { + 'af', 'ar', 'az', 'ba', 'be', 'bg', 'bn', 'bs', 'ca', 'cs', 'cv', 'cy', + 'da', 'de', 'el', 'en', 'eo', 'es', 'et', 'eu', 'fa', 'fi', 'fr', 'fy', + 'gl', 'he', 'hi', 'hr', 'hu', 'hy', 'ia', 'id', 'is', 'it', 'ja', 'ka', + 'kk', 'km', 'ko', 'ku', 'ky', 'la', 'li', 'lt', 'mg', 'mk', 'ml', 'mr', + 'ms', 'ne', 'nl', 'no', 'oc', 'pa', 'pl', 'pt', 'ro', 'ru', 'sa', + 'shn', 'si', 'sk', 'sl', 'sq', 'sr', 'sv', 'ta', 'te', 'tg', 'th', + 'tl', 'tr', 'tt', 'uk', 'ur', 'vi', 'zh', + }
category_redirect_templates = { '_default': (), diff --git a/pywikibot/families/wikihow_family.py b/pywikibot/families/wikihow_family.py index 3a183a7..6e4fae8 100644 --- a/pywikibot/families/wikihow_family.py +++ b/pywikibot/families/wikihow_family.py @@ -3,7 +3,7 @@ .. versionadded:: 3.0 """ # -# (C) Pywikibot team, 2020-2023 +# (C) Pywikibot team, 2020-2024 # # Distributed under the terms of the MIT license. # @@ -23,10 +23,10 @@ name = 'wikihow' domain = 'wikihow.com'
- codes = [ + codes = { 'ar', 'cs', 'de', 'en', 'es', 'fr', 'hi', 'id', 'it', 'ja', 'ko', 'nl', 'pt', 'ru', 'th', 'tr', 'vi', 'zh', - ] + }
removed_wikis = ['ca', 'cy', 'fa', 'he', 'pl', 'ur']
diff --git a/pywikibot/families/wikimania_family.py b/pywikibot/families/wikimania_family.py index d415b58..a43a36a 100644 --- a/pywikibot/families/wikimania_family.py +++ b/pywikibot/families/wikimania_family.py @@ -3,7 +3,7 @@ .. versionadded:: 3.0 """ # -# (C) Pywikibot team, 2017-2023 +# (C) Pywikibot team, 2017-2024 # # Distributed under the terms of the MIT license. # @@ -28,7 +28,7 @@ '2014', '2015', '2016', '2017', '2018' ]
- codes = ['wikimania', 'team'] + codes = {'wikimania', 'team'}
code_aliases = {'2019': 'wikimania'}
diff --git a/pywikibot/families/wikimediachapter_family.py b/pywikibot/families/wikimediachapter_family.py index b3aba7a..e217010 100644 --- a/pywikibot/families/wikimediachapter_family.py +++ b/pywikibot/families/wikimediachapter_family.py @@ -20,9 +20,9 @@
closed_wikis = ['cn', 'nz', 'pa-us']
- codes = [ + codes = { 'am', 'ar', 'az', 'bd', 'be', 'br', 'ca', 'co', 'dk', 'ec', 'ee', 'fi', 'ge', 'gr', 'hi', 'id', 'id-internal', 'il', 'mai', 'mk', 'mx', 'ng', 'nl', 'no', 'nyc', 'pl', 'pt', 'punjabi', 'romd', 'rs', 'ru', 'se', 'tr', 'ua', 'uk', 've', 'wb', - ] + } diff --git a/pywikibot/families/wikinews_family.py b/pywikibot/families/wikinews_family.py index d9be92b..cdac2bb 100644 --- a/pywikibot/families/wikinews_family.py +++ b/pywikibot/families/wikinews_family.py @@ -21,11 +21,11 @@ 'bg', 'hu', 'sd', 'th', 'tr', ]
- languages_by_size = [ - 'ru', 'sr', 'pt', 'fr', 'en', 'pl', 'zh', 'de', 'es', 'it', 'ar', 'cs', - 'ca', 'nl', 'el', 'ta', 'li', 'sv', 'uk', 'fa', 'fi', 'ja', 'ro', 'eo', - 'guw', 'sq', 'no', 'ko', 'bs', 'he', - ] + codes = { + 'ar', 'bs', 'ca', 'cs', 'de', 'el', 'en', 'eo', 'es', 'fa', 'fi', 'fr', + 'guw', 'he', 'it', 'ja', 'ko', 'li', 'nl', 'no', 'pl', 'pt', 'ro', + 'ru', 'sq', 'sr', 'sv', 'ta', 'uk', 'zh', + }
category_redirect_templates = { '_default': (), diff --git a/pywikibot/families/wikipedia_family.py b/pywikibot/families/wikipedia_family.py index 8777443..7c827c9 100644 --- a/pywikibot/families/wikipedia_family.py +++ b/pywikibot/families/wikipedia_family.py @@ -27,40 +27,40 @@ 'dk', 'mo', 'ru-sib', 'tlh', 'tokipona', 'zh_cn', 'zh_tw', ]
- languages_by_size = [ - 'en', 'ceb', 'de', 'fr', 'sv', 'nl', 'ru', 'es', 'it', 'arz', 'pl', - 'ja', 'zh', 'uk', 'vi', 'war', 'ar', 'pt', 'fa', 'ca', 'sr', 'id', - 'ko', 'no', 'ce', 'fi', 'tr', 'cs', 'hu', 'tt', 'sh', 'ro', - 'zh-min-nan', 'eu', 'ms', 'eo', 'he', 'hy', 'da', 'bg', 'cy', 'uz', - 'sk', 'simple', 'azb', 'et', 'be', 'kk', 'el', 'min', 'hr', 'lt', 'gl', - 'ur', 'az', 'sl', 'lld', 'ka', 'nn', 'ta', 'th', 'hi', 'bn', 'mk', - 'la', 'zh-yue', 'ast', 'lv', 'af', 'tg', 'my', 'mg', 'sq', 'mr', 'bs', - 'te', 'oc', 'br', 'be-tarask', 'ml', 'nds', 'sw', 'ky', 'ku', 'lmo', - 'jv', 'pnb', 'new', 'vec', 'ht', 'pms', 'ba', 'lb', 'su', 'ga', 'is', - 'szl', 'ckb', 'fy', 'cv', 'pa', 'tl', 'an', 'io', 'wuu', 'diq', 'ha', - 'vo', 'sco', 'yo', 'ne', 'kn', 'gu', 'als', 'ia', 'avk', 'crh', 'bar', - 'scn', 'bpy', 'qu', 'ig', 'mn', 'nv', 'ban', 'xmf', 'si', 'mzn', 'frr', - 'ps', 'tum', 'os', 'or', 'bat-smg', 'sah', 'cdo', 'bcl', 'gd', 'bug', - 'sd', 'yi', 'ilo', 'am', 'li', 'nap', 'gor', 'mai', 'fo', 'hsb', - 'map-bms', 'shn', 'eml', 'ace', 'zh-classical', 'as', 'ie', 'sa', 'wa', - 'hyw', 'sn', 'mhr', 'lij', 'zu', 'hif', 'bjn', 'mrj', 'km', 'sat', - 'mni', 'hak', 'ary', 'roa-tara', 'pam', 'dag', 'rue', 'bh', 'nso', - 'co', 'vls', 'so', 'mi', 'nds-nl', 'myv', 'se', 'sc', 'bo', 'kw', - 'vep', 'rw', 'glk', 'tk', 'kab', 'gan', 'fiu-vro', 'gv', 'zea', 'ab', - 'mt', 'skr', 'ug', 'tly', 'nah', 'frp', 'udm', 'pcd', 'gn', 'smn', - 'kv', 'csb', 'ay', 'nrm', 'ks', 'mdf', 'lez', 'olo', 'kaa', 'mwl', - 'lfn', 'ang', 'stq', 'lo', 'fur', 'rm', 'tw', 'ln', 'pap', 'lad', - 'ext', 'gom', 'tyv', 'koi', 'av', 'dty', 'dsb', 'cbk-zam', 'dv', 'ksh', - 'za', 'lg', 'gag', 'bxr', 'pfl', 'szy', 'blk', 'tay', 'pag', 'pi', - 'haw', 'awa', 'inh', 'krc', 'atj', 'to', 'pdc', 'tcy', 'mnw', 'arc', - 'xh', 'ff', 'shi', 'xal', 'jam', 'kbp', 'wo', 'om', 'ki', 'nia', 'anp', - 'kbd', 'zgh', 'nov', 'nqo', 'bi', 'tpi', 'tet', 'roa-rup', 'jbo', 'tn', - 'fj', 'kg', 'lbe', 'guw', 'ty', 'cu', 'rmy', 'mad', 'trv', 'ami', - 'srn', 'sm', 'alt', 'dga', 'ltg', 'gcr', 'pcm', 'chr', 'ny', 'kcg', - 'gpe', 'st', 'pih', 'got', 'ss', 'gur', 'ee', 'bm', 'ts', 've', 'bbc', - 'chy', 'fon', 'rn', 'ik', 'ady', 'ch', 'fat', 'guc', 'pnt', 'iu', - 'pwn', 'sg', 'din', 'ti', 'kl', 'dz', 'cr', - ] + codes = { + 'ab', 'ace', 'ady', 'af', 'als', 'alt', 'am', 'ami', 'an', 'ang', + 'anp', 'ar', 'arc', 'ary', 'arz', 'as', 'ast', 'atj', 'av', 'avk', + 'awa', 'ay', 'az', 'azb', 'ba', 'ban', 'bar', 'bat-smg', 'bbc', 'bcl', + 'be', 'be-tarask', 'bg', 'bh', 'bi', 'bjn', 'blk', 'bm', 'bn', 'bo', + 'bpy', 'br', 'bs', 'bug', 'bxr', 'ca', 'cbk-zam', 'cdo', 'ce', 'ceb', + 'ch', 'chr', 'chy', 'ckb', 'co', 'cr', 'crh', 'cs', 'csb', 'cu', 'cv', + 'cy', 'da', 'dag', 'de', 'dga', 'din', 'diq', 'dsb', 'dty', 'dv', 'dz', + 'ee', 'el', 'eml', 'en', 'eo', 'es', 'et', 'eu', 'ext', 'fa', 'fat', + 'ff', 'fi', 'fiu-vro', 'fj', 'fo', 'fon', 'fr', 'frp', 'frr', 'fur', + 'fy', 'ga', 'gag', 'gan', 'gcr', 'gd', 'gl', 'glk', 'gn', 'gom', 'gor', + 'got', 'gpe', 'gu', 'guc', 'gur', 'guw', 'gv', 'ha', 'hak', 'haw', + 'he', 'hi', 'hif', 'hr', 'hsb', 'ht', 'hu', 'hy', 'hyw', 'ia', 'id', + 'ie', 'ig', 'ik', 'ilo', 'inh', 'io', 'is', 'it', 'iu', 'ja', 'jam', + 'jbo', 'jv', 'ka', 'kaa', 'kab', 'kbd', 'kbp', 'kcg', 'kg', 'ki', 'kk', + 'kl', 'km', 'kn', 'ko', 'koi', 'krc', 'ks', 'ksh', 'ku', 'kv', 'kw', + 'ky', 'la', 'lad', 'lb', 'lbe', 'lez', 'lfn', 'lg', 'li', 'lij', 'lld', + 'lmo', 'ln', 'lo', 'lt', 'ltg', 'lv', 'mad', 'mai', 'map-bms', 'mdf', + 'mg', 'mhr', 'mi', 'min', 'mk', 'ml', 'mn', 'mni', 'mnw', 'mr', 'mrj', + 'ms', 'mt', 'mwl', 'my', 'myv', 'mzn', 'nah', 'nap', 'nds', 'nds-nl', + 'ne', 'new', 'nia', 'nl', 'nn', 'no', 'nov', 'nqo', 'nrm', 'nso', 'nv', + 'ny', 'oc', 'olo', 'om', 'or', 'os', 'pa', 'pag', 'pam', 'pap', 'pcd', + 'pcm', 'pdc', 'pfl', 'pi', 'pih', 'pl', 'pms', 'pnb', 'pnt', 'ps', + 'pt', 'pwn', 'qu', 'rm', 'rmy', 'rn', 'ro', 'roa-rup', 'roa-tara', + 'ru', 'rue', 'rw', 'sa', 'sah', 'sat', 'sc', 'scn', 'sco', 'sd', 'se', + 'sg', 'sh', 'shi', 'shn', 'si', 'simple', 'sk', 'skr', 'sl', 'sm', + 'smn', 'sn', 'so', 'sq', 'sr', 'srn', 'ss', 'st', 'stq', 'su', 'sv', + 'sw', 'szl', 'szy', 'ta', 'tay', 'tcy', 'te', 'tet', 'tg', 'th', 'ti', + 'tk', 'tl', 'tly', 'tn', 'to', 'tpi', 'tr', 'trv', 'ts', 'tt', 'tum', + 'tw', 'ty', 'tyv', 'udm', 'ug', 'uk', 'ur', 'uz', 've', 'vec', 'vep', + 'vi', 'vls', 'vo', 'wa', 'war', 'wo', 'wuu', 'xal', 'xh', 'xmf', 'yi', + 'yo', 'za', 'zea', 'zgh', 'zh', 'zh-classical', 'zh-min-nan', 'zh-yue', + 'zu', + }
# Sites we want to edit but not count as real languages test_codes = ['test', 'test2'] diff --git a/pywikibot/families/wikiquote_family.py b/pywikibot/families/wikiquote_family.py index 98efb0b..1fe31f7 100644 --- a/pywikibot/families/wikiquote_family.py +++ b/pywikibot/families/wikiquote_family.py @@ -28,15 +28,15 @@ 'als', 'tokipona', ]
- languages_by_size = [ - 'en', 'it', 'pl', 'ru', 'cs', 'et', 'pt', 'uk', 'fa', 'he', 'fr', 'de', - 'es', 'tr', 'eo', 'sk', 'az', 'bs', 'zh', 'ca', 'sr', 'fi', 'ar', 'lt', - 'sl', 'id', 'su', 'bg', 'hy', 'hr', 'el', 'nn', 'sv', 'li', 'hu', 'ko', - 'nl', 'sah', 'ja', 'la', 'ta', 'ig', 'hi', 'as', 'te', 'bn', 'gl', - 'gu', 'guw', 'ur', 'bjn', 'tl', 'vi', 'be', 'cy', 'no', 'sq', 'ml', - 'kn', 'eu', 'ro', 'th', 'ku', 'uz', 'ka', 'da', 'sa', 'is', 'bcl', - 'br', 'mr', 'af', 'ky', - ] + codes = { + 'af', 'ar', 'as', 'az', 'bcl', 'be', 'bg', 'bjn', 'bn', 'br', 'bs', + 'ca', 'cs', 'cy', 'da', 'de', 'el', 'en', 'eo', 'es', 'et', 'eu', 'fa', + 'fi', 'fr', 'gl', 'gu', 'guw', 'he', 'hi', 'hr', 'hu', 'hy', 'id', + 'ig', 'is', 'it', 'ja', 'ka', 'kn', 'ko', 'ku', 'ky', 'la', 'li', 'lt', + 'ml', 'mr', 'nl', 'nn', 'no', 'pl', 'pt', 'ro', 'ru', 'sa', 'sah', + 'sk', 'sl', 'sq', 'sr', 'su', 'sv', 'ta', 'te', 'th', 'tl', 'tr', 'uk', + 'ur', 'uz', 'vi', 'zh', + }
category_redirect_templates = { '_default': (), diff --git a/pywikibot/families/wikisource_family.py b/pywikibot/families/wikisource_family.py index 9761f3a..3988f9e 100644 --- a/pywikibot/families/wikisource_family.py +++ b/pywikibot/families/wikisource_family.py @@ -26,15 +26,15 @@ 'tokipona', ]
- languages_by_size = [ - 'pl', 'en', 'ru', 'de', 'fr', 'zh', 'he', 'uk', 'it', 'ar', 'es', - 'mul', 'gu', 'cs', 'sr', 'pt', 'bn', 'sv', 'fa', 'ko', 'ta', 'hu', - 'ml', 'be', 'sa', 'te', 'tr', 'sl', 'la', 'vi', 'hy', 'nl', 'ja', 'ro', - 'fi', 'nap', 'el', 'az', 'ca', 'br', 'hr', 'id', 'th', 'no', 'kn', - 'hi', 'cy', 'eo', 'vec', 'is', 'ban', 'mr', 'pms', 'lij', 'da', 'et', - 'as', 'mk', 'yi', 'bg', 'jv', 'wa', 'li', 'lt', 'pa', 'eu', 'or', 'gl', - 'bs', 'sah', 'sk', 'su', 'zh-min-nan', 'fo', - ] + codes = { + 'ar', 'as', 'az', 'ban', 'be', 'bg', 'bn', 'br', 'bs', 'ca', 'cs', + 'cy', 'da', 'de', 'el', 'en', 'eo', 'es', 'et', 'eu', 'fa', 'fi', 'fo', + 'fr', 'gl', 'gu', 'he', 'hi', 'hr', 'hu', 'hy', 'id', 'is', 'it', 'ja', + 'jv', 'kn', 'ko', 'la', 'li', 'lij', 'lt', 'mk', 'ml', 'mr', 'mul', + 'nap', 'nl', 'no', 'or', 'pa', 'pl', 'pms', 'pt', 'ro', 'ru', 'sa', + 'sah', 'sk', 'sl', 'sr', 'su', 'sv', 'ta', 'te', 'th', 'tr', 'uk', + 'vec', 'vi', 'wa', 'yi', 'zh', 'zh-min-nan', + }
# Sites we want to edit but not count as real languages test_codes = ['beta'] @@ -72,7 +72,7 @@ cls.code_aliases = super().code_aliases.copy() aliases = cls.known_codes + ['-', 'www'] for code in aliases: - if (code not in cls.languages_by_size + if (code not in cls.codes and code not in cls.closed_wikis and code not in cls.code_aliases): cls.code_aliases[code] = 'mul' diff --git a/pywikibot/families/wikiversity_family.py b/pywikibot/families/wikiversity_family.py index 9fc47e1..2bbe6e0 100644 --- a/pywikibot/families/wikiversity_family.py +++ b/pywikibot/families/wikiversity_family.py @@ -17,10 +17,10 @@
name = 'wikiversity'
- languages_by_size = [ - 'de', 'en', 'fr', 'zh', 'it', 'cs', 'ru', 'pt', 'es', 'sl', 'ar', 'sv', - 'fi', 'el', 'hi', 'ko', 'ja', - ] + codes = { + 'ar', 'cs', 'de', 'el', 'en', 'es', 'fi', 'fr', 'hi', 'it', 'ja', 'ko', + 'pt', 'ru', 'sl', 'sv', 'zh', + }
test_codes = ['beta']
diff --git a/pywikibot/families/wikivoyage_family.py b/pywikibot/families/wikivoyage_family.py index f342465..012bec3 100644 --- a/pywikibot/families/wikivoyage_family.py +++ b/pywikibot/families/wikivoyage_family.py @@ -16,11 +16,11 @@
name = 'wikivoyage'
- languages_by_size = [ - 'en', 'de', 'pl', 'it', 'fr', 'fa', 'ru', 'zh', 'nl', 'pt', 'es', 'he', - 'vi', 'fi', 'sv', 'el', 'ja', 'eo', 'uk', 'bn', 'ro', 'tr', 'ps', - 'shn', 'hi', - ] + codes = { + 'bn', 'de', 'el', 'en', 'eo', 'es', 'fa', 'fi', 'fr', 'he', 'hi', 'it', + 'ja', 'nl', 'pl', 'ps', 'pt', 'ro', 'ru', 'shn', 'sv', 'tr', 'uk', + 'vi', 'zh', + }
category_redirect_templates = { '_default': (), diff --git a/pywikibot/families/wiktionary_family.py b/pywikibot/families/wiktionary_family.py index 8487efe..ccaa1b7 100644 --- a/pywikibot/families/wiktionary_family.py +++ b/pywikibot/families/wiktionary_family.py @@ -33,24 +33,24 @@ 'als', 'ba', 'dk', 'mo', 'tlh', 'tokipona', ]
- languages_by_size = [ - 'en', 'fr', 'mg', 'zh', 'el', 'ru', 'de', 'ku', 'es', 'sv', 'sh', 'nl', - 'pl', 'lt', 'hu', 'ca', 'it', 'fi', 'pt', 'ta', 'ja', 'tr', 'io', 'hy', - 'ko', 'kn', 'vi', 'sr', 'th', 'hi', 'ro', 'id', 'no', 'et', 'skr', - 'cs', 'ml', 'my', 'li', 'uz', 'eo', 'or', 'te', 'fa', 'sg', 'gl', 'oc', - 'ar', 'is', 'jv', 'ast', 'az', 'uk', 'eu', 'bn', 'br', 'mnw', 'simple', - 'da', 'lo', 'la', 'shn', 'hr', 'sk', 'lmo', 'fj', 'wa', 'ky', 'bg', - 'kbd', 'ur', 'cy', 'ps', 'tg', 'he', 'vo', 'om', 'sl', 'af', 'ms', - 'zh-min-nan', 'scn', 'tl', 'pa', 'ka', 'fy', 'sw', 'kk', 'nn', 'min', - 'lv', 'nds', 'gor', 'sq', 'lb', 'bs', 'co', 'mn', 'pnb', 'yue', 'nah', - 'ckb', 'diq', 'sa', 'km', 'be', 'nia', 'vec', 'tk', 'mk', 'sm', 'hsb', - 'ks', 'shy', 'bcl', 'su', 'ga', 'btm', 'gd', 'an', 'ha', 'gom', 'mr', - 'wo', 'mni', 'blk', 'ia', 'bjn', 'so', 'si', 'ang', 'mt', 'sd', 'tt', - 'fo', 'gn', 'mi', 'ie', 'csb', 'ug', 'guw', 'st', 'jbo', 'hif', - 'roa-rup', 'kl', 'zu', 'ay', 'ln', 'yi', 'kcg', 'gu', 'na', 'gv', 'kw', - 'tpi', 'am', 'ne', 'rw', 'ts', 'ig', 'qu', 'ss', 'iu', 'chr', 'dv', - 'ti', 'tn', - ] + codes = { + 'af', 'am', 'an', 'ang', 'ar', 'ast', 'ay', 'az', 'bcl', 'be', 'bg', + 'bjn', 'blk', 'bn', 'br', 'bs', 'btm', 'ca', 'chr', 'ckb', 'co', 'cs', + 'csb', 'cy', 'da', 'de', 'diq', 'dv', 'el', 'en', 'eo', 'es', 'et', + 'eu', 'fa', 'fi', 'fj', 'fo', 'fr', 'fy', 'ga', 'gd', 'gl', 'gn', + 'gom', 'gor', 'gu', 'guw', 'gv', 'ha', 'he', 'hi', 'hif', 'hr', 'hsb', + 'hu', 'hy', 'ia', 'id', 'ie', 'ig', 'io', 'is', 'it', 'iu', 'ja', + 'jbo', 'jv', 'ka', 'kbd', 'kcg', 'kk', 'kl', 'km', 'kn', 'ko', 'ks', + 'ku', 'kw', 'ky', 'la', 'lb', 'li', 'lmo', 'ln', 'lo', 'lt', 'lv', + 'mg', 'mi', 'min', 'mk', 'ml', 'mn', 'mni', 'mnw', 'mr', 'ms', 'mt', + 'my', 'na', 'nah', 'nds', 'ne', 'nia', 'nl', 'nn', 'no', 'oc', 'om', + 'or', 'pa', 'pl', 'pnb', 'ps', 'pt', 'qu', 'ro', 'roa-rup', 'ru', 'rw', + 'sa', 'scn', 'sd', 'sg', 'sh', 'shn', 'shy', 'si', 'simple', 'sk', + 'skr', 'sl', 'sm', 'so', 'sq', 'sr', 'ss', 'st', 'su', 'sv', 'sw', + 'ta', 'te', 'tg', 'th', 'ti', 'tk', 'tl', 'tn', 'tpi', 'tr', 'ts', + 'tt', 'ug', 'uk', 'ur', 'uz', 'vec', 'vi', 'vo', 'wa', 'wo', 'yi', + 'yue', 'zh', 'zh-min-nan', 'zu', + }
category_redirect_templates = { '_default': (), diff --git a/pywikibot/families/wowwiki_family.py b/pywikibot/families/wowwiki_family.py index 8c2756f..dcf9767 100644 --- a/pywikibot/families/wowwiki_family.py +++ b/pywikibot/families/wowwiki_family.py @@ -1,6 +1,6 @@ """Family module for WOW wiki.""" # -# (C) Pywikibot team, 2009-2023 +# (C) Pywikibot team, 2009-2024 # # Distributed under the terms of the MIT license. # @@ -17,11 +17,11 @@ name = 'wowwiki' domain = 'wowwiki.fandom.com'
- codes = [ + codes = { 'ar', 'cs', 'da', 'de', 'el', 'en', 'es', 'et', 'fa', 'fi', 'fr', 'he', 'hu', 'it', 'ja', 'ko', 'nl', 'no', 'pl', 'pt', 'pt-br', 'ru', 'uk', 'zh', 'zh-tw', - ] + }
removed_wikis = ['is', 'hr', 'lt', 'lv', 'ro', 'sk', 'sr', 'sv', 'tr']
diff --git a/pywikibot/family.py b/pywikibot/family.py index 5c8dd28..08cecae 100644 --- a/pywikibot/family.py +++ b/pywikibot/family.py @@ -23,6 +23,7 @@ import pywikibot from pywikibot import config from pywikibot.backports import DefaultDict, Mapping, Sequence, removesuffix +from pywikibot.data import wikistats from pywikibot.exceptions import FamilyMaintenanceWarning, UnknownFamilyError from pywikibot.tools import classproperty, deprecated, remove_last_args
@@ -201,11 +202,6 @@ families can set this variable to the name of the target family. """
- languages_by_size: list[str] = [] - """Language codes of the largest wikis. They should be roughly - sorted by size. - """ - #: Some languages belong to a group where the possibility is high #: that equivalent articles have identical titles among the group. language_groups = { @@ -722,7 +718,7 @@ return putText
@property - def obsolete(self) -> dict[str, str | None]: + def obsolete(self) -> types.MappingProxyType[str, str | None]: """ Old codes that are not part of the family.
@@ -744,12 +740,8 @@ return set(cls.langs.values())
@classproperty - def codes(cls): - """ - Get list of codes used by this family. - - :rtype: set of str - """ + def codes(cls) -> set[str]: + """Get list of codes used by this family.""" return set(cls.langs.keys())
@classproperty @@ -814,9 +806,9 @@ return super().__new__(cls)
@classproperty - def langs(cls): + def langs(cls) -> dict[str, str]: """Property listing family languages.""" - codes = cls.codes[:] + codes = sorted(cls.codes)
if hasattr(cls, 'test_codes'): codes += cls.test_codes @@ -831,14 +823,6 @@ return cls.langs
@classproperty - def codes(cls): - """Property listing family codes.""" - if cls.languages_by_size: - return cls.languages_by_size - raise NotImplementedError( - f'Family {cls.name} needs property "languages_by_size" or "codes"') - - @classproperty def domains(cls): """Return the domain name of the sites in this family.""" return [cls.domain] @@ -855,10 +839,10 @@ @classproperty def langs(cls): """Property listing family languages.""" - codes = cls.codes + codes = sorted(cls.codes)
if hasattr(cls, 'code_aliases'): - codes += tuple(cls.code_aliases.keys()) + codes += cls.code_aliases
return {code: cls.domain for code in codes}
@@ -876,10 +860,14 @@ """
multi_language_content_families = [ - 'wikipedia', 'wiktionary', - 'wikisource', 'wikibooks', - 'wikinews', 'wikiquote', - 'wikiversity', 'wikivoyage', + 'wikibooks', + 'wikinews', + 'wikipedia', + 'wikiquote', + 'wikisource', + 'wikiversity', + 'wikivoyage', + 'wiktionary', ]
wikimedia_org_content_families = [ @@ -887,8 +875,11 @@ ]
wikimedia_org_meta_families = [ - 'meta', 'outreach', 'strategy', - 'wikimediachapter', 'wikimania', + 'meta', + 'outreach', + 'strategy', + 'wikimediachapter', + 'wikimania', ]
wikimedia_org_other_families = [ @@ -1019,6 +1010,43 @@ """Return path for EventStreams.""" return '/v2/stream'
+ @property + def languages_by_size(self) -> list[str]: + """Language codes of the largest wikis. + + They should be roughly sorted by size. + + .. versionchanged:: 9.0 + Sorting order is retrieved via :mod:`wikistats` for each call. + + :raises NotImplementedError: Family is not member of + :attr:`multi_language_content_families` + """ + if self.name not in self.multi_language_content_families: + raise NotImplementedError( + f'languages_by_size is not implemented for {self.name} family') + + exceptions = { + 'wikiversity': ['beta'] + } + + ws = wikistats.WikiStats() + table = ws.languages_by_size(self.name) + assert type(self.obsolete).__name__ == 'mappingproxy', ( + f'obsolete attribute is of type {type(self.obsolete).__name__} but' + ' mappingproxy was expected' + ) + + lbs = [ + code for code in table + if not (code in self.obsolete + or code in exceptions.get(self.name, [])) + ] + + # add codes missing by wikistats + missing = set(self.codes) - set(lbs) + return lbs + list(missing) +
class WikimediaOrgFamily(SingleSiteFamily, WikimediaFamily):
diff --git a/pywikibot/scripts/generate_user_files.py b/pywikibot/scripts/generate_user_files.py index 6e03aae..a58ebc3 100755 --- a/pywikibot/scripts/generate_user_files.py +++ b/pywikibot/scripts/generate_user_files.py @@ -8,7 +8,7 @@ Also EXTERNAL EDITOR SETTINGS section can be copied. """ # -# (C) Pywikibot team, 2010-2023 +# (C) Pywikibot team, 2010-2024 # # Distributed under the terms of the MIT license. # @@ -48,6 +48,7 @@ config, __url__ = pywikibot.config, pywikibot.__url__ base_dir = pywikibot.config.base_dir
+console_encoding: str | None try: console_encoding = sys.stdout.encoding # unittests fails with "StringIO instance has no attribute 'encoding'" @@ -136,13 +137,8 @@ default=default_family) fam = pywikibot.family.Family.load(fam) if hasattr(fam, 'langs'): - if hasattr(fam, 'languages_by_size'): - by_size = [code for code in fam.languages_by_size - if code in fam.langs] - else: - by_size = [] - known_langs = by_size + sorted( - set(fam.langs.keys()).difference(by_size)) + codes = [code for code in fam.codes if code in fam.langs] + known_langs = codes + sorted(set(fam.langs.keys()).difference(codes)) else: known_langs = []
@@ -153,14 +149,20 @@ pywikibot.info(f'The only known site code: {known_langs[0]}') default_lang = known_langs[0] else: - if not force: - pywikibot.info('This is the list of known site codes:') - pywikibot.info(', '.join(known_langs)) if default_lang not in known_langs: if default_lang != 'en' and 'en' in known_langs: default_lang = 'en' else: default_lang = None + if not force: + pywikibot.info('This is the list of known site codes:') + text = fill(', '.join(known_langs), width=79) + if default_lang: + text = text.replace( + f' {default_lang},', + f' <<lightblue>>{default_lang}<<default>>,', + ) + pywikibot.info(text)
message = "The site code of the site we're working on" mycode = None @@ -262,7 +264,7 @@ return data
-def copy_sections(force: bool = False, default: str = 'n') -> str: +def copy_sections(force: bool = False, default: str = 'n') -> str | None: """Take config sections and copy them to user-config.py.
.. versionchanged:: 8.0 @@ -296,7 +298,7 @@ sections: list[_ConfigSection], skip: Callable | None = None, force: bool = False, - default: str = 'n') -> None: + default: str = 'n') -> list[str]: """Ask for settings to copy.
.. versionadded:: 8.0 @@ -377,10 +379,11 @@
# For each different username entered, ask if user wants to save a # BotPassword (username, BotPassword name, BotPassword pass) - msg = fill('See {}/BotPasswords to know how to get codes.' - 'Please note that plain text in {} and anyone with read ' - 'access to that directory will be able read the file.' - .format(__url__, _fncpass)) + msg: str | None = fill( + f'See {__url__}/BotPasswords to know how to get codes. ' + f'Please note that plain text in {_fncpass} and anyone with read' + ' access to that directory will be able read the file.' + ) botpasswords = [] userset = {user.name for user in userlist} for username in userset: diff --git a/pywikibot/scripts/preload_sites.py b/pywikibot/scripts/preload_sites.py index e55474d..8ff7d57 100755 --- a/pywikibot/scripts/preload_sites.py +++ b/pywikibot/scripts/preload_sites.py @@ -18,7 +18,7 @@ script was moved to the framework scripts folder. """ # -# (C) Pywikibot team, 2021-2023 +# (C) Pywikibot team, 2021-2024 # # Distributed under the terms of the MIT license. # @@ -61,7 +61,7 @@ msg = 'Preloading sites of {} family{}' pywikibot.info(msg.format(family, '...'))
- codes = Family.load(family).languages_by_size + codes = Family.load(family).codes for code in exceptions.get(family, []): if code in codes: codes.remove(code) diff --git a/pywikibot/titletranslate.py b/pywikibot/titletranslate.py index 1bd07aa..ace5183 100644 --- a/pywikibot/titletranslate.py +++ b/pywikibot/titletranslate.py @@ -1,6 +1,6 @@ """Title translate module.""" # -# (C) Pywikibot team, 2003-2022 +# (C) Pywikibot team, 2003-2024 # # Distributed under the terms of the MIT license. # @@ -47,7 +47,7 @@ if codes.isdigit(): codes = site.family.languages_by_size[:int(codes)] elif codes == 'all': - codes = site.family.languages_by_size + codes = list(site.family.codes) else: codes = site.family.language_groups.get(codes, codes.split(','))
diff --git a/scripts/README.rst b/scripts/README.rst index e50079b..0b12f8d 100644 --- a/scripts/README.rst +++ b/scripts/README.rst @@ -174,8 +174,6 @@ +------------------------+---------------------------------------------------------+ | unidata.py | Updates _first_upper_exception_dict in tools.unidata | +------------------------+---------------------------------------------------------+ -| wikimedia_sites.py | Updates the language lists in Wikimedia family files. | -+------------------------+---------------------------------------------------------+
Others ====== diff --git a/scripts/interwiki.py b/scripts/interwiki.py index a85c35f..8551ddb 100755 --- a/scripts/interwiki.py +++ b/scripts/interwiki.py @@ -1463,7 +1463,7 @@ lclSiteDone = False frgnSiteDone = False
- for code in lclSite.family.languages_by_size: + for code in lclSite.family.codes: site = pywikibot.Site(code, lclSite.family) if not lclSiteDone and site == lclSite \ or (not frgnSiteDone and site != lclSite and site in new): diff --git a/scripts/maintenance/unidata.py b/scripts/maintenance/unidata.py index cb20643..815f48e 100755 --- a/scripts/maintenance/unidata.py +++ b/scripts/maintenance/unidata.py @@ -25,12 +25,23 @@ from pywikibot import Site from pywikibot.comms.http import session from pywikibot.family import Family -from scripts.maintenance.wikimedia_sites import families_list
NUMBER_OF_THREADS = 26 FILEPATH = '/data/firstup_excepts.json'
+# supported families by this script +families_list = [ + 'wikibooks', + 'wikinews', + 'wikipedia', + 'wikiquote', + 'wikisource', + 'wikiversity', + 'wikivoyage', + 'wiktionary', +] +
def chars_uppers_wikilinks(): """Retrieve upper chars from MediaWiki using page titles.""" @@ -109,7 +120,7 @@ for fam_name in families_list: family = Family.load(fam_name) families_excepts.setdefault(fam_name, {}) - for site_code in family.languages_by_size: + for site_code in family.codes: site = Site(site_code, family) if site.namespaces[8].case != 'first-letter': raise ValueError('MW namespace case is not first-letter') diff --git a/scripts/maintenance/wikimedia_sites.py b/scripts/maintenance/wikimedia_sites.py deleted file mode 100755 index dc256bb..0000000 --- a/scripts/maintenance/wikimedia_sites.py +++ /dev/null @@ -1,100 +0,0 @@ -#!/usr/bin/env python3 -"""Script that updates the language lists in Wikimedia family files. - -Usage: - - python pwb.py wikimedia_sites [ {<family>} ] - -""" -# -# (C) Pywikibot team, 2008-2022 -# -# Distributed under the terms of the MIT license. -# -from __future__ import annotations - -import re -from pathlib import Path - -import pywikibot -from pywikibot.data import wikistats -from pywikibot.family import Family - - -# supported families by this script -families_list = [ - 'wikibooks', - 'wikinews', - 'wikipedia', - 'wikiquote', - 'wikisource', - 'wikiversity', - 'wikivoyage', - 'wiktionary', -] - -exceptions = { - 'wikiversity': ['beta'] -} - - -def update_family(families): - """Update family files.""" - ws = wikistats.WikiStats() - for family in families or families_list: - pywikibot.info(f'\nChecking family {family}:') - - original = Family.load(family).languages_by_size - for code in exceptions.get(family, []): - if code in original: - original.remove(code) - obsolete = Family.load(family).obsolete - - new = [] - table = ws.languages_by_size(family) - for code in table: - if not (code in obsolete or code in exceptions.get(family, [])): - new.append(code) - - # put the missing languages to the right place - missing = original != new and set(original) - set(new) - if missing: - pywikibot.warning("['{}'] not listed at wikistats." - .format("', '".join(missing))) - index = {} - for code in missing: - index[original.index(code)] = code - i = len(index) - 1 - for key in sorted(index.keys(), reverse=True): - new.insert(key - i, index[key]) - i -= 1 - - if original == new: - pywikibot.info('The lists match!') - continue - - pywikibot.info("The lists don't match, the new list is:") - text = ' languages_by_size = [\n' - line = ' ' * 7 - for code in new: - if len(line) + len(code) >= 76: - text += line + '\n' - line = ' ' * 7 - line += f" '{code}'," - text += line + '\n' - text += ' ]' - pywikibot.info(text) - - filepath = Path(f'pywikibot/families/{family}_family.py') - family_text = filepath.read_text(encoding='utf8') - family_text = re.sub(r'(?ms)^ {4}languages_by_size.+?]', - text, family_text, count=1) - filepath.write_text(family_text, encoding='utf8') - - -if __name__ == '__main__': - fam = set() - for arg in pywikibot.handle_args(): - if arg in families_list: - fam.add(arg) - update_family(fam) diff --git a/tests/family_tests.py b/tests/family_tests.py index 1c8f227..273d931 100755 --- a/tests/family_tests.py +++ b/tests/family_tests.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """Tests for the family module.""" # -# (C) Pywikibot team, 2014-2023 +# (C) Pywikibot team, 2014-2024 # # Distributed under the terms of the MIT license. # @@ -13,7 +13,6 @@ import pywikibot from pywikibot.exceptions import UnknownFamilyError from pywikibot.family import Family, SingleSiteFamily -from pywikibot.tools import suppress_warnings from tests.aspects import PatchingTestCase, TestCase, unittest from tests.utils import DrySite
@@ -42,13 +41,8 @@ self.assertIn('.', domain)
self.assertEqual(f.name, name) - - with suppress_warnings( - 'wowwiki_family.Family.languages_by_size ' - 'is deprecated'): - self.assertIsInstance(f.languages_by_size, list) - self.assertGreaterEqual(set(f.langs), - set(f.languages_by_size)) + self.assertIsInstance(f.codes, set) + self.assertGreaterEqual(set(f.langs), set(f.codes))
if isinstance(f, SingleSiteFamily): self.assertIsNotNone(f.code) diff --git a/tests/l10n_tests.py b/tests/l10n_tests.py index 0fb0327..b931024 100755 --- a/tests/l10n_tests.py +++ b/tests/l10n_tests.py @@ -70,7 +70,7 @@
# create test methods for package messages processed by unittest site = pywikibot.Site(dct['code'], dct['family']) - codes = site.family.languages_by_size + codes = site.family.codes del site for package in PACKAGES: keys = i18n.twget_keys(package)
pywikibot-commits@lists.wikimedia.org