jenkins-bot has submitted this change and it was merged.
Change subject: Use JSON for i18n files
......................................................................
Use JSON for i18n files
The JSON files were first distributed in October 2014.
The i18n module now uses those JSON files for i18n messages.
Bug: T65327
Change-Id: I6e2c63db039854dd8d009d837b2e6b9d11ba217a
---
M pywikibot/i18n.py
D tests/i18n/test.py
A tests/i18n/test/de.json
A tests/i18n/test/en.json
A tests/i18n/test/fr.json
A tests/i18n/test/fy.json
A tests/i18n/test/ja.json
A tests/i18n/test/nl.json
8 files changed, 103 insertions(+), 119 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
Ladsgroup: Looks good to me, but someone else must approve
jenkins-bot: Verified
diff --git a/pywikibot/i18n.py b/pywikibot/i18n.py
index 77e57c6..327abf5 100644
--- a/pywikibot/i18n.py
+++ b/pywikibot/i18n.py
@@ -29,7 +29,11 @@
import sys
import re
import locale
-import warnings
+import json
+import os
+import pkgutil
+
+from collections import defaultdict
from pywikibot import Error
from .plural import plural_rules
@@ -43,10 +47,16 @@
PLURAL_PATTERN = r'{{PLURAL:(?:%\()?([^\)]*?)(?:\)d)?\|(.*?)}}'
-# Package name for the translation messages
+# Package name for the translation messages. The messages data must loaded
+# relative to that package name. In the top of this package should be
+# directories named after for each script/message bundle, and each directory
+# should contain JSON files called <lang>.json
_messages_package_name = 'scripts.i18n'
# Flag to indicate whether translation messages are available
_messages_available = None
+
+# Cache of translated messages
+_cache = defaultdict(dict)
def set_messages_package(package_name):
@@ -70,20 +80,9 @@
global _messages_available
if _messages_available is not None:
return _messages_available
- with warnings.catch_warnings():
- # Ignore 'missing __init__.py' as import looks at the JSON
- # directories before loading the python file.
- try:
- warnings.simplefilter("ignore", ImportWarning)
- # it's explicitly using str() to bypass unicode_literals in Python 2
- module = __import__(_messages_package_name,
fromlist=[str('pywikibot')])
- except ImportError:
- _messages_available = False
- return False
-
try:
- getattr(module, 'pywikibot').msg
- except AttributeError:
+ __import__(_messages_package_name)
+ except ImportError:
_messages_available = False
return False
@@ -301,29 +300,29 @@
pass
-def _get_messages_bundle(name):
- """Load all translation messages for a bundle name."""
- exception_message = 'Unknown problem'
- transdict = {}
+def _get_translation(lang, twtitle):
+ """
+ Return message of certain twtitle if exists.
- with warnings.catch_warnings():
- # Ignore 'missing __init__.py' as import looks at the JSON
- # directories before loading the python file.
- warnings.simplefilter("ignore", ImportWarning)
- try:
- # it's explicitly using str() to bypass unicode_literals in Python 2
- transdict = getattr(__import__(_messages_package_name,
- fromlist=[str(name)]),
- name).msg
- except ImportError as e:
- exception_message = str(e)
-
- if not transdict:
- raise TranslationError(
- 'Could not load bundle %s from message package %s: %s'
- % (name, _messages_package_name, exception_message))
-
- return transdict
+ For internal use, don't use it directly.
+ """
+ if twtitle in _cache[lang]:
+ return _cache[lang][twtitle]
+ message_bundle = twtitle.split('-')[0]
+ trans_text = None
+ filename = '%s/%s.json' % (message_bundle, lang)
+ try:
+ trans_text = pkgutil.get_data(
+ _messages_package_name, filename).decode('utf-8')
+ except (OSError, IOError): # file open can cause several exceptions
+ _cache[lang][twtitle] = None
+ return
+ transdict = json.loads(trans_text)
+ _cache[lang].update(transdict)
+ try:
+ return transdict[twtitle]
+ except KeyError:
+ return
def _extract_plural(code, message, parameters):
@@ -454,8 +453,7 @@
"""
Translate a message.
- The translations are retrieved from i18n.<package>, based on the callers
- import table.
+ The translations are retrieved from json files in messages_package_name.
fallback parameter must be True for i18n and False for L10N or testing
purposes.
@@ -469,10 +467,10 @@
if not messages_available():
raise TranslationError(
'Unable to load messages package %s for bundle %s'
+ '\nIt can happen due to lack of i18n submodule or files. '
+ 'Read
https://mediawiki.org/wiki/PWB/i18n'
% (_messages_package_name, twtitle))
- package = twtitle.split("-")[0]
- transdict = _get_messages_bundle(package)
code_needed = False
# If a site is given instead of a code, use its language
if hasattr(code, 'lang'):
@@ -487,25 +485,18 @@
# There are two possible failure modes: the translation dict might not have
# the language altogether, or a specific key could be untranslated. Both
# modes are caught with the KeyError.
-
- trans = None
- try:
- trans = transdict[lang][twtitle]
- except KeyError:
- # try alternative languages and English
- if fallback:
- for alt in _altlang(lang) + ['en']:
- try:
- trans = transdict[alt][twtitle]
- if code_needed:
- lang = alt
- break
- except KeyError:
- continue
- if trans is None:
- raise TranslationError(
- "No English translation has been defined "
- "for TranslateWiki key %r" % twtitle)
+ langs = [lang]
+ if fallback:
+ langs += _altlang(lang) + ['en']
+ for alt in langs:
+ trans = _get_translation(alt, twtitle)
+ if trans:
+ break
+ else:
+ raise TranslationError(
+ 'No English translation has been defined for TranslateWiki key'
+ ' %r\nIt can happen due to lack of i18n submodule or files. '
+ 'Read
https://mediawiki.org/wiki/PWB/i18n' % twtitle)
# send the language code back via the given list
if code_needed:
code.append(lang)
@@ -527,23 +518,16 @@
it takes that variant calculated by the plural_rules depending on the number
value. Multiple plurals are allowed.
- As an examples, if we had a test dictionary in test.py like::
-
- msg = {
- 'en': {
- # number value as format string is allowed
- 'test-plural': u'Bot: Changing %(num)s
{{PLURAL:%(num)d|page|pages}}.',
- },
- 'nl': {
- # format string inside PLURAL tag is allowed
- 'test-plural': u'Bot: Pas {{PLURAL:num|1 pagina|%(num)d
pagina\'s}} aan.',
- },
- 'fr': {
- # additional string inside or outside PLURAL tag is allowed
- 'test-plural': u'Robot: Changer %(descr)s {{PLURAL:num|une
page|quelques pages}}.',
- },
- }
-
+ As an examples, if we had several json dictionaries in test folder like:
+ en.json:
+ {
+ "test-plural": "Bot: Changing %(num)s
{{PLURAL:%(num)d|page|pages}}.",
+ }
+ fr.json
+ {
+ "test-plural": "Robot: Changer %(descr)s {{PLURAL:num|une
page|quelques pages}}.",
+ }
+ and so on.
>> from pywikibot import i18n
>> i18n.set_messages_package('tests.i18n')
>> # use a number
@@ -602,17 +586,13 @@
@param code: The language code
@param twtitle: The TranslateWiki string title, in <package>-<key>
format
"""
- package = twtitle.split("-")[0]
- transdict = _get_messages_bundle(package)
- if not transdict:
- pywikibot.warning('twhas_key: Could not load message bundle %s.%s'
- % (_messages_package_name, package))
- return False
-
# If a site is given instead of a code, use its language
if hasattr(code, 'lang'):
code = code.lang
- return code in transdict and twtitle in transdict[code]
+ transdict = _get_translation(code, twtitle)
+ if transdict is None:
+ return False
+ return True
def twget_keys(twtitle):
@@ -621,9 +601,21 @@
@param twtitle: The TranslateWiki string title, in <package>-<key>
format
"""
+ # obtain the directory containing all the json files for this package
package = twtitle.split("-")[0]
- transdict = _get_messages_bundle(package)
- return (lang for lang in sorted(transdict.keys()) if lang != 'qqq')
+ mod = __import__(_messages_package_name, fromlist=[str('__file__')])
+ pathname = os.path.join(os.path.dirname(mod.__file__), package)
+
+ # build a list of languages in that directory
+ langs = [filename.partition('.')[0]
+ for filename in sorted(os.listdir(pathname))
+ if filename.endswith('.json')]
+
+ # exclude languages does not have this specific message in that package
+ # i.e. an incomplete set of translated messages.
+ return [lang for lang in langs
+ if lang != 'qqq' and
+ _get_translation(lang, twtitle)]
def input(twtitle, parameters=None, password=False, fallback_prompt=None):
diff --git a/tests/i18n/test.py b/tests/i18n/test.py
deleted file mode 100644
index 315224a..0000000
--- a/tests/i18n/test.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Test i18n data."""
-from __future__ import unicode_literals
-
-msg = {
- 'de': {
- 'test-plural': u'Bot: Ändere %(num)d
{{PLURAL:num|Seite|Seiten}}.',
- 'test-multiple-plurals':
- u'Bot: %(action)s %(line)s {{PLURAL:line|Zeile|Zeilen}} von '
- u'{{PLURAL:%(page)d|einer|mehreren}} {{PLURAL:page|Seite|Seiten}}.',
- },
- 'en': {
- 'test-localized': u'test-localized EN',
- 'test-semi-localized': u'test-semi-localized EN',
- 'test-non-localized': u'test-non-localized EN',
- 'test-plural': u'Bot: Changing %(num)s
{{PLURAL:%(num)d|page|pages}}.',
- },
- 'nl': {
- 'test-localized': u'test-localized NL',
- 'test-semi-localized': u'test-semi-localized NL',
- 'test-plural': u'Bot: Pas {{PLURAL:num|1 pagina|%(num)d
pagina\'s}} aan.',
- },
- 'fr': {
- 'test-plural': u'Robot: Changer %(descr)s {{PLURAL:num|une
page|quelques pages}}.',
- },
- 'fy': {
- 'test-localized': u'test-localized FY'
- },
- 'ja': {
- 'test-no-english': u'test-no-english JA'
- }
-}
diff --git a/tests/i18n/test/de.json b/tests/i18n/test/de.json
new file mode 100644
index 0000000..5309641
--- /dev/null
+++ b/tests/i18n/test/de.json
@@ -0,0 +1,4 @@
+{
+ "test-multiple-plurals": "Bot: %(action)s %(line)s
{{PLURAL:line|Zeile|Zeilen}} von {{PLURAL:%(page)d|einer|mehreren}}
{{PLURAL:page|Seite|Seiten}}.",
+ "test-plural": "Bot: Ändere %(num)d
{{PLURAL:num|Seite|Seiten}}."
+}
\ No newline at end of file
diff --git a/tests/i18n/test/en.json b/tests/i18n/test/en.json
new file mode 100644
index 0000000..1d53316
--- /dev/null
+++ b/tests/i18n/test/en.json
@@ -0,0 +1,6 @@
+{
+ "test-localized": "test-localized EN",
+ "test-non-localized": "test-non-localized EN",
+ "test-plural": "Bot: Changing %(num)s
{{PLURAL:%(num)d|page|pages}}.",
+ "test-semi-localized": "test-semi-localized EN"
+}
\ No newline at end of file
diff --git a/tests/i18n/test/fr.json b/tests/i18n/test/fr.json
new file mode 100644
index 0000000..4066396
--- /dev/null
+++ b/tests/i18n/test/fr.json
@@ -0,0 +1,3 @@
+{
+ "test-plural": "Robot: Changer %(descr)s {{PLURAL:num|une
page|quelques pages}}."
+}
\ No newline at end of file
diff --git a/tests/i18n/test/fy.json b/tests/i18n/test/fy.json
new file mode 100644
index 0000000..5e207fb
--- /dev/null
+++ b/tests/i18n/test/fy.json
@@ -0,0 +1,3 @@
+{
+ "test-localized": "test-localized FY"
+}
\ No newline at end of file
diff --git a/tests/i18n/test/ja.json b/tests/i18n/test/ja.json
new file mode 100644
index 0000000..d9a9084
--- /dev/null
+++ b/tests/i18n/test/ja.json
@@ -0,0 +1,3 @@
+{
+ "test-no-english": "test-no-english JA"
+}
\ No newline at end of file
diff --git a/tests/i18n/test/nl.json b/tests/i18n/test/nl.json
new file mode 100644
index 0000000..7f695c9
--- /dev/null
+++ b/tests/i18n/test/nl.json
@@ -0,0 +1,5 @@
+{
+ "test-localized": "test-localized NL",
+ "test-plural": "Bot: Pas {{PLURAL:num|1 pagina|%(num)d pagina's}}
aan.",
+ "test-semi-localized": "test-semi-localized NL"
+}
\ No newline at end of file
--
To view, visit
https://gerrit.wikimedia.org/r/151114
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I6e2c63db039854dd8d009d837b2e6b9d11ba217a
Gerrit-PatchSet: 69
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Nikerabbit <niklas.laxstrom(a)gmail.com>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>