jenkins-bot submitted this change.

View Change

Approvals: Matěj Suchánek: Looks good to me, but someone else must approve Xqt: Looks good to me, approved jenkins-bot: Verified
[IMPR] Add to_latin_digits() function to textlib

- to_latin_digits() is the counterpart to textlib.to_local_digits()
- use the new function with TimeStripper. The new function is 70 % faster
than the old method.
- deprecate TimeStripper.fix_digits() and make it a staticmethod
- add tests for to_latin_digits() function
- use str.translate() within to_local_digits() function which makes this
function 16-70% faster

Change-Id: Ief32146f53eb3d295aef1807352639f3d3c638d7
---
M pywikibot/textlib.py
M tests/textlib_tests.py
2 files changed, 55 insertions(+), 10 deletions(-)

diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index d8d1384..fedb60e 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -21,8 +21,10 @@
import pywikibot
from pywikibot.backports import Container, Iterable, List, Tuple
from pywikibot.backports import OrderedDict as OrderedDictType
+from pywikibot.backports import Sequence as SequenceType
from pywikibot.exceptions import InvalidTitleError, SiteDefinitionError
from pywikibot.family import Family
+from pywikibot.tools import deprecated


try:
@@ -143,8 +145,32 @@
digits = NON_LATIN_DIGITS.get(lang)
if digits:
phrase = str(phrase)
- for i, digit in enumerate(digits):
- phrase = phrase.replace(str(i), digit)
+ trans = str.maketrans('0123456789', digits)
+ phrase = phrase.translate(trans)
+ return phrase
+
+
+def to_latin_digits(phrase: str,
+ langs: Union[SequenceType[str], str, None] = None) -> str:
+ """Change non-latin digits to latin digits.
+
+ .. versionadded:: 7.0
+
+ :param phrase: The phrase to convert to latin numerical.
+ :param langs: Language codes. If langs parameter is None, use all
+ known languages to convert.
+ :return: The string with latin digits
+ """
+ if langs is None:
+ langs = NON_LATIN_DIGITS.keys()
+ elif isinstance(langs, str):
+ langs = [langs]
+
+ digits = [NON_LATIN_DIGITS[key] for key in langs
+ if key in NON_LATIN_DIGITS]
+ if digits:
+ trans = str.maketrans(''.join(digits), '0123456789' * len(digits))
+ phrase = phrase.translate(trans)
return phrase


@@ -1902,12 +1928,15 @@
self.tzinfo = tzoneFixedOffset(self.site.siteinfo['timeoffset'],
self.site.siteinfo['timezone'])

- def fix_digits(self, line):
- """Make non-latin digits like Persian to latin to parse."""
- for system in NON_LATIN_DIGITS.values():
- for i in range(10):
- line = line.replace(system[i], str(i))
- return line
+ @staticmethod
+ @deprecated('to_latin_digits() function', since='7.0.0')
+ def fix_digits(line):
+ """Make non-latin digits like Persian to latin to parse.
+
+ .. deprecated:: 7.0.0
+ Use :func:`to_latin_digits` instead.
+ """
+ return to_latin_digits(line)

def _last_match_and_replace(self, txt: str, pat):
"""
@@ -2019,7 +2048,7 @@
# to reduce false positives.
line = removeDisabledParts(line)

- line = self.fix_digits(line)
+ line = to_latin_digits(line)
for pat in self.patterns:
line, match_obj = self._last_match_and_replace(line, pat)
if match_obj:
diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py
index 1a5241b..e56ff46 100644
--- a/tests/textlib_tests.py
+++ b/tests/textlib_tests.py
@@ -993,7 +993,7 @@
link)


-class TestLocalDigits(TestCase):
+class TestDigitsConversion(TestCase):

"""Test to verify that local digits are correctly being handled."""

@@ -1011,6 +1011,22 @@
textlib.to_local_digits(
'299792458', 'km'), '២៩៩៧៩២៤៥៨')

+ def test_to_latin(self):
+ """Test converting local digits to Latin digits."""
+ self.assertEqual(textlib.to_latin_digits('299792458'), '299792458')
+ self.assertEqual(
+ textlib.to_latin_digits('۲۹۹۷۹۲۴۵۸', 'fa'), '299792458')
+ self.assertEqual(
+ textlib.to_latin_digits('۲۹۹۷۹۲۴۵۸ flash'), '299792458 flash')
+ self.assertEqual(
+ textlib.to_latin_digits('២៩៩៧៩២៤៥៨', 'km'), '299792458')
+ self.assertEqual(
+ textlib.to_latin_digits('២៩៩៧៩២៤៥៨'), '299792458')
+ self.assertEqual(
+ textlib.to_latin_digits('២៩៩៧៩២៤៥៨', ['km', 'en']), '299792458')
+ self.assertEqual(
+ textlib.to_latin_digits('២៩៩៧៩២៤៥៨', ['en']), '២៩៩៧៩២៤៥៨')
+

class TestReplaceExcept(DefaultDrySiteTestCase):


To view, visit change 759890. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: Ief32146f53eb3d295aef1807352639f3d3c638d7
Gerrit-Change-Number: 759890
Gerrit-PatchSet: 6
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: Ladsgroup <Ladsgroup@gmail.com>
Gerrit-Reviewer: Matěj Suchánek <matejsuchanek97@gmail.com>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged