jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/759890 )
Change subject: [IMPR] Add to_latin_digits() function to textlib ......................................................................
[IMPR] Add to_latin_digits() function to textlib
- to_latin_digits() is the counterpart to textlib.to_local_digits() - use the new function with TimeStripper. The new function is 70 % faster than the old method. - deprecate TimeStripper.fix_digits() and make it a staticmethod - add tests for to_latin_digits() function - use str.translate() within to_local_digits() function which makes this function 16-70% faster
Change-Id: Ief32146f53eb3d295aef1807352639f3d3c638d7 --- M pywikibot/textlib.py M tests/textlib_tests.py 2 files changed, 55 insertions(+), 10 deletions(-)
Approvals: Matěj Suchánek: Looks good to me, but someone else must approve Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py index d8d1384..fedb60e 100644 --- a/pywikibot/textlib.py +++ b/pywikibot/textlib.py @@ -21,8 +21,10 @@ import pywikibot from pywikibot.backports import Container, Iterable, List, Tuple from pywikibot.backports import OrderedDict as OrderedDictType +from pywikibot.backports import Sequence as SequenceType from pywikibot.exceptions import InvalidTitleError, SiteDefinitionError from pywikibot.family import Family +from pywikibot.tools import deprecated
try: @@ -143,8 +145,32 @@ digits = NON_LATIN_DIGITS.get(lang) if digits: phrase = str(phrase) - for i, digit in enumerate(digits): - phrase = phrase.replace(str(i), digit) + trans = str.maketrans('0123456789', digits) + phrase = phrase.translate(trans) + return phrase + + +def to_latin_digits(phrase: str, + langs: Union[SequenceType[str], str, None] = None) -> str: + """Change non-latin digits to latin digits. + + .. versionadded:: 7.0 + + :param phrase: The phrase to convert to latin numerical. + :param langs: Language codes. If langs parameter is None, use all + known languages to convert. + :return: The string with latin digits + """ + if langs is None: + langs = NON_LATIN_DIGITS.keys() + elif isinstance(langs, str): + langs = [langs] + + digits = [NON_LATIN_DIGITS[key] for key in langs + if key in NON_LATIN_DIGITS] + if digits: + trans = str.maketrans(''.join(digits), '0123456789' * len(digits)) + phrase = phrase.translate(trans) return phrase
@@ -1902,12 +1928,15 @@ self.tzinfo = tzoneFixedOffset(self.site.siteinfo['timeoffset'], self.site.siteinfo['timezone'])
- def fix_digits(self, line): - """Make non-latin digits like Persian to latin to parse.""" - for system in NON_LATIN_DIGITS.values(): - for i in range(10): - line = line.replace(system[i], str(i)) - return line + @staticmethod + @deprecated('to_latin_digits() function', since='7.0.0') + def fix_digits(line): + """Make non-latin digits like Persian to latin to parse. + + .. deprecated:: 7.0.0 + Use :func:`to_latin_digits` instead. + """ + return to_latin_digits(line)
def _last_match_and_replace(self, txt: str, pat): """ @@ -2019,7 +2048,7 @@ # to reduce false positives. line = removeDisabledParts(line)
- line = self.fix_digits(line) + line = to_latin_digits(line) for pat in self.patterns: line, match_obj = self._last_match_and_replace(line, pat) if match_obj: diff --git a/tests/textlib_tests.py b/tests/textlib_tests.py index 1a5241b..e56ff46 100644 --- a/tests/textlib_tests.py +++ b/tests/textlib_tests.py @@ -993,7 +993,7 @@ link)
-class TestLocalDigits(TestCase): +class TestDigitsConversion(TestCase):
"""Test to verify that local digits are correctly being handled."""
@@ -1011,6 +1011,22 @@ textlib.to_local_digits( '299792458', 'km'), '២៩៩៧៩២៤៥៨')
+ def test_to_latin(self): + """Test converting local digits to Latin digits.""" + self.assertEqual(textlib.to_latin_digits('299792458'), '299792458') + self.assertEqual( + textlib.to_latin_digits('۲۹۹۷۹۲۴۵۸', 'fa'), '299792458') + self.assertEqual( + textlib.to_latin_digits('۲۹۹۷۹۲۴۵۸ flash'), '299792458 flash') + self.assertEqual( + textlib.to_latin_digits('២៩៩៧៩២៤៥៨', 'km'), '299792458') + self.assertEqual( + textlib.to_latin_digits('២៩៩៧៩២៤៥៨'), '299792458') + self.assertEqual( + textlib.to_latin_digits('២៩៩៧៩២៤៥៨', ['km', 'en']), '299792458') + self.assertEqual( + textlib.to_latin_digits('២៩៩៧៩២៤៥៨', ['en']), '២៩៩៧៩២៤៥៨') +
class TestReplaceExcept(DefaultDrySiteTestCase):