jenkins-bot has submitted this change and it was merged.
Change subject: Remove cosmetic changes dependency on isbn script ......................................................................
Remove cosmetic changes dependency on isbn script
Move ISBN regex into textlib for re-use. Use stdnum package as preferred provider of ISBN routines. Allow invalid ISBNs to be skipped and subsequent ISBNs reformatted. Fix bugs in isbn exception handling.
Bug: T89993 Change-Id: I215466febf77fa0b95997f25c89e414bb4dfffcc --- M pywikibot/textlib.py M scripts/cosmetic_changes.py M scripts/isbn.py M setup.py M tests/isbn_tests.py 5 files changed, 136 insertions(+), 32 deletions(-)
Approvals: John Vandenberg: Looks good to me, but someone else must approve XZise: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py index 6a57668..73e58d9 100644 --- a/pywikibot/textlib.py +++ b/pywikibot/textlib.py @@ -1234,6 +1234,21 @@ return bool(m)
+def reformat_ISBNs(text, match_func): + """Reformat ISBNs. + + @param text: text containing ISBNs + @type text: str + @param match_func: function to reformat matched ISBNs + @type match_func: callable + @return: reformatted text + @rtype: str + """ + isbnR = re.compile(r'(?<=ISBN )(?P<code>[\d-]+[\dXx])') + text = isbnR.sub(match_func, text) + return text + + # --------------------------------------- # Time parsing functionality (Archivebot) # --------------------------------------- diff --git a/scripts/cosmetic_changes.py b/scripts/cosmetic_changes.py index bb99a30..f58985f 100755 --- a/scripts/cosmetic_changes.py +++ b/scripts/cosmetic_changes.py @@ -18,8 +18,9 @@ the predefined message texts with original and replacements inserted.
--ignore: Ignores if an error occured and either skips the page or - only that method. It can be set to 'page' or 'method'. +-ignore: Ignores if an error occured and skip either the page, or + only that method, or only an instance of the problem in the + page text. It can be set to 'page', 'method', or 'match'.
&warning;
@@ -66,7 +67,7 @@ """ # # (C) xqt, 2009-2013 -# (C) Pywikibot team, 2006-2014 +# (C) Pywikibot team, 2006-2015 # # Distributed under the terms of the MIT license. # @@ -76,13 +77,28 @@ #
import re -from pywikibot.tools import MediaWikiVersion + +from warnings import warn + +try: + import stdnum.isbn as stdnum_isbn + scripts_isbn = None +except ImportError: + stdnum_isbn = None + # Old dependency + try: + import scripts.isbn as scripts_isbn + except ImportError: + scripts_isbn = None + import pywikibot -import isbn + from pywikibot import config, i18n, textlib, pagegenerators from pywikibot.bot import ExistingPageBot, NoRedirectPageBot from pywikibot.page import url2unicode from pywikibot.tools import deprecate_arg, first_lower, first_upper +from pywikibot.tools import MediaWikiVersion +
warning = """ ATTENTION: You can run this script as a stand-alone for testing purposes. @@ -153,10 +169,54 @@ } }
- CANCEL_ALL = False CANCEL_PAGE = 1 CANCEL_METHOD = 2 +CANCEL_MATCH = 3 + + +def _format_isbn_match(match, strict=True): + """Helper function to validate and format a single matched ISBN.""" + isbn = match.group('code') + if stdnum_isbn: + try: + stdnum_isbn.validate(isbn) + except stdnum_isbn.ValidationError as e: + if strict: + raise + pywikibot.log('ISBN "%s" validation error: %s' % (isbn, e)) + return isbn + + return stdnum_isbn.format(isbn) + else: + try: + scripts_isbn.is_valid(isbn) + except scripts_isbn.InvalidIsbnException as e: + if strict: + raise + pywikibot.log('ISBN "%s" validation error: %s' % (isbn, e)) + return isbn + + isbn = scripts_isbn.getIsbn(isbn) + isbn.format() + return isbn.code + + +def _reformat_ISBNs(text, strict=True): + """Helper function to normalise ISBNs in text. + + @raises Exception: Invalid ISBN encountered when strict enabled + """ + if not stdnum_isbn: + if not scripts_isbn: + raise NotImplementedError( + 'ISBN functionality not available. Install stdnum package.') + + warn('package stdnum.isbn not found; using scripts.isbn', + ImportWarning) + + return textlib.reformat_ISBNs( + text, lambda match: _format_isbn_match(match, strict=strict))
class CosmeticChangesToolkit: @@ -196,6 +256,7 @@ self.fixTypo,
self.fixArabicLetters, + self.fix_ISBN, )
@classmethod @@ -218,20 +279,10 @@ raise return text if result is None else result
- @staticmethod - def isbn_execute(text): - """Hyphenate ISBN numbers and catch 'InvalidIsbnException'.""" - try: - return isbn.hyphenateIsbnNumbers(text) - except isbn.InvalidIsbnException as error: - pywikibot.log(u"ISBN error: %s" % error) - return None - def _change(self, text): """Execute all clean up methods.""" for method in self.common_methods: text = self.safe_execute(method, text) - text = self.safe_execute(CosmeticChangesToolkit.isbn_execute, text) return text
def change(self, text): @@ -898,6 +949,11 @@ r'\1== {{int:license-header}} ==', exceptions, True) return text
+ def fix_ISBN(self, text): + """Hyphenate ISBN numbers.""" + return _reformat_ISBNs( + text, strict=False if self.ignore == CANCEL_MATCH else True) +
class CosmeticChangesBot(ExistingPageBot, NoRedirectPageBot):
@@ -959,6 +1015,8 @@ options['ignore'] = CANCEL_METHOD elif ignore_mode == 'page': options['ignore'] = CANCEL_PAGE + elif ignore_mode == 'match': + options['ignore'] = CANCEL_MATCH else: raise ValueError('Unknown ignore mode "{0}"!'.format(ignore_mode)) else: diff --git a/scripts/isbn.py b/scripts/isbn.py index 769f303..721ed2e 100755 --- a/scripts/isbn.py +++ b/scripts/isbn.py @@ -45,8 +45,11 @@ #
import re + +from functools import partial + import pywikibot -from pywikibot import i18n, pagegenerators, Bot, WikidataBot +from pywikibot import i18n, pagegenerators, textlib, Bot, WikidataBot
try: import stdnum.isbn @@ -1354,11 +1357,11 @@ try: stdnum.isbn.validate(isbn) except stdnum.isbn.InvalidFormat as e: - raise InvalidIsbnException(e) + raise InvalidIsbnException(str(e)) except stdnum.isbn.InvalidChecksum as e: - raise InvalidIsbnException(e) + raise InvalidIsbnException(str(e)) except stdnum.isbn.InvalidLength as e: - raise InvalidIsbnException(e) + raise InvalidIsbnException(str(e)) return True
try: @@ -1370,10 +1373,7 @@ raise InvalidIsbnException('Invalid ISBN found') return True
- try: - getIsbn(isbn) - except InvalidIsbnException as e: - raise InvalidIsbnException(e) + getIsbn(isbn) return True
@@ -1411,11 +1411,8 @@ return i.code
-def hyphenateIsbnNumbers(text): - """Helper function to hyphenate an ISBN.""" - isbnR = re.compile(r'(?<=ISBN )(?P<code>[\d-]+[\dXx])') - text = isbnR.sub(_hyphenateIsbnNumber, text) - return text +hyphenateIsbnNumbers = partial(textlib.reformat_ISBNs, + match_func=_hyphenateIsbnNumber)
def _isbn10toIsbn13(match): diff --git a/setup.py b/setup.py index a33928e..446808f 100644 --- a/setup.py +++ b/setup.py @@ -19,6 +19,7 @@
extra_deps = { # Core library dependencies + 'isbn': ['python-stdnum'], 'daemonize': ['daemonize'], 'Graphviz': ['pydot'], 'MySQL': ['oursql'], diff --git a/tests/isbn_tests.py b/tests/isbn_tests.py index b871903..4850103 100644 --- a/tests/isbn_tests.py +++ b/tests/isbn_tests.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """Tests for isbn script.""" # -# (C) Pywikibot team, 2014 +# (C) Pywikibot team, 2014-2015 # # Distributed under the terms of the MIT license. # @@ -11,15 +11,48 @@
__version__ = '$Id$'
+from pywikibot import Bot, Claim, ItemPage + +from scripts.cosmetic_changes import CosmeticChangesToolkit, CANCEL_MATCH + from scripts.isbn import ( ISBN10, ISBN13, InvalidIsbnException as IsbnExc, getIsbn, hyphenateIsbnNumbers, convertIsbn10toIsbn13, main ) from tests.aspects import ( - unittest, TestCase, WikibaseTestCase, ScriptMainTestCase + unittest, TestCase, DefaultDrySiteTestCase, + WikibaseTestCase, ScriptMainTestCase, ) -from pywikibot import Bot, Claim, ItemPage + + +class TestCosmeticChangesISBN(DefaultDrySiteTestCase): + + """Test CosmeticChanges ISBN fix.""" + + def test_valid_isbn(self): + """Test ISBN.""" + cc = CosmeticChangesToolkit(self.site, namespace=0) + + text = cc.fix_ISBN(' ISBN 097522980x ') + self.assertEqual(text, ' ISBN 0-9752298-0-X ') + + text = cc.fix_ISBN(' ISBN 9780975229804 ') + self.assertEqual(text, ' ISBN 978-0-9752298-0-4 ') + + def test_invalid_isbn(self): + cc = CosmeticChangesToolkit(self.site, namespace=0) + + self.assertRaises(Exception, cc.fix_ISBN, 'ISBN 0975229LOL') # Invalid characters + self.assertRaises(Exception, cc.fix_ISBN, 'ISBN 0975229801') # Invalid checksum + self.assertRaises(Exception, cc.fix_ISBN, 'ISBN 09752298') # Invalid length + self.assertRaises(Exception, cc.fix_ISBN, 'ISBN 09752X9801') # X in the middle + + def test_ignore_invalid_isbn(self): + cc = CosmeticChangesToolkit(self.site, namespace=0, ignore=CANCEL_MATCH) + + text = cc.fix_ISBN(' ISBN 0975229LOL ISBN 9780975229804 ') + self.assertEqual(text, ' ISBN 0975229LOL ISBN 978-0-9752298-0-4 ')
class TestIsbn(TestCase):
pywikibot-commits@lists.wikimedia.org