jenkins-bot has submitted this change and it was merged.
Change subject: Remove cosmetic changes dependency on isbn script
......................................................................
Remove cosmetic changes dependency on isbn script
Move ISBN regex into textlib for re-use.
Use stdnum package as preferred provider of ISBN routines.
Allow invalid ISBNs to be skipped and subsequent ISBNs reformatted.
Fix bugs in isbn exception handling.
Bug: T89993
Change-Id: I215466febf77fa0b95997f25c89e414bb4dfffcc
---
M pywikibot/textlib.py
M scripts/cosmetic_changes.py
M scripts/isbn.py
M setup.py
M tests/isbn_tests.py
5 files changed, 136 insertions(+), 32 deletions(-)
Approvals:
John Vandenberg: Looks good to me, but someone else must approve
XZise: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index 6a57668..73e58d9 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -1234,6 +1234,21 @@
return bool(m)
+def reformat_ISBNs(text, match_func):
+ """Reformat ISBNs.
+
+ @param text: text containing ISBNs
+ @type text: str
+ @param match_func: function to reformat matched ISBNs
+ @type match_func: callable
+ @return: reformatted text
+ @rtype: str
+ """
+ isbnR = re.compile(r'(?<=ISBN )(?P<code>[\d\-]+[\dXx])')
+ text = isbnR.sub(match_func, text)
+ return text
+
+
# ---------------------------------------
# Time parsing functionality (Archivebot)
# ---------------------------------------
diff --git a/scripts/cosmetic_changes.py b/scripts/cosmetic_changes.py
index bb99a30..f58985f 100755
--- a/scripts/cosmetic_changes.py
+++ b/scripts/cosmetic_changes.py
@@ -18,8 +18,9 @@
the predefined message texts with original and replacements
inserted.
--ignore: Ignores if an error occured and either skips the page or
- only that method. It can be set to 'page' or 'method'.
+-ignore: Ignores if an error occured and skip either the page, or
+ only that method, or only an instance of the problem in the
+ page text. It can be set to 'page', 'method', or
'match'.
&warning;
@@ -66,7 +67,7 @@
"""
#
# (C) xqt, 2009-2013
-# (C) Pywikibot team, 2006-2014
+# (C) Pywikibot team, 2006-2015
#
# Distributed under the terms of the MIT license.
#
@@ -76,13 +77,28 @@
#
import re
-from pywikibot.tools import MediaWikiVersion
+
+from warnings import warn
+
+try:
+ import stdnum.isbn as stdnum_isbn
+ scripts_isbn = None
+except ImportError:
+ stdnum_isbn = None
+ # Old dependency
+ try:
+ import scripts.isbn as scripts_isbn
+ except ImportError:
+ scripts_isbn = None
+
import pywikibot
-import isbn
+
from pywikibot import config, i18n, textlib, pagegenerators
from pywikibot.bot import ExistingPageBot, NoRedirectPageBot
from pywikibot.page import url2unicode
from pywikibot.tools import deprecate_arg, first_lower, first_upper
+from pywikibot.tools import MediaWikiVersion
+
warning = """
ATTENTION: You can run this script as a stand-alone for testing purposes.
@@ -153,10 +169,54 @@
}
}
-
CANCEL_ALL = False
CANCEL_PAGE = 1
CANCEL_METHOD = 2
+CANCEL_MATCH = 3
+
+
+def _format_isbn_match(match, strict=True):
+ """Helper function to validate and format a single matched
ISBN."""
+ isbn = match.group('code')
+ if stdnum_isbn:
+ try:
+ stdnum_isbn.validate(isbn)
+ except stdnum_isbn.ValidationError as e:
+ if strict:
+ raise
+ pywikibot.log('ISBN "%s" validation error: %s' % (isbn,
e))
+ return isbn
+
+ return stdnum_isbn.format(isbn)
+ else:
+ try:
+ scripts_isbn.is_valid(isbn)
+ except scripts_isbn.InvalidIsbnException as e:
+ if strict:
+ raise
+ pywikibot.log('ISBN "%s" validation error: %s' % (isbn,
e))
+ return isbn
+
+ isbn = scripts_isbn.getIsbn(isbn)
+ isbn.format()
+ return isbn.code
+
+
+def _reformat_ISBNs(text, strict=True):
+ """Helper function to normalise ISBNs in text.
+
+ @raises Exception: Invalid ISBN encountered when strict enabled
+ """
+ if not stdnum_isbn:
+ if not scripts_isbn:
+ raise NotImplementedError(
+ 'ISBN functionality not available. Install stdnum package.')
+
+ warn('package stdnum.isbn not found; using scripts.isbn',
+ ImportWarning)
+
+ return textlib.reformat_ISBNs(
+ text, lambda match: _format_isbn_match(match, strict=strict))
class CosmeticChangesToolkit:
@@ -196,6 +256,7 @@
self.fixTypo,
self.fixArabicLetters,
+ self.fix_ISBN,
)
@classmethod
@@ -218,20 +279,10 @@
raise
return text if result is None else result
- @staticmethod
- def isbn_execute(text):
- """Hyphenate ISBN numbers and catch
'InvalidIsbnException'."""
- try:
- return isbn.hyphenateIsbnNumbers(text)
- except isbn.InvalidIsbnException as error:
- pywikibot.log(u"ISBN error: %s" % error)
- return None
-
def _change(self, text):
"""Execute all clean up methods."""
for method in self.common_methods:
text = self.safe_execute(method, text)
- text = self.safe_execute(CosmeticChangesToolkit.isbn_execute, text)
return text
def change(self, text):
@@ -898,6 +949,11 @@
r'\1== {{int:license-header}} ==', exceptions, True)
return text
+ def fix_ISBN(self, text):
+ """Hyphenate ISBN numbers."""
+ return _reformat_ISBNs(
+ text, strict=False if self.ignore == CANCEL_MATCH else True)
+
class CosmeticChangesBot(ExistingPageBot, NoRedirectPageBot):
@@ -959,6 +1015,8 @@
options['ignore'] = CANCEL_METHOD
elif ignore_mode == 'page':
options['ignore'] = CANCEL_PAGE
+ elif ignore_mode == 'match':
+ options['ignore'] = CANCEL_MATCH
else:
raise ValueError('Unknown ignore mode
"{0}"!'.format(ignore_mode))
else:
diff --git a/scripts/isbn.py b/scripts/isbn.py
index 769f303..721ed2e 100755
--- a/scripts/isbn.py
+++ b/scripts/isbn.py
@@ -45,8 +45,11 @@
#
import re
+
+from functools import partial
+
import pywikibot
-from pywikibot import i18n, pagegenerators, Bot, WikidataBot
+from pywikibot import i18n, pagegenerators, textlib, Bot, WikidataBot
try:
import stdnum.isbn
@@ -1354,11 +1357,11 @@
try:
stdnum.isbn.validate(isbn)
except stdnum.isbn.InvalidFormat as e:
- raise InvalidIsbnException(e)
+ raise InvalidIsbnException(str(e))
except stdnum.isbn.InvalidChecksum as e:
- raise InvalidIsbnException(e)
+ raise InvalidIsbnException(str(e))
except stdnum.isbn.InvalidLength as e:
- raise InvalidIsbnException(e)
+ raise InvalidIsbnException(str(e))
return True
try:
@@ -1370,10 +1373,7 @@
raise InvalidIsbnException('Invalid ISBN found')
return True
- try:
- getIsbn(isbn)
- except InvalidIsbnException as e:
- raise InvalidIsbnException(e)
+ getIsbn(isbn)
return True
@@ -1411,11 +1411,8 @@
return i.code
-def hyphenateIsbnNumbers(text):
- """Helper function to hyphenate an ISBN."""
- isbnR = re.compile(r'(?<=ISBN )(?P<code>[\d\-]+[\dXx])')
- text = isbnR.sub(_hyphenateIsbnNumber, text)
- return text
+hyphenateIsbnNumbers = partial(textlib.reformat_ISBNs,
+ match_func=_hyphenateIsbnNumber)
def _isbn10toIsbn13(match):
diff --git a/setup.py b/setup.py
index a33928e..446808f 100644
--- a/setup.py
+++ b/setup.py
@@ -19,6 +19,7 @@
extra_deps = {
# Core library dependencies
+ 'isbn': ['python-stdnum'],
'daemonize': ['daemonize'],
'Graphviz': ['pydot'],
'MySQL': ['oursql'],
diff --git a/tests/isbn_tests.py b/tests/isbn_tests.py
index b871903..4850103 100644
--- a/tests/isbn_tests.py
+++ b/tests/isbn_tests.py
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
"""Tests for isbn script."""
#
-# (C) Pywikibot team, 2014
+# (C) Pywikibot team, 2014-2015
#
# Distributed under the terms of the MIT license.
#
@@ -11,15 +11,48 @@
__version__ = '$Id$'
+from pywikibot import Bot, Claim, ItemPage
+
+from scripts.cosmetic_changes import CosmeticChangesToolkit, CANCEL_MATCH
+
from scripts.isbn import (
ISBN10, ISBN13, InvalidIsbnException as IsbnExc,
getIsbn, hyphenateIsbnNumbers, convertIsbn10toIsbn13,
main
)
from tests.aspects import (
- unittest, TestCase, WikibaseTestCase, ScriptMainTestCase
+ unittest, TestCase, DefaultDrySiteTestCase,
+ WikibaseTestCase, ScriptMainTestCase,
)
-from pywikibot import Bot, Claim, ItemPage
+
+
+class TestCosmeticChangesISBN(DefaultDrySiteTestCase):
+
+ """Test CosmeticChanges ISBN fix."""
+
+ def test_valid_isbn(self):
+ """Test ISBN."""
+ cc = CosmeticChangesToolkit(self.site, namespace=0)
+
+ text = cc.fix_ISBN(' ISBN 097522980x ')
+ self.assertEqual(text, ' ISBN 0-9752298-0-X ')
+
+ text = cc.fix_ISBN(' ISBN 9780975229804 ')
+ self.assertEqual(text, ' ISBN 978-0-9752298-0-4 ')
+
+ def test_invalid_isbn(self):
+ cc = CosmeticChangesToolkit(self.site, namespace=0)
+
+ self.assertRaises(Exception, cc.fix_ISBN, 'ISBN 0975229LOL') # Invalid
characters
+ self.assertRaises(Exception, cc.fix_ISBN, 'ISBN 0975229801') # Invalid
checksum
+ self.assertRaises(Exception, cc.fix_ISBN, 'ISBN 09752298') # Invalid
length
+ self.assertRaises(Exception, cc.fix_ISBN, 'ISBN 09752X9801') # X in the
middle
+
+ def test_ignore_invalid_isbn(self):
+ cc = CosmeticChangesToolkit(self.site, namespace=0, ignore=CANCEL_MATCH)
+
+ text = cc.fix_ISBN(' ISBN 0975229LOL ISBN 9780975229804 ')
+ self.assertEqual(text, ' ISBN 0975229LOL ISBN 978-0-9752298-0-4 ')
class TestIsbn(TestCase):
--
To view, visit
https://gerrit.wikimedia.org/r/205837
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I215466febf77fa0b95997f25c89e414bb4dfffcc
Gerrit-PatchSet: 7
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>