[Pywikipedia-l] SVN: [6305] trunk/pywikipedia/fixes.py
wikipedian at svn.wikimedia.org
wikipedian at svn.wikimedia.org
Tue Jan 27 19:58:05 UTC 2009
Revision: 6305
Author: wikipedian
Date: 2009-01-27 19:58:05 +0000 (Tue, 27 Jan 2009)
Log Message:
-----------
fix ISBNs which have minus signs, em dashes, etc. as separators.
Modified Paths:
--------------
trunk/pywikipedia/fixes.py
Modified: trunk/pywikipedia/fixes.py
===================================================================
--- trunk/pywikipedia/fixes.py 2009-01-27 13:57:16 UTC (rev 6304)
+++ trunk/pywikipedia/fixes.py 2009-01-27 19:58:05 UTC (rev 6305)
@@ -388,12 +388,12 @@
(r'ISBN: (\d+)', r'ISBN \1'),
# superfluous word "number"
(r'ISBN( number| no\.?| No\.?|-Nummer|-Nr\.):? (\d+)', r'ISBN \2'),
- # Spaces, dashes, or dots instead of hyphens as separators,
- # or spaces between digits and separators.
+ # Space, minus, dot, hypen, en dash, em dash, etc. instead of
+ # hyphen-minus as separator, or spaces between digits and separators.
# Note that these regular expressions also match valid ISBNs, but
# these won't be changed.
- (r'ISBN (978|979) *[\- –\.] *(\d+) *[\- –\.] *(\d+) *[\- –\.] *(\d+) *[\- –\.] *(\d)(?!\d)', r'ISBN \1-\2-\3-\4-\5'), # ISBN-13
- (r'ISBN (\d+) *[\- –\.] *(\d+) *[\- –\.] *(\d+) *[\- –\.] *(\d|X|x)(?!\d)', r'ISBN \1-\2-\3-\4'), # ISBN-10
+ (ur'ISBN (978|979) *[\- −\.‐-―] *(\d+) *[\- −\.‐-―] *(\d+) *[\- −\.‐-―] *(\d+) *[\- −\.‐-―] *(\d)(?!\d)', r'ISBN \1-\2-\3-\4-\5'), # ISBN-13
+ (ur'ISBN (\d+) *[\- −\.‐-―] *(\d+) *[\- −\.‐-―] *(\d+) *[\- −\.‐-―] *(\d|X|x)(?!\d)', r'ISBN \1-\2-\3-\4'), # ISBN-10
# missing space before ISBN-10 or before ISBN-13,
# or non-breaking space.
(r'ISBN(| | )((\d(-?)){12}\d|(\d(-?)){9}[\dXx])', r'ISBN \2'),
More information about the Pywikipedia-l
mailing list