Revision: 6305 Author: wikipedian Date: 2009-01-27 19:58:05 +0000 (Tue, 27 Jan 2009)
Log Message: ----------- fix ISBNs which have minus signs, em dashes, etc. as separators.
Modified Paths: -------------- trunk/pywikipedia/fixes.py
Modified: trunk/pywikipedia/fixes.py =================================================================== --- trunk/pywikipedia/fixes.py 2009-01-27 13:57:16 UTC (rev 6304) +++ trunk/pywikipedia/fixes.py 2009-01-27 19:58:05 UTC (rev 6305) @@ -388,12 +388,12 @@ (r'ISBN: (\d+)', r'ISBN \1'), # superfluous word "number" (r'ISBN( number| no.?| No.?|-Nummer|-Nr.):? (\d+)', r'ISBN \2'), - # Spaces, dashes, or dots instead of hyphens as separators, - # or spaces between digits and separators. + # Space, minus, dot, hypen, en dash, em dash, etc. instead of + # hyphen-minus as separator, or spaces between digits and separators. # Note that these regular expressions also match valid ISBNs, but # these won't be changed. - (r'ISBN (978|979) *[- –.] *(\d+) *[- –.] *(\d+) *[- –.] *(\d+) *[- –.] *(\d)(?!\d)', r'ISBN \1-\2-\3-\4-\5'), # ISBN-13 - (r'ISBN (\d+) *[- –.] *(\d+) *[- –.] *(\d+) *[- –.] *(\d|X|x)(?!\d)', r'ISBN \1-\2-\3-\4'), # ISBN-10 + (ur'ISBN (978|979) *[- −.‐-―] *(\d+) *[- −.‐-―] *(\d+) *[- −.‐-―] *(\d+) *[- −.‐-―] *(\d)(?!\d)', r'ISBN \1-\2-\3-\4-\5'), # ISBN-13 + (ur'ISBN (\d+) *[- −.‐-―] *(\d+) *[- −.‐-―] *(\d+) *[- −.‐-―] *(\d|X|x)(?!\d)', r'ISBN \1-\2-\3-\4'), # ISBN-10 # missing space before ISBN-10 or before ISBN-13, # or non-breaking space. (r'ISBN(| | )((\d(-?)){12}\d|(\d(-?)){9}[\dXx])', r'ISBN \2'),
pywikipedia-l@lists.wikimedia.org