Revision: 7463 Author: xqt Date: 2009-10-14 16:41:33 +0000 (Wed, 14 Oct 2009)
Log Message: ----------- copy from trunk
Added Paths: ----------- branches/rewrite/scripts/cosmetic_changes.py branches/rewrite/scripts/isbn.py
Copied: branches/rewrite/scripts/cosmetic_changes.py (from rev 7457, trunk/pywikipedia/cosmetic_changes.py) =================================================================== --- branches/rewrite/scripts/cosmetic_changes.py (rev 0) +++ branches/rewrite/scripts/cosmetic_changes.py 2009-10-14 16:41:33 UTC (rev 7463) @@ -0,0 +1,559 @@ +# -*- coding: utf-8 -*- +""" +This module can do slight modifications to a wiki page source code such that +the code looks cleaner. The changes are not supposed to change the look of the +rendered wiki page. + +The following parameters are supported: + +¶ms; + +-always Don't prompt you for each replacement. Warning (see below) + has not to be confirmed. ATTENTION: Use this with care! + +-summary:XYZ Set the summary message text for the edit to XYZ, bypassing + the predefined message texts with original and replacements + inserted. + +All other parameters will be regarded as part of the title of a single page, +and the bot will only work on that single page. + +&warning; + +For regular use, it is recommended to put this line into your user-config.py: + + cosmetic_changes = True + +There is another config variable: You can set + + cosmetic_changes_mylang_only = False + +if you're running a bot on multiple sites and want to do cosmetic changes on +all of them, but be careful if you do. +""" +__version__ = '$Id$' +import pywikibot, isbn +from pywikibot import pagegenerators +import sys +import re + +warning = """ATTENTION: You can run this script as a stand-alone for testing purposes. +However, the changes are that are made are only minor, and other users +might get angry if you fill the version histories and watchlists with such +irrelevant changes.""" + +docuReplacements = { + '¶ms;': pagegenerators.parameterHelp, + '&warning;': warning, +} + +# Summary message when using this module as a stand-alone script +msg_standalone = { + 'als': u'Bötli: chleineri Änderige', + 'ar': u'روبوت: تغييرات تجميلية', + 'be-x-old': u'Робат: касмэтычныя зьмены', + 'bg': u'Робот козметични промени', + 'ca': u'Robot: Canvis cosmètics', + 'cs': u'Robotické: kosmetické úpravy', + 'da': u'Bot: Kosmetiske ændringer', + 'de': u'Bot: Kosmetische Änderungen', + 'el': u'Ρομπότ: διακοσμητικές αλλαγές', + 'en': u'Robot: Cosmetic changes', + 'es': u'Robot: Cambios triviales', + 'et': u'robot: kosmeetilised muudatused', + 'fa': u'ربات: ویرایش جزئی', + 'fi': u'Botti kosmeettisia muutoksia', + 'fr': u'Robot : Changement de type cosmétique', + 'fy': u'bot tekstwiziging', + 'gl': u'bot Cambios estética', + 'he': u'בוט: שינויים קוסמטיים', + 'hi': u'Bot: अंगराग परिवर्तन', + 'hr': u'robot kozmetičke promjene', + 'hu': u'Bot: kozmetikai változtatások', + 'ia': u'Robot: Cambios cosmetic', + 'id': u'bot kosmetik perubahan', + 'it': u'Bot: Modifiche estetiche', + 'ja': u'ロボットによる: 細部の編集', + 'ko': u'로봇: 예쁘게 바꿈', + 'la': u'automaton: mutationes minores', + 'lt': u'robotas: smulkūs taisymai', + 'lv': u'robots kosmētiskās izmaiņas', + 'mk': u'Бот: козметички промени', + 'ms': u'Bot: perubahan kosmetik', + 'mt': u'Bot: kosmetiċi bidliet', + 'nl': u'Bot: cosmetische wijzigingen', + 'no': u'Bot: Kosmetiske endringer', + 'pdc': u'Waddefresser: gleene Enneringe', + 'pl': u'Robot dokonuje poprawek kosmetycznych', + 'pt': u'Bot: Mudanças triviais', + 'ro': u'robot modificări cosmetice', + 'ru': u'робот косметические изменения', + 'sk': u'robot kozmetické zmeny', + 'sl': u'robot kozmetične spremembe', + 'sr': u'Бот козметичке промене', + 'sv': u'Bot: Kosmetiska ändringar', + 'th': u'บอต ปรับแต่งให้อ่านง่าย', + 'tl': u'robot Kosmetiko pagbabago', + 'tr': u'Bot Kozmetik değişiklikler', + 'uk': u'робот косметичні зміни', + 'vec': u'Bot: Modifiche estetiche', + 'vi': u'robot: Sửa cách trình bày', + 'war': u'Robot: Kosmetiko nga mga pagbag-o', + 'zh': u'機器人: 細部更改', +} + +# Summary message that will be appended to the normal message when +# cosmetic changes are made on the fly +msg_append = { + 'als': u'; chleineri Änderige', + 'ar': u'; تغييرات تجميلية', + 'be-x-old': u'; касмэтычныя зьмены', + 'bg': u'; козметични промени', + 'ca': u'; canvis cosmètics', + 'cs': u'; kosmetické úpravy', + 'da': u'; kosmetiske ændringer', + 'de': u'; kosmetische Änderungen', + 'el': u'; διακοσμητικές αλλαγές', + 'en': u'; cosmetic changes', + 'es': u'; cambios triviales', + 'et': u'; kosmeetilised muudatused', + 'fa': u'; ویرایش جزئی', + 'fi': u'; kosmeettisia muutoksia', + 'fr': u'; changement de type cosmétique', + 'fy': u'; tekstwiziging', + 'gl': u'; cambios estética', + 'he': u'; שינויים קוסמטיים', + 'hi': u'; अंगराग परिवर्तन', + 'hr': u'; kozmetičke promjene', + 'hu': u'; kozmetikai változtatások', + 'ia': u'; cambios cosmetic', + 'id': u'; kosmetik perubahan', + 'it': u'; modifiche estetiche', + 'ja': u'; 細部の編集', + 'ko': u'; 예쁘게 바꿈', + 'la': u'; mutationes minores', + 'lt': u'; smulkūs taisymai', + 'lv': u'; kosmētiskās izmaiņas', + 'mt': u'; kosmetiċi bidliet', + 'mk': u'; козметички промени', + 'ms': u'; perubahan kosmetik', + 'nl': u'; cosmetische veranderingen', + 'no': u'; kosmetiske endringer', + 'pdc': u', gleene Enneringe', + 'pl': u'; zmiany kosmetyczne', + 'pt': u'; mudanças triviais', + 'ro': u'; modificări cosmetice', + 'ru': u'; косметические изменения', + 'sk': u'; kozmetické zmeny', + 'sl': u'; kozmetične spremembe', + 'sr': u'; козметичке промене', + 'sv': u'; kosmetiska ändringar', + 'th': u'; ปรับแต่งให้อ่านง่าย', + 'tl': u'; Kosmetiko pagbabago', + 'tr': u'; Kozmetik değişiklikler', + 'uk': u'; косметичні зміни', + 'vec': u'; modifiche estetiche', + 'vi': u'; sửa cách trình bày', + 'war': u'; kosmetiko nga mga pagbag-o', + 'zh': u'; 細部更改', +} + +deprecatedTemplates = { + 'wikipedia': { + 'pdc':[u'Schkiss'], + } +} + +class CosmeticChangesToolkit: + def __init__(self, site, debug = False, redirect = False, namespace = None): + self.site = site + self.debug = debug + self.redirect = redirect + self.namespace = namespace + self.template = (self.namespace == 10) + + def change(self, text): + """ + Given a wiki source code text, returns the cleaned up version. + """ + oldText = text + text = self.fixSelfInterwiki(text) + text = self.standardizeInterwiki(text) + text = self.standardizeCategories(text) + text = self.cleanUpLinks(text) + text = self.cleanUpSectionHeaders(text) + text = self.putSpacesInLists(text) + text = self.translateAndCapitalizeNamespaces(text) + text = self.removeDeprecatedTemplates(text) + text = self.resolveHtmlEntities(text) + text = self.validXhtml(text) + text = self.removeUselessSpaces(text) + text = self.removeNonBreakingSpaceBeforePercent(text) + text = self.fixSyntaxSave(text) + text = self.fixHtml(text) + try: + text = isbn.hyphenateIsbnNumbers(text) + except isbn.InvalidIsbnException, error: + pass + if self.debug: + pywikibot.showDiff(oldText, text) + return text + + def fixSelfInterwiki(self, text): + """ + Interwiki links to the site itself are displayed like local links. + Remove their language code prefix. + """ + interwikiR = re.compile(r'[[%s\s?:([^[]\n]*)]]' % self.site.lang) + text = interwikiR.sub(r'[[\1]]', text) + return text + + def standardizeInterwiki(self, text): + """ + Makes sure that interwiki links are put to the correct position and + into the right order. + """ + if pywikibot.calledModuleName() <> 'interwiki': + interwikiLinks = pywikibot.getLanguageLinks(text, insite = self.site) + text = pywikibot.replaceLanguageLinks(text, interwikiLinks, site = self.site, template = self.template) + return text + + def standardizeCategories(self, text): + """ + Makes sure that categories are put to the correct position, but + does not sort them. + """ + # The PyWikipediaBot is no longer allowed to touch categories on the German Wikipedia. See http://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/bis_2006#... + if self.site != pywikibot.getSite('de', 'wikipedia') and not self.template: + categories = pywikibot.getCategoryLinks(text, site = self.site) + text = pywikibot.replaceCategoryLinks(text, categories, site = self.site) + return text + + def translateAndCapitalizeNamespaces(self, text): + """ + Makes sure that localized namespace names are used. + """ + family = self.site.family + # wiki links aren't parsed here. + exceptions = ['nowiki', 'comment', 'math', 'pre'] + + for nsNumber in family.namespaces: + if not family.isDefinedNSLanguage(nsNumber, self.site.lang): + # Skip undefined namespaces + continue + if nsNumber == 6 and self.site.family.name == 'wikipedia' and \ + self.site.lang in ('en', 'fr'): + # skip processing file namespace on en-wiki and fr-wiki + continue + namespaces = list(family.namespace(self.site.lang, nsNumber, all = True)) + thisNs = namespaces.pop(0) + + # skip main (article) namespace + if thisNs and namespaces: + text = pywikibot.replaceExcept(text, r'[[\s*(' + '|'.join(namespaces) + ') *:(?P<nameAndLabel>.*?)]]', r'[[' + thisNs + ':\g<nameAndLabel>]]', exceptions) + return text + + def cleanUpLinks(self, text): + # helper function which works on one link and either returns it + # unmodified, or returns a replacement. + def handleOneLink(match): + titleWithSection = match.group('titleWithSection') + label = match.group('label') + trailingChars = match.group('linktrail') + + if not self.site.isInterwikiLink(titleWithSection): + # The link looks like this: + # [[page_title|link_text]]trailing_chars + # We only work on namespace 0 because pipes and linktrails work + # differently for images and categories. + try: + page = pywikibot.Page(pywikibot.Link(titleWithSection, self.site)) + except pywikibot.InvalidTitle: + return match.group() + if page.namespace() == 0: + # Replace underlines by spaces, also multiple underlines + titleWithSection = re.sub('_+', ' ', titleWithSection) + # Remove double spaces + titleWithSection = re.sub(' +', ' ', titleWithSection) + # Remove unnecessary leading spaces from title, + # but remember if we did this because we eventually want + # to re-add it outside of the link later. + titleLength = len(titleWithSection) + titleWithSection = titleWithSection.lstrip() + hadLeadingSpaces = (len(titleWithSection) != titleLength) + hadTrailingSpaces = False + # Remove unnecessary trailing spaces from title, + # but remember if we did this because it may affect + # the linktrail and because we eventually want to + # re-add it outside of the link later. + if not trailingChars: + titleLength = len(titleWithSection) + titleWithSection = titleWithSection.rstrip() + hadTrailingSpaces = (len(titleWithSection) != titleLength) + + # Convert URL-encoded characters to unicode + titleWithSection = pywikibot.url2unicode(titleWithSection, site = self.site) + + if titleWithSection == '': + # just skip empty links. + return match.group() + + # Remove unnecessary initial and final spaces from label. + # Please note that some editors prefer spaces around pipes. (See [[en:Wikipedia:Semi-bots]]). We remove them anyway. + if label is not None: + # Remove unnecessary leading spaces from label, + # but remember if we did this because we want + # to re-add it outside of the link later. + labelLength = len(label) + label = label.lstrip() + hadLeadingSpaces = (len(label) != labelLength) + # Remove unnecessary trailing spaces from label, + # but remember if we did this because it affects + # the linktrail. + if not trailingChars: + labelLength = len(label) + label = label.rstrip() + hadTrailingSpaces = (len(label) != labelLength) + else: + label = titleWithSection + if trailingChars: + label += trailingChars + + if titleWithSection == label or titleWithSection[0].lower() + titleWithSection[1:] == label: + newLink = "[[%s]]" % label + # Check if we can create a link with trailing characters instead of a pipelink + elif len(titleWithSection) <= len(label) and label[:len(titleWithSection)] == titleWithSection and re.sub(trailR, '', label[len(titleWithSection):]) == '': + newLink = "[[%s]]%s" % (label[:len(titleWithSection)], label[len(titleWithSection):]) + else: + # Try to capitalize the first letter of the title. + # Maybe this feature is not useful for languages that + # don't capitalize nouns... + #if not self.site.nocapitalize: + if self.site.sitename() == 'wikipedia:de': + titleWithSection = titleWithSection[0].upper() + titleWithSection[1:] + newLink = "[[%s|%s]]" % (titleWithSection, label) + # re-add spaces that were pulled out of the link. + # Examples: + # text[[ title ]]text -> text [[title]] text + # text[[ title | name ]]text -> text [[title|name]] text + # text[[ title |name]]text -> text[[title|name]]text + # text[[title| name]]text -> text [[title|name]]text + if hadLeadingSpaces: + newLink = ' ' + newLink + if hadTrailingSpaces: + newLink = newLink + ' ' + return newLink + # don't change anything + return match.group() + + trailR = re.compile(self.site.linktrail()) + # The regular expression which finds links. Results consist of four groups: + # group title is the target page title, that is, everything before | or ]. + # group section is the page section. It'll include the # to make life easier for us. + # group label is the alternative link title, that's everything between | and ]. + # group linktrail is the link trail, that's letters after ]] which are part of the word. + # note that the definition of 'letter' varies from language to language. + linkR = re.compile(r'[[(?P<titleWithSection>[^]|]+)(|(?P<label>[^]|]*))?]](?P<linktrail>' + self.site.linktrail() + ')') + + text = pywikibot.replaceExcept(text, linkR, handleOneLink, ['comment', 'math', 'nowiki', 'pre', 'startspace']) + return text + + def resolveHtmlEntities(self, text): + ignore = [ + 38, # Ampersand (&) + 60, # Less than (<) + 62, # Great than (>) + 91, # Opening bracket - sometimes used intentionally inside links + 93, # Closing bracket - sometimes used intentionally inside links + 124, # Vertical bar (??) - used intentionally in navigation bar templates on de: + 160, # Non-breaking space ( ) - not supported by Firefox textareas + ] + # ignore ' see http://eo.wikipedia.org/w/index.php?title=Liberec&diff=next&oldid=23... + if self.site.lang == 'eo': + ignore += [39] + text = pywikibot.html2unicode(text, ignore = ignore) + return text + + def validXhtml(self, text): + text = pywikibot.replaceExcept(text, r'<br>', r'<br />', ['comment', 'math', 'nowiki', 'pre']) + return text + + def removeUselessSpaces(self, text): + result = [] + multipleSpacesR = re.compile(' +') + spaceAtLineEndR = re.compile(' $') + + exceptions = ['comment', 'math', 'nowiki', 'pre', 'startspace', 'table', 'template'] + text = pywikibot.replaceExcept(text, multipleSpacesR, ' ', exceptions) + text = pywikibot.replaceExcept(text, spaceAtLineEndR, '', exceptions) + + return text + + def removeNonBreakingSpaceBeforePercent(self, text): + ''' + Newer MediaWiki versions automatically place a non-breaking space in + front of a percent sign, so it is no longer required to place it + manually. + ''' + text = pywikibot.replaceExcept(text, r'(\d) %', r'\1 %', ['timeline']) + return text + + def cleanUpSectionHeaders(self, text): + """ + For better readability of section header source code, puts a space + between the equal signs and the title. + Example: ==Section title== becomes == Section title == + + NOTE: This space is recommended in the syntax help on the English and + German Wikipedia. It might be that it is not wanted on other wikis. + If there are any complaints, please file a bug report. + """ + for level in range(1, 7): + equals = '=' * level + text = pywikibot.replaceExcept(text, r'\n' + equals + ' *(?P<title>[^=]+?) *' + equals + ' *\r\n', '\n' + equals + ' \g<title> ' + equals + '\r\n', ['comment', 'math', 'nowiki', 'pre']) + return text + + def putSpacesInLists(self, text): + """ + For better readability of bullet list and enumeration wiki source code, + puts a space between the * or # and the text. + + NOTE: This space is recommended in the syntax help on the English, German, + and French Wikipedia. It might be that it is not wanted on other wikis. + If there are any complaints, please file a bug report. + """ + if not self.redirect: + text = pywikibot.replaceExcept(text, r'(?m)^(?P<bullet>(*+|#+):*)(?P<char>[^\s*#:].+?)', '\g<bullet> \g<char>', ['comment', 'math', 'nowiki', 'pre']) + return text + + def removeDeprecatedTemplates(self, text): + if self.site.family.name in deprecatedTemplates and self.site.lang in deprecatedTemplates[self.site.family.name]: + for template in deprecatedTemplates[self.site.family.name][self.site.lang]: + if not self.site.nocapitalize: + template = '[' + template[0].upper() + template[0].lower() + ']' + template[1:] + text = pywikibot.replaceExcept(text, r'{{([mM][sS][gG]:)?' + template + '(?P<parameters>|[^}]+|)}}', '', ['comment', 'math', 'nowiki', 'pre']) + return text + + #from fixes.py + def fixSyntaxSave(self, text): + exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace'] + # external link in double brackets + text = pywikibot.replaceExcept(text, r'[[(?P<url>https?://[^]]+?)]]', r'[\g<url>]', exceptions) + # external link starting with double bracket + text = pywikibot.replaceExcept(text, r'[[(?P<url>https?://.+?)]', r'[\g<url>]', exceptions) + # external link and description separated by a dash, with + # whitespace in front of the dash, so that it is clear that + # the dash is not a legitimate part of the URL. + text = pywikibot.replaceExcept(text, r'[(?P<url>https?://[^|] \r\n]+?) +| *(?P<label>[^|]]+?)]', r'[\g<url> \g<label>]', exceptions) + # dash in external link, where the correct end of the URL can + # be detected from the file extension. It is very unlikely that + # this will cause mistakes. + text = pywikibot.replaceExcept(text, r'[(?P<url>https?://[^|] ]+?(.pdf|.html|.htm|.php|.asp|.aspx|.jsp)) *| *(?P<label>[^|]]+?)]', r'[\g<url> \g<label>]', exceptions) + return text + + def fixHtml(self, text): + # Everything case-insensitive (?i) + # Keep in mind that MediaWiki automatically converts <br> to <br /> + exceptions = ['nowiki', 'comment', 'math', 'pre', 'source', 'startspace'] + text = pywikibot.replaceExcept(text, r'(?i)<b>(.*?)</b>', r"'''\1'''" , exceptions) + text = pywikibot.replaceExcept(text, r'(?i)<strong>(.*?)</strong>', r"'''\1'''" , exceptions) + text = pywikibot.replaceExcept(text, r'(?i)<i>(.*?)</i>', r"''\1''" , exceptions) + text = pywikibot.replaceExcept(text, r'(?i)<em>(.*?)</em>', r"''\1''" , exceptions) + # horizontal line without attributes in a single line + text = pywikibot.replaceExcept(text, r'(?i)([\r\n])<hr[ /]*>([\r\n])', r'\1----\2', exceptions) + # horizontal line with attributes; can't be done with wiki syntax + # so we only make it XHTML compliant + text = pywikibot.replaceExcept(text, r'(?i)<hr ([^>/]+?)>', r'<hr \1 />', exceptions) + # TODO: maybe we can make the bot replace <p> tags with \r\n's. + return text + +class CosmeticChangesBot: + def __init__(self, generator, acceptall = False, comment=u'Robot: Cosmetic changes'): + self.generator = generator + self.acceptall = acceptall + self.comment = comment + + def treat(self, page): + try: + # Show the title of the page we're working on. + # Highlight the title in purple. + pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) + ccToolkit = CosmeticChangesToolkit(page.site(), debug = True, namespace = page.namespace()) + changedText = ccToolkit.change(page.get()) + if changedText != page.get(): + if not self.acceptall: + choice = pywikibot.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N') + if choice == 'a': + self.acceptall = True + if self.acceptall or choice == 'y': + page.put(changedText, comment=self.comment) + else: + pywikibot.output('No changes were necessary in %s' % page.title()) + except pywikibot.NoPage: + pywikibot.output("Page %s does not exist?!" % page.aslink()) + except pywikibot.IsRedirectPage: + pywikibot.output("Page %s is a redirect; skipping." % page.aslink()) + except pywikibot.LockedPage: + pywikibot.output("Page %s is locked?!" % page.aslink()) + + def run(self): + try: + for page in self.generator: + self.treat(page) + except KeyboardInterrupt: + raise + #pywikibot.output('\nQuitting program...') + +def main(): + #page generator + gen = None + pageTitle = [] + editSummary = '' + answer = 'y' + always = False + # This factory is responsible for processing command line arguments + # that are also used by other scripts and that determine on which pages + # to work on. + genFactory = pagegenerators.GeneratorFactory() + + for arg in pywikibot.handleArgs(): + if arg.startswith('-summary:'): + editSummary = arg[len('-summary:'):] + elif arg == '-always': + always = True + elif not genFactory.handleArg(arg): + pageTitle.append(arg) + + if editSummary == '': + # Load default summary message. + editSummary = pywikibot.translate(pywikibot.getSite(), msg_standalone) + + # Disabled this check. Although the point is still valid, there + # is now a warning and a prompt (see below). + #if pywikibot.getSite() == pywikibot.getSite('nl','wikipedia'): + #print "Deze bot is op WikipediaNL niet gewenst." + #print "Het toevoegen van cosmetic changes bij andere wijzigingen is toegestaan," + #print "maar cosmetic_changes als stand-alone bot niet." + #print "Zoek alstublieft een nuttig gebruik voor uw bot." + #sys.exit() + + if pageTitle: + site = pywikibot.getSite() + gen = iter([pywikibot.Page(pywikibot.Link(t, site)) for t in pageTitle]) + if not gen: + gen = genFactory.getCombinedGenerator() + if not gen: + pywikibot.showHelp() + elif not always: + answer = pywikibot.inputChoice(warning + '\nDo you really want to continue?', ['yes', 'no'], ['y', 'N'], 'N') + + if answer == 'y': + preloadingGen = pagegenerators.PreloadingGenerator(gen) + bot = CosmeticChangesBot(preloadingGen, acceptall=always, comment=editSummary) + bot.run() + +if __name__ == "__main__": + try: + main() + finally: + pywikibot.stopme()
Copied: branches/rewrite/scripts/isbn.py (from rev 7457, trunk/pywikipedia/isbn.py) =================================================================== --- branches/rewrite/scripts/isbn.py (rev 0) +++ branches/rewrite/scripts/isbn.py 2009-10-14 16:41:33 UTC (rev 7463) @@ -0,0 +1,1498 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +""" +This script goes over multiple pages of the home wiki, and reports invalid +ISBN numbers. + +Additionally, it can convert all ISBN-10 codes to the ISBN-13 format, and +correct the ISBN format by placing hyphens. + +These command line parameters can be used to specify which pages to work on: + +¶ms; + +-namespace:n Number or name of namespace to process. The parameter can be + used multiple times. It works in combination with all other + parameters, except for the -start parameter. If you e.g. + want to iterate over all categories starting at M, use + -start:Category:M. + +Furthermore, the following command line parameters are supported: + +-to13 Converts all ISBN-10 codes to ISBN-13. + NOTE: This needn't be done, as MediaWiki still supports + (and will keep supporting) ISBN-10, and all libraries and + bookstores will most likely do so as well. + +-format Corrects the hyphenation. + NOTE: This is in here for testing purposes only. Usually + it's not worth to create an edit for such a minor issue. + The recommended way of doing this is enabling + cosmetic_changes, so that these changes are made on-the-fly + to all pages that are modified. + +-always Don't prompt you for each replacement. + +""" + +__version__='$Id$' + +import pywikibot +from pywikibot import pagegenerators +import sys, re + +docuReplacements = { + '¶ms;': pagegenerators.parameterHelp, +} + +# Summary messages in different languages +msg = { + 'ar': u'روبوت: تهيئة ISBN', + 'de': 'Bot: Formatiere ISBN', + 'en': 'Robot: Formatting ISBN', + 'he': u'בוט: מעצב ISBN', + 'ja': u'ロボットによる ISBN の書式化', + 'nl': 'Bot: ISBN opgemaakt', + 'pt': u'Bot: Formatando ISBN', + 'zh': u'機器人:ISBN格式化', +} + +# Maps each group number to the list of its publisher number ranges. +# Taken from http://www.isbn-international.org/converter/ranges.htm +ranges = { + '0': [ # English speaking area + ('00', '19'), + ('200', '699'), + ('7000', '8499'), + ('85000', '89999'), + ('900000', '949999'), + ('9500000', '9999999'), + ], + '1': [ # English speaking area + ('00', '09'), + ('100', '399'), + ('4000', '5499'), + ('55000', '86979'), + ('869800', '998999'), + ], + '2': [ # French speaking area + ('00', '19'), + ('200', '349'), + ('35000', '39999'), + ('400', '699'), + ('7000', '8399'), + ('84000', '89999'), + ('900000', '949999'), + ('9500000', '9999999'), + ], + '3': [ # German speaking area + ('00', '02'), + ('030', '033'), + ('0340', '0369'), + ('03700', '03999'), + ('04', '19'), + ('200', '699'), + ('7000', '8499'), + ('85000', '89999'), + ('900000', '949999'), + ('9500000', '9999999'), + ], + '4': [ # Japan + ('00', '19'), + ('200', '699'), + ('7000', '8499'), + ('85000', '89999'), + ('900000', '949999'), + ('9500000', '9999999'), + ], + '5': [ # Russian Federation + ('00', '19'), + ('200', '699'), + ('7000', '8499'), + ('85000', '89999'), + ('900000', '909999'), + ('91000', '91999'), + ('9200', '9299'), + ('93000', '94999'), + ('9500', '9799'), + ('98000', '98999'), + ('9900000', '9909999'), + ('9910', '9999'), + ], + '600': [ # Iran + ('00', '09'), + ('100', '499'), + ('5000', '8999'), + ('90000', '99999'), + ], + '601': [ # Kazakhstan + ('00', '19'), + ('200', '699'), + ('7000', '7999'), + ('80000', '84999'), + ('85', '99'), + ], + '602': [ # Indonesia + ('00', '19'), + ('200', '799'), + ('8000', '9499'), + ('95000', '99999'), + ], + '603': [ # Saudi Arabia + ('00', '04'), + ('500', '799'), + ('8000', '8999'), + ('90000', '99999'), + ], + '604': [ # Vietnam + ('0', '4'), + ('50', '89'), + ('900', '979'), + ('9800', '9999'), + ], + '605': [ # Turkey + ('00', '09'), + ('100', '399'), + ('4000', '5999'), + ('60000', '89999'), + ], + '7': [ # China, People's Republic + ('00', '09'), + ('100', '499'), + ('5000', '7999'), + ('80000', '89999'), + ('900000', '999999'), + ], + '80': [ # Czech Republic; Slovakia + ('00', '19'), + ('200', '699'), + ('7000', '8499'), + ('85000', '89999'), + ('900000', '999999'), + ], + '81': [ # India + ('00', '19'), + ('200', '699'), + ('7000', '8499'), + ('85000', '89999'), + ('900000', '999999'), + ], + '82': [ # Norway + ('00', '19'), + ('200', '699'), + ('7000', '8999'), + ('90000', '98999'), + ('990000', '999999'), + ], + '83': [ # Poland + ('00', '19'), + ('200', '599'), + ('60000', '69999'), + ('7000', '8499'), + ('85000', '89999'), + ('900000', '999999'), + ], + '84': [ # Spain + ('00', '19'), + ('200', '699'), + ('7000', '8499'), + ('85000', '89999'), + ('9000', '9199'), + ('920000', '923999'), + ('92400', '92999'), + ('930000', '949999'), + ('95000', '96999'), + ('9700', '9999'), + ], + '85': [ # Brazil + ('00', '19'), + ('200', '599'), + ('60000', '69999'), + ('7000', '8499'), + ('85000', '89999'), + ('900000', '979999'), + ('98000', '99999'), + ], + '86': [ # Serbia and Montenegro + ('00', '29'), + ('300', '599'), + ('6000', '7999'), + ('80000', '89999'), + ('900000', '999999'), + ], + '87': [ # Denmark + ('00', '29'), + ('400', '649'), + ('7000', '7999'), + ('85000', '94999'), + ('970000', '999999'), + ], + '88': [ # Italian speaking area + ('00', '19'), + ('200', '599'), + ('6000', '8499'), + ('85000', '89999'), + ('900000', '949999'), + ('95000', '99999'), + ], + '89': [ # Korea + ('00', '24'), + ('250', '549'), + ('5500', '8499'), + ('85000', '94999'), + ('950000', '999999'), + ], + '90': [ # Netherlands, Belgium (Flemish) + ('00', '19'), + ('200', '499'), + ('5000', '6999'), + ('70000', '79999'), + ('800000', '849999'), + ('8500', '8999'), + ('900000', '909999'), + ('940000', '949999'), + ], + '91': [ # Sweden + ('0', '1'), + ('20', '49'), + ('500', '649'), + ('7000', '7999'), + ('85000', '94999'), + ('970000', '999999'), + ], + '92': [ # International Publishers (Unesco, EU), European Community Organizations + ('0', '5'), + ('60', '79'), + ('800', '899'), + ('9000', '9499'), + ('95000', '98999'), + ('990000', '999999'), + ], + '93': [ # India - no ranges fixed yet + ], + '950': [ # Argentina + ('00', '49'), + ('500', '899'), + ('9000', '9899'), + ('99000', '99999'), + ], + '951': [ # Finland + ('0', '1'), + ('20', '54'), + ('550', '889'), + ('8900', '9499'), + ('95000', '99999'), + ], + '952': [ # Finland + ('00', '19'), + ('200', '499'), + ('5000', '5999'), + ('60', '65'), + ('6600', '6699'), + ('67000', '69999'), + ('7000', '7999'), + ('80', '94'), + ('9500', '9899'), + ('99000', '99999'), + ], + '953': [ # Croatia + ('0', '0'), + ('10', '14'), + ('150', '549'), + ('55000', '59999'), + ('6000', '9499'), + ('95000', '99999'), + ], + '954': [ # Bulgaria + ('00', '29'), + ('300', '799'), + ('8000', '8999'), + ('90000', '92999'), + ('9300', '9999'), + ], + '955': [ # Sri Lanka + ('0', '0'), + ('1000', '1999'), + ('20', '54'), + ('550', '799'), + ('8000', '9499'), + ('95000', '99999'), + ], + '956': [ # Chile + ('00', '19'), + ('200', '699'), + ('7000', '9999'), + ], + '957': [ # Taiwan, China + ('00', '02'), + ('0300', '0499'), + ('05', '19'), + ('2000', '2099'), + ('21', '27'), + ('28000', '30999'), + ('31', '43'), + ('440', '819'), + ('8200', '9699'), + ('97000', '99999'), + ], + '958': [ # Colombia + ('00', '59'), + ('600', '799'), + ('8000', '9499'), + ('95000', '99999'), + ], + '959': [ # Cuba + ('00', '19'), + ('200', '699'), + ('7000', '8499'), + ], + '960': [ # Greece + ('00', '19'), + ('200', '659'), + ('6600', '6899'), + ('690', '699'), + ('7000', '8499'), + ('85000', '99999'), + ], + '961': [ # Slovenia + ('00', '19'), + ('200', '599'), + ('6000', '8999'), + ('90000', '94999'), + ], + '962': [ # Hong Kong + ('00', '19'), + ('200', '699'), + ('7000', '8499'), + ('85000', '86999'), + ('8700', '8999'), + ('900', '999'), + ], + '963': [ # Hungary + ('00', '19'), + ('200', '699'), + ('7000', '8499'), + ('85000', '89999'), + ('9000', '9999'), + ], + '964': [ # Iran + ('00', '14'), + ('150', '249'), + ('2500', '2999'), + ('300', '549'), + ('5500', '8999'), + ('90000', '96999'), + ('970', '989'), + ('9900', '9999'), + ], + '965': [ # Israel + ('00', '19'), + ('200', '599'), + ('7000', '7999'), + ('90000', '99999'), + ], + '966': [ # Ukraine + ('00', '19'), + ('2000', '2999'), + ('300', '699'), + ('7000', '8999'), + ('90000', '99999'), + ], + '967': [ # Malaysia + ('00', '29'), + ('300', '499'), + ('5000', '5999'), + ('60', '89'), + ('900', '989'), + ('9900', '9989'), + ('99900', '99999'), + ], + '968': [ # Mexico + ('01', '39'), + ('400', '499'), + ('5000', '7999'), + ('800', '899'), + ('9000', '9999'), + ], + '969': [ # Pakistan + ('0', '1'), + ('20', '39'), + ('400', '799'), + ('8000', '9999'), + ], + '970': [ # Mexico + ('01', '59'), + ('600', '899'), + ('9000', '9099'), + ('91000', '96999'), + ('9700', '9999'), + ], + '971': [ #Philippines? + ('000', '019'), + ('02', '02'), + ('0300', '0599'), + ('06', '09'), + ('10', '49'), + ('500', '849'), + ('8500', '9099'), + ('91000', '99999'), + ], + '972': [ # Portugal + ('0', '1'), + ('20', '54'), + ('550', '799'), + ('8000', '9499'), + ('95000', '99999'), + ], + '973': [ # Romania + ('0', '0'), + ('100', '169'), + ('1700', '1999'), + ('20', '54'), + ('550', '759'), + ('7600', '8499'), + ('85000', '88999'), + ('8900', '9499'), + ('95000', '99999'), + ], + '974': [ # Thailand + ('00', '19'), + ('200', '699'), + ('7000', '8499'), + ('85000', '89999'), + ('90000', '94999'), + ('9500', '9999'), + ], + '975': [ # Turkey + ('00000', '00999'), + ('01', '24'), + ('250', '599'), + ('6000', '9199'), + ('92000', '98999'), + ('990', '999'), + ], + '976': [ # Caribbean Community + ('0', '3'), + ('40', '59'), + ('600', '799'), + ('8000', '9499'), + ('95000', '99999'), + ], + '977': [ # Egypt + ('00', '19'), + ('200', '499'), + ('5000', '6999'), + ('700', '999'), + ], + '978': [ # Nigeria + ('000', '199'), + ('2000', '2999'), + ('30000', '79999'), + ('8000', '8999'), + ('900', '999'), + ], + '979': [ # Indonesia + ('000', '099'), + ('1000', '1499'), + ('15000', '19999'), + ('20', '29'), + ('3000', '3999'), + ('400', '799'), + ('8000', '9499'), + ('95000', '99999'), + ], + '980': [ # Venezuela + ('00', '19'), + ('200', '599'), + ('6000', '9999'), + ], + '981': [ # Singapore + ('00', '19'), + ('200', '299'), + ('3000', '9999'), + ], + '982': [ # South Pacific + ('00', '09'), + ('100', '699'), + ('70', '89'), + ('9000', '9999'), + ], + '983': [ # Malaysia + ('00', '01'), + ('020', '199'), + ('2000', '3999'), + ('40000', '44999'), + ('45', '49'), + ('50', '79'), + ('800', '899'), + ('9000', '9899'), + ('99000', '99999'), + ], + '984': [ # Bangladesh + ('00', '39'), + ('400', '799'), + ('8000', '8999'), + ('90000', '99999'), + ], + '985': [ # Belarus + ('00', '39'), + ('400', '599'), + ('6000', '8999'), + ('90000', '99999'), + ], + '986': [ # Taiwan, China + ('00', '11'), + ('120', '559'), + ('5600', '7999'), + ('80000', '99999'), + ], + '987': [ # Argentina + ('00', '09'), + ('1000', '1999'), + ('20000', '29999'), + ('30', '49'), + ('500', '899'), + ('9000', '9499'), + ('95000', '99999'), + ], + '988': [ # Hongkong + ('00', '16'), + ('17000', '19999'), + ('200', '799'), + ('8000', '9699'), + ('97000', '99999'), + ], + '989': [ # Portugal + ('0', '1'), + ('20', '54'), + ('550', '799'), + ('8000', '9499'), + ('95000', '99999'), + ], + '9937': [ # Nepal + ('0', '2'), + ('30', '49'), + ('500', '799'), + ('8000', '9999'), + ], + '9938': [ # Tunisia + ('00', '79'), + ('800', '949'), + ('9500', '9999'), + ], + '9939': [ # Armenia + ('0', '4'), + ('50', '79'), + ('800', '899'), + ('9000', '9999'), + ], + '9940': [ # Montenegro + ('0', '1'), + ('20', '49'), + ('500', '899'), + ('9000', '9999'), + ], + '9941': [ # Georgia + ('0', '0'), + ('10', '39'), + ('400', '899'), + ('9000', '9999'), + ], + '9942': [ # Ecuador + ('00', '89'), + ('900', '994'), + ('9950', '9999'), + ], + '9943': [ # Uzbekistan + ('00', '29'), + ('300', '399'), + ('4000', '9999'), + ], + '9944': [ # Turkey + ('0', '2'), + ('300', '499'), + ('5000', '5999'), + ('60', '89'), + ('900', '999'), + ], + '9945': [ # Dominican Republic + ('00', '00'), + ('010', '079'), + ('08', '39'), + ('400', '569'), + ('57', '57'), + ('580', '849'), + ('8500', '9999'), + ], + '9946': [ # Korea, P.D.R. + ('0', '1'), + ('20', '39'), + ('400', '899'), + ('9000', '9999'), + ], + '9947': [ # Algeria + ('0', '1'), + ('20', '79'), + ('800', '999'), + ], + '9948': [ # United Arab Emirates + ('00', '39'), + ('400', '849'), + ('8500', '9999'), + ], + '9949': [ # Estonia + ('0', '0'), + ('10', '39'), + ('400', '899'), + ('9000', '9999'), + ], + '9950': [ # Palestine + ('00', '29'), + ('300', '840'), + ('8500', '9999'), + ], + '9951': [ # Kosova + ('00', '39'), + ('400', '849'), + ('8500', '9999'), + ], + '9952': [ # Azerbaijan + ('0', '1'), + ('20', '39'), + ('400', '799'), + ('8000', '9999'), + ], + '9953': [ # Lebanon + ('0', '0'), + ('10', '39'), + ('400', '599'), + ('60', '89'), + ('9000', '9999'), + ], + '9954': [ # Morocco + ('0', '1'), + ('20', '39'), + ('400', '799'), + ('8000', '9999'), + ], + '9955': [ # Lithuania + ('00', '39'), + ('400', '929'), + ('9300', '9999'), + ], + '9956': [ # Cameroon + ('0', '0'), + ('10', '39'), + ('400', '899'), + ('9000', '9999'), + ], + '9957': [ # Jordan + ('00', '39'), + ('400', '699'), + ('70', '84'), + ('8500', '9999'), + ], + '9958': [ # Bosnia and Herzegovina + ('0', '0'), + ('10', '49'), + ('500', '899'), + ('9000', '9999'), + ], + '9959': [ # Libya + ('0', '1'), + ('20', '79'), + ('800', '949'), + ('9500', '9999'), + ], + '9960': [ # Saudi Arabia + ('00', '59'), + ('600', '899'), + ('9000', '9999'), + ], + '9961': [ # Algeria + ('0', '2'), + ('30', '69'), + ('700', '949'), + ('9500', '9999'), + ], + '9962': [ # Panama + ('00', '54'), + ('5500', '5599'), + ('56', '59'), + ('600', '849'), + ('8500', '9999'), + ], + '9963': [ # Cyprus + ('0', '2'), + ('30', '54'), + ('550', '749'), + ('7500', '9999'), + ], + '9964': [ # Ghana + ('0', '6'), + ('70', '94'), + ('950', '999'), + ], + '9965': [ # Kazakhstan + ('00', '39'), + ('400', '899'), + ('9000', '9999'), + ], + '9966': [ # Kenya + ('00', '69'), + ('7000', '7499'), + ('750', '959'), + ('9600', '9999'), + ], + '9967': [ # Kyrgyzstan + ('00', '39'), + ('400', '899'), + ('9000', '9999'), + ], + '9968': [ # Costa Rica + ('00', '49'), + ('500', '939'), + ('9400', '9999'), + ], + '9970': [ # Uganda + ('00', '39'), + ('400', '899'), + ('9000', '9999'), + ], + '9971': [ # Singapore + ('0', '5'), + ('60', '89'), + ('900', '989'), + ('9900', '9999'), + ], + '9972': [ # Peru + ('00', '09'), + ('1', '1'), + ('200', '249'), + ('2500', '2999'), + ('30', '59'), + ('600', '899'), + ('9000', '9999'), + ], + '9973': [ # Tunisia + ('0', '05'), + ('060', '089'), + ('0900', '0999'), + ('10', '69'), + ('700', '969'), + ('9700', '9999'), + ], + '9974': [ # Uruguay + ('0', '2'), + ('30', '54'), + ('550', '749'), + ('7500', '9499'), + ('95', '99'), + ], + '9975': [ # Moldova + ('0', '0'), + ('100', '399'), + ('4000', '4499'), + ('45', '89'), + ('900', '949'), + ('9500', '9999'), + ], + '9976': [ # Tanzania + ('0', '5'), + ('60', '89'), + ('900', '989'), + ('9990', '9999'), + ], + '9977': [ # Costa Rica + ('00', '89'), + ('900', '989'), + ('9900', '9999'), + ], + '9978': [ # Ecuador + ('00', '29'), + ('300', '399'), + ('40', '94'), + ('950', '989'), + ('9900', '9999'), + ], + '9979': [ # Iceland + ('0', '4'), + ('50', '64'), + ('650', '659'), + ('66', '75'), + ('760', '899'), + ('9000', '9999'), + ], + '9980': [ # Papua New Guinea + ('0', '3'), + ('40', '89'), + ('900', '989'), + ('9900', '9999'), + ], + '9981': [ # Morocco + ('00', '09'), + ('100', '159'), + ('1600', '1999'), + ('20', '79'), + ('800', '949'), + ('9500', '9999'), + ], + '9982': [ # Zambia + ('00', '79'), + ('800', '989'), + ('9900', '9999'), + ], + '9983': [ # Gambia + ('80', '94'), + ('950', '989'), + ('9900', '9999'), + ], + '9984': [ # Latvia + ('00', '49'), + ('500', '899'), + ('9000', '9999'), + ], + '9985': [ # Estonia + ('0', '4'), + ('50', '79'), + ('800', '899'), + ('9000', '9999'), + ], + '9986': [ # Lithuania + ('00', '39'), + ('400', '899'), + ('9000', '9399'), + ('940', '969'), + ('97', '99'), + ], + '9987': [ # Tanzania + ('00', '39'), + ('400', '879'), + ('8800', '9999'), + ], + '9988': [ # Ghana + ('0', '2'), + ('30', '54'), + ('550', '749'), + ('7500', '9999'), + ], + '9989': [ # Macedonia + ('0', '0'), + ('100', '199'), + ('2000', '2999'), + ('30', '59'), + ('600', '949'), + ('9500', '9999'), + ], + '99901': [ # Bahrain + ('00', '49'), + ('500', '799'), + ('80', '99'), + ], + '99902': [ # Gabon - no ranges fixed yet + ], + '99903': [ # Mauritius + ('0', '1'), + ('20', '89'), + ('900', '999'), + ], + '99904': [ # Netherlands Antilles; Aruba, Neth. Ant + ('0', '5'), + ('60', '89'), + ('900', '999'), + ], + '99905': [ # Bolivia + ('0', '3'), + ('40', '79'), + ('800', '999'), + ], + '99906': [ # Kuwait + ('0', '2'), + ('30', '59'), + ('600', '699'), + ('70', '89'), + ('9', '9'), + ], + '99908': [ # Malawi + ('0', '0'), + ('10', '89'), + ('900', '999'), + ], + '99909': [ # Malta + ('0', '3'), + ('40', '94'), + ('950', '999'), + ], + '99910': [ # Sierra Leone + ('0', '2'), + ('30', '89'), + ('900', '999'), + ], + '99911': [ # Lesotho + ('00', '59'), + ('600', '999'), + ], + '99912': [ # Botswana + ('0', '3'), + ('400', '599'), + ('60', '89'), + ('900', '999'), + ], + '99913': [ # Andorra + ('0', '2'), + ('30', '35'), + ('600', '604'), + ], + '99914': [ # Suriname + ('0', '4'), + ('50', '89'), + ('900', '949'), + ], + '99915': [ # Maldives + ('0', '4'), + ('50', '79'), + ('800', '999'), + ], + '99916': [ # Namibia + ('0', '2'), + ('30', '69'), + ('700', '999'), + ], + '99917': [ # Brunei Darussalam + ('0', '2'), + ('30', '89'), + ('900', '999'), + ], + '99918': [ # Faroe Islands + ('0', '3'), + ('40', '79'), + ('800', '999'), + ], + '99919': [ # Benin + ('0', '2'), + ('40', '69'), + ('900', '999'), + ], + '99920': [ # Andorra + ('0', '4'), + ('50', '89'), + ('900', '999'), + ], + '99921': [ # Qatar + ('0', '1'), + ('20', '69'), + ('700', '799'), + ('8', '8'), + ('90', '99'), + ], + '99922': [ # Guatemala + ('0', '3'), + ('40', '69'), + ('700', '999'), + ], + '99923': [ # El Salvador + ('0', '1'), + ('20', '79'), + ('800', '999'), + ], + '99924': [ # Nicaragua + ('0', '2'), + ('30', '79'), + ('800', '999'), + ], + '99925': [ # Paraguay + ('0', '3'), + ('40', '79'), + ('800', '999'), + ], + '99926': [ # Honduras + ('0', '0'), + ('10', '59'), + ('600', '999'), + ], + '99927': [ # Albania + ('0', '2'), + ('30', '59'), + ('600', '999'), + ], + '99928': [ # Georgia + ('0', '0'), + ('10', '79'), + ('800', '999'), + ], + '99929': [ # Mongolia + ('0', '4'), + ('50', '79'), + ('800', '999'), + ], + '99930': [ # Armenia + ('0', '4'), + ('50', '79'), + ('800', '999'), + ], + '99931': [ # Seychelles + ('0', '4'), + ('50', '79'), + ('800', '999'), + ], + '99932': [ # Malta + ('0', '0'), + ('10', '59'), + ('600', '699'), + ('7', '7'), + ('80', '99'), + ], + '99933': [ # Nepal + ('0', '2'), + ('30', '59'), + ('600', '999'), + ], + '99934': [ # Dominican Republic + ('0', '1'), + ('20', '79'), + ('800', '999'), + ], + '99935': [ # Haiti + ('0', '2'), + ('7', '8'), + ('30', '59'), + ('600', '699'), + ('90', '99'), + ], + '99936': [ # Bhutan + ('0', '0'), + ('10', '59'), + ('600', '999'), + ], + '99937': [ # Macau + ('0', '1'), + ('20', '59'), + ('600', '999'), + ], + '99938': [ # Srpska + ('0', '1'), + ('20', '59'), + ('600', '899'), + ('90', '99'), + ], + '99939': [ # Guatemala + ('0', '5'), + ('60', '89'), + ('900', '999'), + ], + '99940': [ # Georgia + ('0', '0'), + ('10', '69'), + ('700', '999'), + ], + '99941': [ # Armenia + ('0', '2'), + ('30', '79'), + ('800', '999'), + ], + '99942': [ # Sudan + ('0', '4'), + ('50', '79'), + ('800', '999'), + ], + '99943': [ # Alsbania + ('0', '2'), + ('30', '59'), + ('600', '999'), + ], + '99944': [ # Ethiopia + ('0', '4'), + ('50', '79'), + ('800', '999'), + ], + '99945': [ # Namibia + ('0', '5'), + ('60', '89'), + ('900', '999'), + ], + '99946': [ # Nepal + ('0', '2'), + ('30', '59'), + ('600', '999'), + ], + '99947': [ # Tajikistan + ('0', '2'), + ('30', '69'), + ('700', '999'), + ], + '99948': [ # Eritrea + ('0', '4'), + ('50', '79'), + ('800', '999'), + ], + '99949': [ # Mauritius + ('0', '1'), + ('20', '89'), + ('900', '999'), + ], + '99950': [ # Cambodia + ('0', '4'), + ('50', '79'), + ('800', '999'), + ], + '99951': [ # Congo - no ranges fixed yet + ], + '99952': [ # Mali + ('0', '4'), + ('50', '79'), + ('800', '999'), + ], + '99953': [ # Paraguay + ('0', '2'), + ('30', '79'), + ('800', '999'), + ], + '99954': [ # Bolivia + ('0', '2'), + ('30', '69'), + ('700', '999'), + ], + '99955': [ # Srpska + ('0', '1'), + ('20', '59'), + ('600', '899'), + ('90', '99'), + ], + '99956': [ # Albania + ('00', '59'), + ('600', '999'), + ], +} + +class IsbnBot: + def __init__(self, generator): + self.generator = generator + + def run(self): + for page in self.generator: + try: + text = page.get(get_redirect = self.touch_redirects) + # convert ISBN numbers + page.put(text) + except pywikibot.NoPage: + print "Page %s does not exist?!" % page.title(asLink=True) + except pywikibot.IsRedirectPage: + print "Page %s is a redirect; skipping." % page.title(asLink=True) + except pywikibot.LockedPage: + print "Page %s is locked?!" % page.title(asLink=True) + + +class InvalidIsbnException(pywikibot.Error): + """Invalid ISBN""" + +class ISBN: + """ + Abstract superclass + """ + + def format(self): + """ + Puts hyphens into this ISBN number. + """ + result = '' + rest = '' + for digit in self.digits(): + rest += str(digit) + # Determine the prefix (if any) + for prefix in self.possiblePrefixes(): + if rest.startswith(prefix): + result += prefix + '-' + rest = rest[len(prefix):] + break + + # Determine the group + for groupNumber in ranges.iterkeys(): + if rest.startswith(groupNumber): + result += groupNumber + '-' + rest = rest[len(groupNumber):] + publisherRanges = ranges[groupNumber] + break + else: + raise InvalidIsbnException('ISBN %s: group number unknown.' % self.code) + + # Determine the publisher + for (start, end) in publisherRanges: + length = len(start) # NOTE: start and end always have equal length + if rest[:length] > start and rest[:length] <= end: + result += rest[:length] + '-' + rest = rest[length:] + break + else: + raise InvalidIsbnException('ISBN %s: publisher number unknown.' % self.code) + + # The rest is the item number and the 1-digit checksum. + result += rest[:-1] + '-' + rest[-1] + self.code = result + +class ISBN13(ISBN): + def __init__(self, code, checksumMissing = False): + self.code = code + if checksumMissing: + self.code += str(self.calculateChecksum()) + self.checkValidity() + + def possiblePrefixes(self): + return ['978', '979'] + + def digits(self): + """ + Returns a list of the digits in the ISBN code. + """ + result = [] + for c in self.code: + if c.isdigit(): + result.append(int(c)) + elif c != '-': + raise InvalidIsbnException('The ISBN %s contains invalid characters.' % self.code) + return result + + def checkValidity(self): + if len(self.digits()) != 13: + raise InvalidIsbnException('The ISBN %s is not 13 digits long.' % self.code) + if self.calculateChecksum() != self.digits()[-1]: + raise InvalidIsbnException('The ISBN checksum of %s is incorrect.' % self.code) + + def calculateChecksum(self): + # See http://en.wikipedia.org/wiki/ISBN#Check_digit_in_ISBN_13 + sum = 0 + for i in range(0, 13 - 1, 2): + sum += self.digits()[i] + for i in range(1, 13 - 1, 2): + sum += 3 * self.digits()[i] + return (10 - (sum % 10)) % 10 + +class ISBN10(ISBN): + def __init__(self, code): + self.code = code + self.checkValidity() + + def possiblePrefixes(self): + return [] + + def digits(self): + """ + Returns a list of the digits and Xs in the ISBN code. + """ + result = [] + for c in self.code: + if c.isdigit() or c in 'Xx': + result.append(c) + elif c != '-': + raise InvalidIsbnException('The ISBN %s contains invalid characters.' % self.code) + return result + + def checkChecksum(self): + """ + Raises an InvalidIsbnException if the checksum shows that the + ISBN is incorrect. + """ + # See http://en.wikipedia.org/wiki/ISBN#Check_digit_in_ISBN_10 + sum = 0 + for i in range(0, 9): + sum += (i + 1) * int(self.digits()[i]) + #print sum + checksum = sum % 11 + #print checksum + lastDigit = self.digits()[-1] + #print lastDigit + if not ((checksum == 10 and lastDigit in 'Xx') or (lastDigit.isdigit() and checksum == int(lastDigit))): + raise InvalidIsbnException('The ISBN checksum of %s is incorrect.' % self.code) + + def checkValidity(self): + if len(self.digits()) != 10: + raise InvalidIsbnException('The ISBN %s is not 10 digits long.' % self.code) + if 'X' in self.digits()[:-1] or 'x' in self.digits()[:-1]: + raise InvalidIsbnException('ISBN %s: X is only allowed at the end of the ISBN.' % self.code) + self.checkChecksum() + + def toISBN13(self): + """ + Creates a 13-digit ISBN from this 10-digit ISBN by prefixing the GS1 + prefix '978' and recalculating the checksum. + The hyphenation structure is taken from the format of the original + ISBN number. + """ + code = '978-' + self.code[:-1] + + #cs = self.calculateChecksum() + #code += str(cs) + return ISBN13(code, checksumMissing = True) + + def format(self): + # load overridden superclass method + ISBN.format(self) + # capitalize checksum + if self.code[-1] == 'x': + self.code = self.code[:-1] + 'X' + +def getIsbn(code): + try: + i = ISBN13(code) + except InvalidIsbnException, e13: + try: + i = ISBN10(code) + except InvalidIsbnException, e10: + raise InvalidIsbnException(u'ISBN-13: %s / ISBN-10: %s' % (e13, e10)) + return i + +def _hyphenateIsbnNumber(match): + """ + Helper function to deal with a single ISBN + """ + code = match.group('code') + try: + i = getIsbn(code) + except InvalidIsbnException: + # don't change + return code + i.format() + return i.code + +def hyphenateIsbnNumbers(text): + isbnR = re.compile(r'(?<=ISBN )(?P<code>[\d-]+[Xx]?)') + text = isbnR.sub(_hyphenateIsbnNumber, text) + return text + +def _isbn10toIsbn13(match): + """ + Helper function to deal with a single ISBN + """ + code = match.group('code') + try: + i = getIsbn(code) + except InvalidIsbnException: + # don't change + return code + i13 = i.toISBN13() + return i13.code + +def convertIsbn10toIsbn13(text): + isbnR = re.compile(r'(?<=ISBN )(?P<code>[\d-]+[Xx]?)') + text = isbnR.sub(_isbn10toIsbn13, text) + return text + +class IsbnBot: + + def __init__(self, generator, to13 = False, format = False, always = False): + self.generator = generator + self.to13 = to13 + self.format = format + self.always = always + self.isbnR = re.compile(r'(?<=ISBN )(?P<code>[\d-]+[Xx]?)') + self.comment = pywikibot.translate(pywikibot.getSite(), msg) + + def treat(self, page): + try: + oldText = page.get() + for match in self.isbnR.finditer(oldText): + code = match.group('code') + try: + getIsbn(code) + except InvalidIsbnException, e: + pywikibot.output(e) + + newText = oldText + if self.to13: + newText = self.isbnR.sub(_isbn10toIsbn13, newText) + + if self.format: + newText = self.isbnR.sub(_hyphenateIsbnNumber, newText) + self.save(page, newText) + except pywikibot.NoPage: + pywikibot.output(u"Page %s does not exist?!" % page.title(asLink=True)) + except pywikibot.IsRedirectPage: + pywikibot.output(u"Page %s is a redirect; skipping." % page.title(asLink=True)) + except pywikibot.LockedPage: + pywikibot.output(u"Page %s is locked?!" % page.title(asLink=True)) + + def save(self, page, text): + if text != page.get(): + # Show the title of the page we're working on. + # Highlight the title in purple. + pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) + pywikibot.showDiff(page.get(), text) + if not self.always: + choice = pywikibot.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No', 'Always yes'], ['y', 'N', 'a'], 'N') + if choice == 'n': + return + elif choice == 'a': + self.always = True + + if self.always: + try: + page.put(text, comment=self.comment) + except pywikibot.EditConflict: + pywikibot.output(u'Skipping %s because of edit conflict' % (page.title(),)) + except pywikibot.SpamfilterError, e: + pywikibot.output(u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) + except pywikibot.LockedPage: + pywikibot.output(u'Skipping %s (locked page)' % (page.title(),)) + else: + # Save the page in the background. No need to catch exceptions. + page.put(text, comment=self.comment, async=True) + + + def run(self): + for page in self.generator: + self.treat(page) + + +def main(): + #page generator + gen = None + # This temporary array is used to read the page title if one single + # page to work on is specified by the arguments. + pageTitle = [] + # Which namespaces should be processed? + # default to [] which means all namespaces will be processed + namespaces = [] + # This factory is responsible for processing command line arguments + # that are also used by other scripts and that determine on which pages + # to work on. + genFactory = pagegenerators.GeneratorFactory() + # Never ask before changing a page + always = False + to13 = False + format = False + + for arg in pywikibot.handleArgs(): + if arg.startswith('-namespace:'): + try: + namespaces.append(int(arg[11:])) + except ValueError: + namespaces.append(arg[11:]) + elif arg == '-always': + always = True + elif arg == '-to13': + to13 = True + elif arg == '-format': + format = True + else: + if not genFactory.handleArg(arg): + pageTitle.append(arg) + + site = pywikibot.getSite() + if pageTitle: + gen = iter([pywikibot.Page(pywikibot.Link(t, site)) for t in pageTitle]) + if not gen: + gen = genFactory.getCombinedGenerator() + if not gen: + pywikibot.showHelp('isbn') + else: + if namespaces != []: + gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) + preloadingGen = pagegenerators.PreloadingGenerator(gen) + bot = IsbnBot(preloadingGen, to13 = to13, format = format, always = always) + bot.run() + +if __name__ == "__main__": + try: + main() + finally: + pywikibot.stopme()