Revision: 5139 Author: russblau Date: 2008-03-17 16:11:27 +0000 (Mon, 17 Mar 2008)
Log Message: ----------- code and docstring cleanup in replace.py
Modified Paths: -------------- trunk/pywikipedia/replace.py trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/replace.py =================================================================== --- trunk/pywikipedia/replace.py 2008-03-17 14:27:11 UTC (rev 5138) +++ trunk/pywikipedia/replace.py 2008-03-17 16:11:27 UTC (rev 5139) @@ -74,10 +74,10 @@ -allowoverlap When occurences of the pattern overlap, replace all of them. Be careful, this might lead to an infinite loop.
-other: First argument is the old text, second argument is the new text. - If the -regex argument is given, the first argument will be - regarded as a regular expression, and the second argument might - contain expressions like \1 or \g<name>. +other: First argument is the old text, second argument is the new + text. If the -regex argument is given, the first argument + will be regarded as a regular expression, and the second + argument might contain expressions like \1 or \g<name>.
Examples:
@@ -193,11 +193,7 @@
def next(self): try: - while True: - try: - entry = self.parser.next() - except StopIteration: - raise + for entry in self.parser: if self.skipping: if entry.title != self.xmlStart: continue @@ -206,7 +202,8 @@ and not self.isTextExcepted(entry.text): new_text = entry.text for old, new in self.replacements: - new_text = wikipedia.replaceExcept(new_text, old, new, self.excsInside) + new_text = wikipedia.replaceExcept( + new_text, old, new, self.excsInside) if new_text != entry.text: return wikipedia.Page(self.site, entry.title) except KeyboardInterrupt: @@ -237,20 +234,23 @@ """ A bot that can do text replacements. """ - def __init__(self, generator, replacements, exceptions = {}, acceptall = False, allowoverlap = False, - recursive = False, addedCat = None, sleep = None): + def __init__(self, generator, replacements, exceptions={}, + acceptall=False, allowoverlap=False, recursive=False, + addedCat=None, sleep=None): """ Arguments: * generator - A generator that yields Page objects. - * replacements - A list of 2-tuples of original text (as a compiled - regular expression) and replacement text (as a - string). + * replacements - A list of 2-tuples of original text (as a + compiled regular expression) and replacement + text (as a string). * exceptions - A dictionary which defines when not to change an occurence. See below. * acceptall - If True, the user won't be prompted before changes are made. - * allowoverlap - If True, when matches overlap, all of them are replaced. - * addedCat - If set to a value, add this category to every page touched. + * allowoverlap - If True, when matches overlap, all of them are + replaced. + * addedCat - If set to a value, add this category to every page + touched.
Structure of the exceptions dictionary: This dictionary can have these keys: @@ -269,6 +269,7 @@ inside-tags A list of strings. These strings must be keys from the exceptionRegexes dictionary in wikipedia.replaceExcept(). + """ self.generator = generator self.replacements = replacements @@ -291,7 +292,8 @@
def isTextExcepted(self, original_text): """ - Iff one of the exceptions applies for the given page contents, returns True. + Iff one of the exceptions applies for the given page contents, + returns True. """ if self.exceptions.has_key('text-contains'): for exc in self.exceptions['text-contains']: @@ -301,8 +303,8 @@
def doReplacements(self, original_text): """ - Returns the text which is generated by applying all replacements to the - given text. + Returns the text which is generated by applying all replacements to + the given text. """ new_text = original_text exceptions = [] @@ -313,7 +315,8 @@ for old, new in self.replacements: if self.sleep != None: time.sleep(self.sleep) - new_text = wikipedia.replaceExcept(new_text, old, new, exceptions, allowoverlap = self.allowoverlap) + new_text = wikipedia.replaceExcept(new_text, old, new, exceptions, + allowoverlap=self.allowoverlap) return new_text
def run(self): @@ -323,6 +326,11 @@ # Run the generator which will yield Pages which might need to be # changed. for page in self.generator: + if self.isTitleExcepted(page.title()): + wikipedia.output( + u'Skipping %s because the title is on the exceptions list.' + % page.aslink()) + continue try: # Load the page's text from the wiki original_text = page.get() @@ -334,48 +342,55 @@ continue except wikipedia.IsRedirectPage: original_text = page.get(get_redirect=True) - if self.isTitleExcepted(page.title()): - wikipedia.output(u'Skipping %s because the title is on the exceptions list.' % page.aslink()) - else: - if self.isTextExcepted(original_text): - wikipedia.output(u'Skipping %s because it contains text that is on the exceptions list.' % page.aslink()) - continue - new_text = self.doReplacements(original_text) - if new_text == original_text: - wikipedia.output('No changes were necessary in %s' % page.aslink()) - else: - if self.recursive: - newest_text = self.doReplacements(new_text) - while (newest_text!=new_text): - new_text = newest_text - newest_text = self.doReplacements(new_text) + if self.isTextExcepted(original_text): + wikipedia.output( +u'Skipping %s because it contains text that is on the exceptions list.' + % page.aslink()) + continue + new_text = self.doReplacements(original_text) + if new_text == original_text: + wikipedia.output('No changes were necessary in %s' + % page.aslink()) + continue + if self.recursive: + newest_text = self.doReplacements(new_text) + while (newest_text!=new_text): + new_text = newest_text + newest_text = self.doReplacements(new_text)
- if self.addedCat: - cats = page.categories() - if self.addedCat not in cats: - cats.append(self.addedCat) - new_text = wikipedia.replaceCategoryLinks(new_text, cats) - # Show the title of the page we're working on. - # Highlight the title in purple. - wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) - wikipedia.showDiff(original_text, new_text) - if not self.acceptall: - choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N') - if choice in ['a', 'A']: - self.acceptall = True - if choice in ['y', 'Y']: - page.put_async(new_text) - if self.acceptall: - try: - page.put(new_text) - except wikipedia.EditConflict: - wikipedia.output(u'Skipping %s because of edit conflict' % (page.title(),)) - except wikipedia.SpamfilterError, e: - wikipedia.output(u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) - except wikipedia.PageNotSaved, error: - wikipedia.output(u'Error putting page: %s' % (error.args,)) - except wikipedia.LockedPage: - wikipedia.output(u'Skipping %s (locked page)' % (page.title(),)) + if self.addedCat: + cats = page.categories() + if self.addedCat not in cats: + cats.append(self.addedCat) + new_text = wikipedia.replaceCategoryLinks(new_text, cats) + # Show the title of the page we're working on. + # Highlight the title in purple. + wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" + % page.title()) + wikipedia.showDiff(original_text, new_text) + if not self.acceptall: + choice = wikipedia.inputChoice( + u'Do you want to accept these changes?', + ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N') + if choice in ['a', 'A']: + self.acceptall = True + if choice in ['y', 'Y']: + page.put_async(new_text) + if self.acceptall: + try: + page.put(new_text) + except wikipedia.EditConflict: + wikipedia.output(u'Skipping %s because of edit conflict' + % (page.title(),)) + except wikipedia.SpamfilterError, e: + wikipedia.output( + u'Cannot change %s because of blacklist entry %s' + % (page.title(), e.url)) + except wikipedia.PageNotSaved, error: + wikipedia.output(u'Error putting page: %s' % (error.args,)) + except wikipedia.LockedPage: + wikipedia.output(u'Skipping %s (locked page)' + % (page.title(),))
def prepareRegexForMySQL(pattern): pattern = pattern.replace('\s', '[:space:]') @@ -410,13 +425,13 @@ regex = False # Predefined fixes from dictionary 'fixes' (see above). fix = None - # the dump's path, either absolute or relative, which will be used when source - # is 'xmldump'. + # the dump's path, either absolute or relative, which will be used + # if -xml flag is present xmlFilename = None useSql = False PageTitles = [] - # will become True when the user presses a ('yes to all') or uses the -always - # commandline paramater. + # will become True when the user presses a ('yes to all') or uses the + # -always flag. acceptall = False # Will become True if the user inputs the commandline parameter -nocase caseInsensitive = False @@ -434,7 +449,8 @@ # Load default summary message. # BUG WARNING: This is probably incompatible with the -lang parameter. wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg)) - # Between a regex and another (using -fix) sleep some time (not to waste too much CPU + # Between a regex and another (using -fix) sleep some time (not to waste + # too much CPU sleep = None
# Read commandline parameters. @@ -457,7 +473,8 @@ useSql = True elif arg.startswith('-page'): if len(arg) == 5: - PageTitles.append(wikipedia.input(u'Which page do you want to change?')) + PageTitles.append(wikipedia.input( + u'Which page do you want to change?')) else: PageTitles.append(arg[6:]) elif arg.startswith('-excepttitle:'): @@ -498,27 +515,37 @@ if (len(commandline_replacements) % 2): raise wikipedia.Error, 'require even number of replacements.' elif (len(commandline_replacements) == 2 and fix == None): - replacements.append((commandline_replacements[0], commandline_replacements[1])) + replacements.append((commandline_replacements[0], + commandline_replacements[1])) if summary_commandline == None: - wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg ) % ' (-' + commandline_replacements[0] + ' +' + commandline_replacements[1] + ')') + wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg ) + % (' (-' + commandline_replacements[0] + ' +' + + commandline_replacements[1] + ')')) elif (len(commandline_replacements) > 1): if (fix == None): for i in xrange (0, len(commandline_replacements), 2): replacements.append((commandline_replacements[i], commandline_replacements[i + 1])) if summary_commandline == None: - pairs = [(commandline_replacements[i], commandline_replacements[i + 1]) for i in range(0, len(commandline_replacements), 2)] - replacementsDescription = '(' + ', '.join([('-' + pair[0] + ' +' + pair[1]) for pair in pairs]) + ')' - wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg ) % replacementsDescription) + pairs = [( commandline_replacements[i], + commandline_replacements[i + 1] ) + for i in range(0, len(commandline_replacements), 2)] + replacementsDescription = '(%s)' % ', '.join( + [('-' + pair[0] + ' +' + pair[1]) for pair in pairs]) + wikipedia.setAction( + wikipedia.translate(wikipedia.getSite(), msg ) + % replacementsDescription) else: - raise wikipedia.Error, 'Specifying -fix with replacements is undefined' + raise wikipedia.Error( + 'Specifying -fix with replacements is undefined') elif fix == None: old = wikipedia.input(u'Please enter the text that should be replaced:') new = wikipedia.input(u'Please enter the new text:') change = '(-' + old + ' +' + new replacements.append((old, new)) while True: - old = wikipedia.input(u'Please enter another text that should be replaced, or press Enter to start:') + old = wikipedia.input( +u'Please enter another text that should be replaced, or press Enter to start:') if old == '': change = change + ')' break @@ -527,8 +554,10 @@ replacements.append((old, new)) if not summary_commandline == True: default_summary_message = wikipedia.translate(wikipedia.getSite(), msg) % change - wikipedia.output(u'The summary message will default to: %s' % default_summary_message) - summary_message = wikipedia.input(u'Press Enter to use this default message, or enter a description of the changes your bot will make:') + wikipedia.output(u'The summary message will default to: %s' + % default_summary_message) + summary_message = wikipedia.input( +u'Press Enter to use this default message, or enter a description of the\nchanges your bot will make:') if summary_message == '': summary_message = default_summary_message wikipedia.setAction(summary_message) @@ -538,18 +567,19 @@ try: fix = fixes.fixes[fix] except KeyError: - wikipedia.output(u'Available predefined fixes are: %s' % fixes.fixes.keys()) + wikipedia.output(u'Available predefined fixes are: %s' + % fixes.fixes.keys()) wikipedia.stopme() sys.exit() if fix.has_key('regex'): regex = fix['regex'] if fix.has_key('msg'): - wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), fix['msg'])) + wikipedia.setAction( + wikipedia.translate(wikipedia.getSite(), fix['msg'])) if fix.has_key('exceptions'): exceptions = fix['exceptions'] replacements = fix['replacements']
- # already compile all regular expressions here to save time later for i in range(len(replacements)): old, new = replacements[i] @@ -560,15 +590,18 @@ else: oldR = re.compile(old, re.UNICODE) replacements[i] = oldR, new + for exceptionCategory in ['title', 'text-contains', 'inside']: if exceptions.has_key(exceptionCategory): patterns = exceptions[exceptionCategory] if not regex: patterns = [re.escape(pattern) for pattern in patterns] if caseInsensitive: - patterns = [re.compile(pattern, re.UNICODE | re.IGNORECASE) for pattern in patterns] + patterns = [re.compile(pattern, re.UNICODE | re.IGNORECASE) + for pattern in patterns] else: - patterns = [re.compile(pattern, re.UNICODE) for pattern in patterns] + patterns = [re.compile(pattern, re.UNICODE) + for pattern in patterns] exceptions[exceptionCategory] = patterns
if xmlFilename: @@ -594,7 +627,8 @@ gen = pagegenerators.MySQLPageGenerator(query)
elif PageTitles: - pages = [wikipedia.Page(wikipedia.getSite(), PageTitle) for PageTitle in PageTitles] + pages = [wikipedia.Page(wikipedia.getSite(), PageTitle) + for PageTitle in PageTitles] gen = iter(pages)
if not gen: @@ -609,7 +643,8 @@ preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 20) else: preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 60) - bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall, allowoverlap, recursive, None, sleep) + bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall, + allowoverlap, recursive, None, sleep) bot.run()
if __name__ == "__main__":
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2008-03-17 14:27:11 UTC (rev 5138) +++ trunk/pywikipedia/wikipedia.py 2008-03-17 16:11:27 UTC (rev 5139) @@ -2985,10 +2985,10 @@ index = nextExceptionMatch.end() else: # We found a valid match. Replace it. - try: + if callable(new): # the parameter new can be a function which takes the match as a parameter. replacement = new(match) - except TypeError: + else: # it is not a function, but a string.
# it is a little hack to make \n work. It would be better to fix it