Revision: 5139
Author: russblau
Date: 2008-03-17 16:11:27 +0000 (Mon, 17 Mar 2008)
Log Message:
-----------
code and docstring cleanup in replace.py
Modified Paths:
--------------
trunk/pywikipedia/replace.py
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/replace.py
===================================================================
--- trunk/pywikipedia/replace.py 2008-03-17 14:27:11 UTC (rev 5138)
+++ trunk/pywikipedia/replace.py 2008-03-17 16:11:27 UTC (rev 5139)
@@ -74,10 +74,10 @@
-allowoverlap When occurences of the pattern overlap, replace all of them.
Be careful, this might lead to an infinite loop.
-other: First argument is the old text, second argument is the new text.
- If the -regex argument is given, the first argument will be
- regarded as a regular expression, and the second argument might
- contain expressions like \\1 or \g<name>.
+other: First argument is the old text, second argument is the new
+ text. If the -regex argument is given, the first argument
+ will be regarded as a regular expression, and the second
+ argument might contain expressions like \\1 or \g<name>.
Examples:
@@ -193,11 +193,7 @@
def next(self):
try:
- while True:
- try:
- entry = self.parser.next()
- except StopIteration:
- raise
+ for entry in self.parser:
if self.skipping:
if entry.title != self.xmlStart:
continue
@@ -206,7 +202,8 @@
and not self.isTextExcepted(entry.text):
new_text = entry.text
for old, new in self.replacements:
- new_text = wikipedia.replaceExcept(new_text, old, new, self.excsInside)
+ new_text = wikipedia.replaceExcept(
+ new_text, old, new, self.excsInside)
if new_text != entry.text:
return wikipedia.Page(self.site, entry.title)
except KeyboardInterrupt:
@@ -237,20 +234,23 @@
"""
A bot that can do text replacements.
"""
- def __init__(self, generator, replacements, exceptions = {}, acceptall = False, allowoverlap = False,
- recursive = False, addedCat = None, sleep = None):
+ def __init__(self, generator, replacements, exceptions={},
+ acceptall=False, allowoverlap=False, recursive=False,
+ addedCat=None, sleep=None):
"""
Arguments:
* generator - A generator that yields Page objects.
- * replacements - A list of 2-tuples of original text (as a compiled
- regular expression) and replacement text (as a
- string).
+ * replacements - A list of 2-tuples of original text (as a
+ compiled regular expression) and replacement
+ text (as a string).
* exceptions - A dictionary which defines when not to change an
occurence. See below.
* acceptall - If True, the user won't be prompted before changes
are made.
- * allowoverlap - If True, when matches overlap, all of them are replaced.
- * addedCat - If set to a value, add this category to every page touched.
+ * allowoverlap - If True, when matches overlap, all of them are
+ replaced.
+ * addedCat - If set to a value, add this category to every page
+ touched.
Structure of the exceptions dictionary:
This dictionary can have these keys:
@@ -269,6 +269,7 @@
inside-tags
A list of strings. These strings must be keys from the
exceptionRegexes dictionary in wikipedia.replaceExcept().
+
"""
self.generator = generator
self.replacements = replacements
@@ -291,7 +292,8 @@
def isTextExcepted(self, original_text):
"""
- Iff one of the exceptions applies for the given page contents, returns True.
+ Iff one of the exceptions applies for the given page contents,
+ returns True.
"""
if self.exceptions.has_key('text-contains'):
for exc in self.exceptions['text-contains']:
@@ -301,8 +303,8 @@
def doReplacements(self, original_text):
"""
- Returns the text which is generated by applying all replacements to the
- given text.
+ Returns the text which is generated by applying all replacements to
+ the given text.
"""
new_text = original_text
exceptions = []
@@ -313,7 +315,8 @@
for old, new in self.replacements:
if self.sleep != None:
time.sleep(self.sleep)
- new_text = wikipedia.replaceExcept(new_text, old, new, exceptions, allowoverlap = self.allowoverlap)
+ new_text = wikipedia.replaceExcept(new_text, old, new, exceptions,
+ allowoverlap=self.allowoverlap)
return new_text
def run(self):
@@ -323,6 +326,11 @@
# Run the generator which will yield Pages which might need to be
# changed.
for page in self.generator:
+ if self.isTitleExcepted(page.title()):
+ wikipedia.output(
+ u'Skipping %s because the title is on the exceptions list.'
+ % page.aslink())
+ continue
try:
# Load the page's text from the wiki
original_text = page.get()
@@ -334,48 +342,55 @@
continue
except wikipedia.IsRedirectPage:
original_text = page.get(get_redirect=True)
- if self.isTitleExcepted(page.title()):
- wikipedia.output(u'Skipping %s because the title is on the exceptions list.' % page.aslink())
- else:
- if self.isTextExcepted(original_text):
- wikipedia.output(u'Skipping %s because it contains text that is on the exceptions list.' % page.aslink())
- continue
- new_text = self.doReplacements(original_text)
- if new_text == original_text:
- wikipedia.output('No changes were necessary in %s' % page.aslink())
- else:
- if self.recursive:
- newest_text = self.doReplacements(new_text)
- while (newest_text!=new_text):
- new_text = newest_text
- newest_text = self.doReplacements(new_text)
+ if self.isTextExcepted(original_text):
+ wikipedia.output(
+u'Skipping %s because it contains text that is on the exceptions list.'
+ % page.aslink())
+ continue
+ new_text = self.doReplacements(original_text)
+ if new_text == original_text:
+ wikipedia.output('No changes were necessary in %s'
+ % page.aslink())
+ continue
+ if self.recursive:
+ newest_text = self.doReplacements(new_text)
+ while (newest_text!=new_text):
+ new_text = newest_text
+ newest_text = self.doReplacements(new_text)
- if self.addedCat:
- cats = page.categories()
- if self.addedCat not in cats:
- cats.append(self.addedCat)
- new_text = wikipedia.replaceCategoryLinks(new_text, cats)
- # Show the title of the page we're working on.
- # Highlight the title in purple.
- wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title())
- wikipedia.showDiff(original_text, new_text)
- if not self.acceptall:
- choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')
- if choice in ['a', 'A']:
- self.acceptall = True
- if choice in ['y', 'Y']:
- page.put_async(new_text)
- if self.acceptall:
- try:
- page.put(new_text)
- except wikipedia.EditConflict:
- wikipedia.output(u'Skipping %s because of edit conflict' % (page.title(),))
- except wikipedia.SpamfilterError, e:
- wikipedia.output(u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url))
- except wikipedia.PageNotSaved, error:
- wikipedia.output(u'Error putting page: %s' % (error.args,))
- except wikipedia.LockedPage:
- wikipedia.output(u'Skipping %s (locked page)' % (page.title(),))
+ if self.addedCat:
+ cats = page.categories()
+ if self.addedCat not in cats:
+ cats.append(self.addedCat)
+ new_text = wikipedia.replaceCategoryLinks(new_text, cats)
+ # Show the title of the page we're working on.
+ # Highlight the title in purple.
+ wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
+ % page.title())
+ wikipedia.showDiff(original_text, new_text)
+ if not self.acceptall:
+ choice = wikipedia.inputChoice(
+ u'Do you want to accept these changes?',
+ ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')
+ if choice in ['a', 'A']:
+ self.acceptall = True
+ if choice in ['y', 'Y']:
+ page.put_async(new_text)
+ if self.acceptall:
+ try:
+ page.put(new_text)
+ except wikipedia.EditConflict:
+ wikipedia.output(u'Skipping %s because of edit conflict'
+ % (page.title(),))
+ except wikipedia.SpamfilterError, e:
+ wikipedia.output(
+ u'Cannot change %s because of blacklist entry %s'
+ % (page.title(), e.url))
+ except wikipedia.PageNotSaved, error:
+ wikipedia.output(u'Error putting page: %s' % (error.args,))
+ except wikipedia.LockedPage:
+ wikipedia.output(u'Skipping %s (locked page)'
+ % (page.title(),))
def prepareRegexForMySQL(pattern):
pattern = pattern.replace('\s', '[:space:]')
@@ -410,13 +425,13 @@
regex = False
# Predefined fixes from dictionary 'fixes' (see above).
fix = None
- # the dump's path, either absolute or relative, which will be used when source
- # is 'xmldump'.
+ # the dump's path, either absolute or relative, which will be used
+ # if -xml flag is present
xmlFilename = None
useSql = False
PageTitles = []
- # will become True when the user presses a ('yes to all') or uses the -always
- # commandline paramater.
+ # will become True when the user presses a ('yes to all') or uses the
+ # -always flag.
acceptall = False
# Will become True if the user inputs the commandline parameter -nocase
caseInsensitive = False
@@ -434,7 +449,8 @@
# Load default summary message.
# BUG WARNING: This is probably incompatible with the -lang parameter.
wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg))
- # Between a regex and another (using -fix) sleep some time (not to waste too much CPU
+ # Between a regex and another (using -fix) sleep some time (not to waste
+ # too much CPU
sleep = None
# Read commandline parameters.
@@ -457,7 +473,8 @@
useSql = True
elif arg.startswith('-page'):
if len(arg) == 5:
- PageTitles.append(wikipedia.input(u'Which page do you want to change?'))
+ PageTitles.append(wikipedia.input(
+ u'Which page do you want to change?'))
else:
PageTitles.append(arg[6:])
elif arg.startswith('-excepttitle:'):
@@ -498,27 +515,37 @@
if (len(commandline_replacements) % 2):
raise wikipedia.Error, 'require even number of replacements.'
elif (len(commandline_replacements) == 2 and fix == None):
- replacements.append((commandline_replacements[0], commandline_replacements[1]))
+ replacements.append((commandline_replacements[0],
+ commandline_replacements[1]))
if summary_commandline == None:
- wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg ) % ' (-' + commandline_replacements[0] + ' +' + commandline_replacements[1] + ')')
+ wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg )
+ % (' (-' + commandline_replacements[0] + ' +'
+ + commandline_replacements[1] + ')'))
elif (len(commandline_replacements) > 1):
if (fix == None):
for i in xrange (0, len(commandline_replacements), 2):
replacements.append((commandline_replacements[i],
commandline_replacements[i + 1]))
if summary_commandline == None:
- pairs = [(commandline_replacements[i], commandline_replacements[i + 1]) for i in range(0, len(commandline_replacements), 2)]
- replacementsDescription = '(' + ', '.join([('-' + pair[0] + ' +' + pair[1]) for pair in pairs]) + ')'
- wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg ) % replacementsDescription)
+ pairs = [( commandline_replacements[i],
+ commandline_replacements[i + 1] )
+ for i in range(0, len(commandline_replacements), 2)]
+ replacementsDescription = '(%s)' % ', '.join(
+ [('-' + pair[0] + ' +' + pair[1]) for pair in pairs])
+ wikipedia.setAction(
+ wikipedia.translate(wikipedia.getSite(), msg )
+ % replacementsDescription)
else:
- raise wikipedia.Error, 'Specifying -fix with replacements is undefined'
+ raise wikipedia.Error(
+ 'Specifying -fix with replacements is undefined')
elif fix == None:
old = wikipedia.input(u'Please enter the text that should be replaced:')
new = wikipedia.input(u'Please enter the new text:')
change = '(-' + old + ' +' + new
replacements.append((old, new))
while True:
- old = wikipedia.input(u'Please enter another text that should be replaced, or press Enter to start:')
+ old = wikipedia.input(
+u'Please enter another text that should be replaced, or press Enter to start:')
if old == '':
change = change + ')'
break
@@ -527,8 +554,10 @@
replacements.append((old, new))
if not summary_commandline == True:
default_summary_message = wikipedia.translate(wikipedia.getSite(), msg) % change
- wikipedia.output(u'The summary message will default to: %s' % default_summary_message)
- summary_message = wikipedia.input(u'Press Enter to use this default message, or enter a description of the changes your bot will make:')
+ wikipedia.output(u'The summary message will default to: %s'
+ % default_summary_message)
+ summary_message = wikipedia.input(
+u'Press Enter to use this default message, or enter a description of the\nchanges your bot will make:')
if summary_message == '':
summary_message = default_summary_message
wikipedia.setAction(summary_message)
@@ -538,18 +567,19 @@
try:
fix = fixes.fixes[fix]
except KeyError:
- wikipedia.output(u'Available predefined fixes are: %s' % fixes.fixes.keys())
+ wikipedia.output(u'Available predefined fixes are: %s'
+ % fixes.fixes.keys())
wikipedia.stopme()
sys.exit()
if fix.has_key('regex'):
regex = fix['regex']
if fix.has_key('msg'):
- wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), fix['msg']))
+ wikipedia.setAction(
+ wikipedia.translate(wikipedia.getSite(), fix['msg']))
if fix.has_key('exceptions'):
exceptions = fix['exceptions']
replacements = fix['replacements']
-
# already compile all regular expressions here to save time later
for i in range(len(replacements)):
old, new = replacements[i]
@@ -560,15 +590,18 @@
else:
oldR = re.compile(old, re.UNICODE)
replacements[i] = oldR, new
+
for exceptionCategory in ['title', 'text-contains', 'inside']:
if exceptions.has_key(exceptionCategory):
patterns = exceptions[exceptionCategory]
if not regex:
patterns = [re.escape(pattern) for pattern in patterns]
if caseInsensitive:
- patterns = [re.compile(pattern, re.UNICODE | re.IGNORECASE) for pattern in patterns]
+ patterns = [re.compile(pattern, re.UNICODE | re.IGNORECASE)
+ for pattern in patterns]
else:
- patterns = [re.compile(pattern, re.UNICODE) for pattern in patterns]
+ patterns = [re.compile(pattern, re.UNICODE)
+ for pattern in patterns]
exceptions[exceptionCategory] = patterns
if xmlFilename:
@@ -594,7 +627,8 @@
gen = pagegenerators.MySQLPageGenerator(query)
elif PageTitles:
- pages = [wikipedia.Page(wikipedia.getSite(), PageTitle) for PageTitle in PageTitles]
+ pages = [wikipedia.Page(wikipedia.getSite(), PageTitle)
+ for PageTitle in PageTitles]
gen = iter(pages)
if not gen:
@@ -609,7 +643,8 @@
preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 20)
else:
preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 60)
- bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall, allowoverlap, recursive, None, sleep)
+ bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall,
+ allowoverlap, recursive, None, sleep)
bot.run()
if __name__ == "__main__":
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2008-03-17 14:27:11 UTC (rev 5138)
+++ trunk/pywikipedia/wikipedia.py 2008-03-17 16:11:27 UTC (rev 5139)
@@ -2985,10 +2985,10 @@
index = nextExceptionMatch.end()
else:
# We found a valid match. Replace it.
- try:
+ if callable(new):
# the parameter new can be a function which takes the match as a parameter.
replacement = new(match)
- except TypeError:
+ else:
# it is not a function, but a string.
# it is a little hack to make \n work. It would be better to fix it