Revision: 5094 Author: russblau Date: 2008-02-28 22:58:33 +0000 (Thu, 28 Feb 2008)
Log Message: ----------- Improve screening for malformed redirect targets, and don't use "dict" as a local variable name.
Modified Paths: -------------- trunk/pywikipedia/redirect.py
Modified: trunk/pywikipedia/redirect.py =================================================================== --- trunk/pywikipedia/redirect.py 2008-02-28 18:56:51 UTC (rev 5093) +++ trunk/pywikipedia/redirect.py 2008-02-28 22:58:33 UTC (rev 5094) @@ -114,7 +114,7 @@ targets are the values. ''' xmlFilename = self.xmlFilename - dict = {} + redict = {} # open xml dump and read page titles out of it dump = xmlreader.XmlDump(xmlFilename) site = wikipedia.getSite() @@ -151,23 +151,24 @@ source = entry.title.replace(' ', '_') target = target.replace(' ', '_') # remove leading and trailing whitespace - target = target.strip() + target = target.strip('_') # capitalize the first letter if not wikipedia.getSite().nocapitalize: - source = source[0].upper() + source[1:] - target = target[0].upper() + target[1:] + source = source[:1].upper() + source[1:] + target = target[:1].upper() + target[1:] if '#' in target: - target = target[:target.index('#')] + target = target[:target.index('#')].rstrip("_") if '|' in target: wikipedia.output( u'HINT: %s is a redirect with a pipelink.' % entry.title) - target = target[:target.index('|')] - dict[source] = target + target = target[:target.index('|')].rstrip("_") + if target: # in case preceding steps left nothing + redict[source] = target if alsoGetPageTitles: - return dict, pageTitles + return redict, pageTitles else: - return dict + return redict
def retrieve_broken_redirects(self): if self.xmlFilename == None: @@ -216,16 +217,16 @@ for redir_name in redir_names: yield redir_name else: - dict = self.get_redirects_from_dump() + redict = self.get_redirects_from_dump() num = 0 - for (key, value) in dict.iteritems(): + for (key, value) in redict.iteritems(): num += 1 # check if the value - that is, the redirect target - is a # redirect as well - if num > self.offset and dict.has_key(value): + if num > self.offset and redict.has_key(value): yield key wikipedia.output(u'\nChecking redirect %i of %i...' - % (num + 1, len(dict))) + % (num + 1, len(redict)))
class RedirectRobot: def __init__(self, action, generator, always = False):