Revision: 4278 Author: wikipedian Date: 2007-09-13 19:07:07 +0000 (Thu, 13 Sep 2007)
Log Message: ----------- bugfixes
Modified Paths: -------------- trunk/pywikipedia/redirect.py
Modified: trunk/pywikipedia/redirect.py =================================================================== --- trunk/pywikipedia/redirect.py 2007-09-13 18:50:34 UTC (rev 4277) +++ trunk/pywikipedia/redirect.py 2007-09-13 19:07:07 UTC (rev 4278) @@ -88,6 +88,7 @@ self.namespace = namespace self.restart = restart
+ # TODO: I think namespaces are ignored here. def get_redirects_from_dump(self, alsoGetPageTitles = False): ''' Loads a local XML dump file, looks at all pages which have the redirect flag @@ -138,6 +139,8 @@ wikipedia.output(u'HINT: %s is a redirect with a pipelink.' % entry.title) target = target[:target.index('|')] dict[source] = target + print len(dict) + print len(pageTitles) if alsoGetPageTitles: return dict, pageTitles else: @@ -164,7 +167,7 @@ wikipedia.output(u'Getting a list of all redirects and of all page titles...') redirs, pageTitles = self.get_redirects_from_dump(alsoGetPageTitles = True) for (key, value) in redirs.iteritems(): - if not pagetitles.has_key(value): + if value not in pagetitles: yield key
def retrieve_double_redirects(self): @@ -242,7 +245,7 @@ else: try: secondTargetPage = secondRedir.getRedirectTarget() - anchorMatch = re.search(u'#(?P<section>.*)$', target) + anchorMatch = re.search(u'#(?P<section>.*)$', secondRedir.title()) if anchorMatch and not u'#' in secondTargetPage.title(): secondTarget = wikipedia.Page(mysite, '%s#%s' % (secondTargetPage.sectionFreeTitle(), anchorMatch.group('section'))) except wikipedia.SectionError: @@ -254,7 +257,6 @@ else: wikipedia.output(u'%s is a redirect to %s, which is a redirect to %s. Fixing...' % (redir.aslink(), secondRedir.aslink(), secondTargetPage.aslink())) txt = mysite.redirectRegex().sub('#REDIRECT [[%s]]') - txt = redir.get(get_redirect=True).replace('[['+target,'[['+secondTargetPage.title()) wikipedia.showDiff(redir.get(get_redirect=True), txt) try: redir.put(txt)