Revision: 3932 Author: wikipedian Date: 2007-08-01 16:24:02 +0000 (Wed, 01 Aug 2007)
Log Message: ----------- don't crash when the Internet Archive gives a 403, e.g. on http://web.archive.org/web/*/http://highmarkfunds.stockpoint.com/highmarkfun...
Modified Paths: -------------- trunk/pywikipedia/weblinkchecker.py
Modified: trunk/pywikipedia/weblinkchecker.py =================================================================== --- trunk/pywikipedia/weblinkchecker.py 2007-08-01 16:17:20 UTC (rev 3931) +++ trunk/pywikipedia/weblinkchecker.py 2007-08-01 16:24:02 UTC (rev 3932) @@ -147,7 +147,12 @@ def getArchiveURL(self): wikipedia.output(u'Consulting the Internet Archive for %s' % self.url) archiveURL = 'http://web.archive.org/web/*/%s' % self.url - f = urllib2.urlopen(archiveURL) + try: + f = urllib2.urlopen(archiveURL) + except urllib2.HTTPError: + # The Internet Archive yields a 403 error when the site was not + # archived due to robots.txt restrictions. + return None text = f.read() if text.find("Search Results for ") != -1: return archiveURL