Revision: 3956 Author: wikipedian Date: 2007-08-02 23:11:32 +0000 (Thu, 02 Aug 2007)
Log Message: ----------- bugfix: only the BadStatusLine exception has a 'line' attribute, the other httplib errors don't.
Modified Paths: -------------- trunk/pywikipedia/weblinkchecker.py
Modified: trunk/pywikipedia/weblinkchecker.py =================================================================== --- trunk/pywikipedia/weblinkchecker.py 2007-08-02 16:47:03 UTC (rev 3955) +++ trunk/pywikipedia/weblinkchecker.py 2007-08-02 23:11:32 UTC (rev 3956) @@ -230,6 +230,7 @@ pass
def changeUrl(self, url): + print url self.url = url # we ignore the fragment self.scheme, self.host, self.path, self.query, self.fragment = urlparse.urlsplit(self.url) @@ -248,7 +249,7 @@ self.path = unicode(urllib.quote(self.path.encode(encoding))) self.query = unicode(urllib.quote(self.query.encode(encoding), '=&'))
- def resolveRedirect(self, useHEAD = True): + def resolveRedirect(self, useHEAD = False): ''' Requests the header from the server. If the page is an HTTP redirect, returns the redirect target URL as a string. Otherwise returns None. @@ -305,24 +306,22 @@ else: return False # not a redirect
- def check(self, useHEAD = True): + def check(self, useHEAD = False): """ Returns True and the server status message if the page is alive. Otherwise returns false """ try: wasRedirected = self.resolveRedirect(useHEAD = useHEAD) - except UnicodeError, arg: - return False, u'Encoding Error: %s (%s)' % (arg.__class__.__name__, unicode(arg)) - except httplib.error, arg: - return False, u'HTTP Error: %s (%s)' % (arg.__class__.__name__, arg.line) - except socket.error, arg: - # TODO: decode arg[1]. On Linux, it's encoded in UTF-8. + except UnicodeError, error: + return False, u'Encoding Error: %s (%s)' % (error.__class__.__name__, unicode(error)) + except httplib.error, error: + return False, u'HTTP Error: %s' % error.__class__.__name__ + except socket.error, error: + # TODO: decode error[1]. On Linux, it's encoded in UTF-8. # How is it encoded in Windows? Or can we somehow just # get the English message? - return False, u'Socket Error: %s' % repr(arg[1]) - #except UnicodeEncodeError, arg: - # return False, u'Non-ASCII Characters in URL: %s' % arg + return False, u'Socket Error: %s' % repr(error[1]) if wasRedirected: if self.url in self.redirectChain: if useHEAD: @@ -352,18 +351,16 @@ else: try: conn = self.getConnection() - except httplib.error, arg: - return False, u'HTTP Error: %s (%s)' % (arg.__class__.__name__, arg.line) + except httplib.error, error: + return False, u'HTTP Error: %s' % error.__class__.__name__ try: conn.request('GET', '%s%s' % (self.path, self.query), None, self.header) - except socket.error, arg: - return False, u'Socket Error: %s' % repr(arg[1]) - #except UnicodeEncodeError, arg: - # return False, u'Non-ASCII Characters in URL: %s' % arg + except socket.error, error: + return False, u'Socket Error: %s' % repr(error[1]) try: response = conn.getresponse() - except Exception, arg: - return False, u'Error: %s' % arg + except Exception, error: + return False, u'Error: %s' % error # read the server's encoding, in case we need it later self.readEncodingFromResponse(response) # site down if the server status is between 400 and 499