http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9422
Revision: 9422 Author: saper Date: 2011-07-29 20:25:35 +0000 (Fri, 29 Jul 2011) Log Message: ----------- Better diagnostic of HTTP issues
Store responses in the LinkChecker instance variable to be able to refer to it later when troubleshooting.
Add check(url) module function for fast one-off checking from the Python interpreter:
Python 2.7.1 (r271:86832, Apr 19 2011, 02:35:09) [GCC 4.3.4] on linux2 Type "help", "copyright", "credits" or "license" for more information.
import weblinkchecker weblinkchecker.check("http://www.czeslawmroczek.pl/")
(False, '403 Forbidden')
Modified Paths: -------------- trunk/pywikipedia/weblinkchecker.py
Modified: trunk/pywikipedia/weblinkchecker.py =================================================================== --- trunk/pywikipedia/weblinkchecker.py 2011-07-24 17:02:43 UTC (rev 9421) +++ trunk/pywikipedia/weblinkchecker.py 2011-07-29 20:25:35 UTC (rev 9422) @@ -318,7 +318,7 @@ % self.host) conn = self.getConnection() conn.request('HEAD', '/', None, self.header) - response = conn.getresponse() + self.response = conn.getresponse()
self.readEncodingFromResponse(response) except: @@ -377,9 +377,9 @@ else: conn.request('GET', '%s%s' % (self.path, self.query), None, self.header) - response = conn.getresponse() + self.response = conn.getresponse() # read the server's encoding, in case we need it later - self.readEncodingFromResponse(response) + self.readEncodingFromResponse(self.response) except httplib.BadStatusLine: # Some servers don't seem to handle HEAD requests properly, # e.g. http://www.radiorus.ru/ which is running on a very old @@ -389,9 +389,9 @@ return self.resolveRedirect(useHEAD = False) else: raise - if response.status >= 300 and response.status <= 399: + if self.response.status >= 300 and self.response.status <= 399: #print response.getheaders() - redirTarget = response.getheader('Location') + redirTarget = self.response.getheader('Location') if redirTarget: try: redirTarget.encode('ascii') @@ -499,16 +499,16 @@ except socket.error, error: return False, u'Socket Error: %s' % repr(error[1]) try: - response = conn.getresponse() + self.response = conn.getresponse() except Exception, error: return False, u'Error: %s' % error # read the server's encoding, in case we need it later - self.readEncodingFromResponse(response) + self.readEncodingFromResponse(self.response) # site down if the server status is between 400 and 499 - alive = response.status not in range(400, 500) - if response.status in self.HTTPignore: + alive = self.response.status not in range(400, 500) + if self.response.status in self.HTTPignore: alive = False - return alive, '%s %s' % (response.status, response.reason) + return alive, '%s %s' % (self.response.status, self.response.reason)
class LinkCheckThread(threading.Thread): ''' @@ -816,6 +816,11 @@ i += 1 return i
+def check(url): + """Peform a check on URL""" + c = LinkChecker(url) + return c.check() + def main(): gen = None singlePageTitle = []
pywikipedia-svn@lists.wikimedia.org