Revision: 5681 Author: nicdumz Date: 2008-07-06 13:20:50 +0000 (Sun, 06 Jul 2008)
Log Message: ----------- * Overriding urllib.FancyURLopener.http_error_default to catch 403 and 404 errors : Without this, trying to access an unexisting path had strange behavior. In particular, 404 pages using a different encoding than the site' encoding was raising "code2encodings has wrong charset"... * removing useless and misleading Site.charset
Modified Paths: -------------- trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2008-07-06 10:31:36 UTC (rev 5680) +++ trunk/pywikipedia/wikipedia.py 2008-07-06 13:20:50 UTC (rev 5681) @@ -5574,12 +5574,11 @@
def checkCharset(self, charset): """Warn if charset returned by wiki doesn't match family file.""" - if not hasattr(self,'charset'): - self.charset = charset - assert self.charset.lower() == charset.lower(), \ + fromFamily = self.encoding() + assert fromFamily.lower() == charset.lower(), \ "charset for %s changed from %s to %s" \ - % (repr(self), self.charset, charset) - if self.encoding().lower() != charset.lower(): + % (repr(self), fromFamily, charset) + if fromFamily.lower() != charset.lower(): raise ValueError( "code2encodings has wrong charset for %s. It should be %s, but is %s" % (repr(self), charset, self.encoding())) @@ -6414,6 +6413,14 @@
class MyURLopener(urllib.FancyURLopener): version="PythonWikipediaBot/1.0" + + def http_error_default(self, url, fp, errcode, errmsg, headers): + if errcode == 401 or errcode == 404: + raise PageNotFound(u'Page %s could not be retrieved. Check your family file ?' % url) + else: + return urllib.FancyURLopener(self, url, fp, errcode, errmsg, headers) + +
# Special opener in case we are using a site with authentication if config.authenticate: