Revision: 7424 Author: alexsh Date: 2009-10-11 06:31:54 +0000 (Sun, 11 Oct 2009)
Log Message: ----------- comment out self.persistent_http in postData and getUrl
Modified Paths: -------------- trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2009-10-10 16:46:59 UTC (rev 7423) +++ trunk/pywikipedia/wikipedia.py 2009-10-11 06:31:54 UTC (rev 7424) @@ -123,7 +123,7 @@ __version__ = '$Id$'
import os, sys -import httplib, socket, urllib +import httplib, socket, urllib, urllib2 import traceback import time, threading, Queue import math @@ -5052,15 +5052,15 @@
# TODO: add the authenticate stuff here
- if False: #self.persistent_http: - conn = self.conn - else: - # Encode all of this into a HTTP request - if self.protocol() == 'http': - conn = httplib.HTTPConnection(self.hostname()) - elif self.protocol() == 'https': - conn = httplib.HTTPSConnection(self.hostname()) - # otherwise, it will crash, as other protocols are not supported + #if False: #self.persistent_http: + # conn = self.conn + #else: + # Encode all of this into a HTTP request + if self.protocol() == 'http': + conn = httplib.HTTPConnection(self.hostname()) + elif self.protocol() == 'https': + conn = httplib.HTTPSConnection(self.hostname()) + # otherwise, it will crash, as other protocols are not supported
conn.putrequest('POST', address) conn.putheader('Content-Length', str(len(data))) @@ -5068,8 +5068,8 @@ conn.putheader('User-agent', useragent) if cookies: conn.putheader('Cookie', cookies) - if False: #self.persistent_http: - conn.putheader('Connection', 'Keep-Alive') + #if False: #self.persistent_http: + # conn.putheader('Connection', 'Keep-Alive') if compress: conn.putheader('Accept-encoding', 'gzip') conn.endheaders() @@ -5121,82 +5121,82 @@ if retry is None: retry=config.retry_on_fail
- if False: #self.persistent_http and not data: - self.conn.putrequest('GET', path) - self.conn.putheader('User-agent', useragent) - self.conn.putheader('Cookie', self.cookies(sysop = sysop)) - self.conn.putheader('Connection', 'Keep-Alive') - if compress: - self.conn.putheader('Accept-encoding', 'gzip') - self.conn.endheaders() + #if False: #self.persistent_http and not data: + # self.conn.putrequest('GET', path) + # self.conn.putheader('User-agent', useragent) + # self.conn.putheader('Cookie', self.cookies(sysop = sysop)) + # self.conn.putheader('Connection', 'Keep-Alive') + # if compress: + # self.conn.putheader('Accept-encoding', 'gzip') + # self.conn.endheaders()
- # Prepare the return values - # Note that this can raise network exceptions which are not - # caught here. - try: - response = self.conn.getresponse() - except httplib.BadStatusLine: - # Blub. - self.conn.close() - self.conn.connect() - return self.getUrl(path, retry, sysop, data, compress, back_response=back_response) + # # Prepare the return values + # # Note that this can raise network exceptions which are not + # # caught here. + # try: + # response = self.conn.getresponse() + # except httplib.BadStatusLine: + # # Blub. + # self.conn.close() + # self.conn.connect() + # return self.getUrl(path, retry, sysop, data, compress, back_response=back_response)
- text = response.read() - headers = dict(response.getheaders()) + # text = response.read() + # headers = dict(response.getheaders())
+ #else: + if self.hostname() in config.authenticate.keys(): + uo = authenticateURLopener else: - if self.hostname() in config.authenticate.keys(): - uo = authenticateURLopener - else: - uo = MyURLopener() - if self.cookies(sysop = sysop): - uo.addheader('Cookie', self.cookies(sysop = sysop)) - if compress: - uo.addheader('Accept-encoding', 'gzip') - if no_hostname == True: # This allow users to parse also toolserver's script - url = path # and other useful pages without using some other functions. - else: - url = '%s://%s%s' % (self.protocol(), self.hostname(), path) - data = self.urlEncode(data) + uo = MyURLopener() + if self.cookies(sysop = sysop): + uo.addheader('Cookie', self.cookies(sysop = sysop)) + if compress: + uo.addheader('Accept-encoding', 'gzip') + if no_hostname == True: # This allow users to parse also toolserver's script + url = path # and other useful pages without using some other functions. + else: + url = '%s://%s%s' % (self.protocol(), self.hostname(), path) + data = self.urlEncode(data)
- # Try to retrieve the page until it was successfully loaded (just in - # case the server is down or overloaded). - # Wait for retry_idle_time minutes (growing!) between retries. - retry_idle_time = 1 - retrieved = False - while not retrieved: - try: - if self.hostname() in config.authenticate.keys(): - request = urllib2.Request(url, data) - request.add_header('User-agent', useragent) - opener = urllib2.build_opener() - f = opener.open(request) - else: - f = uo.open(url, data) + # Try to retrieve the page until it was successfully loaded (just in + # case the server is down or overloaded). + # Wait for retry_idle_time minutes (growing!) between retries. + retry_idle_time = 1 + retrieved = False + while not retrieved: + try: + if self.hostname() in config.authenticate.keys(): + request = urllib2.Request(url, data) + request.add_header('User-agent', useragent) + opener = urllib2.build_opener() + f = opener.open(request) + else: + f = uo.open(url, data)
- # read & info can raise socket.error - text = f.read() - headers = f.info() + # read & info can raise socket.error + text = f.read() + headers = f.info()
- retrieved = True - except KeyboardInterrupt: - raise - except Exception, e: - if retry: - # We assume that the server is down. Wait some time, then try again. - output(u"%s" % e) - output(u"""\ + retrieved = True + except KeyboardInterrupt: + raise + except Exception, e: + if retry: + # We assume that the server is down. Wait some time, then try again. + output(u"%s" % e) + output(u"""\ WARNING: Could not open '%s'. Maybe the server or your connection is down. Retrying in %i minutes...""" - % (url, - retry_idle_time)) - time.sleep(retry_idle_time * 60) - # Next time wait longer, but not longer than half an hour - retry_idle_time *= 2 - if retry_idle_time > 30: - retry_idle_time = 30 - else: - raise + % (url, + retry_idle_time)) + time.sleep(retry_idle_time * 60) + # Next time wait longer, but not longer than half an hour + retry_idle_time *= 2 + if retry_idle_time > 30: + retry_idle_time = 30 + else: + raise
if cookie_only: return headers.get('set-cookie', '') @@ -5207,10 +5207,10 @@ if int(headers.get('content-length', '0')) != len(text) and 'content-length' in headers: output(u'Warning! len(text) does not match content-length: %s != %s' % \ (len(text), headers.get('content-length'))) - if False: #self.persistent_http - self.conn.close() - self.conn.connect() - return self.getUrl(path, retry, sysop, data, compress, back_response=back_response) + #if False: #self.persistent_http + # self.conn.close() + # self.conn.connect() + return self.getUrl(path, retry, sysop, data, compress, no_hostname, cookie_only, back_response)
if compress and contentEncoding == 'gzip': text = decompress_gzip(text)
pywikipedia-svn@lists.wikimedia.org