Revision: 7521
Author: alexsh
Date: 2009-10-21 02:12:58 +0000 (Wed, 21 Oct 2009)
Log Message:
-----------
wp.py:Site().postData():change httplib to MyURLopener, disable all response.reason, response.status->response.code, remove all httplib.BadStatusLine
Modified Paths:
--------------
trunk/pywikipedia/interwiki.py
trunk/pywikipedia/login.py
trunk/pywikipedia/query.py
trunk/pywikipedia/upload.py
trunk/pywikipedia/userlib.py
trunk/pywikipedia/weblinkchecker.py
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/interwiki.py
===================================================================
--- trunk/pywikipedia/interwiki.py 2009-10-20 23:28:49 UTC (rev 7520)
+++ trunk/pywikipedia/interwiki.py 2009-10-21 02:12:58 UTC (rev 7521)
@@ -1596,7 +1596,7 @@
timeout=60
while 1:
try:
- status, reason, data = page.put(newtext, comment = mcomment)
+ status, data = page.put(newtext, comment = mcomment)
except wikipedia.LockedPage:
wikipedia.output(u'Page %s is locked. Skipping.' % (page.title(),))
raise SaveError
@@ -1628,7 +1628,7 @@
if str(status) == '302':
return True
else:
- wikipedia.output(u'%s %s' % (status, reason))
+ wikipedia.output(u'%s' % status)
return False
elif answer == 'g':
raise GiveUpOnPage
Modified: trunk/pywikipedia/login.py
===================================================================
--- trunk/pywikipedia/login.py 2009-10-20 23:28:49 UTC (rev 7520)
+++ trunk/pywikipedia/login.py 2009-10-21 02:12:58 UTC (rev 7521)
@@ -182,7 +182,7 @@
fakepredata['wpPassword'] = fakepredata['lgpassword'] = u'XXXXX'
wikipedia.output(u"self.site.postData(%s, %s)" % (address, self.site.urlEncode(fakepredata)))
fakeresponsemsg = re.sub(r"(session|Token)=..........", r"session=XXXXXXXXXX", response.msg.__str__())
- wikipedia.output(u"%s/%s\n%s" % (response.status, response.reason, fakeresponsemsg))
+ wikipedia.output(u"%s\n%s" % (response.code, fakeresponsemsg))
wikipedia.output(u"%s" % data)
Reat=re.compile(': (.*?);')
L = []
Modified: trunk/pywikipedia/query.py
===================================================================
--- trunk/pywikipedia/query.py 2009-10-20 23:28:49 UTC (rev 7520)
+++ trunk/pywikipedia/query.py 2009-10-21 02:12:58 UTC (rev 7521)
@@ -115,11 +115,6 @@
(('file', params['filename'].encode(site.encoding()), cont),),
site.cookies(sysop=sysop)
)
- elif site.hostname() in wikipedia.config.authenticate.keys():
- params["Content-type"] = "application/x-www-form-urlencoded"
- params["User-agent"] = useragent
- res = urllib2.urlopen(urllib2.Request(site.protocol() + '://' + site.hostname() + address, site.urlEncode(params)))
- jsontext = res.read()
elif params['action'] in postAC:
res, jsontext = site.postForm(path, params, sysop, site.cookies(sysop = sysop) )
else:
Modified: trunk/pywikipedia/upload.py
===================================================================
--- trunk/pywikipedia/upload.py 2009-10-20 23:28:49 UTC (rev 7520)
+++ trunk/pywikipedia/upload.py 2009-10-21 02:12:58 UTC (rev 7521)
@@ -347,14 +347,14 @@
# ATTENTION: if you changed your Wikimedia Commons account not to show
# an English interface, this detection will fail!
success_msg = self.targetSite.mediawiki_message('successfulupload')
- if success_msg in returned_html or response.status == 302:
+ if success_msg in returned_html or response.code == 302:
wikipedia.output(u"Upload successful.")
# The following is not a good idea, because the server also gives a 200 when
# something went wrong.
- #if response.status in [200, 302]:
+ #if response.code in [200, 302]:
# wikipedia.output(u"Upload successful.")
- elif response.status == 301:
+ elif response.code == 301:
wikipedia.output(u"Following redirect...")
address = response.getheader('Location')
wikipedia.output(u"Changed upload address to %s. Please update %s.py" % (address, self.targetSite.family.__module__))
@@ -368,7 +368,7 @@
except:
pass
wikipedia.output(u'%s\n\n' % returned_html)
- wikipedia.output(u'%i %s' % (response.status, response.reason))
+ wikipedia.output(u'%i' % response.code)
if self.targetSite.mediawiki_message('uploadwarning') in returned_html:
answer = wikipedia.inputChoice(u"You have recevied an upload warning message. Ignore?", ['Yes', 'No'], ['y', 'N'], 'N')
Modified: trunk/pywikipedia/userlib.py
===================================================================
--- trunk/pywikipedia/userlib.py 2009-10-20 23:28:49 UTC (rev 7520)
+++ trunk/pywikipedia/userlib.py 2009-10-21 02:12:58 UTC (rev 7521)
@@ -554,7 +554,7 @@
address = self.site().unblock_address()
response, data = self.site().postForm(address, predata, sysop = True)
- if response.status != 302:
+ if response.code != 302:
if self.site().mediawiki_message('ipb_cant_unblock').replace('$1',blockID) in data:
raise AlreadyUnblockedError
raise UnblockError, data
Modified: trunk/pywikipedia/weblinkchecker.py
===================================================================
--- trunk/pywikipedia/weblinkchecker.py 2009-10-20 23:28:49 UTC (rev 7520)
+++ trunk/pywikipedia/weblinkchecker.py 2009-10-21 02:12:58 UTC (rev 7521)
@@ -394,7 +394,7 @@
return self.resolveRedirect(useHEAD = False)
else:
raise
- if response.status >= 300 and response.status <= 399:
+ if response.code >= 300 and response.code <= 399:
#print response.getheaders()
redirTarget = response.getheader('Location')
if redirTarget:
@@ -493,10 +493,10 @@
# read the server's encoding, in case we need it later
self.readEncodingFromResponse(response)
# site down if the server status is between 400 and 499
- alive = response.status not in range(400, 500)
- if response.status in self.HTTPignore:
+ alive = response.code not in range(400, 500)
+ if response.code in self.HTTPignore:
alive = False
- return alive, '%s %s' % (response.status, response.reason)
+ return alive, '%s' % response.code
class LinkCheckThread(threading.Thread):
'''
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2009-10-20 23:28:49 UTC (rev 7520)
+++ trunk/pywikipedia/wikipedia.py 2009-10-21 02:12:58 UTC (rev 7521)
@@ -123,7 +123,7 @@
__version__ = '$Id$'
import os, sys
-import httplib, socket, urllib, urllib2
+import socket, urllib, urllib2
import traceback
import time, threading, Queue
import math
@@ -1647,8 +1647,6 @@
response, data = query.GetData(params, self.site(), sysop=sysop, back_response = True)
if query.IsString(data):
raise KeyError
- except httplib.BadStatusLine, line:
- raise PageNotSaved('Bad status line: %s' % line.line)
except ServerError:
output(u''.join(traceback.format_exception(*sys.exc_info())))
retry_attempt += 1
@@ -1673,7 +1671,7 @@
# Check blocks
self.site().checkBlocks(sysop = sysop)
# A second text area means that an edit conflict has occured.
- if response.status == 500:
+ if response.code == 500:
output(u"Server error encountered; will retry in %i minute%s."
% (retry_delay, retry_delay != 1 and "s" or ""))
time.sleep(60 * retry_delay)
@@ -1698,8 +1696,8 @@
#for debug only
#------------------------
if verbose:
- output("error occured, code:%s\ninfo:%s\nstatus:%s\nresponse:%s" % (
- data['error']['code'], data['error']['info'], response.status, response.reason))
+ output("error occured, code:%s\ninfo:%s\nstatus:%s" % (
+ data['error']['code'], data['error']['info'], response.code))
faked = params
if 'text' in faked:
del faked['text']
@@ -1708,7 +1706,7 @@
#------------------------
errorCode = data['error']['code']
#cannot handle longpageerror and PageNoSave yet
- if errorCode == 'maxlag' or response.status == 503:
+ if errorCode == 'maxlag' or response.code == 503:
# server lag; wait for the lag time and retry
m = re.search('Waiting for (.+?): (.+?) seconds lagged', data['error']['info'])
timelag = int(m.group(2))
@@ -1793,13 +1791,13 @@
# if the page update is successed, we need to return code 302 for cheat script who
# using status code
#
- return 302, response.reason, data['edit']
+ return 302, data['edit']
solve = self.site().solveCaptcha(data)
if solve:
return self._putPage(text, comment, watchArticle, minorEdit, newPage, token, newToken, sysop, captcha=solve)
- return response.status, response.reason, data
+ return response.code, data
def _putPageOld(self, text, comment=None, watchArticle=False, minorEdit=True,
@@ -1886,7 +1884,7 @@
return None
try:
response, data = self.site().postForm(address, predata, sysop)
- if response.status == 503:
+ if response.code == 503:
if 'x-database-lag' in response.msg.keys():
# server lag; Mediawiki recommends waiting 5 seconds
# and retrying
@@ -1898,9 +1896,7 @@
wait = min(wait*2, 300)
continue
# Squid error 503
- raise ServerError(response.status)
- except httplib.BadStatusLine, line:
- raise PageNotSaved('Bad status line: %s' % line.line)
+ raise ServerError(response.code)
except ServerError:
output(u''.join(traceback.format_exception(*sys.exc_info())))
retry_attempt += 1
@@ -1992,7 +1988,7 @@
# to "Wikipedia has a problem", but I'm not sure. Maybe we could
# just check for HTTP Status 500 (Internal Server Error)?
if ("<title>Wikimedia Error</title>" in data or "has a problem</title>" in data) \
- or response.status == 500:
+ or response.code == 500:
output(u"Server error encountered; will retry in %i minute%s."
% (retry_delay, retry_delay != 1 and "s" or ""))
time.sleep(60 * retry_delay)
@@ -2053,7 +2049,7 @@
# Something went wrong, and we don't know what. Show the
# HTML code that hopefully includes some error message.
output(u"ERROR: Unexpected response from wiki server.")
- output(u" %s (%s) " % (response.status, response.reason))
+ output(u" %s" % response.code)
output(data)
# Unexpected responses should raise an error and not pass,
# be it silently or loudly. This should raise an error
@@ -2062,7 +2058,7 @@
# We are on the preview page, so the page was not saved
raise PageNotSaved
- return response.status, response.reason, data
+ return response.code, data
def canBeEdited(self):
"""Return bool indicating whether this page can be edited.
@@ -3285,7 +3281,7 @@
else:
response, data = self.site().postForm(address, predata, sysop=True)
- if response.status == 302 and not data:
+ if response.code == 302 and not data:
output(u'Changed protection level of page %s.' % self.aslink())
return True
else:
@@ -3293,7 +3289,7 @@
self.site().checkBlocks(sysop = True)
output(u'Failed to change protection level of page %s:'
% self.aslink())
- output(u"HTTP response code %s" % response.status)
+ output(u"HTTP response code %s" % response.code)
output(data)
return False
@@ -3691,7 +3687,7 @@
while True:
try:
data = self.getData()
- except (socket.error, httplib.BadStatusLine, ServerError):
+ except (socket.error, ServerError):
# Print the traceback of the caught exception
output(u''.join(traceback.format_exception(*sys.exc_info())))
output(u'DBG> got network error in _GetAll.run. ' \
@@ -5138,14 +5134,6 @@
if not language[0].upper() + language[1:] in self.namespaces():
self._validlanguages.append(language)
- #if persistent_http is None:
- # persistent_http = config.persistent_http
- #self.persistent_http = persistent_http and self.protocol() in ('http', 'https')
- #if persistent_http:
- # if self.protocol() == 'http':
- # self.conn = httplib.HTTPConnection(self.hostname())
- # elif self.protocol() == 'https':
- # self.conn = httplib.HTTPSConnection(self.hostname())
self.persistent_http = False
def _userIndex(self, sysop = False):
@@ -5389,75 +5377,94 @@
"""
# TODO: add the authenticate stuff here
-
- #if False: #self.persistent_http:
- # conn = self.conn
- #else:
- if config.proxy['host']:
- conn = httplib.HTTPConnection(config.proxy['host'])
- proxyPutAddr = '%s://%s%s' % (self.protocol(), self.hostname(), address)
- conn.putrequest('POST', proxyPutAddr)
- if type(config.proxy['auth']) == tuple:
- import base64
- authcode = base64.b64encode("%s:%s" % (config.proxy['auth'][0], config.proxy['auth'][1]) )
- conn.putheader('Proxy-Authorization', "Basic %s" % authcode )
-
+ if self.hostname() in config.authenticate.keys():
+ uo = authenticateURLopener
else:
- if self.protocol() == 'http':
- conn = httplib.HTTPConnection(self.hostname())
- elif self.protocol() == 'https':
- conn = httplib.HTTPSConnection(self.hostname())
+ if config.proxy['host'] and type(config.proxy['auth']) == tuple:
+ proxyHandle = {'http':'http://%s:%s@%s' % (config.proxy['auth'][0], config.proxy['auth'][1], config.proxy['host'] )}
+ elif config.proxy['host']:
+ proxyHandle = {'http':'http://%s' % config.proxy['host'] }
+ else:
+ proxyHandle = None
- conn.putrequest('POST', address)
-
- # Encode all of this into a HTTP request
- # otherwise, it will crash, as other protocols are not supported
-
+ uo = MyURLopener(proxies = proxyHandle)
+ uo.addheader('Cookie', cookies)
+ if compress:
+ uo.addheader('Accept-encoding', 'gzip')
if address[-1] == "?":
address = address[:-1]
- if self.hostname() in config.authenticate.keys():
- import base64
- authcode = base64.b64encode("%s:%s" % (config.authenticate[self.hostname()][0], config.authenticate[self.hostname()][1]) )
- conn.putheader("Authorization", "Basic %s" % authcode )
-
- conn.putheader('Content-Length', str(len(data)))
- conn.putheader('Content-type', contentType)
- conn.putheader('User-agent', useragent)
- if cookies:
- conn.putheader('Cookie', cookies)
- #if False: #self.persistent_http:
- # conn.putheader('Connection', 'Keep-Alive')
- if compress:
- conn.putheader('Accept-encoding', 'gzip')
- conn.endheaders()
- conn.send(data)
+ url = '%s://%s%s' % (self.protocol(), self.hostname(), address)
- # Prepare the return values
- # Note that this can raise network exceptions which are not
- # caught here.
- try:
- response = conn.getresponse()
- except httplib.BadStatusLine:
- # Blub.
- conn.close()
- conn.connect()
- return self.postData(address, data, contentType, sysop, compress, cookies)
+ # Try to retrieve the page until it was successfully loaded (just in
+ # case the server is down or overloaded).
+ # Wait for retry_idle_time minutes (growing!) between retries.
+ retry_idle_time = 1
+ while True:
+ try:
+ if self.hostname() in config.authenticate.keys():
+ request = urllib2.Request(url, data)
+ request.add_header('User-agent', useragent)
+ opener = urllib2.build_opener()
+ f = opener.open(request)
+ else:
+ f = uo.open(url, data)
- data = response.read()
+ # read & info can raise socket.error
+ text = f.read()
+ headers = f.info()
+ break
+ except KeyboardInterrupt:
+ raise
+ except Exception, e:
+ if retry:
+ # We assume that the server is down. Wait some time, then try again.
+ output(u"%s" % e)
+ output(u"WARNING: Could not open '%s'. Maybe the server or" % url)
+ output(u"your connection is down. Retrying in %i minutes..." % retry_idle_time)
+ time.sleep(retry_idle_time * 60)
+ # Next time wait longer, but not longer than half an hour
+ retry_idle_time *= 2
+ if retry_idle_time > 30:
+ retry_idle_time = 30
+ else:
+ raise
- if compress and response.getheader('Content-Encoding') == 'gzip':
- data = decompress_gzip(data)
+ contentType = headers.get('content-type', '')
+ contentEncoding = headers.get('content-encoding', '')
- data = data.decode(self.encoding())
- response.close()
+ # Ensure that all sent data is received
+ if int(headers.get('content-length', '0')) != len(text) and 'content-length' in headers:
+ output(u'Warning! len(text) does not match content-length: %s != %s' % \
+ (len(text), headers.get('content-length')))
+ return self.postData(path, address, data, contentType, sysop, compress, cookie)
- if True: #not self.persistent_http:
- conn.close()
+ if compress and contentEncoding == 'gzip':
+ text = decompress_gzip(text)
+ R = re.compile('charset=([^\'\";]+)')
+ m = R.search(contentType)
+ if m:
+ charset = m.group(1)
+ else:
+ if verbose:
+ output(u"WARNING: No character set found.")
+ # UTF-8 as default
+ charset = 'utf-8'
+ # Check if this is the charset we expected
+ self.checkCharset(charset)
+ # Convert HTML to Unicode
+ try:
+ text = unicode(text, charset, errors = 'strict')
+ except UnicodeDecodeError, e:
+ print e
+ output(u'ERROR: Invalid characters found on %s://%s%s, replaced by \\ufffd.' % (self.protocol(), self.hostname(), path))
+ # We use error='replace' in case of bad encoding.
+ text = unicode(text, charset, errors = 'replace')
+
# If a wiki page, get user data
- self._getUserDataOld(data, sysop = sysop)
+ self._getUserDataOld(text, sysop = sysop)
- return response, data
+ return f, text
def getUrl(self, path, retry = None, sysop = False, data = None,
compress = True, no_hostname = False, cookie_only=False, back_response=False):
@@ -5524,10 +5531,8 @@
if retry:
# We assume that the server is down. Wait some time, then try again.
output(u"%s" % e)
- output(u"""\
-WARNING: Could not open '%s'. Maybe the server or
-your connection is down. Retrying in %i minutes..."""
- % (url, retry_idle_time))
+ output(u"WARNING: Could not open '%s'. Maybe the server or" % url)
+ output("your connection is down. Retrying in %i minutes..." % retry_idle_time)
time.sleep(retry_idle_time * 60)
# Next time wait longer, but not longer than half an hour
retry_idle_time *= 2
@@ -5545,9 +5550,6 @@
if int(headers.get('content-length', '0')) != len(text) and 'content-length' in headers:
output(u'Warning! len(text) does not match content-length: %s != %s' % \
(len(text), headers.get('content-length')))
- #if False: #self.persistent_http
- # self.conn.close()
- # self.conn.connect()
return self.getUrl(path, retry, sysop, data, compress, no_hostname, cookie_only, back_response)
if compress and contentEncoding == 'gzip':