Revision: 5490 Author: nicdumz Date: 2008-06-01 09:39:27 +0000 (Sun, 01 Jun 2008)
Log Message: ----------- Fixing the broken login on WM sites. It fetches Special:Userlogin to get a session cookie, and this cookie is being sent with the login data.
I wanted to wait a few days to know if wpSkipCookieCheck was to be allowed on WM sites, but I'm changing my mind : Committing this fix, even if it fetches one more page, to make sure that WM bots can run, without having to come to #pywikipediabot to ask for the magic use_api_login; I will revert it if wpSkipCookieCheck gets live.
Modified Paths: -------------- trunk/pywikipedia/family.py trunk/pywikipedia/login.py trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/family.py =================================================================== --- trunk/pywikipedia/family.py 2008-06-01 09:32:11 UTC (rev 5489) +++ trunk/pywikipedia/family.py 2008-06-01 09:39:27 UTC (rev 5490) @@ -2979,7 +2979,7 @@ return "%s?useskin=monobook&title=%s:Allmessages&ot=html" % (self.path(code), self.special_namespace_url(code))
def login_address(self, code): - return '%s?useskin=monobook&title=%s:Userlogin&action=submit' % (self.path(code), self.special_namespace_url(code)) + return '%s?useskin=monobook&title=%s:Userlogin' % (self.path(code), self.special_namespace_url(code))
def captcha_image_address(self, code, id): return '%s?useskin=monobook&title=%s:Captcha/image&wpCaptchaId=%s' % (self.path(code), self.special_namespace_url(code), id)
Modified: trunk/pywikipedia/login.py =================================================================== --- trunk/pywikipedia/login.py 2008-06-01 09:32:11 UTC (rev 5489) +++ trunk/pywikipedia/login.py 2008-06-01 09:39:27 UTC (rev 5490) @@ -126,7 +126,8 @@ if captchaId: predata["wpCaptchaId"] = captchaId predata["wpCaptchaWord"] = captchaAnswer - address = self.site.login_address() + login_address = self.site.login_address() + address = login_address + '&action=submit'
if self.site.hostname() in config.authenticate.keys(): headers = { @@ -139,23 +140,23 @@ wikipedia.cj.save(wikipedia.COOKIEFILE) return "Ok" else: - response, data = self.site.postForm(address, predata, useCookie=False) - n = 0 + #Retrieve a session cookie + session = self.site.getUrl(login_address, cookie_only=True) + + response, data = self.site.postForm(address, predata, cookies=session) Reat=re.compile(': (.*?);') L = []
for eat in response.msg.getallmatchingheaders('set-cookie'): m = Reat.search(eat) if m: - n += 1 L.append(m.group(1))
got_token = False - log_data = [] for Ldata in L: if 'Token=' in Ldata: got_token = True - + if got_token: return "\n".join(L) elif not captchaAnswer: @@ -168,9 +169,9 @@ url = self.site.protocol() + '://' + self.site.hostname() + self.site.captcha_image_address(id) answer = wikipedia.ui.askForCaptcha(url) return self.getCookie(remember = remember, captchaId = id, captchaAnswer = answer) - else: - return None
+ return None + def storecookiedata(self, data): """ Stores cookie data.
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2008-06-01 09:32:11 UTC (rev 5489) +++ trunk/pywikipedia/wikipedia.py 2008-06-01 09:39:27 UTC (rev 5490) @@ -4143,12 +4143,13 @@ l.append('wpEditToken=' + wpEditToken) return '&'.join(l)
- def postForm(self, address, predata, sysop=False, useCookie=True): + def postForm(self, address, predata, sysop=False, cookies = None): """Post http form data to the given address at this site.
- address is the absolute path without hostname. - predata is a dict or any iterable that can be converted to a dict, + address - the absolute path without hostname. + predata - a dict or any iterable that can be converted to a dict, containing keys and values for the http form. + cookies - the cookies to send with the form. If None, send self.cookies
Return a (response, data) tuple, where response is the HTTP response object and data is a Unicode string containing the @@ -4157,14 +4158,18 @@ """ data = self.urlEncode(predata) try: - return self.postData(address, data, sysop=sysop, - useCookie=useCookie) + if cookies: + return self.postData(address, data, sysop=sysop, + cookies=cookies) + else: + return self.postData(address, data, sysop=sysop, + cookies=self.cookies(sysop = sysop)) except socket.error, e: raise ServerError(e)
def postData(self, address, data, contentType='application/x-www-form-urlencoded', - sysop=False, useCookie=True, compress=True): + sysop=False, compress=True, cookies=None): """Post encoded data to the given http address at this site.
address is the absolute path without hostname. @@ -4191,8 +4196,8 @@ conn.putheader('Content-Length', str(len(data))) conn.putheader('Content-type', contentType) conn.putheader('User-agent', useragent) - if useCookie and self.cookies(sysop = sysop): - conn.putheader('Cookie', self.cookies(sysop = sysop)) + if cookies: + conn.putheader('Cookie', cookies) if False: #self.persistent_http: conn.putheader('Connection', 'Keep-Alive') if compress: @@ -4209,7 +4214,7 @@ # Blub. conn.close() conn.connect() - return self.postData(address, data, contentType, sysop, useCookie) + return self.postData(address, data, contentType, sysop, useCookie, compress, cookie)
data = response.read()
@@ -4227,7 +4232,8 @@
return response, data
- def getUrl(self, path, retry = True, sysop = False, data = None, compress = True, no_hostname = False): + def getUrl(self, path, retry = True, sysop = False, data = None, + compress = True, no_hostname = False, cookie_only=False): """ Low-level routine to get a URL from the wiki.
@@ -4237,6 +4243,7 @@ occurs. sysop - If True, the sysop account's cookie will be used. data - An optional dict providing extra post request parameters. + cookie_only - Only return the cookie the server sent us back no_hostname - Open the URL given, don't add the hostname before.
Returns the HTML text of the page converted to unicode. @@ -4319,6 +4326,8 @@
headers = f.info()
+ if cookie_only: + return headers.get('set-cookie', '') contentType = headers.get('content-type', '') contentEncoding = headers.get('content-encoding', '')
pywikipedia-l@lists.wikimedia.org