[Pywikipedia-l] SVN: [5992] trunk/pywikipedia
nicdumz at svn.wikimedia.org
nicdumz at svn.wikimedia.org
Sat Oct 18 15:26:39 UTC 2008
Revision: 5992
Author: nicdumz
Date: 2008-10-18 15:26:39 +0000 (Sat, 18 Oct 2008)
Log Message:
-----------
adding support for wpCatchaWord. Oh and... removing that %&%$ captcha code duplication
Modified Paths:
--------------
trunk/pywikipedia/login.py
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/login.py
===================================================================
--- trunk/pywikipedia/login.py 2008-10-18 14:05:27 UTC (rev 5991)
+++ trunk/pywikipedia/login.py 2008-10-18 15:26:39 UTC (rev 5992)
@@ -96,13 +96,12 @@
# No bot policies on other
return True
- def getCookie(self, remember=True, captchaId = None, captchaAnswer = None):
+ def getCookie(self, remember=True, captcha = None):
"""
Login to the site.
remember Remember login (default: True)
- captchaId The id number of the captcha, if any
- captcha The word displayed in the captcha, if any
+ captchaId A dictionary containing the captcha id and answer, if any
Returns cookie data if succesful, None otherwise.
"""
@@ -123,9 +122,9 @@
"wpRemember": str(int(bool(remember))),
"wpSkipCookieCheck": '1'
}
- if captchaId:
- predata["wpCaptchaId"] = captchaId
- predata["wpCaptchaWord"] = captchaAnswer
+ if captcha:
+ predata["wpCaptchaId"] = captcha['id']
+ predata["wpCaptchaWord"] = captcha['answer']
login_address = self.site.login_address()
address = login_address + '&action=submit'
@@ -158,17 +157,10 @@
if got_token and got_user:
return "\n".join(L)
- elif not captchaAnswer:
- captchaR = re.compile('<input type="hidden" name="wpCaptchaId" id="wpCaptchaId" value="(?P<id>\d+)" />')
- match = captchaR.search(data)
- if match:
- id = match.group('id')
- if not config.solve_captcha:
- raise wikipedia.CaptchaError(id)
- url = self.site.protocol() + '://' + self.site.hostname() + self.site.captcha_image_address(id)
- answer = wikipedia.ui.askForCaptcha(url)
- return self.getCookie(remember = remember, captchaId = id, captchaAnswer = answer)
-
+ elif not captcha:
+ solve = self.site.solveCaptcha(data)
+ if solve:
+ return self.getCookie(remember = remember, captcha = solve)
return None
def storecookiedata(self, data):
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2008-10-18 14:05:27 UTC (rev 5991)
+++ trunk/pywikipedia/wikipedia.py 2008-10-18 15:26:39 UTC (rev 5992)
@@ -1352,7 +1352,7 @@
def _putPage(self, text, comment=None, watchArticle=False, minorEdit=True,
newPage=False, token=None, newToken=False, sysop=False,
- captchaId=None, captchaAnswer=None ):
+ captcha=None):
"""Upload 'text' as new content of Page by filling out the edit form.
Don't use this directly, use put() instead.
@@ -1365,10 +1365,13 @@
'wpSave': '1',
'wpSummary': self._encodeArg(comment, 'edit summary'),
'wpTextbox1': self._encodeArg(text, 'wikitext'),
+ # As of October 2008, MW HEAD requires wpSection to be set.
+ # We will need to fill this more smartly if we ever decide to edit by section
+ 'wpSection': '',
}
- if captchaId:
- predata["wpCaptchaId"] = captchaId
- predata["wpCaptchaWord"] = captchaAnswer
+ if captcha:
+ predata["wpCaptchaId"] = captcha['id']
+ predata["wpCaptchaWord"] = captcha['answer']
# Add server lag parameter (see config.py for details)
if config.maxlag:
predata['maxlag'] = str(config.maxlag)
@@ -1560,15 +1563,9 @@
# We might have been prompted for a captcha if the
# account is not autoconfirmed, checking....
- captchaR = re.compile('<input type="hidden" name="wpCaptchaId" id="wpCaptchaId" value="(?P<id>\d+)" />')
- match = captchaR.search(data)
- if match:
- id = match.group('id')
- if not config.solve_captcha:
- raise wikipedia.CaptchaError(id)
- url = self.site().protocol() + '://' + self.site().hostname() + self.site().captcha_image_address(id)
- answer = ui.askForCaptcha(url)
- return self._putPage(text, comment, watchArticle, minorEdit, newPage, token, newToken, sysop, captchaId=id, captchaAnswer = answer)
+ solve = self.site().solveCaptcha(data)
+ if solve:
+ return self._putPage(text, comment, watchArticle, minorEdit, newPage, token, newToken, sysop, captcha=solve)
# We are expecting a 302 to the action=view page. I'm not sure why this was removed in r5019
if data.strip() != u"":
@@ -4338,6 +4335,23 @@
l.append('wpEditToken=' + wpEditToken)
return '&'.join(l)
+ def solveCaptcha(self, data):
+ captchaW = re.compile('<label for="wpCaptchaWord">(?P<question>[^<]*)</label>')
+ captchaR = re.compile('<input type="hidden" name="wpCaptchaId" id="wpCaptchaId" value="(?P<id>\d+)" />')
+ match = captchaR.search(data)
+ if match:
+ id = match.group('id')
+ match = captchaW.search(data)
+ if match:
+ answer = input('What is the answer to the captcha "%s" ?' % match.group('question'))
+ else:
+ if not config.solve_captcha:
+ raise wikipedia.CaptchaError(id)
+ url = self.protocol() + '://' + self.hostname() + self.captcha_image_address(id)
+ answer = ui.askForCaptcha(url)
+ return {'id':id, 'answer':answer}
+ return None
+
def postForm(self, address, predata, sysop=False, cookies = None):
"""Post http form data to the given address at this site.
More information about the Pywikipedia-l
mailing list