Revision: 5992 Author: nicdumz Date: 2008-10-18 15:26:39 +0000 (Sat, 18 Oct 2008)
Log Message: ----------- adding support for wpCatchaWord. Oh and... removing that %&%$ captcha code duplication
Modified Paths: -------------- trunk/pywikipedia/login.py trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/login.py =================================================================== --- trunk/pywikipedia/login.py 2008-10-18 14:05:27 UTC (rev 5991) +++ trunk/pywikipedia/login.py 2008-10-18 15:26:39 UTC (rev 5992) @@ -96,13 +96,12 @@ # No bot policies on other return True
- def getCookie(self, remember=True, captchaId = None, captchaAnswer = None): + def getCookie(self, remember=True, captcha = None): """ Login to the site.
remember Remember login (default: True) - captchaId The id number of the captcha, if any - captcha The word displayed in the captcha, if any + captchaId A dictionary containing the captcha id and answer, if any
Returns cookie data if succesful, None otherwise. """ @@ -123,9 +122,9 @@ "wpRemember": str(int(bool(remember))), "wpSkipCookieCheck": '1' } - if captchaId: - predata["wpCaptchaId"] = captchaId - predata["wpCaptchaWord"] = captchaAnswer + if captcha: + predata["wpCaptchaId"] = captcha['id'] + predata["wpCaptchaWord"] = captcha['answer'] login_address = self.site.login_address() address = login_address + '&action=submit'
@@ -158,17 +157,10 @@
if got_token and got_user: return "\n".join(L) - elif not captchaAnswer: - captchaR = re.compile('<input type="hidden" name="wpCaptchaId" id="wpCaptchaId" value="(?P<id>\d+)" />') - match = captchaR.search(data) - if match: - id = match.group('id') - if not config.solve_captcha: - raise wikipedia.CaptchaError(id) - url = self.site.protocol() + '://' + self.site.hostname() + self.site.captcha_image_address(id) - answer = wikipedia.ui.askForCaptcha(url) - return self.getCookie(remember = remember, captchaId = id, captchaAnswer = answer) - + elif not captcha: + solve = self.site.solveCaptcha(data) + if solve: + return self.getCookie(remember = remember, captcha = solve) return None
def storecookiedata(self, data):
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2008-10-18 14:05:27 UTC (rev 5991) +++ trunk/pywikipedia/wikipedia.py 2008-10-18 15:26:39 UTC (rev 5992) @@ -1352,7 +1352,7 @@
def _putPage(self, text, comment=None, watchArticle=False, minorEdit=True, newPage=False, token=None, newToken=False, sysop=False, - captchaId=None, captchaAnswer=None ): + captcha=None): """Upload 'text' as new content of Page by filling out the edit form.
Don't use this directly, use put() instead. @@ -1365,10 +1365,13 @@ 'wpSave': '1', 'wpSummary': self._encodeArg(comment, 'edit summary'), 'wpTextbox1': self._encodeArg(text, 'wikitext'), + # As of October 2008, MW HEAD requires wpSection to be set. + # We will need to fill this more smartly if we ever decide to edit by section + 'wpSection': '', } - if captchaId: - predata["wpCaptchaId"] = captchaId - predata["wpCaptchaWord"] = captchaAnswer + if captcha: + predata["wpCaptchaId"] = captcha['id'] + predata["wpCaptchaWord"] = captcha['answer'] # Add server lag parameter (see config.py for details) if config.maxlag: predata['maxlag'] = str(config.maxlag) @@ -1560,15 +1563,9 @@
# We might have been prompted for a captcha if the # account is not autoconfirmed, checking.... - captchaR = re.compile('<input type="hidden" name="wpCaptchaId" id="wpCaptchaId" value="(?P<id>\d+)" />') - match = captchaR.search(data) - if match: - id = match.group('id') - if not config.solve_captcha: - raise wikipedia.CaptchaError(id) - url = self.site().protocol() + '://' + self.site().hostname() + self.site().captcha_image_address(id) - answer = ui.askForCaptcha(url) - return self._putPage(text, comment, watchArticle, minorEdit, newPage, token, newToken, sysop, captchaId=id, captchaAnswer = answer) + solve = self.site().solveCaptcha(data) + if solve: + return self._putPage(text, comment, watchArticle, minorEdit, newPage, token, newToken, sysop, captcha=solve)
# We are expecting a 302 to the action=view page. I'm not sure why this was removed in r5019 if data.strip() != u"": @@ -4338,6 +4335,23 @@ l.append('wpEditToken=' + wpEditToken) return '&'.join(l)
+ def solveCaptcha(self, data): + captchaW = re.compile('<label for="wpCaptchaWord">(?P<question>[^<]*)</label>') + captchaR = re.compile('<input type="hidden" name="wpCaptchaId" id="wpCaptchaId" value="(?P<id>\d+)" />') + match = captchaR.search(data) + if match: + id = match.group('id') + match = captchaW.search(data) + if match: + answer = input('What is the answer to the captcha "%s" ?' % match.group('question')) + else: + if not config.solve_captcha: + raise wikipedia.CaptchaError(id) + url = self.protocol() + '://' + self.hostname() + self.captcha_image_address(id) + answer = ui.askForCaptcha(url) + return {'id':id, 'answer':answer} + return None + def postForm(self, address, predata, sysop=False, cookies = None): """Post http form data to the given address at this site.
pywikipedia-l@lists.wikimedia.org