[Pywikipedia-l] SVN: [5992] trunk/pywikipedia

nicdumz at svn.wikimedia.org nicdumz at svn.wikimedia.org
Sat Oct 18 15:26:39 UTC 2008


Revision: 5992
Author:   nicdumz
Date:     2008-10-18 15:26:39 +0000 (Sat, 18 Oct 2008)

Log Message:
-----------
adding support for wpCatchaWord. Oh and... removing that %&%$ captcha code duplication

Modified Paths:
--------------
    trunk/pywikipedia/login.py
    trunk/pywikipedia/wikipedia.py

Modified: trunk/pywikipedia/login.py
===================================================================
--- trunk/pywikipedia/login.py	2008-10-18 14:05:27 UTC (rev 5991)
+++ trunk/pywikipedia/login.py	2008-10-18 15:26:39 UTC (rev 5992)
@@ -96,13 +96,12 @@
             # No bot policies on other
             return True
 
-    def getCookie(self, remember=True, captchaId = None, captchaAnswer = None):
+    def getCookie(self, remember=True, captcha = None):
         """
         Login to the site.
 
         remember    Remember login (default: True)
-        captchaId   The id number of the captcha, if any
-        captcha     The word displayed in the captcha, if any
+        captchaId   A dictionary containing the captcha id and answer, if any
 
         Returns cookie data if succesful, None otherwise.
         """
@@ -123,9 +122,9 @@
                 "wpRemember": str(int(bool(remember))),
                 "wpSkipCookieCheck": '1'
             }
-            if captchaId:
-                predata["wpCaptchaId"] = captchaId
-                predata["wpCaptchaWord"] = captchaAnswer
+            if captcha:
+                predata["wpCaptchaId"] = captcha['id']
+                predata["wpCaptchaWord"] = captcha['answer']
             login_address = self.site.login_address()
             address = login_address + '&action=submit'
 
@@ -158,17 +157,10 @@
 
             if got_token and got_user:
                 return "\n".join(L)
-            elif not captchaAnswer:
-                captchaR = re.compile('<input type="hidden" name="wpCaptchaId" id="wpCaptchaId" value="(?P<id>\d+)" />')
-                match = captchaR.search(data)
-                if match:
-                    id = match.group('id')
-                    if not config.solve_captcha:
-                        raise wikipedia.CaptchaError(id)
-                    url = self.site.protocol() + '://' + self.site.hostname() + self.site.captcha_image_address(id)
-                    answer = wikipedia.ui.askForCaptcha(url)
-                    return self.getCookie(remember = remember, captchaId = id, captchaAnswer = answer)
-
+            elif not captcha:
+                solve = self.site.solveCaptcha(data)
+                if solve:
+                    return self.getCookie(remember = remember, captcha = solve)
             return None
 
     def storecookiedata(self, data):

Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py	2008-10-18 14:05:27 UTC (rev 5991)
+++ trunk/pywikipedia/wikipedia.py	2008-10-18 15:26:39 UTC (rev 5992)
@@ -1352,7 +1352,7 @@
 
     def _putPage(self, text, comment=None, watchArticle=False, minorEdit=True,
                 newPage=False, token=None, newToken=False, sysop=False,
-                captchaId=None, captchaAnswer=None ):
+                captcha=None):
         """Upload 'text' as new content of Page by filling out the edit form.
 
         Don't use this directly, use put() instead.
@@ -1365,10 +1365,13 @@
             'wpSave': '1',
             'wpSummary': self._encodeArg(comment, 'edit summary'),
             'wpTextbox1': self._encodeArg(text, 'wikitext'),
+            # As of October 2008, MW HEAD requires wpSection to be set.
+            # We will need to fill this more smartly if we ever decide to edit by section
+            'wpSection': '', 
         }
-        if captchaId:
-            predata["wpCaptchaId"] = captchaId
-            predata["wpCaptchaWord"] = captchaAnswer
+        if captcha:
+            predata["wpCaptchaId"] = captcha['id']
+            predata["wpCaptchaWord"] = captcha['answer']
         # Add server lag parameter (see config.py for details)
         if config.maxlag:
             predata['maxlag'] = str(config.maxlag)
@@ -1560,15 +1563,9 @@
 
             # We might have been prompted for a captcha if the
             # account is not autoconfirmed, checking....
-            captchaR = re.compile('<input type="hidden" name="wpCaptchaId" id="wpCaptchaId" value="(?P<id>\d+)" />')
-            match = captchaR.search(data)
-            if match:
-                id = match.group('id')
-                if not config.solve_captcha:
-                    raise wikipedia.CaptchaError(id)
-                url = self.site().protocol() + '://' + self.site().hostname() + self.site().captcha_image_address(id)
-                answer = ui.askForCaptcha(url)
-                return self._putPage(text, comment, watchArticle, minorEdit, newPage, token, newToken, sysop, captchaId=id, captchaAnswer = answer)
+            solve = self.site().solveCaptcha(data)
+            if solve:
+                return self._putPage(text, comment, watchArticle, minorEdit, newPage, token, newToken, sysop, captcha=solve)
 
             # We are expecting a 302 to the action=view page. I'm not sure why this was removed in r5019
             if data.strip() != u"":
@@ -4338,6 +4335,23 @@
             l.append('wpEditToken=' + wpEditToken)
         return '&'.join(l)
 
+    def solveCaptcha(self, data):
+        captchaW = re.compile('<label for="wpCaptchaWord">(?P<question>[^<]*)</label>')
+        captchaR = re.compile('<input type="hidden" name="wpCaptchaId" id="wpCaptchaId" value="(?P<id>\d+)" />')
+        match = captchaR.search(data)
+        if match:
+            id = match.group('id')
+            match = captchaW.search(data)
+            if match:
+                answer = input('What is the answer to the captcha "%s" ?' % match.group('question'))
+            else:
+                if not config.solve_captcha:
+                    raise wikipedia.CaptchaError(id)
+                url = self.protocol() + '://' + self.hostname() + self.captcha_image_address(id)
+                answer = ui.askForCaptcha(url)
+            return {'id':id, 'answer':answer}
+        return None
+
     def postForm(self, address, predata, sysop=False, cookies = None):
         """Post http form data to the given address at this site.
 





More information about the Pywikipedia-l mailing list