Revision: 7141 Author: alexsh Date: 2009-08-11 10:53:06 +0000 (Tue, 11 Aug 2009)
Log Message: ----------- *Page()._putPage(): change to use API, old move to _putPageOld() --All possible error in API mode should be handled --need to set config.use_api to enable this, otherwise it will use _putPageOld()
Modified Paths: -------------- trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2009-08-10 19:21:56 UTC (rev 7140) +++ trunk/pywikipedia/wikipedia.py 2009-08-11 10:53:06 UTC (rev 7141) @@ -1453,6 +1453,233 @@ def _putPage(self, text, comment=None, watchArticle=False, minorEdit=True, newPage=False, token=None, newToken=False, sysop=False, captcha=None, botflag=True, maxTries=-1): + """Upload 'text' as new content of Page by API + + Don't use this directly, use put() instead. + + """ + try: + if config.use_api and self.site().versionnumber() >= 13: + apitest = self.site().api_address() + del apitest + else: + raise NotImplementedError #No enable api or version not support + except NotImplementedError: + return self._putPageOld(text, comment, watchArticle, minorEdit, + newPage, token, newToken, sysop, captcha, botflag, maxTries) + + retry_attempt = 1 + retry_delay = 1 + dblagged = False + params = { + 'action': 'edit', + 'title': self.title(), + 'text': self._encodeArg(text, 'text'), + 'summary': self._encodeArg(comment, 'summary'), + } + + if token: + params['token'] = token + else: + params['token'] = self.site().getToken(sysop = sysop) + + # Add server lag parameter (see config.py for details) + if config.maxlag: + params['maxlag'] = str(config.maxlag) + + if self._editTime: + params['basetimestamp'] = self._editTime + else: + params['basetimestamp'] = time.strftime('%Y%m%d%H%M%S', time.gmtime()) + + if self._startTime: + params['starttimestamp'] = self._startTime + else: + params['starttimestamp'] = time.strftime('%Y%m%d%H%M%S', time.gmtime()) + + if botflag: + params['bot'] = 1 + + if minorEdit: + params['minor'] = 1 + else: + params['notminor'] = 1 + + if watchArticle: + params['watch'] = 1 + #else: + # params['unwatch'] = 1 + + if captcha: + params['captchaid'] = captcha['id'] + params['captchaword'] = captcha['answer'] + + while True: + if (maxTries == 0): + raise MaxTriesExceededError() + maxTries -= 1 + # Check whether we are not too quickly after the previous + # putPage, and wait a bit until the interval is acceptable + if not dblagged: + put_throttle() + # Which web-site host are we submitting to? + if newPage: + output(u'Creating page %s' % self.aslink()) + else: + output(u'Updating page %s' % self.aslink()) + # Submit the prepared information + try: + response, data = query.GetData(params, self.site(), back_response = True) + if query.IsString(data): + raise KeyError + except httplib.BadStatusLine, line: + raise PageNotSaved('Bad status line: %s' % line.line) + except ServerError: + output(u''.join(traceback.format_exception(*sys.exc_info()))) + retry_attempt += 1 + if retry_attempt > config.maxretries: + raise + output(u'Got a server error when putting %s; will retry in %i minute%s.' % (self.aslink(), retry_delay, retry_delay != 1 and "s" or "")) + time.sleep(60 * retry_delay) + retry_delay *= 2 + if retry_delay > 30: + retry_delay = 30 + continue + # If it has gotten this far then we should reset dblagged + dblagged = False + # Check blocks + self.site().checkBlocks(sysop = sysop) + # A second text area means that an edit conflict has occured. + if response.status == 500: + output(u"Server error encountered; will retry in %i minute%s." + % (retry_delay, retry_delay != 1 and "s" or "")) + time.sleep(60 * retry_delay) + retry_delay *= 2 + if retry_delay > 30: + retry_delay = 30 + continue + if data.has_key('error'): + #All available error key in edit mode: (from ApiBase.php) + # 'noimageredirect-anon':"Anonymous users can't create image redirects", + # 'noimageredirect':"You don't have permission to create image redirects", + # 'filtered':"The filter callback function refused your edit", + # 'noedit-anon':"Anonymous users can't edit pages", + # 'noedit':"You don't have permission to edit pages", + # 'emptypage':"Creating new, empty pages is not allowed", + # 'badmd5':"The supplied MD5 hash was incorrect", + # 'notext':"One of the text, appendtext, prependtext and undo parameters must be set", + # 'emptynewsection':'Creating empty new sections is not possible.', + # 'revwrongpage':"r$1 is not a revision of ``$2''", + # 'undofailure':'Undo failed due to conflicting intermediate edits', + + #for debug only + #------------------------ + if verbose: + output("error occured, result:%s\nstatus:%s\nresponse:%s" % (data, response.status, response.reason)) + faked = params + del faked['text'], faked['format'] + output("OriginalData:%s" % faked) + del faked + #------------------------ + errorCode = data['error']['code'] + #cannot handle longpageerror and PageNoSave yet + if errorCode == 'maxlag' or response.status == 503: + # server lag; Mediawiki recommends waiting 5 seconds + # and retrying + if verbose: + output(data, newline=False) + output(u"Pausing 5 seconds due to database server lag.") + dblagged = True + time.sleep(5) + continue + elif errorCode == 'editconflict': + # 'editconflict':"Edit conflict detected", + raise EditConflict(u'An edit conflict has occured.') + elif errorCode == 'spamdetected': + # 'spamdetected':"Your edit was refused because it contained a spam fragment: ``$1''", + raise SpamfilterError(data['error']['info'][62:-2]) + elif errorCode == 'pagedeleted': + # 'pagedeleted':"The page has been deleted since you fetched its timestamp", + # Make sure your system clock is correct if this error occurs + # without any reason! + # raise EditConflict(u'Someone deleted the page.') + # No raise, simply define these variables and retry: + if self._editTime: + params['basetimestamp'] = self._editTime + else: + params['basetimestamp'] = time.strftime('%Y%m%d%H%M%S', time.gmtime()) + + if self._startTime: + params['starttimestamp'] = self._startTime + else: + params['starttimestamp'] = time.strftime('%Y%m%d%H%M%S', time.gmtime()) + continue + elif errorCode == 'readonly': + # 'readonly':"The wiki is currently in read-only mode" + output(u"The database is currently locked for write access; will retry in %i minute%s." + % (retry_delay, retry_delay != 1 and "s" or "")) + time.sleep(60 * retry_delay) + retry_delay *= 2 + if retry_delay > 30: + retry_delay = 30 + continue + elif errorCode == 'contenttoobig': + # 'contenttoobig':"The content you supplied exceeds the article size limit of $1 kilobytes", + raise LongPageError(len(params['text']), int(data['error']['indo'][59:-10])) + elif errorCode in ['protectedpage', 'customcssjsprotected', 'cascadeprotected', 'protectednamespace', 'protectednamespace-interface']: + # 'protectedpage':"The ``$1'' right is required to edit this page" + # 'cascadeprotected':"The page you're trying to edit is protected because it's included in a cascade-protected page" + # 'customcssjsprotected': "You're not allowed to edit custom CSS and JavaScript pages" + # 'protectednamespace': "You're not allowed to edit pages in the ``$1'' namespace" + # 'protectednamespace-interface':"You're not allowed to edit interface messages" + # + # The page is locked. This should have already been + # detected when getting the page, but there are some + # reasons why this didn't work, e.g. the page might be + # locked via a cascade lock. + try: + # Page is locked - try using the sysop account, unless we're using one already + if sysop:# Unknown permissions error + raise LockedPage() + else: + self.site().forceLogin(sysop = True) + output(u'Page is locked, retrying using sysop account.') + return self._putPage(text, comment, watchArticle, minorEdit, newPage, token=self.site().getToken(sysop = True), sysop = True) + except NoUsername: + raise LockedPage() + elif errorCode == 'badtoken': + if newToken: + output(u"Edit token has failed. Giving up.") + else: + # We might have been using an outdated token + output(u"Edit token has failed. Retrying.") + return self._putPage(text, comment, watchArticle, minorEdit, newPage, token=self.site().getToken(sysop = sysop), newToken = True, sysop = sysop) + # I think the error message title was changed from "Wikimedia Error" + # to "Wikipedia has a problem", but I'm not sure. Maybe we could + # just check for HTTP Status 500 (Internal Server Error)? + else: + output("Unknown Error. API Error code:%s" % data['error']['code'] ) + output("Information:%s" %data['error']['info']) + else: + if data['edit']['result'] == u"Success": + # + # The status code for update page completed in ordinary mode is 302 - Found + # But API is always 200 - OK because it only send "success" back in string. + # if the page update is successed, we need to return code 302 for cheat script who + # using status code + # + return 302, response.reason, data + + solve = self.site().solveCaptcha(data) + if solve: + return self._putPage(text, comment, watchArticle, minorEdit, newPage, token, newToken, sysop, captcha=solve) + + return response.status, response.reason, data + + + def _putPageOld(self, text, comment=None, watchArticle=False, minorEdit=True, + newPage=False, token=None, newToken=False, sysop=False, + captcha=None, botflag=True, maxTries=-1): """Upload 'text' as new content of Page by filling out the edit form.
Don't use this directly, use put() instead. @@ -1625,14 +1852,14 @@ else: self.site().forceLogin(sysop = True) output(u'Page is locked, retrying using sysop account.') - return self._putPage(text, comment, watchArticle, minorEdit, newPage, token=self.site().getToken(sysop = True), sysop = True) + return self._putPageOld(text, comment, watchArticle, minorEdit, newPage, token=self.site().getToken(sysop = True), sysop = True) except NoUsername: raise LockedPage() if not newToken and "<textarea" in data: ##if "<textarea" in data: # for debug use only, if badtoken still happen # We might have been using an outdated token output(u"Changing page has failed. Retrying.") - return self._putPage(text, comment, watchArticle, minorEdit, newPage, token=self.site().getToken(sysop = sysop, getagain = True), newToken = True, sysop = sysop) + return self._putPageOld(text, comment, watchArticle, minorEdit, newPage, token=self.site().getToken(sysop = sysop, getagain = True), newToken = True, sysop = sysop) # I think the error message title was changed from "Wikimedia Error" # to "Wikipedia has a problem", but I'm not sure. Maybe we could # just check for HTTP Status 500 (Internal Server Error)? @@ -1683,7 +1910,7 @@ ## output('%s' % data) # WHY? solve = self.site().solveCaptcha(data) if solve: - return self._putPage(text, comment, watchArticle, minorEdit, newPage, token, newToken, sysop, captcha=solve) + return self._putPageOld(text, comment, watchArticle, minorEdit, newPage, token, newToken, sysop, captcha=solve)
# We are expecting a 302 to the action=view page. I'm not sure why this was removed in r5019 if data.strip() != u"": @@ -2795,9 +3022,7 @@ raise NoPage(u'API Error, nothing found in the APIs')
# We don't know the page's id, if any other better idea please change it - pageid = data.keys()[0] - nickdata = data[pageid][u'revisions'] - return nickdata + return data[data.keys()[0]][u'revisions']
class ImagePage(Page): """A subclass of Page representing an image descriptor wiki page. @@ -2856,7 +3081,7 @@ } imagedata = query.GetData(params, self.site(), encodeTitle = False) try: - url=imagedata['query']['pages'].values()[0]['imageinfo'][0]['url'] + return imagedata['query']['pages'].values()[0]['imageinfo'][0]['url'] # urlR = re.compile(r'<div class="fullImageLink" id="file">.*?<a href="(?P<url>[^ ]+?)"(?! class="image")|<span class="dangerousLink"><a href="(?P<url2>.+?)"', re.DOTALL) # m = urlR.search(self.getImagePageHtml())
@@ -5152,7 +5377,8 @@ self._mediawiki_messages = _dict([(tag['name'].lower(), tag['*']) for tag in datas]) except KeyError: - output('API get messages had some error, retrying by ordinary.') + if verbose: + output('API get messages had some error, retrying by ordinary.') api = False continue except NotImplementedError: @@ -5207,8 +5433,7 @@
key = key.lower() try: - value = self._mediawiki_messages[key] - return value + return self._mediawiki_messages[key] except KeyError: raise KeyError("MediaWiki key '%s' does not exist on %s" % (key, self))