Revision: 7308 Author: siebrand Date: 2009-09-25 00:10:15 +0000 (Fri, 25 Sep 2009)
Log Message: ----------- * [#2807441] correcting issue with get references. Patch by Nakor Wikipedia If you look for references to a pages that have a lot of them you may enter in an infinite loop. See e.g. [[:fr:Mod?\195?\168le:Admissibilit?\195?\169]]. Attached is a patch to try and prevent that by filtering the initial display of reference pages.
* remove trailing whitespace
Modified Paths: -------------- trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2009-09-25 00:01:59 UTC (rev 7307) +++ trunk/pywikipedia/wikipedia.py 2009-09-25 00:10:15 UTC (rev 7308) @@ -921,7 +921,7 @@ } data = query.GetData(params, self.site(), encodeTitle = False)['query']['pages'].values()[0] if data.has_key('redirect'): - raise IsRedirectPage + raise IsRedirectPage elif data.has_key('missing'): raise NoPage elif data.has_key('lastrevid'): @@ -929,7 +929,7 @@ else: # should not exists, OR we have problems. # better double check in this situations - x = self.get() + x = self.get() return True # if we reach this point, we had no problems.
def getTemplates(self, tllimit = 5000): @@ -950,7 +950,7 @@ params['tllimit'] = config.special_page_limit if tllimit > 5000 and self.site.isAllowed('apihighlimits'): params['tllimit'] = 5000 - + tmpsFound = [] while True: data = query.GetData(params, self.site(), encodeTitle = False) @@ -960,7 +960,7 @@ params["tlcontinue"] = data["query-continue"]["templates"]["tlcontinue"] else: break - + return tmpsFound
def isRedirectPage(self): @@ -1169,6 +1169,12 @@ config.special_page_limit = 999 site = self.site() path = self.site().references_address(self.urlname()) + if withTemplateInclusion: + path+=u'&hidetrans=0' + if onlyTemplateInclusion: + path+=u'&hidetrans=0&hidelinks=1&hideredirs=1&hideimages=1' + if redirectsOnly: + path+=u'&hideredirs=0&hidetrans=1&hidelinks=1&hideimages=1' content = SoupStrainer("div", id=self.site().family.content_id) try: next_msg = self.site().mediawiki_message('whatlinkshere-next') @@ -1313,7 +1319,7 @@ api_url = self.site().api_address() except NotImplementedError: return restrictions - + predata = { 'action': 'query', 'prop': 'info', @@ -1322,9 +1328,9 @@ } #if titles: # predata['titles'] = query.ListToParam(titles) - + text = query.GetData(predata, self.site())['query']['pages'] - + for pageid in text: if text[pageid].has_key('missing'): self._getexception = NoPage @@ -1333,7 +1339,7 @@ # Don't know what may happen here. # We may want to have better error handling raise Error("BUG> API problem.") - if text[pageid]['protection'] != []: + if text[pageid]['protection'] != []: #if titles: # restrictions = dict([ detail['type'], [ detail['level'], detail['expiry'] ] ] # for detail in text[pageid]['protection']) @@ -1468,7 +1474,7 @@ except NotImplementedError: return self._putPageOld(text, comment, watchArticle, minorEdit, newPage, token, newToken, sysop, captcha, botflag, maxTries) - + retry_attempt = 1 retry_delay = 1 dblagged = False @@ -1478,12 +1484,12 @@ 'text': self._encodeArg(text, 'text'), 'summary': self._encodeArg(comment, 'summary'), } - + if token: params['token'] = token else: params['token'] = self.site().getToken(sysop = sysop) - + # Add server lag parameter (see config.py for details) if config.maxlag: params['maxlag'] = str(config.maxlag) @@ -1492,29 +1498,29 @@ params['basetimestamp'] = self._editTime else: params['basetimestamp'] = time.strftime('%Y%m%d%H%M%S', time.gmtime()) - + if self._startTime: params['starttimestamp'] = self._startTime else: params['starttimestamp'] = time.strftime('%Y%m%d%H%M%S', time.gmtime()) - + if botflag: params['bot'] = 1 - + if minorEdit: params['minor'] = 1 else: params['notminor'] = 1 - + if watchArticle: params['watch'] = 1 #else: # params['unwatch'] = 1 - + if captcha: params['captchaid'] = captcha['id'] params['captchaword'] = captcha['answer'] - + while True: if (maxTries == 0): raise MaxTriesExceededError() @@ -1621,7 +1627,7 @@ params['basetimestamp'] = self._editTime else: params['basetimestamp'] = time.strftime('%Y%m%d%H%M%S', time.gmtime()) - + if self._startTime: params['starttimestamp'] = self._startTime else: @@ -1645,7 +1651,7 @@ # 'customcssjsprotected': "You're not allowed to edit custom CSS and JavaScript pages" # 'protectednamespace': "You're not allowed to edit pages in the ``$1'' namespace" # 'protectednamespace-interface':"You're not allowed to edit interface messages" - # + # # The page is locked. This should have already been # detected when getting the page, but there are some # reasons why this didn't work, e.g. the page might be @@ -1669,7 +1675,7 @@ return self._putPage(text, comment, watchArticle, minorEdit, newPage, token=self.site().getToken(sysop = sysop, getagain = True), newToken = True, sysop = sysop) # I think the error message title was changed from "Wikimedia Error" # to "Wikipedia has a problem", but I'm not sure. Maybe we could - # just check for HTTP Status 500 (Internal Server Error)? + # just check for HTTP Status 500 (Internal Server Error)? else: output("Unknown Error. API Error code:%s" % data['error']['code'] ) output("Information:%s" %data['error']['info']) @@ -1678,18 +1684,18 @@ # # The status code for update page completed in ordinary mode is 302 - Found # But API is always 200 - OK because it only send "success" back in string. - # if the page update is successed, we need to return code 302 for cheat script who + # if the page update is successed, we need to return code 302 for cheat script who # using status code # return 302, response.reason, data - + solve = self.site().solveCaptcha(data) if solve: return self._putPage(text, comment, watchArticle, minorEdit, newPage, token, newToken, sysop, captcha=solve) - + return response.status, response.reason, data -
+ def _putPageOld(self, text, comment=None, watchArticle=False, minorEdit=True, newPage=False, token=None, newToken=False, sysop=False, captcha=None, botflag=True, maxTries=-1): @@ -1707,7 +1713,7 @@ 'wpTextbox1': self._encodeArg(text, 'wikitext'), # As of October 2008, MW HEAD requires wpSection to be set. # We will need to fill this more smartly if we ever decide to edit by section - 'wpSection': '', + 'wpSection': '', } if not botflag: predata['bot']='0' @@ -1725,9 +1731,9 @@ else: predata['wpEdittime'] = time.strftime('%Y%m%d%H%M%S', time.gmtime()) if self._startTime: - predata['wpStarttime'] = self._startTime + predata['wpStarttime'] = self._startTime else: - predata['wpStarttime'] = time.strftime('%Y%m%d%H%M%S', time.gmtime()) + predata['wpStarttime'] = time.strftime('%Y%m%d%H%M%S', time.gmtime()) if self._revisionId: predata['baseRevId'] = self._revisionId # Pass the minorEdit and watchArticle arguments to the Wiki. @@ -1850,9 +1856,9 @@ else: predata['wpEdittime'] = time.strftime('%Y%m%d%H%M%S', time.gmtime()) if self._startTime: - predata['wpStarttime'] = self._startTime + predata['wpStarttime'] = self._startTime else: - predata['wpStarttime'] = time.strftime('%Y%m%d%H%M%S', time.gmtime()) + predata['wpStarttime'] = time.strftime('%Y%m%d%H%M%S', time.gmtime()) continue if self.site().has_mediawiki_message("viewsource")\ and self.site().mediawiki_message('viewsource') in data: @@ -1990,10 +1996,10 @@ ns -= 1 else: ns += 1 - + if ns == 6: return ImagePage(self.site(), self.titleWithoutNamespace()) - + return Page(self.site(), self.titleWithoutNamespace(), defaultNamespace=ns)
def interwiki(self): @@ -2361,7 +2367,7 @@
# If we are getting all of the page history... if getAll: - #Find the nextPage link, if not exist, the page is last history page + #Find the nextPage link, if not exist, the page is last history page matchObj = RLinkToNextPage.search(self_txt) if matchObj: startFromPage = matchObj.group(1) @@ -2599,7 +2605,7 @@ answer = 'y' self.site()._noDeletePrompt = True if answer == 'y': - + token = self.site().getToken(self, sysop = True) reason = reason.encode(self.site().encoding()) try: @@ -2607,7 +2613,7 @@ del d except NotImplementedError: config.use_api = False - + if config.use_api and self.site().versionnumber() >= 12: #API Mode params = { @@ -2626,7 +2632,7 @@ else: output(u'Deletion of %s failed for an unknown reason. The response text is:' % self.aslink(forceInterwiki = True)) output('%s' % datas) - + return False else: #Ordinary mode from webpage. @@ -2792,7 +2798,7 @@ output(u'Page %s undeleted' % self.aslink()) return result
- def protect(self, editcreate = 'sysop', move = 'sysop', unprotect = False, reason = None, editcreate_duration = 'infinite', + def protect(self, editcreate = 'sysop', move = 'sysop', unprotect = False, reason = None, editcreate_duration = 'infinite', move_duration = 'infinite', cascading = False, prompt = True, throttle = True): """(Un)protect a wiki title. Requires administrator status.
@@ -2815,7 +2821,7 @@ #if self.exists() and editcreate != move: # check protect level if edit/move not same # if editcreate == 'sysop' and move != 'sysop': # raise Error("The level configuration is not safe") - + if unprotect: address = self.site().unprotect_address(self.urlname()) # unprotect_address is actually an alias for protect_address... @@ -2865,9 +2871,9 @@ predata = {} if self.site().versionnumber >= 10: predata['mwProtect-cascade'] = cascading - + predata['mwProtect-reason'] = reason - + if not self.exists(): #and self.site().versionnumber() >= : #create protect predata['mwProtect-level-create'] = editcreate @@ -2876,14 +2882,14 @@ #edit/move Protect predata['mwProtect-level-edit'] = editcreate predata['mwProtect-level-move'] = move - + if self.site().versionnumber() >= 14: predata['wpProtectExpirySelection-edit'] = editcreate_duration predata['wpProtectExpirySelection-move'] = move_duration else: predata['mwProtect-expiry'] = editcreate_duration - - + + if token: predata['wpEditToken'] = token if self.site().hostname() in config.authenticate.keys(): @@ -3157,7 +3163,7 @@ return [nick, timestamp] except KeyError: raise NoPage(u'API Error, nothing found in the APIs') - + def getHash(self): """ Function that return the Hash of an file in oder to understand if two Files are the same or not. @@ -5169,7 +5175,7 @@ # Get username. # The data in anonymous mode had key 'anon' # if 'anon' exist, username is IP address, not to collect it right now - if not text.has_key('anon'): + if not text.has_key('anon'): self._isLoggedIn[index] = True self._userName[index] = text['name'] else: @@ -5233,7 +5239,7 @@ output(u'WARNING: Token not found on %s. You will not be able to edit any page.' % self) else: #ordinary mode to get data from edit page HTMLs and JavaScripts - + if '<div id="globalWrapper">' not in text: # Not a wiki page return @@ -5485,13 +5491,13 @@
if verbose: output(u'Getting information for site %s' % self) - + try: api_url = self.api_address() del api_url except NotImplementedError: config.use_api = False - + # Get data # API Userinfo is available from version 1.11 # preferencetoken available from 1.14 @@ -5504,7 +5510,7 @@ } if self.versionnumber() >= 14: params['uiprop'] += '|preferencestoken' - + text = query.GetData(params, self, sysop=sysop)['query']['userinfo'] ##output('%s' % text) # for debug use only else: @@ -5586,7 +5592,7 @@ #'': '', } data = query.GetData(params, self)['query']['recentchanges'] - + for np in data: date = np['timestamp'] title = np['title'] @@ -6072,7 +6078,7 @@ if namespace is None: page = Page(self, start) namespace = page.namespace() - start = page.titleWithoutNamespace() + start = page.titleWithoutNamespace() try: api_url = self.api_address() del api_url @@ -6095,16 +6101,16 @@ params['apfilterredir'] = 'redirects'
while True: - + if throttle: get_throttle() data = query.GetData(params, self) - + #count = 0 for p in data['query']['allpages']: #count += 1 yield Page(self, p['title']) - + if data.has_key('query-continue'): params['apfrom'] = data['query-continue']['allpages']['apfrom'] else: @@ -6434,7 +6440,7 @@ defaults = [] for namespace in self.family.namespaces.itervalues(): value = namespace.get('_default', None) - if value: + if value: if isinstance(value, list): defaults += value else: @@ -6451,7 +6457,7 @@
def replacenumbered(match): return self.namespace(int(match.group(1))) - + return numbered.sub(replacenumbered, wikitext)
# The following methods are for convenience, so that you can access @@ -6898,7 +6904,7 @@ """ if self.versionnumber() < 12: return None - + if hash_found is None: # If the hash is none return None and not continue return None # Now get all the images with the same hash @@ -7227,7 +7233,7 @@ xdict = xdict[default_family] else: xdict = xdict['wikipedia'] - + if type(xdict) != dict: return xdict