[Pywikipedia-svn] SVN: [7308] trunk/pywikipedia/wikipedia.py

Fri Sep 25 00:10:15 UTC 2009

Revision: 7308
Author:   siebrand
Date:     2009-09-25 00:10:15 +0000 (Fri, 25 Sep 2009)

Log Message:
-----------
* [#2807441] correcting issue with get references. Patch by Nakor Wikipedia
If you look for references to a pages that have a lot of them you may enter
in an infinite loop. See e.g. [[:fr:Mod?\195?\168le:Admissibilit?\195?\169]]. Attached is a
patch to try and prevent that by filtering the initial display of reference
pages.

* remove trailing whitespace

Modified Paths:
--------------
    trunk/pywikipedia/wikipedia.py

Modified: trunk/pywikipedia/wikipedia.py
===================================================================

--- trunk/pywikipedia/wikipedia.py	2009-09-25 00:01:59 UTC (rev 7307)
+++ trunk/pywikipedia/wikipedia.py	2009-09-25 00:10:15 UTC (rev 7308)
@@ -921,7 +921,7 @@
             }
         data = query.GetData(params, self.site(), encodeTitle = False)['query']['pages'].values()[0]
         if data.has_key('redirect'):
-            raise IsRedirectPage        
+            raise IsRedirectPage
         elif data.has_key('missing'):
             raise NoPage
         elif data.has_key('lastrevid'):
@@ -929,7 +929,7 @@
         else:
             # should not exists, OR we have problems.
             # better double check in this situations
-            x = self.get()                
+            x = self.get()
             return True # if we reach this point, we had no problems.
 
     def getTemplates(self, tllimit = 5000):
@@ -950,7 +950,7 @@
             params['tllimit'] = config.special_page_limit
             if tllimit > 5000 and self.site.isAllowed('apihighlimits'):
                 params['tllimit'] = 5000
-        
+
         tmpsFound = []
         while True:
             data = query.GetData(params, self.site(), encodeTitle = False)
@@ -960,7 +960,7 @@
                 params["tlcontinue"] = data["query-continue"]["templates"]["tlcontinue"]
             else:
                 break
-        
+
         return tmpsFound
 
     def isRedirectPage(self):
@@ -1169,6 +1169,12 @@
             config.special_page_limit = 999
         site = self.site()
         path = self.site().references_address(self.urlname())
+        if withTemplateInclusion:
+            path+=u'&hidetrans=0'
+        if onlyTemplateInclusion:
+            path+=u'&hidetrans=0&hidelinks=1&hideredirs=1&hideimages=1'
+        if redirectsOnly:
+            path+=u'&hideredirs=0&hidetrans=1&hidelinks=1&hideimages=1'
         content = SoupStrainer("div", id=self.site().family.content_id)
         try:
             next_msg = self.site().mediawiki_message('whatlinkshere-next')
@@ -1313,7 +1319,7 @@
             api_url = self.site().api_address()
         except NotImplementedError:
             return restrictions
-        
+
         predata = {
             'action': 'query',
             'prop': 'info',
@@ -1322,9 +1328,9 @@
         }
         #if titles:
         #    predata['titles'] = query.ListToParam(titles)
-        
+
         text = query.GetData(predata, self.site())['query']['pages']
-        
+
         for pageid in text:
             if text[pageid].has_key('missing'):
                 self._getexception = NoPage
@@ -1333,7 +1339,7 @@
                 # Don't know what may happen here.
                 # We may want to have better error handling
                 raise Error("BUG> API problem.")
-            if text[pageid]['protection'] != []: 
+            if text[pageid]['protection'] != []:
                 #if titles:
                 #    restrictions = dict([ detail['type'], [ detail['level'], detail['expiry'] ] ]
                 #        for detail in text[pageid]['protection'])
@@ -1468,7 +1474,7 @@
         except NotImplementedError:
             return self._putPageOld(text, comment, watchArticle, minorEdit,
                 newPage, token, newToken, sysop, captcha, botflag, maxTries)
-        
+
         retry_attempt = 1
         retry_delay = 1
         dblagged = False
@@ -1478,12 +1484,12 @@
             'text': self._encodeArg(text, 'text'),
             'summary': self._encodeArg(comment, 'summary'),
         }
-        
+
         if token:
             params['token'] = token
         else:
             params['token'] = self.site().getToken(sysop = sysop)
-        
+
         # Add server lag parameter (see config.py for details)
         if config.maxlag:
             params['maxlag'] = str(config.maxlag)
@@ -1492,29 +1498,29 @@
             params['basetimestamp'] = self._editTime
         else:
             params['basetimestamp'] = time.strftime('%Y%m%d%H%M%S', time.gmtime())
-        
+
         if self._startTime:
             params['starttimestamp'] = self._startTime
         else:
             params['starttimestamp'] = time.strftime('%Y%m%d%H%M%S', time.gmtime())
-        
+
         if botflag:
             params['bot'] = 1
-        
+
         if minorEdit:
             params['minor'] = 1
         else:
             params['notminor'] = 1
-        
+
         if watchArticle:
             params['watch'] = 1
         #else:
         #    params['unwatch'] = 1
-        
+
         if captcha:
             params['captchaid'] = captcha['id']
             params['captchaword'] = captcha['answer']
-        
+
         while True:
             if (maxTries == 0):
                 raise MaxTriesExceededError()
@@ -1621,7 +1627,7 @@
                         params['basetimestamp'] = self._editTime
                     else:
                         params['basetimestamp'] = time.strftime('%Y%m%d%H%M%S', time.gmtime())
-                    
+
                     if self._startTime:
                         params['starttimestamp'] = self._startTime
                     else:
@@ -1645,7 +1651,7 @@
                     # 'customcssjsprotected': "You're not allowed to edit custom CSS and JavaScript pages"
                     # 'protectednamespace': "You're not allowed to edit pages in the ``\$1'' namespace"
                     # 'protectednamespace-interface':"You're not allowed to edit interface messages"
-                    # 
+                    #
                     # The page is locked. This should have already been
                     # detected when getting the page, but there are some
                     # reasons why this didn't work, e.g. the page might be
@@ -1669,7 +1675,7 @@
                         return self._putPage(text, comment, watchArticle, minorEdit, newPage, token=self.site().getToken(sysop = sysop, getagain = True), newToken = True, sysop = sysop)
                 # I think the error message title was changed from "Wikimedia Error"
                 # to "Wikipedia has a problem", but I'm not sure. Maybe we could
-                # just check for HTTP Status 500 (Internal Server Error)?                    
+                # just check for HTTP Status 500 (Internal Server Error)?
                 else:
                     output("Unknown Error. API Error code:%s" % data['error']['code'] )
                     output("Information:%s" %data['error']['info'])
@@ -1678,18 +1684,18 @@
                     #
                     # The status code for update page completed in ordinary mode is 302 - Found
                     # But API is always 200 - OK because it only send "success" back in string.
-                    # if the page update is successed, we need to return code 302 for cheat script who 
+                    # if the page update is successed, we need to return code 302 for cheat script who
                     # using status code
                     #
                     return 302, response.reason, data
-            
+
             solve = self.site().solveCaptcha(data)
             if solve:
                 return self._putPage(text, comment, watchArticle, minorEdit, newPage, token, newToken, sysop, captcha=solve)
-            
+
             return response.status, response.reason, data
-        
 
+
     def _putPageOld(self, text, comment=None, watchArticle=False, minorEdit=True,
                 newPage=False, token=None, newToken=False, sysop=False,
                 captcha=None, botflag=True, maxTries=-1):
@@ -1707,7 +1713,7 @@
             'wpTextbox1': self._encodeArg(text, 'wikitext'),
             # As of October 2008, MW HEAD requires wpSection to be set.
             # We will need to fill this more smartly if we ever decide to edit by section
-            'wpSection': '', 
+            'wpSection': '',
         }
         if not botflag:
             predata['bot']='0'
@@ -1725,9 +1731,9 @@
         else:
             predata['wpEdittime'] = time.strftime('%Y%m%d%H%M%S', time.gmtime())
         if self._startTime:
-            predata['wpStarttime'] = self._startTime  
+            predata['wpStarttime'] = self._startTime
         else:
-            predata['wpStarttime'] = time.strftime('%Y%m%d%H%M%S', time.gmtime())     
+            predata['wpStarttime'] = time.strftime('%Y%m%d%H%M%S', time.gmtime())
         if self._revisionId:
             predata['baseRevId'] = self._revisionId
         # Pass the minorEdit and watchArticle arguments to the Wiki.
@@ -1850,9 +1856,9 @@
                 else:
                     predata['wpEdittime'] = time.strftime('%Y%m%d%H%M%S', time.gmtime())
                 if self._startTime:
-                    predata['wpStarttime'] = self._startTime  
+                    predata['wpStarttime'] = self._startTime
                 else:
-                    predata['wpStarttime'] = time.strftime('%Y%m%d%H%M%S', time.gmtime())     
+                    predata['wpStarttime'] = time.strftime('%Y%m%d%H%M%S', time.gmtime())
                 continue
             if self.site().has_mediawiki_message("viewsource")\
                     and self.site().mediawiki_message('viewsource') in data:
@@ -1990,10 +1996,10 @@
             ns -= 1
         else:
             ns += 1
-        
+
         if ns == 6:
             return ImagePage(self.site(), self.titleWithoutNamespace())
-        
+
         return Page(self.site(), self.titleWithoutNamespace(), defaultNamespace=ns)
 
     def interwiki(self):
@@ -2361,7 +2367,7 @@
 
             # If we are getting all of the page history...
             if getAll:
-                #Find the nextPage link, if not exist, the page is last history page 
+                #Find the nextPage link, if not exist, the page is last history page
                 matchObj = RLinkToNextPage.search(self_txt)
                 if matchObj:
                     startFromPage = matchObj.group(1)
@@ -2599,7 +2605,7 @@
                 answer = 'y'
                 self.site()._noDeletePrompt = True
         if answer == 'y':
-            
+
             token = self.site().getToken(self, sysop = True)
             reason = reason.encode(self.site().encoding())
             try:
@@ -2607,7 +2613,7 @@
                 del d
             except NotImplementedError:
                 config.use_api = False
-            
+
             if config.use_api and self.site().versionnumber() >= 12:
                 #API Mode
                 params = {
@@ -2626,7 +2632,7 @@
                     else:
                         output(u'Deletion of %s failed for an unknown reason. The response text is:' % self.aslink(forceInterwiki = True))
                         output('%s' % datas)
-                    
+
                     return False
             else:
                 #Ordinary mode from webpage.
@@ -2792,7 +2798,7 @@
         output(u'Page %s undeleted' % self.aslink())
         return result
 
-    def protect(self, editcreate = 'sysop', move = 'sysop', unprotect = False, reason = None, editcreate_duration = 'infinite', 
+    def protect(self, editcreate = 'sysop', move = 'sysop', unprotect = False, reason = None, editcreate_duration = 'infinite',
                 move_duration = 'infinite', cascading = False, prompt = True, throttle = True):
         """(Un)protect a wiki title. Requires administrator status.
 
@@ -2815,7 +2821,7 @@
         #if self.exists() and editcreate != move: # check protect level if edit/move not same
         #    if editcreate == 'sysop' and move != 'sysop':
         #        raise Error("The level configuration is not safe")
-        
+
         if unprotect:
             address = self.site().unprotect_address(self.urlname())
             # unprotect_address is actually an alias for protect_address...
@@ -2865,9 +2871,9 @@
             predata = {}
             if self.site().versionnumber >= 10:
                 predata['mwProtect-cascade'] = cascading
-            
+
             predata['mwProtect-reason'] = reason
-            
+
             if not self.exists(): #and self.site().versionnumber() >= :
                 #create protect
                 predata['mwProtect-level-create'] = editcreate
@@ -2876,14 +2882,14 @@
                 #edit/move Protect
                 predata['mwProtect-level-edit'] = editcreate
                 predata['mwProtect-level-move'] = move
-                
+
                 if self.site().versionnumber() >= 14:
                     predata['wpProtectExpirySelection-edit'] = editcreate_duration
                     predata['wpProtectExpirySelection-move'] = move_duration
                 else:
                     predata['mwProtect-expiry'] = editcreate_duration
-                    
-            
+
+
             if token:
                 predata['wpEditToken'] = token
             if self.site().hostname() in config.authenticate.keys():
@@ -3157,7 +3163,7 @@
             return [nick, timestamp]
         except KeyError:
             raise NoPage(u'API Error, nothing found in the APIs')
-        
+
     def getHash(self):
         """ Function that return the Hash of an file in oder to understand if two
             Files are the same or not.
@@ -5169,7 +5175,7 @@
             # Get username.
             # The data in anonymous mode had key 'anon'
             # if 'anon' exist, username is IP address, not to collect it right now
-            if not text.has_key('anon'): 
+            if not text.has_key('anon'):
                 self._isLoggedIn[index] = True
                 self._userName[index] = text['name']
             else:
@@ -5233,7 +5239,7 @@
                     output(u'WARNING: Token not found on %s. You will not be able to edit any page.' % self)
         else:
             #ordinary mode to get data from edit page HTMLs and JavaScripts
-            
+
             if '<div id="globalWrapper">' not in text:
                 # Not a wiki page
                 return
@@ -5485,13 +5491,13 @@
 
         if verbose:
             output(u'Getting information for site %s' % self)
-        
+
         try:
             api_url = self.api_address()
             del api_url
         except NotImplementedError:
             config.use_api = False
-        
+
         # Get data
         # API Userinfo is available from version 1.11
         # preferencetoken available from 1.14
@@ -5504,7 +5510,7 @@
             }
             if self.versionnumber() >= 14:
                 params['uiprop'] += '|preferencestoken'
-            
+
             text = query.GetData(params, self, sysop=sysop)['query']['userinfo']
             ##output('%s' % text) # for debug use only
         else:
@@ -5586,7 +5592,7 @@
                     #'': '',
                 }
                 data = query.GetData(params, self)['query']['recentchanges']
-                
+
                 for np in data:
                     date = np['timestamp']
                     title = np['title']
@@ -6072,7 +6078,7 @@
         if namespace is None:
             page = Page(self, start)
             namespace = page.namespace()
-            start = page.titleWithoutNamespace()        
+            start = page.titleWithoutNamespace()
         try:
             api_url = self.api_address()
             del api_url
@@ -6095,16 +6101,16 @@
             params['apfilterredir'] = 'redirects'
 
         while True:
-            
+
             if throttle:
                 get_throttle()
             data = query.GetData(params, self)
-            
+
             #count = 0
             for p in data['query']['allpages']:
                 #count += 1
                 yield Page(self, p['title'])
-            
+
             if data.has_key('query-continue'):
                 params['apfrom'] = data['query-continue']['allpages']['apfrom']
             else:
@@ -6434,7 +6440,7 @@
         defaults = []
         for namespace in self.family.namespaces.itervalues():
             value = namespace.get('_default', None)
-            if value:    
+            if value:
                 if isinstance(value, list):
                     defaults += value
                 else:
@@ -6451,7 +6457,7 @@
 
         def replacenumbered(match):
             return self.namespace(int(match.group(1)))
-        
+
         return numbered.sub(replacenumbered, wikitext)
 
     # The following methods are for convenience, so that you can access
@@ -6898,7 +6904,7 @@
         """
         if self.versionnumber() < 12:
             return None
-        
+
         if hash_found is None: # If the hash is none return None and not continue
             return None
         # Now get all the images with the same hash
@@ -7227,7 +7233,7 @@
             xdict = xdict[default_family]
         else:
             xdict = xdict['wikipedia']
-        
+
         if type(xdict) != dict:
             return xdict