[Pywikipedia-l] SVN: [6309] trunk/pywikipedia/wikipedia.py

russblau at svn.wikimedia.org russblau at svn.wikimedia.org
Wed Jan 28 19:41:32 UTC 2009


Revision: 6309
Author:   russblau
Date:     2009-01-28 19:41:32 +0000 (Wed, 28 Jan 2009)

Log Message:
-----------
Enable category redirect checking.

Usage (where c is a Page in the category namespace):

c.isCategoryRedirect(): returns True if the page contains a category redirect template listed in the family file; False otherwise

c.getCategoryRedirectTarget(): returns the redirect target category as a catlib.Category object; raises IsNotRedirectPage if c is not a category redirect

Note that isRedirect() and getRedirectTarget() are not affected; if you want a script to check for category redirects, you have to modify it to do so explicitly (this is to preserve backwards-compatibility).

Modified Paths:
--------------
    trunk/pywikipedia/wikipedia.py

Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py	2009-01-28 18:28:30 UTC (rev 6308)
+++ trunk/pywikipedia/wikipedia.py	2009-01-28 19:41:32 UTC (rev 6309)
@@ -834,10 +834,6 @@
                 self._redirarg = redirtarget
             else:
                 raise IsRedirectPage(redirtarget)
-##        elif self.isCategoryRedirect(): # sets _redirarg
-##            if not get_redirect:
-##                self._getexception = IsRedirectPage
-##                raise IsRedirectPage, self._redirarg
         if self.section():
             # TODO: What the hell is this? Docu please.
             m = re.search("\.3D\_*(\.27\.27+)?(\.5B\.5B)?\_*%s\_*(\.5B\.5B)?(\.27\.27+)?\_*\.3D" % re.escape(self.section()), sectionencode(text,self.site().encoding()))
@@ -1086,26 +1082,33 @@
         """Return True if this is an image description page, False otherwise."""
         return self.namespace() == 6
 
-    def isCategoryRedirect(self):
-        """Return True if this is a category redirect page."""
+    def isCategoryRedirect(self, text=None):
+        """Return True if this is a category redirect page, False otherwise."""
 
         if not self.isCategory():
             return False
         if not hasattr(self, "_catredirect"):
-            if not hasattr( self.site(), "category_redirects"):
+            if not text:
+                text = self.get(get_redirect=True)
+            catredirs = self.site().category_redirects()
+            for (t, args) in self.templatesWithParams(thistxt=text):
+                template = Page(self.site(), t, defaultNamespace=10
+                                ).titleWithoutNamespace() # normalize title
+                if template in catredirs:
+                    # Get target (first template argument)
+                    self._catredirect = self.site().namespace(14) + ":" + args[0]
+                    break
+            else:
                 self._catredirect = False
-            else:
-                catredirs = self.site().category_redirects()
-                for (t, args) in self.templatesWithParams():
-                    if t in catredirs:
-                        self._catredirect = True
-                        # Get target (first template argument)
-                        self._redirarg = self.site().namespace(14) + ":" + args[0]
-                        break
-                else:
-                    self._catredirect = False
-        return self._catredirect
+        return bool(self._catredirect)
 
+    def getCategoryRedirectTarget(self):
+        """If this is a category redirect, return the target category title."""
+        if self.isCategoryRedirect():
+            import catlib
+            return catlib.Category(self.site(), self._catredirect)
+        raise IsNotRedirectPage
+
     def isDisambig(self):
         """Return True if this is a disambiguation page, False otherwise.
 
@@ -2986,6 +2989,8 @@
                     page2._revisionId = revisionId
                     page2._editTime = timestamp
                     section = page2.section()
+                    # Store the content
+                    page2._contents = text
                     m = self.site.redirectRegex().match(text)
                     if m:
                         ## output(u"%s is a redirect" % page2.aslink())
@@ -2994,6 +2999,7 @@
                             redirectto = redirectto+"#"+section
                         page2._getexception = IsRedirectPage
                         page2._redirarg = redirectto
+
                     # This is used for checking deletion conflict.
                     # Use the data loading time.
                     page2._startTime = time.strftime('%Y%m%d%H%M%S', time.gmtime())
@@ -3006,8 +3012,6 @@
                             except AttributeError:
                                 # There is no exception yet
                                 page2._getexception = SectionError
-                    # Store the content
-                    page2._contents = text
                 successful = True
                 # Note that there is no break here. The reason is that there
                 # might be duplicates in the pages list.
@@ -5244,7 +5248,7 @@
         if leend != None: params['leend'] = leend
         if leend != None: params['leuser'] = leuser
         if leend != None: params['letitle'] = letitle
-        
+
         data = query.GetData(params,
                         useAPI = True, encodeTitle = False)
         try:





More information about the Pywikipedia-l mailing list