Revision: 6309 Author: russblau Date: 2009-01-28 19:41:32 +0000 (Wed, 28 Jan 2009)
Log Message: ----------- Enable category redirect checking.
Usage (where c is a Page in the category namespace):
c.isCategoryRedirect(): returns True if the page contains a category redirect template listed in the family file; False otherwise
c.getCategoryRedirectTarget(): returns the redirect target category as a catlib.Category object; raises IsNotRedirectPage if c is not a category redirect
Note that isRedirect() and getRedirectTarget() are not affected; if you want a script to check for category redirects, you have to modify it to do so explicitly (this is to preserve backwards-compatibility).
Modified Paths: -------------- trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2009-01-28 18:28:30 UTC (rev 6308) +++ trunk/pywikipedia/wikipedia.py 2009-01-28 19:41:32 UTC (rev 6309) @@ -834,10 +834,6 @@ self._redirarg = redirtarget else: raise IsRedirectPage(redirtarget) -## elif self.isCategoryRedirect(): # sets _redirarg -## if not get_redirect: -## self._getexception = IsRedirectPage -## raise IsRedirectPage, self._redirarg if self.section(): # TODO: What the hell is this? Docu please. m = re.search(".3D_*(.27.27+)?(.5B.5B)?_*%s_*(.5B.5B)?(.27.27+)?_*.3D" % re.escape(self.section()), sectionencode(text,self.site().encoding())) @@ -1086,26 +1082,33 @@ """Return True if this is an image description page, False otherwise.""" return self.namespace() == 6
- def isCategoryRedirect(self): - """Return True if this is a category redirect page.""" + def isCategoryRedirect(self, text=None): + """Return True if this is a category redirect page, False otherwise."""
if not self.isCategory(): return False if not hasattr(self, "_catredirect"): - if not hasattr( self.site(), "category_redirects"): + if not text: + text = self.get(get_redirect=True) + catredirs = self.site().category_redirects() + for (t, args) in self.templatesWithParams(thistxt=text): + template = Page(self.site(), t, defaultNamespace=10 + ).titleWithoutNamespace() # normalize title + if template in catredirs: + # Get target (first template argument) + self._catredirect = self.site().namespace(14) + ":" + args[0] + break + else: self._catredirect = False - else: - catredirs = self.site().category_redirects() - for (t, args) in self.templatesWithParams(): - if t in catredirs: - self._catredirect = True - # Get target (first template argument) - self._redirarg = self.site().namespace(14) + ":" + args[0] - break - else: - self._catredirect = False - return self._catredirect + return bool(self._catredirect)
+ def getCategoryRedirectTarget(self): + """If this is a category redirect, return the target category title.""" + if self.isCategoryRedirect(): + import catlib + return catlib.Category(self.site(), self._catredirect) + raise IsNotRedirectPage + def isDisambig(self): """Return True if this is a disambiguation page, False otherwise.
@@ -2986,6 +2989,8 @@ page2._revisionId = revisionId page2._editTime = timestamp section = page2.section() + # Store the content + page2._contents = text m = self.site.redirectRegex().match(text) if m: ## output(u"%s is a redirect" % page2.aslink()) @@ -2994,6 +2999,7 @@ redirectto = redirectto+"#"+section page2._getexception = IsRedirectPage page2._redirarg = redirectto + # This is used for checking deletion conflict. # Use the data loading time. page2._startTime = time.strftime('%Y%m%d%H%M%S', time.gmtime()) @@ -3006,8 +3012,6 @@ except AttributeError: # There is no exception yet page2._getexception = SectionError - # Store the content - page2._contents = text successful = True # Note that there is no break here. The reason is that there # might be duplicates in the pages list. @@ -5244,7 +5248,7 @@ if leend != None: params['leend'] = leend if leend != None: params['leuser'] = leuser if leend != None: params['letitle'] = letitle - + data = query.GetData(params, useAPI = True, encodeTitle = False) try: