Revision: 4749 Author: rotem Date: 2007-12-22 13:28:10 +0000 (Sat, 22 Dec 2007)
Log Message: ----------- Not getting supercategories from the category page - not needed; getting them in the usual way for pages.
Modified Paths: -------------- trunk/pywikipedia/catlib.py
Modified: trunk/pywikipedia/catlib.py =================================================================== --- trunk/pywikipedia/catlib.py 2007-12-22 13:06:46 UTC (rev 4748) +++ trunk/pywikipedia/catlib.py 2007-12-22 13:28:10 UTC (rev 4749) @@ -39,7 +39,6 @@ # some constants that are used internally ARTICLE = 0 SUBCATEGORY = 1 -SUPERCATEGORY = 2
def isCatTitle(title, site): return ':' in title and title[:title.index(':')] in site.category_namespaces() @@ -64,7 +63,6 @@ self.completelyCached = False self.articleCache = [] self.subcatCache = [] - self.supercatCache = []
def aslink(self, forceInterwiki = False): """ @@ -83,8 +81,7 @@ else: return '[[%s]]' % titleWithSortKey
- def _getContentsAndSupercats(self, recurse=False, purge=False, - startFrom=None, cache=None): + def _getContents(self, recurse=False, purge=False, startFrom=None, cache=None): """ Cache results of _parseCategory for a second call.
@@ -120,12 +117,8 @@ # contents of subcategory are cached by calling # this method recursively; therefore, do not cache # them again - for item in subcat._getContentsAndSupercats(newrecurse, - purge, cache=cache): - if item[0] != SUPERCATEGORY: - yield item - for supercat in self.supercatCache: - yield SUPERCATEGORY, supercat + for item in subcat._getContents(newrecurse, purge, cache=cache): + yield item else: for tag, page in self._parseCategory(purge, startFrom): if tag == ARTICLE: @@ -142,28 +135,22 @@ # contents of subcategory are cached by calling # this method recursively; therefore, do not cache # them again - for item in page._getContentsAndSupercats( - newrecurse, purge, cache=cache): - if item[0] != SUPERCATEGORY: - yield item - elif tag == SUPERCATEGORY: - self.supercatCache.append(page) - yield SUPERCATEGORY, page + for item in page._getContents(newrecurse, purge, cache=cache): + yield item if not startFrom: self.completelyCached = True
def _parseCategory(self, purge=False, startFrom=None): """ - Yields all articles and subcategories that are in this category, - as well as its supercategories. + Yields all articles and subcategories that are in this category.
Set purge to True to instruct MediaWiki not to serve a cached version.
Set startFrom to a string which is the title of the page to start from.
Yielded results are tuples in the form (tag, page) where tag is one - of the constants ARTICLE, SUBCATEGORY and SUPERCATEGORY, and title is - the Page or Category object. + of the constants ARTICLE and SUBCATEGORY, and title is the Page or Category + object.
Note that results of this method need not be unique.
@@ -201,8 +188,6 @@ wikipedia.output('Getting [[%s]]...' % self.title()) wikipedia.get_throttle() txt = self.site().getUrl(path) - # save a copy of this text to find out self's supercategory. - self_txt = txt # index where subcategory listing begins try: ibegin = txt.index('<div id="mw-subcategories">') @@ -264,25 +249,6 @@ break else: break - - # get supercategories - try: - ibegin = self_txt.index('<div id="catlinks">') - iend = self_txt.index('<!-- end content -->') - except ValueError: - # no supercategories found - pass - else: - self_txt = self_txt[ibegin:iend] - if self.site().versionnumber() < 5: - # MediaWiki 1.4 has an unneeded space here - Rsupercat = re.compile('title ="([^"]*)"') - else: - Rsupercat = re.compile('title="([^"]*)"') - for title in Rsupercat.findall(self_txt): - # There might be a link to Special:Categories we don't want - if isCatTitle(title, self.site()): - yield SUPERCATEGORY, title
def subcategories(self, recurse=False): """ @@ -296,7 +262,7 @@
Results a sorted (as sorted by MediaWiki), but need not be unique. """ - for tag, subcat in self._getContentsAndSupercats(recurse): + for tag, subcat in self._getContents(recurse): if tag == SUBCATEGORY: yield subcat
@@ -325,7 +291,7 @@ Results are unsorted (except as sorted by MediaWiki), and need not be unique. """ - for tag, page in self._getContentsAndSupercats(recurse, startFrom=startFrom): + for tag, page in self._getContents(recurse, startFrom=startFrom): if tag == ARTICLE: yield page
@@ -351,9 +317,8 @@ Results are stored in the order in which they were entered, and need not be unique. """ - for tag, supercat in self._getContentsAndSupercats(): - if tag == SUPERCATEGORY: - yield supercat + for supercat in self.categories(): + yield supercat
def supercategoriesList(self): """ @@ -368,7 +333,7 @@
def isEmpty(self): # TODO: rename; naming conflict with Page.isEmpty - for tag, title in self._getContentsAndSupercats(purge = True): + for tag, title in self._getContents(purge = True): if tag in (ARTICLE, SUBCATEGORY): return False return True