[Pywikipedia-l] SVN: [4656] trunk/pywikipedia/catlib.py
rotem at svn.wikimedia.org
rotem at svn.wikimedia.org
Sun Dec 9 13:28:23 UTC 2007
Revision: 4656
Author: rotem
Date: 2007-12-09 13:28:18 +0000 (Sun, 09 Dec 2007)
Log Message:
-----------
(patch 1843787) catlib _getContentsAndSupercats performance issue (changed patch to avoid possible problems in caching)
Modified Paths:
--------------
trunk/pywikipedia/catlib.py
Modified: trunk/pywikipedia/catlib.py
===================================================================
--- trunk/pywikipedia/catlib.py 2007-12-09 13:25:52 UTC (rev 4655)
+++ trunk/pywikipedia/catlib.py 2007-12-09 13:28:18 UTC (rev 4656)
@@ -83,7 +83,7 @@
return '[[%s]]' % titleWithSortKey
def _getContentsAndSupercats(self, recurse=False, purge=False,
- startFrom=None):
+ startFrom=None, cache=[]):
"""
Cache results of _parseCategory for a second call.
@@ -107,36 +107,46 @@
newrecurse = recurse
if self.completelyCached:
for article in self.articleCache:
- yield ARTICLE, article
+ if article not in cache:
+ cache.append(article)
+ yield ARTICLE, article
for subcat in self.subcatCache:
- yield SUBCATEGORY, subcat
- if recurse:
- # contents of subcategory are cached by calling
- # this method recursively; therefore, do not cache
- # them again
- for item in subcat._getContentsAndSupercats(newrecurse,
+ if subcat not in cache:
+ cache.append(subcat)
+ yield SUBCATEGORY, subcat
+ if recurse:
+ # contents of subcategory are cached by calling
+ # this method recursively; therefore, do not cache
+ # them again
+ for item in subcat._getContentsAndSupercats(newrecurse,
purge):
- if item[0] != SUPERCATEGORY:
- yield item
+ if item[0] != SUPERCATEGORY:
+ yield item
for supercat in self.supercatCache:
yield SUPERCATEGORY, supercat
else:
for tag, page in self._parseCategory(purge, startFrom):
if tag == ARTICLE:
self.articleCache.append(page)
+ if not page in cache:
+ cache.append(page)
+ yield ARTICLE, page
elif tag == SUBCATEGORY:
self.subcatCache.append(page)
- if recurse:
- # contents of subcategory are cached by calling
- # this method recursively; therefore, do not cache
- # them again
- for item in page._getContentsAndSupercats(newrecurse,
+ if not page in cache:
+ cache.append(page)
+ yield SUBCATEGORY, page
+ if recurse:
+ # contents of subcategory are cached by calling
+ # this method recursively; therefore, do not cache
+ # them again
+ for item in page._getContentsAndSupercats(newrecurse,
purge):
- if item[0] != SUPERCATEGORY:
- yield item
+ if item[0] != SUPERCATEGORY:
+ yield item
elif tag == SUPERCATEGORY:
self.supercatCache.append(page)
- yield tag, page
+ yield SUPERCATEGORY, page
if not startFrom:
self.completelyCached = True
More information about the Pywikipedia-l
mailing list