[Pywikipedia-l] SVN: [4656] trunk/pywikipedia/catlib.py

rotem at svn.wikimedia.org rotem at svn.wikimedia.org
Sun Dec 9 13:28:23 UTC 2007


Revision: 4656
Author:   rotem
Date:     2007-12-09 13:28:18 +0000 (Sun, 09 Dec 2007)

Log Message:
-----------
(patch 1843787) catlib _getContentsAndSupercats performance issue (changed patch to avoid possible problems in caching)

Modified Paths:
--------------
    trunk/pywikipedia/catlib.py

Modified: trunk/pywikipedia/catlib.py
===================================================================
--- trunk/pywikipedia/catlib.py	2007-12-09 13:25:52 UTC (rev 4655)
+++ trunk/pywikipedia/catlib.py	2007-12-09 13:28:18 UTC (rev 4656)
@@ -83,7 +83,7 @@
             return '[[%s]]' % titleWithSortKey
 
     def _getContentsAndSupercats(self, recurse=False, purge=False,
-                                 startFrom=None):
+                                 startFrom=None, cache=[]):
         """
         Cache results of _parseCategory for a second call.
 
@@ -107,36 +107,46 @@
                 newrecurse = recurse
         if self.completelyCached:
             for article in self.articleCache:
-                yield ARTICLE, article
+                if article not in cache:
+                    cache.append(article)
+                    yield ARTICLE, article
             for subcat in self.subcatCache:
-                yield SUBCATEGORY, subcat
-                if recurse:
-                    # contents of subcategory are cached by calling
-                    # this method recursively; therefore, do not cache
-                    # them again
-                    for item in subcat._getContentsAndSupercats(newrecurse,
+                if subcat not in cache:
+                    cache.append(subcat)
+                    yield SUBCATEGORY, subcat
+                    if recurse:
+                        # contents of subcategory are cached by calling
+                        # this method recursively; therefore, do not cache
+                        # them again
+                        for item in subcat._getContentsAndSupercats(newrecurse,
                                                                 purge):
-                        if item[0] != SUPERCATEGORY:
-                            yield item
+                            if item[0] != SUPERCATEGORY:
+                                yield item
             for supercat in self.supercatCache:
                 yield SUPERCATEGORY, supercat
         else:
             for tag, page in self._parseCategory(purge, startFrom):
                 if tag == ARTICLE:
                     self.articleCache.append(page)
+                    if not page in cache:
+                        cache.append(page)
+                        yield ARTICLE, page
                 elif tag == SUBCATEGORY:
                     self.subcatCache.append(page)
-                    if recurse:
-                        # contents of subcategory are cached by calling
-                        # this method recursively; therefore, do not cache
-                        # them again
-                        for item in page._getContentsAndSupercats(newrecurse,
+                    if not page in cache:
+                        cache.append(page)
+                        yield SUBCATEGORY, page
+                        if recurse:
+                            # contents of subcategory are cached by calling
+                            # this method recursively; therefore, do not cache
+                            # them again
+                            for item in page._getContentsAndSupercats(newrecurse,
                                                                   purge):
-                            if item[0] != SUPERCATEGORY:
-                                yield item
+                                if item[0] != SUPERCATEGORY:
+                                    yield item
                 elif tag == SUPERCATEGORY:
                     self.supercatCache.append(page)
-                yield tag, page
+                    yield SUPERCATEGORY, page
             if not startFrom:
                 self.completelyCached = True
 





More information about the Pywikipedia-l mailing list