Revision: 6569 Author: a_engels Date: 2009-04-03 16:17:41 +0000 (Fri, 03 Apr 2009)
Log Message: ----------- Download more pages at a time
Modified Paths: -------------- trunk/pywikipedia/disambredir.py
Modified: trunk/pywikipedia/disambredir.py =================================================================== --- trunk/pywikipedia/disambredir.py 2009-04-01 11:18:59 UTC (rev 6568) +++ trunk/pywikipedia/disambredir.py 2009-04-03 16:17:41 UTC (rev 6569) @@ -5,7 +5,7 @@ each link that goes to a redirect page whether it should be replaced. """ # -# (C) André Engels, 2006 +# (C) André Engels and others, 2006-2009 # # Distributed under the terms of the MIT license. # @@ -108,16 +108,11 @@ continue return text
-def workon(page): - try: - text = page.get() - except wikipedia.IsRedirectPage: - return +def workon(page, links): + text = page.get() # Show the title of the page we're working on. # Highlight the title in purple. wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) - links = page.linkedPages() - wikipedia.getall(mysite,links) for page2 in links: try: target = page2.getRedirectTarget() @@ -150,8 +145,20 @@ # only work on articles generator = pagegenerators.NamespaceFilterPageGenerator(generator, [0]) generator = pagegenerators.PreloadingGenerator(generator) + pagestodo = [] + pagestoload = [] for page in generator: - workon(page) + if page.isRedirectPage(): + continue + linked = page.linkedPages() + pagestodo.append((page,linked)) + pagestoload += linked + if len(pagestoload) > 49: + wikipedia.getall(mysite,pagestoload) + for page, links in pagestodo: + workon(page,links) + pagestoload = [] + pagestodo = []
finally: wikipedia.stopme()
pywikipedia-l@lists.wikimedia.org