[Pywikipedia-l] SVN: [6569] trunk/pywikipedia/disambredir.py
a_engels at svn.wikimedia.org
a_engels at svn.wikimedia.org
Fri Apr 3 16:17:42 UTC 2009
Revision: 6569
Author: a_engels
Date: 2009-04-03 16:17:41 +0000 (Fri, 03 Apr 2009)
Log Message:
-----------
Download more pages at a time
Modified Paths:
--------------
trunk/pywikipedia/disambredir.py
Modified: trunk/pywikipedia/disambredir.py
===================================================================
--- trunk/pywikipedia/disambredir.py 2009-04-01 11:18:59 UTC (rev 6568)
+++ trunk/pywikipedia/disambredir.py 2009-04-03 16:17:41 UTC (rev 6569)
@@ -5,7 +5,7 @@
each link that goes to a redirect page whether it should be replaced.
"""
#
-# (C) André Engels, 2006
+# (C) André Engels and others, 2006-2009
#
# Distributed under the terms of the MIT license.
#
@@ -108,16 +108,11 @@
continue
return text
-def workon(page):
- try:
- text = page.get()
- except wikipedia.IsRedirectPage:
- return
+def workon(page, links):
+ text = page.get()
# Show the title of the page we're working on.
# Highlight the title in purple.
wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title())
- links = page.linkedPages()
- wikipedia.getall(mysite,links)
for page2 in links:
try:
target = page2.getRedirectTarget()
@@ -150,8 +145,20 @@
# only work on articles
generator = pagegenerators.NamespaceFilterPageGenerator(generator, [0])
generator = pagegenerators.PreloadingGenerator(generator)
+ pagestodo = []
+ pagestoload = []
for page in generator:
- workon(page)
+ if page.isRedirectPage():
+ continue
+ linked = page.linkedPages()
+ pagestodo.append((page,linked))
+ pagestoload += linked
+ if len(pagestoload) > 49:
+ wikipedia.getall(mysite,pagestoload)
+ for page, links in pagestodo:
+ workon(page,links)
+ pagestoload = []
+ pagestodo = []
finally:
wikipedia.stopme()
More information about the Pywikipedia-l
mailing list