[Pywikipedia-l] SVN: [6569] trunk/pywikipedia/disambredir.py

a_engels at svn.wikimedia.org a_engels at svn.wikimedia.org
Fri Apr 3 16:17:42 UTC 2009


Revision: 6569
Author:   a_engels
Date:     2009-04-03 16:17:41 +0000 (Fri, 03 Apr 2009)

Log Message:
-----------
Download more pages at a time

Modified Paths:
--------------
    trunk/pywikipedia/disambredir.py

Modified: trunk/pywikipedia/disambredir.py
===================================================================
--- trunk/pywikipedia/disambredir.py	2009-04-01 11:18:59 UTC (rev 6568)
+++ trunk/pywikipedia/disambredir.py	2009-04-03 16:17:41 UTC (rev 6569)
@@ -5,7 +5,7 @@
 each link that goes to a redirect page whether it should be replaced.
 """
 #
-# (C) André Engels, 2006
+# (C) André Engels and others, 2006-2009
 #
 # Distributed under the terms of the MIT license.
 #
@@ -108,16 +108,11 @@
         continue
     return text
 
-def workon(page):
-    try:
-        text = page.get()
-    except wikipedia.IsRedirectPage:
-        return
+def workon(page, links):
+    text = page.get()
     # Show the title of the page we're working on.
     # Highlight the title in purple.
     wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title())
-    links = page.linkedPages()
-    wikipedia.getall(mysite,links)
     for page2 in links:
         try:
             target = page2.getRedirectTarget()
@@ -150,8 +145,20 @@
     # only work on articles
     generator = pagegenerators.NamespaceFilterPageGenerator(generator, [0])
     generator = pagegenerators.PreloadingGenerator(generator)
+    pagestodo = []
+    pagestoload = []
     for page in generator:
-        workon(page)
+        if page.isRedirectPage():
+            continue
+        linked = page.linkedPages()
+        pagestodo.append((page,linked))
+        pagestoload += linked
+        if len(pagestoload) > 49:
+            wikipedia.getall(mysite,pagestoload)
+            for page, links in pagestodo:
+                workon(page,links)
+            pagestoload = []
+            pagestodo = []
 
 finally:
     wikipedia.stopme()





More information about the Pywikipedia-l mailing list