[Pywikipedia-l] SVN: [5377] trunk/pywikipedia/redirect.py

russblau at svn.wikimedia.org russblau at svn.wikimedia.org
Wed May 14 20:44:31 UTC 2008


Revision: 5377
Author:   russblau
Date:     2008-05-14 20:44:31 +0000 (Wed, 14 May 2008)

Log Message:
-----------
New MW version uses timestamp instead of count for offset parameter.

Modified Paths:
--------------
    trunk/pywikipedia/redirect.py

Modified: trunk/pywikipedia/redirect.py
===================================================================
--- trunk/pywikipedia/redirect.py	2008-05-14 20:37:13 UTC (rev 5376)
+++ trunk/pywikipedia/redirect.py	2008-05-14 20:44:31 UTC (rev 5377)
@@ -263,15 +263,22 @@
 
     def get_moved_pages_redirects(self):
         '''generate redirects to recently-moved pages'''
-        offset = max(0, self.offset)
+        # this will run forever, until user interrupts it
+        import datetime
+
+        offsetpattern = re.compile(
+r"""\(<a href="/w/index\.php\?title=Special:Log&amp;offset=(\d+)&amp;limit=500&amp;type=move" title="Special:Log" rel="next">older 500</a>\)""")
+        start = datetime.datetime.utcnow() - datetime.timedelta(0, 3600)
+        # one hour ago
+        offset = start.strftime("%Y%m%d%H%M%S")
         site = wikipedia.getSite()
-        while offset <= 10000: # MW won't accept offset value > 10000
+        while True:
             move_url = \
-                site.path() + "?title=Special:Log&limit=500&offset=%i&type=move"\
+                site.path() + "?title=Special:Log&limit=500&offset=%s&type=move"\
                        % offset
             try:
                 move_list = site.getUrl(move_url)
-#                wikipedia.output(u"[%i]" % offset)
+#                wikipedia.output(u"[%s]" % offset)
             except:
                 import traceback
                 traceback.print_exc()
@@ -281,13 +288,16 @@
                 # to it need to be changed
                 try:
                     for page in wikipedia.Page(site, moved_page
-                                          ).getReferences(follow_redirects=True,
-                                                          redirectsOnly=True):
+                                ).getReferences(follow_redirects=True,
+                                                redirectsOnly=True):
                         yield page
                 except wikipedia.NoPage:
                     # original title must have been deleted after move
                     continue
-            offset += 500
+            m = offsetpattern.search(move_list)
+            if not m:
+                break
+            offset = m.group(1)
 
 
 class RedirectRobot:





More information about the Pywikipedia-l mailing list