[Pywikipedia-l] SVN: [6395] trunk/pywikipedia/wikipedia.py

nicdumz at svn.wikimedia.org nicdumz at svn.wikimedia.org
Sat Feb 21 02:59:41 UTC 2009


Revision: 6395
Author:   nicdumz
Date:     2009-02-21 02:59:41 +0000 (Sat, 21 Feb 2009)

Log Message:
-----------
More getVersionHistory regex fix. Did I mention lately that I HATE screen-scraping?

Modified Paths:
--------------
    trunk/pywikipedia/wikipedia.py

Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py	2009-02-21 02:06:58 UTC (rev 6394)
+++ trunk/pywikipedia/wikipedia.py	2009-02-21 02:59:41 UTC (rev 6395)
@@ -2026,7 +2026,7 @@
         elif self.site().versionnumber() < 15:
             editR = re.compile('<li>\(.*?\)\s+\(.*\).*?<a href=".*?oldid=([0-9]*)" title=".*?">([^<]*)</a> (?:<span class=\'history-user\'>|)<a href=".*?" title=".*?">([^<]*?)</a>.*?(?:</span>|).*?(?:<span class=[\'"]comment[\'"]>(.*?)</span>)?</li>')
         else:
-            editR = re.compile(r'<li[^>]*>\(cur|<a[^<]*</a>\) \(prev|<a[^<]*</a>\) <[^>]*><[^>]*> <a href="[^\'"]*oldid=(\d*)"[^>]*>([^<]*)</a> <span class=[\'"]history-user[\'"]><a [^>]*>([^<]*)</a>(?:[^<]|<(?!span class="comment">)(?!/li>))*(?:<span class="comment">\((.*)\)</span>)?.*</li>', re.UNICODE)
+            editR = re.compile(r'<li[^>]*>\((?:cur|<a[^<]*</a>)\) \((?:prev|<a[^<]*</a>)\) <[^>]*><[^>]*> <a href="[^>\'"]*oldid=(\d*)"[^>]*>([^<]*)</a> <span class=[\'"]history-user[\'"]><a [^>]*>([^<]*)</a>(?:[^<]|<(?!span class="comment">)(?!/li>))*(?:<span class="comment">\((.*)\)</span>)?.*</li>', re.UNICODE)
 
         startFromPage = None
         thisHistoryDone = False





More information about the Pywikipedia-l mailing list