[Pywikipedia-l] SVN: [6417] trunk/pywikipedia/wikipedia.py

nicdumz at svn.wikimedia.org nicdumz at svn.wikimedia.org
Mon Feb 23 03:32:20 UTC 2009


Revision: 6417
Author:   nicdumz
Date:     2009-02-23 03:32:20 +0000 (Mon, 23 Feb 2009)

Log Message:
-----------
Changing regex to exclude "autocomment" spans from comments. Suggested by [[it:Utente:Mauro742]] !

Modified Paths:
--------------
    trunk/pywikipedia/wikipedia.py

Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py	2009-02-23 03:12:34 UTC (rev 6416)
+++ trunk/pywikipedia/wikipedia.py	2009-02-23 03:32:20 UTC (rev 6417)
@@ -2026,8 +2026,7 @@
         elif self.site().versionnumber() < 15:
             editR = re.compile('<li>\(.*?\)\s+\(.*\).*?<a href=".*?oldid=([0-9]*)" title=".*?">([^<]*)</a> (?:<span class=\'history-user\'>|)<a href=".*?" title=".*?">([^<]*?)</a>.*?(?:</span>|).*?(?:<span class=[\'"]comment[\'"]>(.*?)</span>)?</li>')
         else:
-            editR = re.compile(r'<li[^>]*>\((?:\w*|<a[^<]*</a>)\) \((?:\w*|<a[^<]*</a>)\) <[^>]*><[^>]*> <a href="[^>\'"]*oldid=(\d*)"[^>]*>([^<]*)</a> <span class=[\'"]history-user[\'"]><a [^>]*>([^<]*)</a>(?:[^<]|<(?!span class="comment">)(?!/li>))*(?:<span class="comment">\((.*)\)</span>)?.*</li>', re.UNICODE)
-
+            editR = re.compile(r'<li class=".*?">\((?:\w*|<a[^<]*</a>)\)\s\((?:\w*|<a[^<]*</a>)\).*?<a href=".*?([0-9]*)" title=".*?">([^<]*)</a> <span class=\'history-user\'><a [^>]*?>([^<]*?)</a>.*?</span></span>(?: <span class="minor">m</span>|)(?: <span class="history-size">|).*?</span>(?: <span class=[\'"]comment[\'"]>\((?:<span class="autocomment">|)(.*?)(?:</span>|)\)</span>)?(?: \(<span class="mw-history-undo">.*?</span>\)|) </li>', re.UNICODE)
         startFromPage = None
         thisHistoryDone = False
         skip = False # Used in determining whether we need to skip the first page





More information about the Pywikipedia-l mailing list