Revision: 6417 Author: nicdumz Date: 2009-02-23 03:32:20 +0000 (Mon, 23 Feb 2009)
Log Message: ----------- Changing regex to exclude "autocomment" spans from comments. Suggested by [[it:Utente:Mauro742]] !
Modified Paths: -------------- trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2009-02-23 03:12:34 UTC (rev 6416) +++ trunk/pywikipedia/wikipedia.py 2009-02-23 03:32:20 UTC (rev 6417) @@ -2026,8 +2026,7 @@ elif self.site().versionnumber() < 15: editR = re.compile('<li>(.*?)\s+(.*).*?<a href=".*?oldid=([0-9]*)" title=".*?">([^<]*)</a> (?:<span class='history-user'>|)<a href=".*?" title=".*?">([^<]*?)</a>.*?(?:</span>|).*?(?:<span class=['"]comment['"]>(.*?)</span>)?</li>') else: - editR = re.compile(r'<li[^>]*>((?:\w*|<a[^<]*</a>)) ((?:\w*|<a[^<]*</a>)) <[^>]*><[^>]*> <a href="[^>'"]*oldid=(\d*)"[^>]*>([^<]*)</a> <span class=['"]history-user['"]><a [^>]*>([^<]*)</a>(?:[^<]|<(?!span class="comment">)(?!/li>))*(?:<span class="comment">((.*))</span>)?.*</li>', re.UNICODE) - + editR = re.compile(r'<li class=".*?">((?:\w*|<a[^<]*</a>))\s((?:\w*|<a[^<]*</a>)).*?<a href=".*?([0-9]*)" title=".*?">([^<]*)</a> <span class='history-user'><a [^>]*?>([^<]*?)</a>.*?</span></span>(?: <span class="minor">m</span>|)(?: <span class="history-size">|).*?</span>(?: <span class=['"]comment['"]>((?:<span class="autocomment">|)(.*?)(?:</span>|))</span>)?(?: (<span class="mw-history-undo">.*?</span>)|) </li>', re.UNICODE) startFromPage = None thisHistoryDone = False skip = False # Used in determining whether we need to skip the first page
pywikipedia-l@lists.wikimedia.org