Revision: 6513 Author: jayvdb Date: 2009-03-17 13:39:49 +0000 (Tue, 17 Mar 2009)
Log Message: ----------- Revising contribs parsing regex to prevent dropping rows
Modified Paths: -------------- trunk/pywikipedia/userlib.py
Modified: trunk/pywikipedia/userlib.py =================================================================== --- trunk/pywikipedia/userlib.py 2009-03-17 13:05:06 UTC (rev 6512) +++ trunk/pywikipedia/userlib.py 2009-03-17 13:39:49 UTC (rev 6513) @@ -92,12 +92,11 @@ older_str = older_str.replace('$1',str(step))
address = self.site.contribs_address(self.name,limit=step) + contribRX = re.compile('<li[^>]*>.*>diff</a>) *(<span class="[^"]+">[A-Za-z]</span>)* *<a href="[^"]+" (class="[^"]+" )?title="[^"]+">(?P<title>[^<]+)</a>') while offset < limit: - wikipedia.output(u'Querying [[Special:Contributions/%s]]...' % self.name) data = self.site.getUrl(address) - contribRX = re.compile('<li[^>]*>.*?<a href=".*?" title="(?P<target>.*?)">(?P=target)</a>') for pg in contribRX.finditer(data): - yield wikipedia.Page(self.site,pg.group('target')) + yield wikipedia.Page(self.site,pg.group('title')) offset += 1 if offset == limit: break