Revision: 5196
Author: filnik
Date: 2008-04-08 17:40:08 +0000 (Tue, 08 Apr 2008)
Log Message:
-----------
Another fix to the regex
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2008-04-08 16:56:41 UTC (rev 5195)
+++ trunk/pywikipedia/wikipedia.py 2008-04-08 17:40:08 UTC (rev 5196)
@@ -4641,13 +4641,12 @@
"""Yield ImagePages from
Special:Log&type=upload"""
seen = set()
- regexp = re.compile(r'(?:<li[^>]*>|<div
class="mw-log-entry"[^>]*>)(?P<date>.+?)\s+<a
href=.*?>(?P<user>.+?)</a>\s+\(.+?</a>\).*?<a
href=".*?"(?P<new> class="new")?
title=".*?"\s*>(?P<image>.+?)</a>(?:.*?<span
class="comment">(?P<comment>.*?)</span>)?', re.UNICODE)
-
+ regexp = re.compile(r'(?:<li[^>]*>|<div
class="mw-log-entry">)(?P<date>.+?)\s+<a
href=.*?>(?P<user>.+?)</a>\s+\(.+?</a>\).*?<a
href=".*?"(?P<new> class="new")?
title=".*?"\s*>(?P<image>.+?)</a>(?:.*?<span
class="comment">\((?P<comment>.*?)\)</span>)?', re.UNICODE)
while True:
path = self.log_address(number, mode = 'upload')
get_throttle()
html = self.getUrl(path)
-
+ print regexp.findall(html)
for m in regexp.finditer(html):
image = m.group('image')
@@ -4661,7 +4660,6 @@
date = m.group('date')
user = m.group('user')
comment = m.group('comment') or ''
-
yield ImagePage(self, image), date, user, comment
if not repeat:
break
Show replies by date