Revision: 5122
Author: filnik
Date: 2008-03-11 17:26:37 +0000 (Tue, 11 Mar 2008)
Log Message:
-----------
Uhm, I've not completly fixed the latest regex. Fixing it.
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2008-03-11 17:16:43 UTC (rev 5121)
+++ trunk/pywikipedia/wikipedia.py 2008-03-11 17:26:37 UTC (rev 5122)
@@ -633,7 +633,7 @@
except SectionError:
self._getexception = SectionError
raise
- return self._contents
+ return self._contents.replace('<div id="wikia-credits"><br
/><br /><small>From [
http://nonciclopedia.wikia.com Nonciclopedia], a
[
http://www.wikia.com Wikia] wiki.</small></div>', '')
def _getEditPage(self, get_redirect=False, throttle=True, sysop=False,
oldid=None, nofollow_redirects=False,
@@ -2442,9 +2442,8 @@
"""
result = []
history = re.search('(?s)<table
class="filehistory">.+?</table>', self.getImagePageHtml())
-
if history:
- lineR = re.compile('<tr><td>.*?</td><td><a
href=".+?">(?P<datetime>.+?)</a></td><td><a
href=".+?"(?: class="new"|)
title=".+?">(?P<username>.+?)</a>.*?</td><td>(?P<resolution>.*?)</td><td
class=".+?">(?P<filesize>.+?)</td><td>(?P<comment>.*?)</td></tr>')
+ lineR =
re.compile(r'<tr>(?:<td>.*?</td>){1,2}<td><a
href=".+?">(?P<datetime>.+?)</a></td><td><a
href=".+?"(?: class="new"|)
title=".+?">(?P<username>.+?)</a>.*?</td><td>(?P<resolution>.*?)</td><td
class=".+?">(?P<filesize>.+?)</td><td>(?P<comment>.*?)</td></tr>')
else:
# backward compatible code
history = re.search('(?s)<ul
class="special">.+?</ul>', self.getImagePageHtml())
@@ -4605,7 +4604,7 @@
"""Yield ImagePages from
Special:Log&type=upload"""
seen = set()
- regexp = re.compile('<li[^>]*>(?P<date>.+?)\s+<a
href=.*?>(?P<user>.+?)</a>\s+\(.+?</a>\).*?<a
href=".*?"(?P<new> class="new")?
title="(?P<image>.+?)"\s*>(?:.*?<span
class="comment">(?P<comment>.*?)</span>)?', re.UNICODE)
+ regexp = re.compile('<li[^>]*>(?P<date>.+?)\s+<a
href=.*?>(?P<user>.+?)</a>\s+\(.+?</a>\).*?<a
href=".*?"(?P<new> class="new")?
title=".*?"\s*>(?P<image>.+?)</a>(?:.*?<span
class="comment">(?P<comment>.*?)</span>)?', re.UNICODE)
while True:
path = self.log_address(number, mode = 'upload')