Revision: 4375
Author: btongminh
Date: 2007-09-28 20:20:59 +0000 (Fri, 28 Sep 2007)
Log Message:
-----------
Page.fullVersionHistory uses Special:Export again, which is basically a modified revert to r3659. The API is too unstable for revision fetching.
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2007-09-28 18:51:41 UTC (rev 4374)
+++ trunk/pywikipedia/wikipedia.py 2007-09-28 20:20:59 UTC (rev 4375)
@@ -1694,64 +1694,33 @@
result += '|}\n'
return result
- def fullVersionHistory(self, max = 50, comment = False, since = None):
+ def fullVersionHistory(self):
"""
Returns all previous versions. Gives a list of tuples consisting of
edit date/time, user name and content
"""
- RV_LIMIT = 50
-
- address = self.site().api_address()
+ address = self.site().export_address()
predata = {
- 'action': 'query',
- 'prop': 'revisions',
- 'titles': self.title(),
- 'rvprop': 'timestamp|user|comment|content',
- 'rvlimit': str(RV_LIMIT),
- 'format': 'json'
+ 'action': 'submit',
+ 'pages': self.title()
}
- if max < RV_LIMIT: predata['rvlimit'] = str(max)
- if since: predata['rvend'] = since
-
get_throttle(requestsize = 10)
now = time.time()
-
- count = 0
+ if self.site().hostname() in config.authenticate.keys():
+ predata["Content-type"] = "application/x-www-form-urlencoded"
+ predata["User-agent"] = useragent
+ data = self.site.urlEncode(predata)
+ response = urllib2.urlopen(urllib2.Request('http://' + self.site.hostname() + address, data))
+ data = response.read()
+ else:
+ response, data = self.site().postForm(address, predata)
+ data = data.encode(self.site().encoding())
+ get_throttle.setDelay(time.time() - now)
output = []
+ r = re.compile("\<revision\>.*?\<timestamp\>(.*?)\<\/timestamp\>.*?\<(?:ip|username)\>(.*?)\</(?:ip|username)\>.*?\<text.*?\>(.*?)\<\/text\>",re.DOTALL)
+ #r = re.compile("\<revision\>.*?\<timestamp\>(.*?)\<\/timestamp\>.*?\<(?:ip|username)\>(.*?)\<",re.DOTALL)
+ return [(match.group(1), unescape(match.group(2)), unescape(match.group(3))) for match in r.finditer(data)]
- while count < max and max != -1:
- if self.site().hostname() in config.authenticate.keys():
- predata["Content-type"] = "application/x-www-form-urlencoded"
- predata["User-agent"] = useragent
- data = self.site.urlEncode(predata)
- response = urllib2.urlopen(urllib2.Request(self.site.protocol() + '://' + self.site.hostname() + address, data))
- data = response.read().decode(self.site().encoding())
- else:
- response, data = self.site().postForm(address, predata)
-
- get_throttle.setDelay(time.time() - now)
- data = simplejson.loads(data)
- page = data['query']['pages'].values()[0]
- if 'missing' in page:
- raise NoPage, 'Page %s not found' % self
- revisions = page.get('revisions', ())
- for revision in revisions:
- if not comment:
- output.append((revision['timestamp'],
- revision['user'], revision.get('*', u'')))
- else:
- output.append((revision['timestamp'], revision['user'],
- revision.get('*', u''), revision.get('comment', u'')))
- count += len(revisions)
- if max - count < RV_LIMIT:
- predata['rvlimit'] = str(max - count)
- if 'query-continue' in data:
- predata['rvstartid'] = str(data['query-continue']['revisions']['rvstartid'])
- else:
- break
- return output
- fullRevisionHistory = fullVersionHistory
-
def contributingUsers(self):
"""
Returns a set of all user names (including anonymous IPs) of those who