Am I correct that the new method uses regex to parse XML??? Somebody,
please tell me its not true!!! We have JSON that works perfectly for
all sorts of parsing in python. We have XML libraries. Regex is just
plain silly - we risk incorrect XML parsing behavior.
What is unstable about the API? Revisions query has been available for
half a year without changes.
--Yurik
On 9/28/07, btongminh(a)svn.wikimedia.org <btongminh(a)svn.wikimedia.org> wrote:
Revision: 4375
Author: btongminh
Date: 2007-09-28 20:20:59 +0000 (Fri, 28 Sep 2007)
Log Message:
-----------
Page.fullVersionHistory uses Special:Export again, which is basically a modified revert
to r3659. The API is too unstable for revision fetching.
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2007-09-28 18:51:41 UTC (rev 4374)
+++ trunk/pywikipedia/wikipedia.py 2007-09-28 20:20:59 UTC (rev 4375)
@@ -1694,64 +1694,33 @@
result += '|}\n'
return result
- def fullVersionHistory(self, max = 50, comment = False, since = None):
+ def fullVersionHistory(self):
"""
Returns all previous versions. Gives a list of tuples consisting of
edit date/time, user name and content
"""
- RV_LIMIT = 50
-
- address = self.site().api_address()
+ address = self.site().export_address()
predata = {
- 'action': 'query',
- 'prop': 'revisions',
- 'titles': self.title(),
- 'rvprop': 'timestamp|user|comment|content',
- 'rvlimit': str(RV_LIMIT),
- 'format': 'json'
+ 'action': 'submit',
+ 'pages': self.title()
}
- if max < RV_LIMIT: predata['rvlimit'] = str(max)
- if since: predata['rvend'] = since
-
get_throttle(requestsize = 10)
now = time.time()
-
- count = 0
+ if self.site().hostname() in config.authenticate.keys():
+ predata["Content-type"] =
"application/x-www-form-urlencoded"
+ predata["User-agent"] = useragent
+ data = self.site.urlEncode(predata)
+ response = urllib2.urlopen(urllib2.Request('http://' +
self.site.hostname() + address, data))
+ data = response.read()
+ else:
+ response, data = self.site().postForm(address, predata)
+ data = data.encode(self.site().encoding())
+ get_throttle.setDelay(time.time() - now)
output = []
+ r =
re.compile("\<revision\>.*?\<timestamp\>(.*?)\<\/timestamp\>.*?\<(?:ip|username)\>(.*?)\</(?:ip|username)\>.*?\<text.*?\>(.*?)\<\/text\>",re.DOTALL)
+ #r =
re.compile("\<revision\>.*?\<timestamp\>(.*?)\<\/timestamp\>.*?\<(?:ip|username)\>(.*?)\<",re.DOTALL)
+ return [(match.group(1), unescape(match.group(2)), unescape(match.group(3))) for
match in r.finditer(data)]
- while count < max and max != -1:
- if self.site().hostname() in config.authenticate.keys():
- predata["Content-type"] =
"application/x-www-form-urlencoded"
- predata["User-agent"] = useragent
- data = self.site.urlEncode(predata)
- response = urllib2.urlopen(urllib2.Request(self.site.protocol() +
'://' + self.site.hostname() + address, data))
- data = response.read().decode(self.site().encoding())
- else:
- response, data = self.site().postForm(address, predata)
-
- get_throttle.setDelay(time.time() - now)
- data = simplejson.loads(data)
- page = data['query']['pages'].values()[0]
- if 'missing' in page:
- raise NoPage, 'Page %s not found' % self
- revisions = page.get('revisions', ())
- for revision in revisions:
- if not comment:
- output.append((revision['timestamp'],
- revision['user'], revision.get('*',
u'')))
- else:
- output.append((revision['timestamp'],
revision['user'],
- revision.get('*', u''),
revision.get('comment', u'')))
- count += len(revisions)
- if max - count < RV_LIMIT:
- predata['rvlimit'] = str(max - count)
- if 'query-continue' in data:
- predata['rvstartid'] =
str(data['query-continue']['revisions']['rvstartid'])
- else:
- break
- return output
- fullRevisionHistory = fullVersionHistory
-
def contributingUsers(self):
"""
Returns a set of all user names (including anonymous IPs) of those who
_______________________________________________
Pywikipedia-l mailing list
Pywikipedia-l(a)lists.wikimedia.org
http://lists.wikimedia.org/mailman/listinfo/pywikipedia-l