Revision: 4379 Author: btongminh Date: 2007-09-29 10:33:52 +0000 (Sat, 29 Sep 2007)
Log Message: ----------- Reworked the history fetching method
Modified Paths: -------------- trunk/pywikipedia/image_replacer.py
Modified: trunk/pywikipedia/image_replacer.py =================================================================== --- trunk/pywikipedia/image_replacer.py 2007-09-29 09:41:08 UTC (rev 4378) +++ trunk/pywikipedia/image_replacer.py 2007-09-29 10:33:52 UTC (rev 4379) @@ -10,7 +10,7 @@ # Distributed under the terms of the MIT license. # __version__ = '$Id$' -import config, wikipedia +import config, wikipedia, simplejson import re, time import threadpool import sys, os, signal, traceback @@ -74,8 +74,13 @@ else: since = None + if self.config.get('clean_list', False): + username = config.sysopnames[self.site.family.name][self.site.lang] + else: + username = None + try: - revisions = page.fullVersionHistory(max = 500, since = since) + revisions = self.get_history(page.title(), since, username) # Fetch the page any way, to prevent editconflicts old_text = text = page.get() except StandardError, e: @@ -84,18 +89,13 @@ output('%s: %s' % (e.__class__.__name__, str(e)), False) return time.sleep(self.config['timeout']) - if text.lower().find('{{stop}}') != -1: + if '{{stop}}' in text.lower(): output(u'Found {{stop}} on command page. Not replacing anything.') return time.sleep(self.config['timeout']) - revisions.sort(key = lambda rev: rev[0]) + revisions.sort(key = lambda rev: rev['timestamp']) replacements = self.template.finditer(text) - if self.config.get('clean_list', False): - username = config.sysopnames[self.site.family.name][self.site.lang] - else: - username = None - remove_from_list = [] for replacement in replacements: res = self.examine_revision_history( @@ -116,23 +116,38 @@ return except wikipedia.EditConflict: text = page.get() + + def get_history(self, title, since, username): + address = self.site.api_address() + predata = [ + ('action', 'query'), + ('prop', 'revisions'), + ('titles', title.encode('utf-8')), + ('rvprop', 'timestamp|user|comment|content'), + ('rvlimit', '50'), + ('format', 'json'), + ('rvend', since), + ('rvexcludeuser', username.encode('utf-8')) + ] + response, data = self.site.postForm(address, predata) + data = simplejson.loads(data) + if 'error' in data: + raise RuntimeError(data['error']) + + page = data['query']['pages'].values()[0] + if 'missing' in page: + raise Exception('Missing page!') + return page.get('revisions', ()) - def examine_revision_history(self, revisions, replacement, username): - #if replacement.group(0) in revisions[0][2]: - # return (db_timestamp(revisions[0][0]), - # strip_image(replacement.group(1)), - # strip_image(replacement.group(2)), - # '<Unknown>', replacement.group(3)) - - for timestamp, user, text in revisions: - if replacement.group(0) in text and user != username: + for revision in revisions: + if replacement.group(0) in revision['content']: db_time = db_timestamp(timestamp) if db_time < self.first_revision or not self.first_revision: self.first_revision = int(db_time) return (db_time, strip_image(replacement.group(1)), strip_image(replacement.group(2)), - user, replacement.group(3)) + revision['user'], replacement.group(3)) output('Warning! Could not find out who did %s' % \ repr(replacement.group(0)), False)
pywikipedia-l@lists.wikimedia.org