Revision: 6000 Author: filnik Date: 2008-10-19 13:59:03 +0000 (Sun, 19 Oct 2008)
Log Message: ----------- Rewrite of the newimages() function, to get the data from the APIs
Modified Paths: -------------- trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2008-10-19 13:58:24 UTC (rev 5999) +++ trunk/pywikipedia/wikipedia.py 2008-10-19 13:59:03 UTC (rev 6000) @@ -5048,29 +5048,51 @@ if not repeat: break
- def newimages(self, number = 10, repeat = False): - """Yield ImagePages from Special:Log&type=upload""" + def newimages(self, number = 100, lestart = None, leend = None, leuser = None, letitle = None, repeat = False): + """ + Yield ImagePages from APIs, call: action=query&list=logevents&letype=upload&lelimit=500
- seen = set() - regexp = re.compile(r'(?:<li[^>]*>|<div class="mw-log-entry">)(?P<date>.+?)\s+<a href=.*?>(?P<user>.+?)</a>\s+(.+?</a>).*?<a href=".*?"(?P<new> class="new")? title=".*?"\s*>(?P<image>.+?)</a>(?:.*?<span class="comment">((?P<comment>.*?))</span>)?', re.UNICODE) + Options directly from APIs: + --- + Parameters: + Default: ids|title|type|user|timestamp|comment|details + lestart - The timestamp to start enumerating from. + leend - The timestamp to end enumerating. + ledir - In which direction to enumerate. + One value: newer, older + Default: older + leuser - Filter entries to those made by the given user. + letitle - Filter entries to those related to a page. + lelimit - How many total event entries to return. + No more than 500 (5000 for bots) allowed. + Default: 10 + """ + params = { + 'action' :'query', + 'list' :'logevents', + 'letype' :'upload', + 'lelimit' :int(number), + } + if lestart != None: params['lestart'] = lestart + if leend != None: params['leend'] = leend + if leend != None: params['leuser'] = leuser + if leend != None: params['letitle'] = letitle + + data = query.GetData(params, + useAPI = True, encodeTitle = False) + imagesData = data['query']['logevents'] while True: - path = self.log_address(number, mode = 'upload') - get_throttle() - html = self.getUrl(path) - for m in regexp.finditer(html): - image = m.group('image') - - if image not in seen: - seen.add(image) - - if m.group('new'): - output(u"Image '%s' has been deleted." % image) - continue - - date = m.group('date') - user = m.group('user') - comment = m.group('comment') or '' - yield ImagePage(self, image), date, user, comment + for imageData in imagesData: + try: + comment = imageData['comment'] + except KeyError: + comment = '' + pageid = imageData['pageid'] + title = imageData['title'] + timestamp = imageData['timestamp'] + logid = imageData['logid'] + user = imageData['user'] + yield ImagePage(self, title), timestamp, user, comment if not repeat: break