Revision: 4650 Author: filnik Date: 2007-12-09 12:09:34 +0000 (Sun, 09 Dec 2007)
Log Message: ----------- Adding a new generator, newImages()
Modified Paths: -------------- trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2007-12-09 11:49:32 UTC (rev 4649) +++ trunk/pywikipedia/wikipedia.py 2007-12-09 12:09:34 UTC (rev 4650) @@ -3468,6 +3468,7 @@ search(query): query results from Special:Search allpages(): Special:Allpages newpages(): Special:Newpages + newImages(): Special:Log&type=upload longpages(): Special:Longpages shortpages(): Special:Shortpages categories(): Special:Categories (yields Category objects) @@ -4214,6 +4215,37 @@ if not repeat: break
+ def newImages(self, limit = 50, repeat = False): + """Yield ImagePages from Special:Log&type=upload""" + # Url of the new images + url = "/w/index.php?title=Special:Log&type=upload&user=&page=&pattern=&limit=%d&offset=0" % int(limit) + # Get the HTML text + html = self.getUrl(url) + image_namespace = self.image_namespace() + regexp = re.compile( + r'(?P<new>class="new" |)title="%s:(?P<image>.*?).(?P<ext>\w\w\w|jpeg)">.*?</a>".*?<span class="comment">' % image_namespace, + re.UNICODE) + pos = 0 + seen = list() + ext_list = list() + for m in regexp.finditer(html): + new = m.group('new') + im = m.group('image') + ext = m.group('ext') + # This prevent pages with strange characters. They will be loaded without problem. + image = "%s.%s" % (im, ext) + if new != '': + wikipedia.output(u"Skipping %s because it has been deleted." % image) + if image not in seen: + seen.append(image) + if image not in seen: + seen.append(image) + page = Page(self, 'Image:%s' % image) + yield page + if not repeat: + wikipedia.output(u"\t\t>> All images checked. <<") + break + def uncategorizedimages(self, number = 10, repeat = False): """Yield ImagePages from Special:Uncategorizedimages.""" seen = set()
pywikipedia-l@lists.wikimedia.org