Revision: 4684 Author: cosoleto Date: 2007-12-10 07:27:15 +0000 (Mon, 10 Dec 2007)
Log Message: ----------- Fixed Site.newimages() by using my old code. Now it yields date, author and summary data too (like newpages function) and reports .JPEG and djvu files.
Modified Paths: -------------- trunk/pywikipedia/family.py trunk/pywikipedia/pagegenerators.py trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/family.py =================================================================== --- trunk/pywikipedia/family.py 2007-12-10 03:07:37 UTC (rev 4683) +++ trunk/pywikipedia/family.py 2007-12-10 07:27:15 UTC (rev 4684) @@ -2767,6 +2767,9 @@ return '%s?title=%s:Allpages&from=%s&namespace=%s' % ( self.path(code), self.special_namespace_url(code), start, namespace)
+ def log_address(self, code, limit=50, mode = ''): + return "%s?title=Special:Log&type=%s&user=&page=&limit=%d" % (self.path(code), mode, limit) + def newpages_address(self, code, limit=50): return "%s?title=%s:Newpages&limit=%d" % (self.path(code), self.special_namespace_url(code), limit)
Modified: trunk/pywikipedia/pagegenerators.py =================================================================== --- trunk/pywikipedia/pagegenerators.py 2007-12-10 03:07:37 UTC (rev 4683) +++ trunk/pywikipedia/pagegenerators.py 2007-12-10 07:27:15 UTC (rev 4684) @@ -199,7 +199,7 @@ if site is None: site = wikipedia.getSite() for page in site.newimages(number, repeat=repeat): - yield page + yield page[0]
def UnCategorizedPageGenerator(number = 100, repeat = False, site = None): if site is None: @@ -772,14 +772,14 @@ if namespace: prefix = prefix[colon+1:] gen = PrefixingPageGenerator(prefix = prefix, namespace = namespace) + elif arg.startswith('-newimages'): + limit = arg[11:] or wikipedia.input(u'How many images do you want to load?') + gen = NewimagesPageGenerator(number = int(limit)) elif arg.startswith('-new'): if len(arg) >=5: gen = NewpagesPageGenerator(number = int(arg[5:])) else: gen = NewpagesPageGenerator(number = 60) - elif arg.startswith('-newimages'): - limit = arg[11:] or wikipedia.input(u'How many images do you want to check?') - gen = NewimagesPageGenerator(number = limit) elif arg.startswith('-search'): mediawikiQuery = arg[8:] if not mediawikiQuery:
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2007-12-10 03:07:37 UTC (rev 4683) +++ trunk/pywikipedia/wikipedia.py 2007-12-10 07:27:15 UTC (rev 4684) @@ -4019,7 +4019,7 @@ The objects yielded are tuples composed of the Page object, timestamp (unicode), length (int), an empty unicode string, username or IP address (str), comment (unicode). - + """ # TODO: in recent MW versions Special:Newpages takes a namespace parameter, # and defaults to 0 if not specified. @@ -4216,30 +4216,30 @@
def newimages(self, number = 10, repeat = False): """Yield ImagePages from Special:Log&type=upload""" - # Url of the new images - url = "/w/index.php?title=Special:Log&type=upload&user=&page=&pattern=&limit=%d&offset=0" % number - # Get the HTML text - html = self.getUrl(url) - image_namespace = self.image_namespace() - regexp = re.compile( - r'(?P<new>class="new" |)title="%s:(?P<image>.*?).(?P<ext>\w\w\w|jpeg)">.*?</a>".*?(?:<span class="comment">.*?|)</li>' % image_namespace, - re.UNICODE) + seen = set() + regexp = re.compile('<li[^>]*>(?P<date>.+?)\s+<a href=.*?>(?P<user>.+?)</a>\s+(.+?</a>).*?<a href=".*?"(?P<new> class="new")? title="(?P<image>.+?)"\s*>(?:.*?<span class="comment">(?P<comment>.*?)</span>)?')
while True: + path = self.log_address(number, mode = 'upload') + get_throttle() + html = self.getUrl(path) + for m in regexp.finditer(html): - new = m.group('new') - im = m.group('image') - ext = m.group('ext') - # This prevent pages with strange characters. They will be loaded without problem. - image = "%s.%s" % (im, ext) + image = m.group('image') + if image not in seen: seen.add(image) - if new != '': + + if m.group('new'): output(u"Image '%s' has been deleted." % image) continue - page = ImagePage(self, image) - yield page + + date = m.group('date') + user = m.group('user') + comment = m.group('comment') or '' + + yield ImagePage(self, image), date, user, comment if not repeat: break
@@ -4657,6 +4657,10 @@ """Return path to Special:Allpages.""" return self.family.allpages_address(self.lang, start=s, namespace = ns)
+ def log_address(self, n=50, mode = ''): + """Return path to Special:Log.""" + return self.family.log_address(self.lang, n, mode) + def newpages_address(self, n=50): """Return path to Special:Newpages.""" return self.family.newpages_address(self.lang, n)
pywikipedia-l@lists.wikimedia.org