Revision: 4266 Author: wikipedian Date: 2007-09-13 10:17:57 +0000 (Thu, 13 Sep 2007)
Log Message: ----------- applied patch by John Vandenberg - zeroj [ 1789089 ] UnusedFilesGenerator not working
Modified Paths: -------------- trunk/pywikipedia/pagegenerators.py trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/pagegenerators.py =================================================================== --- trunk/pywikipedia/pagegenerators.py 2007-09-13 09:34:39 UTC (rev 4265) +++ trunk/pywikipedia/pagegenerators.py 2007-09-13 10:17:57 UTC (rev 4266) @@ -125,10 +125,10 @@ for page in pageWithImages.imagelinks(followRedirects = False, loose = True): yield page
-def UnusedFilesGenerator(number = 100, repeat = False, site = None): +def UnusedFilesGenerator(number = 100, repeat = False, site = None, extension = None): if site is None: site = wikipedia.getSite() - for page in site.unusedfiles(number=number, repeat=repeat): + for page in site.unusedfiles(number=number, repeat=repeat, extension=extension): yield wikipedia.ImagePage(page.site(), page.title())
def WithoutInterwikiPageGenerator(number = 100, repeat = False, site = None):
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2007-09-13 09:34:39 UTC (rev 4265) +++ trunk/pywikipedia/wikipedia.py 2007-09-13 10:17:57 UTC (rev 4266) @@ -3797,21 +3797,30 @@ if not repeat: break
- def unusedfiles(self, number = 10, repeat = False): + def unusedfiles(self, number = 10, repeat = False, extension = None): throttle = True seen = set() + ns = self.image_namespace() + entryR = re.compile('<a href=".+?" title="(?P<title>%s:.+?)">.+?</a>' % ns) while True: path = self.unusedfiles_address(n=number) get_throttle() html = self.getUrl(path) - entryR = re.compile('<li>(<a href=".+?" title="(?P<title>.+?)">.+?</a>) ') for m in entryR.finditer(html): + fileext = None title = m.group('title') + if extension: + fileext = title[len(title)-3:]
- if title not in seen: - seen.add(title) - page = ImagePage(self, title) - yield page + if title not in seen and fileext == extension: + # Check whether the media is used in a Proofread page + basename = title[6:] + page = Page(self, 'Page:' + basename) + + if not page.exists(): + seen.add(title) + image = ImagePage(self, title) + yield image if not repeat: break