Revision: 4266
Author: wikipedian
Date: 2007-09-13 10:17:57 +0000 (Thu, 13 Sep 2007)
Log Message:
-----------
applied patch by John Vandenberg - zeroj
[ 1789089 ] UnusedFilesGenerator not working
Modified Paths:
--------------
trunk/pywikipedia/pagegenerators.py
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py 2007-09-13 09:34:39 UTC (rev 4265)
+++ trunk/pywikipedia/pagegenerators.py 2007-09-13 10:17:57 UTC (rev 4266)
@@ -125,10 +125,10 @@
for page in pageWithImages.imagelinks(followRedirects = False, loose = True):
yield page
-def UnusedFilesGenerator(number = 100, repeat = False, site = None):
+def UnusedFilesGenerator(number = 100, repeat = False, site = None, extension = None):
if site is None:
site = wikipedia.getSite()
- for page in site.unusedfiles(number=number, repeat=repeat):
+ for page in site.unusedfiles(number=number, repeat=repeat, extension=extension):
yield wikipedia.ImagePage(page.site(), page.title())
def WithoutInterwikiPageGenerator(number = 100, repeat = False, site = None):
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2007-09-13 09:34:39 UTC (rev 4265)
+++ trunk/pywikipedia/wikipedia.py 2007-09-13 10:17:57 UTC (rev 4266)
@@ -3797,21 +3797,30 @@
if not repeat:
break
- def unusedfiles(self, number = 10, repeat = False):
+ def unusedfiles(self, number = 10, repeat = False, extension = None):
throttle = True
seen = set()
+ ns = self.image_namespace()
+ entryR = re.compile('<a href=".+?" title="(?P<title>%s:.+?)">.+?</a>' % ns)
while True:
path = self.unusedfiles_address(n=number)
get_throttle()
html = self.getUrl(path)
- entryR = re.compile('<li>\(<a href=".+?" title="(?P<title>.+?)">.+?</a>\) ')
for m in entryR.finditer(html):
+ fileext = None
title = m.group('title')
+ if extension:
+ fileext = title[len(title)-3:]
- if title not in seen:
- seen.add(title)
- page = ImagePage(self, title)
- yield page
+ if title not in seen and fileext == extension:
+ # Check whether the media is used in a Proofread page
+ basename = title[6:]
+ page = Page(self, 'Page:' + basename)
+
+ if not page.exists():
+ seen.add(title)
+ image = ImagePage(self, title)
+ yield image
if not repeat:
break