Revision: 4347
Author: wikipedian
Date: 2007-09-24 08:21:58 +0000 (Mon, 24 Sep 2007)
Log Message:
-----------
applied patch [ 1800492 ] uncategorised page generators by John
Vandenberg - zeroj
"The current page generators are not exposed as command line options,
and
there isnt a page generator for [[Special:Uncategorizedimages]].
This patch provides both."
Modified Paths:
--------------
trunk/pywikipedia/family.py
trunk/pywikipedia/pagegenerators.py
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/family.py
===================================================================
--- trunk/pywikipedia/family.py 2007-09-24 08:19:23 UTC (rev 4346)
+++ trunk/pywikipedia/family.py 2007-09-24 08:21:58 UTC (rev 4347)
@@ -2617,6 +2617,9 @@
def uncategorizedcategories_address(self, code, limit=500):
return "%s?title=%s:Uncategorizedcategories&limit=%d" % (self.path(code), self.special_namespace_url(code), limit)
+ def uncategorizedimages_address(self, code, limit=500):
+ return "%s?title=%s:Uncategorizedimages&limit=%d" % (self.path(code), self.special_namespace_url(code), limit)
+
def uncategorizedpages_address(self, code, limit=500):
return "%s?title=%s:Uncategorizedpages&limit=%d" % (self.path(code), self.special_namespace_url(code), limit)
Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py 2007-09-24 08:19:23 UTC (rev 4346)
+++ trunk/pywikipedia/pagegenerators.py 2007-09-24 08:21:58 UTC (rev 4347)
@@ -19,6 +19,12 @@
-cat Work on all pages which are in a specific category.
Argument can also be given as "-cat:categoryname".
+-uncat Work on all pages which are not categorised.
+
+-uncatcat Work on all categories which are not categorised.
+
+-uncatfiles Work on all files which are not categorised.
+
-file Read a list of pages to treat from the named text file.
Page titles in the file must be enclosed with [[brackets]].
Argument can also be given as "-file:filename".
@@ -166,6 +172,18 @@
if page.title() >= start:
yield page
+def UnCategorizedCategoryGenerator(number = 100, repeat = False, site = None):
+ if site is None:
+ site = wikipedia.getSite()
+ for page in site.uncategorizedcategories(number=number, repeat=repeat):
+ yield page
+
+def UnCategorizedImageGenerator(number = 100, repeat = False, site = None):
+ if site is None:
+ site = wikipedia.getSite()
+ for page in site.uncategorizedimages(number=number, repeat=repeat):
+ yield page
+
def UnCategorizedPageGenerator(number = 100, repeat = False, site = None):
if site is None:
site = wikipedia.getSite()
@@ -635,6 +653,12 @@
gen = TextfilePageGenerator(textfilename)
elif arg.startswith('-cat'):
gen = self.setCategoryGen(arg, 4)
+ elif arg.startswith('-uncatfiles'):
+ gen = UnCategorizedImageGenerator()
+ elif arg.startswith('-uncatcat'):
+ gen = UnCategorizedCategoryGenerator()
+ elif arg.startswith('-uncat'):
+ gen = UnCategorizedPageGenerator()
elif arg.startswith('-subcat'):
gen = self.setCategoryGen(arg, 7, recurse = True)
elif arg.startswith('-ref'):
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2007-09-24 08:19:23 UTC (rev 4346)
+++ trunk/pywikipedia/wikipedia.py 2007-09-24 08:21:58 UTC (rev 4347)
@@ -69,6 +69,7 @@
lonelypages(): Special:Lonelypages
uncategorizedcategories(): Special:Uncategorizedcategories
uncategorizedpages(): Special:Uncategorizedpages
+ uncategorizedimages(): Special:Uncategorizedimages
unusedcategories(): Special:Unusuedcategories
Other functions:
@@ -3795,6 +3796,26 @@
if not repeat:
break
+ def uncategorizedimages(self, number = 10, repeat = False):
+ throttle = True
+ seen = set()
+ ns = self.image_namespace()
+ entryR = re.compile('<a href=".+?" title="(?P<title>%s:.+?)">.+?</a>' % ns)
+ while True:
+ path = self.uncategorizedimages_address(n=number)
+ get_throttle()
+ html = self.getUrl(path)
+ for m in entryR.finditer(html):
+ title = m.group('title')
+
+ if title not in seen:
+ seen.add(title)
+ page = Page(self, title)
+ yield page
+ if not repeat:
+ break
+
+
def uncategorizedpages(self, number = 10, repeat = False):
throttle = True
seen = set()
@@ -4166,6 +4187,9 @@
def uncategorizedcategories_address(self, n=500):
return self.family.uncategorizedcategories_address(self.lang, n)
+ def uncategorizedimages_address(self, n=500):
+ return self.family.uncategorizedimages_address(self.lang, n)
+
def uncategorizedpages_address(self, n=500):
return self.family.uncategorizedpages_address(self.lang, n)