Revision: 4347 Author: wikipedian Date: 2007-09-24 08:21:58 +0000 (Mon, 24 Sep 2007)
Log Message: ----------- applied patch [ 1800492 ] uncategorised page generators by John Vandenberg - zeroj "The current page generators are not exposed as command line options, and there isnt a page generator for [[Special:Uncategorizedimages]].
This patch provides both."
Modified Paths: -------------- trunk/pywikipedia/family.py trunk/pywikipedia/pagegenerators.py trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/family.py =================================================================== --- trunk/pywikipedia/family.py 2007-09-24 08:19:23 UTC (rev 4346) +++ trunk/pywikipedia/family.py 2007-09-24 08:21:58 UTC (rev 4347) @@ -2617,6 +2617,9 @@ def uncategorizedcategories_address(self, code, limit=500): return "%s?title=%s:Uncategorizedcategories&limit=%d" % (self.path(code), self.special_namespace_url(code), limit)
+ def uncategorizedimages_address(self, code, limit=500): + return "%s?title=%s:Uncategorizedimages&limit=%d" % (self.path(code), self.special_namespace_url(code), limit) + def uncategorizedpages_address(self, code, limit=500): return "%s?title=%s:Uncategorizedpages&limit=%d" % (self.path(code), self.special_namespace_url(code), limit)
Modified: trunk/pywikipedia/pagegenerators.py =================================================================== --- trunk/pywikipedia/pagegenerators.py 2007-09-24 08:19:23 UTC (rev 4346) +++ trunk/pywikipedia/pagegenerators.py 2007-09-24 08:21:58 UTC (rev 4347) @@ -19,6 +19,12 @@ -cat Work on all pages which are in a specific category. Argument can also be given as "-cat:categoryname".
+-uncat Work on all pages which are not categorised. + +-uncatcat Work on all categories which are not categorised. + +-uncatfiles Work on all files which are not categorised. + -file Read a list of pages to treat from the named text file. Page titles in the file must be enclosed with [[brackets]]. Argument can also be given as "-file:filename". @@ -166,6 +172,18 @@ if page.title() >= start: yield page
+def UnCategorizedCategoryGenerator(number = 100, repeat = False, site = None): + if site is None: + site = wikipedia.getSite() + for page in site.uncategorizedcategories(number=number, repeat=repeat): + yield page + +def UnCategorizedImageGenerator(number = 100, repeat = False, site = None): + if site is None: + site = wikipedia.getSite() + for page in site.uncategorizedimages(number=number, repeat=repeat): + yield page + def UnCategorizedPageGenerator(number = 100, repeat = False, site = None): if site is None: site = wikipedia.getSite() @@ -635,6 +653,12 @@ gen = TextfilePageGenerator(textfilename) elif arg.startswith('-cat'): gen = self.setCategoryGen(arg, 4) + elif arg.startswith('-uncatfiles'): + gen = UnCategorizedImageGenerator() + elif arg.startswith('-uncatcat'): + gen = UnCategorizedCategoryGenerator() + elif arg.startswith('-uncat'): + gen = UnCategorizedPageGenerator() elif arg.startswith('-subcat'): gen = self.setCategoryGen(arg, 7, recurse = True) elif arg.startswith('-ref'):
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2007-09-24 08:19:23 UTC (rev 4346) +++ trunk/pywikipedia/wikipedia.py 2007-09-24 08:21:58 UTC (rev 4347) @@ -69,6 +69,7 @@ lonelypages(): Special:Lonelypages uncategorizedcategories(): Special:Uncategorizedcategories uncategorizedpages(): Special:Uncategorizedpages + uncategorizedimages(): Special:Uncategorizedimages unusedcategories(): Special:Unusuedcategories
Other functions: @@ -3795,6 +3796,26 @@ if not repeat: break
+ def uncategorizedimages(self, number = 10, repeat = False): + throttle = True + seen = set() + ns = self.image_namespace() + entryR = re.compile('<a href=".+?" title="(?P<title>%s:.+?)">.+?</a>' % ns) + while True: + path = self.uncategorizedimages_address(n=number) + get_throttle() + html = self.getUrl(path) + for m in entryR.finditer(html): + title = m.group('title') + + if title not in seen: + seen.add(title) + page = Page(self, title) + yield page + if not repeat: + break + + def uncategorizedpages(self, number = 10, repeat = False): throttle = True seen = set() @@ -4166,6 +4187,9 @@ def uncategorizedcategories_address(self, n=500): return self.family.uncategorizedcategories_address(self.lang, n)
+ def uncategorizedimages_address(self, n=500): + return self.family.uncategorizedimages_address(self.lang, n) + def uncategorizedpages_address(self, n=500): return self.family.uncategorizedpages_address(self.lang, n)