Revision: 6312 Author: purodha Date: 2009-01-29 22:47:54 +0000 (Thu, 29 Jan 2009)
Log Message: ----------- Add honoring -namespace parameters ro -new processing. (Multiple -namespace parameters still have a glitch with page count) This solves the request at: https://sourceforge.net/tracker2/index.php?func=detail&aid=2531112&g...
Modified Paths: -------------- trunk/pywikipedia/family.py trunk/pywikipedia/interwiki.py trunk/pywikipedia/pagegenerators.py trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/family.py =================================================================== --- trunk/pywikipedia/family.py 2009-01-29 20:08:52 UTC (rev 6311) +++ trunk/pywikipedia/family.py 2009-01-29 22:47:54 UTC (rev 6312) @@ -3465,8 +3465,8 @@ def log_address(self, code, limit=50, mode = ''): return "%s?useskin=monobook&title=Special:Log&type=%s&user=&page=&limit=%d" % (self.path(code), mode, limit)
- def newpages_address(self, code, limit=50): - return "%s?useskin=monobook&title=%s:Newpages&limit=%d" % (self.path(code), self.special_namespace_url(code), limit) + def newpages_address(self, code, limit=50, namespace=0): + return "%s?useskin=monobook&title=%s:Newpages&limit=%d&namespace=%s" % (self.path(code), self.special_namespace_url(code), limit, namespace)
def longpages_address(self, code, limit=500): return "%s?useskin=monobook&title=%s:Longpages&limit=%d" % (self.path(code), self.special_namespace_url(code), limit)
Modified: trunk/pywikipedia/interwiki.py =================================================================== --- trunk/pywikipedia/interwiki.py 2009-01-29 20:08:52 UTC (rev 6311) +++ trunk/pywikipedia/interwiki.py 2009-01-29 22:47:54 UTC (rev 6312) @@ -31,6 +31,10 @@
-new: Work on the 100 newest pages. If given as -new:x, will work on the x newest pages. + When multiple -namespace parameters are given, x pages are + inspected, and only the ones in the selected name spaces are + processed. Use -namespace:all for all namespaces. Without + -namespace, only article pages are processed.
This implies -noredirect.
@@ -1600,6 +1604,7 @@ hintlessPageGen = None optContinue = False optRestore = False + newPages = None # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. @@ -1694,7 +1699,6 @@ newPages = int(arg[5:]) else: newPages = 100 - hintlessPageGen = pagegenerators.NewpagesPageGenerator(newPages) elif arg.startswith('-skipfile:'): skipfile = arg[10:] skipPageGen = pagegenerators.TextfilePageGenerator(skipfile) @@ -1753,6 +1757,22 @@ except: wikipedia.output(u'Missing main page name')
+ if newPages != None: + if len(namespaces) == 0: + ns = 0 + if len(namespaces) == 1: + ns = namespaces[0] + if ns != 'all': + if isinstance(ns, unicode) or isinstance(ns, str): + index = site.getNamespaceIndex(ns) + if index is None: + raise ValueError(u'Unknown namespace: %s' % ns) + ns = index + namespaces = [] + else: + ns = 'all' + hintlessPageGen = pagegenerators.NewpagesPageGenerator(newPages, namespace=ns) + if optRestore or optContinue: site = wikipedia.getSite() dumpFileName = wikipedia.config.datafilepath(
Modified: trunk/pywikipedia/pagegenerators.py =================================================================== --- trunk/pywikipedia/pagegenerators.py 2009-01-29 20:08:52 UTC (rev 6311) +++ trunk/pywikipedia/pagegenerators.py 2009-01-29 22:47:54 UTC (rev 6312) @@ -249,10 +249,10 @@ for page in site.prefixindex(prefix = title, namespace = namespace, includeredirects = includeredirects): yield page
-def NewpagesPageGenerator(number = 100, get_redirect = False, repeat = False, site = None): +def NewpagesPageGenerator(number = 100, get_redirect = False, repeat = False, site = None, namespace = 0): if site is None: site = wikipedia.getSite() - for page in site.newpages(number=number, get_redirect=get_redirect, repeat=repeat): + for page in site.newpages(number=number, get_redirect=get_redirect, repeat=repeat, namespace=namespace): yield page[0]
def FileLinksGenerator(referredImagePage):
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2009-01-29 20:08:52 UTC (rev 6311) +++ trunk/pywikipedia/wikipedia.py 2009-01-29 22:47:54 UTC (rev 6312) @@ -5010,7 +5010,7 @@ yield page, match, relevance, '', '', ''
# TODO: avoid code duplication for the following methods - def newpages(self, number = 10, get_redirect = False, repeat = False): + def newpages(self, number = 10, get_redirect = False, repeat = False, namespace = 0): """Yield new articles (as Page objects) from Special:Newpages.
Starts with the newest article and fetches the number of articles @@ -5029,9 +5029,10 @@ # TODO: Repeat mechanism doesn't make much sense as implemented; # should use both offset and limit parameters, and have an # option to fetch older rather than newer pages + # TODO: extract and return edit comment. seen = set() while True: - path = self.newpages_address(n=number) + path = self.newpages_address(n=number, namespace=namespace) # The throttling is important here, so always enabled. get_throttle() html = self.getUrl(path) @@ -5856,9 +5857,9 @@ """Return path to Special:Log.""" return self.family.log_address(self.lang, n, mode)
- def newpages_address(self, n=50): + def newpages_address(self, n=50, namespace=0): """Return path to Special:Newpages.""" - return self.family.newpages_address(self.lang, n) + return self.family.newpages_address(self.lang, n, namespace)
def longpages_address(self, n=500): """Return path to Special:Longpages."""
pywikipedia-l@lists.wikimedia.org