Revision: 6344
Author: purodha
Date: 2009-02-12 16:23:44 +0000 (Thu, 12 Feb 2009)
Log Message:
-----------
Add -until:pagetitle command line parameter so as to complement -start:pagetitle
in intewiki.py , thereby solving tracker item 1911836, see:
https://sourceforge.net/tracker2/index.php?func=detail&aid=1911836&…
Modified Paths:
--------------
trunk/pywikipedia/interwiki.py
Modified: trunk/pywikipedia/interwiki.py
===================================================================
--- trunk/pywikipedia/interwiki.py 2009-02-12 16:20:23 UTC (rev 6343)
+++ trunk/pywikipedia/interwiki.py 2009-02-12 16:23:44 UTC (rev 6344)
@@ -64,6 +64,12 @@
-number: used as -number:#, specifies that the robot should process
that amount of pages and then stop. This is only useful in
combination with -start. The default is not to stop.
+
+ -until: used as -until:title, specifies that the robot should process
+ pages in wiki default sort order up to, and including,
"title"
+ and then stop. This is only useful in combination with -start.
+ The default is not to stop.
+ Note: do not specify a namespace, even if -start has one.
-bracket only work on pages that have (in the home language) parenthesis
in their title. All other pages are skipped.
@@ -1378,11 +1384,13 @@
# Keep correct counters
self.plus(site)
- def setPageGenerator(self, pageGenerator, number = None):
+# def setPageGenerator(self, pageGenerator, number = None):
+ def setPageGenerator(self, pageGenerator, number = None, until = None):
"""Add a generator of subjects. Once the list of subjects gets
too small, this generator is called to produce more Pages"""
self.pageGenerator = pageGenerator
self.generateNumber = number
+ self.generateUntil = until
def dump(self):
site = wikipedia.getSite()
@@ -1420,12 +1428,14 @@
continue
break
+ if len(self.generateUntil) > 0:
+ if page.titleWithoutNamespace() > self.generateUntil:
+ raise StopIteration
self.add(page, hints = hints)
self.generated += 1
if self.generateNumber:
- if self.generated == self.generateNumber:
- self.pageGenerator = None
- break
+ if self.generated >= self.generateNumber:
+ raise StopIteration
except StopIteration:
self.pageGenerator = None
break
@@ -1620,6 +1630,7 @@
# default to [] which means all namespaces will be processed
namespaces = []
number = None
+ until = None
warnfile = None
# a normal PageGenerator (which doesn't give hints, only Pages)
hintlessPageGen = None
@@ -1740,6 +1751,8 @@
# deprecated for consistency with other scripts
elif arg.startswith('-number:'):
number = int(arg[8:])
+ elif arg.startswith('-until:'):
+ until = arg[7:]
elif arg.startswith('-neverlink:'):
globalvar.neverlink += arg[11:].split(",")
elif arg.startswith('-ignore:'):
@@ -1826,7 +1839,7 @@
if len(namespaces) > 0:
hintlessPageGen =
pagegenerators.NamespaceFilterPageGenerator(hintlessPageGen, namespaces)
# we'll use iter() to create make a next() function available.
- bot.setPageGenerator(iter(hintlessPageGen), number = number)
+ bot.setPageGenerator(iter(hintlessPageGen), number = number, until=until)
elif warnfile:
# TODO: filter namespaces if -namespace parameter was used
readWarnfile(warnfile, bot)