[Pywikipedia-l] SVN: [6271] trunk/pywikipedia/pagegenerators.py
russblau at svn.wikimedia.org
russblau at svn.wikimedia.org
Mon Jan 19 17:58:54 UTC 2009
Revision: 6271
Author: russblau
Date: 2009-01-19 17:58:54 +0000 (Mon, 19 Jan 2009)
Log Message:
-----------
Use defaultNamespace when parsing an argument that implies a particular namespace; break long lines, and other formatting changes
Modified Paths:
--------------
trunk/pywikipedia/pagegenerators.py
Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py 2009-01-19 15:50:18 UTC (rev 6270)
+++ trunk/pywikipedia/pagegenerators.py 2009-01-19 17:58:54 UTC (rev 6271)
@@ -763,6 +763,8 @@
# preload remaining pages
for loaded_page in self.preload(somePages):
yield loaded_page
+ except GeneratorExit:
+ pass
except Exception, e:
traceback.print_exc()
wikipedia.output(unicode(e))
@@ -823,8 +825,10 @@
startfrom = categoryname[ind + 1:]
categoryname = categoryname[:ind]
- cat = catlib.Category(wikipedia.getSite(), 'Category:%s' % categoryname)
- return CategorizedPageGenerator(cat, start = startfrom, recurse = recurse)
+ cat = catlib.Category(wikipedia.getSite(), categoryname)
+ # Category constructor automatically prepends localized namespace
+ # if not included in user's input
+ return CategorizedPageGenerator(cat, start=startfrom, recurse=recurse)
def setSubCategoriesGen(self, arg, length, recurse = False):
if len(arg) == length:
@@ -839,24 +843,27 @@
else:
startfrom = None
- cat = catlib.Category(wikipedia.getSite(), 'Category:%s' % categoryname)
- return SubCategoriesPageGenerator(cat, start = startfrom, recurse = recurse)
+ cat = catlib.Category(wikipedia.getSite(), categoryname)
+ return SubCategoriesPageGenerator(cat, start=startfrom, recurse=recurse)
- """
- This function parses one argument at a time. If it is recognized as an
- argument that specifies a generator, a generator is created and added
- to the accumulation list, and the function returns true. Otherwise, it
- returns false, so that callee can try parsing the argument.
- Call getCombinedGenerator() after all arguments have been parsed to get
- the final output generator.
- """
def handleArg(self, arg):
+ """Parse one argument at a time.
+
+ If it is recognized as an argument that specifies a generator, a
+ generator is created and added to the accumulation list, and the
+ function returns true. Otherwise, it returns false, so that caller
+ can try parsing the argument. Call getCombinedGenerator() after all
+ arguments have been parsed to get the final output generator.
+
+ """
gen = None
if arg.startswith('-filelinks'):
fileLinksPageTitle = arg[11:]
if not fileLinksPageTitle:
- fileLinksPageTitle = wikipedia.input(u'Links to which image page should be processed?')
- if fileLinksPageTitle.startswith(wikipedia.getSite().namespace(6) + ":"):
+ fileLinksPageTitle = wikipedia.input(
+ u'Links to which image page should be processed?')
+ if fileLinksPageTitle.startswith(wikipedia.getSite().namespace(6)
+ + ":"):
fileLinksPage = wikipedia.ImagePage(wikipedia.getSite(),
fileLinksPageTitle)
else:
@@ -889,10 +896,11 @@
elif arg.startswith('-file'):
textfilename = arg[6:]
if not textfilename:
- textfilename = wikipedia.input(u'Please enter the local file name:')
+ textfilename = wikipedia.input(
+ u'Please enter the local file name:')
gen = TextfilePageGenerator(textfilename)
elif arg.startswith('-catr'):
- gen = self.getCategoryGen(arg, 5, recurse = True)
+ gen = self.getCategoryGen(arg, len('-catr'), recurse = True)
elif arg.startswith('-cat'):
gen = self.getCategoryGen(arg, len('-cat'))
elif arg.startswith('-category'):
@@ -906,7 +914,9 @@
gen = self.getCategoryGen(arg, 7, recurse = True)
elif arg.startswith('-page'):
if len(arg) == len('-page'):
- gen = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))]
+ gen = [wikipedia.Page(wikipedia.getSite(),
+ wikipedia.input(
+ u'What page do you want to use?'))]
else:
gen = [wikipedia.Page(wikipedia.getSite(), arg[len('-page:'):])]
elif arg.startswith('-uncatfiles'):
@@ -918,44 +928,58 @@
elif arg.startswith('-ref'):
referredPageTitle = arg[5:]
if not referredPageTitle:
- referredPageTitle = wikipedia.input(u'Links to which page should be processed?')
- referredPage = wikipedia.Page(wikipedia.getSite(), referredPageTitle)
+ referredPageTitle = wikipedia.input(
+ u'Links to which page should be processed?')
+ referredPage = wikipedia.Page(wikipedia.getSite(),
+ referredPageTitle)
gen = ReferringPageGenerator(referredPage)
elif arg.startswith('-links'):
linkingPageTitle = arg[7:]
if not linkingPageTitle:
- linkingPageTitle = wikipedia.input(u'Links from which page should be processed?')
+ linkingPageTitle = wikipedia.input(
+ u'Links from which page should be processed?')
linkingPage = wikipedia.Page(wikipedia.getSite(), linkingPageTitle)
gen = LinkedPageGenerator(linkingPage)
elif arg.startswith('-weblink'):
url = arg[9:]
if not url:
- url = wikipedia.input(u'Pages with which weblink should be processed?')
+ url = wikipedia.input(
+ u'Pages with which weblink should be processed?')
gen = LinksearchPageGenerator(url)
elif arg.startswith('-transcludes'):
transclusionPageTitle = arg[len('-transcludes:'):]
if not transclusionPageTitle:
- transclusionPageTitle = wikipedia.input(u'Pages that transclude which page should be processed?')
- transclusionPage = wikipedia.Page(wikipedia.getSite(), 'Template:%s' % transclusionPageTitle)
- gen = ReferringPageGenerator(transclusionPage, onlyTemplateInclusion = True)
+ transclusionPageTitle = wikipedia.input(
+ u'Pages that transclude which page should be processed?')
+ transclusionPage = wikipedia.Page(wikipedia.getSite(),
+ transclusionPageTitle,
+ defaultNamespace=10)
+ gen = ReferringPageGenerator(transclusionPage,
+ onlyTemplateInclusion=True)
elif arg.startswith('-start'):
if arg.startswith('-startxml'):
wikipedia.output(u'-startxml : wrong parameter')
sys.exit()
firstPageTitle = arg[7:]
if not firstPageTitle:
- firstPageTitle = wikipedia.input(u'At which page do you want to start?')
- namespace = wikipedia.Page(wikipedia.getSite(), firstPageTitle).namespace()
- firstPageTitle = wikipedia.Page(wikipedia.getSite(), firstPageTitle).titleWithoutNamespace()
- gen = AllpagesPageGenerator(firstPageTitle, namespace, includeredirects = False)
+ firstPageTitle = wikipedia.input(
+ u'At which page do you want to start?')
+ namespace = wikipedia.Page(wikipedia.getSite(),
+ firstPageTitle).namespace()
+ firstPageTitle = wikipedia.Page(wikipedia.getSite(),
+ firstPageTitle).titleWithoutNamespace()
+ gen = AllpagesPageGenerator(firstPageTitle, namespace,
+ includeredirects=False)
elif arg.startswith('-prefixindex'):
prefix = arg[13:]
namespace = None
if not prefix:
- prefix = wikipedia.input(u'What page names are you looking for?')
+ prefix = wikipedia.input(
+ u'What page names are you looking for?')
gen = PrefixingPageGenerator(prefix = prefix)
elif arg.startswith('-newimages'):
- limit = arg[11:] or wikipedia.input(u'How many images do you want to load?')
+ limit = arg[11:] or wikipedia.input(
+ u'How many images do you want to load?')
gen = NewimagesPageGenerator(number = int(limit))
elif arg.startswith('-new'):
if len(arg) >=5:
@@ -965,13 +989,16 @@
elif arg.startswith('-imagelinks'):
imagelinkstitle = arg[len('-imagelinks:'):]
if not imagelinkstitle:
- imagelinkstitle = wikipedia.input(u'Images on which page should be processed?')
- imagelinksPage = wikipedia.Page(wikipedia.getSite(), imagelinkstitle)
+ imagelinkstitle = wikipedia.input(
+ u'Images on which page should be processed?')
+ imagelinksPage = wikipedia.Page(wikipedia.getSite(),
+ imagelinkstitle)
gen = ImagesPageGenerator(imagelinksPage)
elif arg.startswith('-search'):
mediawikiQuery = arg[8:]
if not mediawikiQuery:
- mediawikiQuery = wikipedia.input(u'What do you want to search for?')
+ mediawikiQuery = wikipedia.input(
+ u'What do you want to search for?')
# In order to be useful, all namespaces are required
gen = SearchPageGenerator(mediawikiQuery, namespaces = [])
elif arg.startswith('-google'):
@@ -981,7 +1008,8 @@
regex = wikipedia.input(u'What page names are you looking for?')
else:
regex = arg[7:]
- gen = RegexFilterPageGenerator(wikipedia.getSite().allpages(), regex)
+ gen = RegexFilterPageGenerator(wikipedia.getSite().allpages(),
+ regex)
elif arg.startswith('-yahoo'):
gen = YahooSearchPageGenerator(arg[7:])
else:
More information about the Pywikipedia-l
mailing list