Revision: 4348
Author: wikipedian
Date: 2007-09-24 08:26:13 +0000 (Mon, 24 Sep 2007)
Log Message:
-----------
Applied patch [ 1800470 ] yahoo page generator by John Vandenberg -
zeroj
I didn't test this because I don't have a Yahoo AppID.
Modified Paths:
--------------
trunk/pywikipedia/pagegenerators.py
Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py 2007-09-24 08:21:58 UTC (rev 4347)
+++ trunk/pywikipedia/pagegenerators.py 2007-09-24 08:26:13 UTC (rev 4348)
@@ -32,6 +32,10 @@
-filelinks Work on all pages that use a certain image/media file.
Argument can also be given as "-file:filename".
+-yahoo Work on all pages that are found in a Yahoo search.
+ Depends on python module pYsearch. See yahoo_appid in
+ config.py for instructions.
+
-google Work on all pages that are found in a Google search.
You need a Google Web API license key. Note that Google
doesn't give out license keys anymore. See google_key in
@@ -286,6 +290,35 @@
yield wikipedia.Page(site, pagenameofthelink)
offset += step
+class YahooSearchPageGenerator:
+ '''
+ To use this generator, install pYsearch
+ '''
+ def __init__(self, query = None, count = 100): # values larger than 100 fail
+ self.query = query or wikipedia.input(u'Please enter the search query:')
+ self.count = count;
+
+ def queryYahoo(self, query):
+ from yahoo.search.web import WebSearch
+ srch = WebSearch(config.yahoo_appid, query=query, results=self.count)
+
+ dom = srch.get_results()
+ results = srch.parse_results(dom)
+ for res in results:
+ url = res.Url
+ yield url
+
+ def __iter__(self):
+ site = wikipedia.getSite()
+ # restrict query to local site
+ localQuery = '%s site:%s' % (self.query, site.hostname())
+ base = 'http://%s%s' % (site.hostname(),
site.nice_get_address(''))
+ for url in self.queryYahoo(localQuery):
+ if url[:len(base)] == base:
+ title = url[len(base):]
+ page = wikipedia.Page(site, title)
+ yield page
+
class GoogleSearchPageGenerator:
'''
To use this generator, you must install the pyGoogle module from
@@ -707,6 +740,12 @@
else:
googleQuery = arg[8:]
gen = GoogleSearchPageGenerator(googleQuery)
+ elif arg.startswith('-yahoo'):
+ if len(arg) == 7:
+ query = wikipedia.input(u'What do you want to search for?')
+ else:
+ query = arg[7:]
+ gen = YahooSearchPageGenerator(query)
else:
return None
# make sure all yielded pages are unique