Revision: 4348 Author: wikipedian Date: 2007-09-24 08:26:13 +0000 (Mon, 24 Sep 2007)
Log Message: ----------- Applied patch [ 1800470 ] yahoo page generator by John Vandenberg - zeroj I didn't test this because I don't have a Yahoo AppID.
Modified Paths: -------------- trunk/pywikipedia/pagegenerators.py
Modified: trunk/pywikipedia/pagegenerators.py =================================================================== --- trunk/pywikipedia/pagegenerators.py 2007-09-24 08:21:58 UTC (rev 4347) +++ trunk/pywikipedia/pagegenerators.py 2007-09-24 08:26:13 UTC (rev 4348) @@ -32,6 +32,10 @@ -filelinks Work on all pages that use a certain image/media file. Argument can also be given as "-file:filename".
+-yahoo Work on all pages that are found in a Yahoo search. + Depends on python module pYsearch. See yahoo_appid in + config.py for instructions. + -google Work on all pages that are found in a Google search. You need a Google Web API license key. Note that Google doesn't give out license keys anymore. See google_key in @@ -286,6 +290,35 @@ yield wikipedia.Page(site, pagenameofthelink) offset += step
+class YahooSearchPageGenerator: + ''' + To use this generator, install pYsearch + ''' + def __init__(self, query = None, count = 100): # values larger than 100 fail + self.query = query or wikipedia.input(u'Please enter the search query:') + self.count = count; + + def queryYahoo(self, query): + from yahoo.search.web import WebSearch + srch = WebSearch(config.yahoo_appid, query=query, results=self.count) + + dom = srch.get_results() + results = srch.parse_results(dom) + for res in results: + url = res.Url + yield url + + def __iter__(self): + site = wikipedia.getSite() + # restrict query to local site + localQuery = '%s site:%s' % (self.query, site.hostname()) + base = 'http://%s%s' % (site.hostname(), site.nice_get_address('')) + for url in self.queryYahoo(localQuery): + if url[:len(base)] == base: + title = url[len(base):] + page = wikipedia.Page(site, title) + yield page + class GoogleSearchPageGenerator: ''' To use this generator, you must install the pyGoogle module from @@ -707,6 +740,12 @@ else: googleQuery = arg[8:] gen = GoogleSearchPageGenerator(googleQuery) + elif arg.startswith('-yahoo'): + if len(arg) == 7: + query = wikipedia.input(u'What do you want to search for?') + else: + query = arg[7:] + gen = YahooSearchPageGenerator(query) else: return None # make sure all yielded pages are unique