Revision: 7572 Author: alexsh Date: 2009-10-29 21:02:26 +0000 (Thu, 29 Oct 2009)
Log Message: ----------- Simple handle google search from AJAX Search API
Modified Paths: -------------- trunk/pywikipedia/config.py trunk/pywikipedia/pagegenerators.py trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/config.py =================================================================== --- trunk/pywikipedia/config.py 2009-10-29 20:48:49 UTC (rev 7571) +++ trunk/pywikipedia/config.py 2009-10-29 21:02:26 UTC (rev 7572) @@ -324,8 +324,16 @@ # you must install the pyGoogle module from http://pygoogle.sf.net/ and have a # Google Web API license key. Note that Google doesn't give out license keys # anymore. +# -------------------- +# Google web API is obsoleted for long time, now we can use Google AJAX Search API, +# You can signup an API key from http://code.google.com/apis/ajaxsearch/signup.html. google_key = ''
+ +# using Google AJAX Search API, it require the refer website, this variable save the refer web address +# when you sign up the Key. +google_api_refer = '' + # Some scripts allow using the Yahoo! Search Web Services. To use this feature, # you must install the pYsearch module from http://pysearch.sourceforge.net/ # and get a Yahoo AppID from http://developer.yahoo.com
Modified: trunk/pywikipedia/pagegenerators.py =================================================================== --- trunk/pywikipedia/pagegenerators.py 2009-10-29 20:48:49 UTC (rev 7571) +++ trunk/pywikipedia/pagegenerators.py 2009-10-29 21:02:26 UTC (rev 7572) @@ -551,18 +551,43 @@ def queryGoogle(self, query): #if config.google_key: if True: - #try: + try: for url in self.queryViaSoapApi(query): yield url return - #except ImportError: - #pass + except ImportError: + for u in self.queryViaAPI(query): + yield u + return # No google license key, or pygoogle not installed. Do it the ugly way. #for url in self.queryViaWeb(query): # yield url
+ def queryViaAPI(self, query): + import json + url = u'http://ajax.googleapis.com/ajax/services/search/web?' + params = { + 'key': config.google_key, + 'v':'1.0', + 'q': query, + } + url += urllib.urlencode(params) + + while True: + try: + wikipedia.output(u'Querying Google AJAX Search API...') #, offset %i' % offset) + result = json.loads(self.site.getUrl(url, refer = config.google_api_refer, no_hostname=True)) + for res in result['responseData']['results']: + yield res['url'] + except: + wikipedia.output(u"An error occured. Retrying in 10 seconds...") + time.sleep(10) + continue + + def queryViaSoapApi(self, query): import google + google.LICENSE_KEY = config.google_key offset = 0 estimatedTotalResultsCount = None
Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2009-10-29 20:48:49 UTC (rev 7571) +++ trunk/pywikipedia/wikipedia.py 2009-10-29 21:02:26 UTC (rev 7572) @@ -5448,8 +5448,8 @@
return response, data
- def getUrl(self, path, retry = None, sysop = False, data = None, - compress = True, no_hostname = False, cookie_only=False, back_response=False): + def getUrl(self, path, retry = None, sysop = False, data = None, compress = True, + no_hostname = False, cookie_only=False, refer=None, back_response=False): """ Low-level routine to get a URL from the wiki.
@@ -5486,6 +5486,8 @@ url = path # and other useful pages without using some other functions. else: url = '%s://%s%s' % (self.protocol(), self.hostname(), path) + if refer: + uo.addheader('Refer', refer) data = self.urlEncode(data)
# Try to retrieve the page until it was successfully loaded (just in
pywikipedia-svn@lists.wikimedia.org