Revision: 7572
Author: alexsh
Date: 2009-10-29 21:02:26 +0000 (Thu, 29 Oct 2009)
Log Message:
-----------
Simple handle google search from AJAX Search API
Modified Paths:
--------------
trunk/pywikipedia/config.py
trunk/pywikipedia/pagegenerators.py
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/config.py
===================================================================
--- trunk/pywikipedia/config.py 2009-10-29 20:48:49 UTC (rev 7571)
+++ trunk/pywikipedia/config.py 2009-10-29 21:02:26 UTC (rev 7572)
@@ -324,8 +324,16 @@
# you must install the pyGoogle module from
http://pygoogle.sf.net/ and have a
# Google Web API license key. Note that Google doesn't give out license keys
# anymore.
+# --------------------
+# Google web API is obsoleted for long time, now we can use Google AJAX Search API,
+# You can signup an API key from
http://code.google.com/apis/ajaxsearch/signup.html.
google_key = ''
+
+# using Google AJAX Search API, it require the refer website, this variable save the
refer web address
+# when you sign up the Key.
+google_api_refer = ''
+
# Some scripts allow using the Yahoo! Search Web Services. To use this feature,
# you must install the pYsearch module from
http://pysearch.sourceforge.net/
# and get a Yahoo AppID from
http://developer.yahoo.com
Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py 2009-10-29 20:48:49 UTC (rev 7571)
+++ trunk/pywikipedia/pagegenerators.py 2009-10-29 21:02:26 UTC (rev 7572)
@@ -551,18 +551,43 @@
def queryGoogle(self, query):
#if config.google_key:
if True:
- #try:
+ try:
for url in self.queryViaSoapApi(query):
yield url
return
- #except ImportError:
- #pass
+ except ImportError:
+ for u in self.queryViaAPI(query):
+ yield u
+ return
# No google license key, or pygoogle not installed. Do it the ugly way.
#for url in self.queryViaWeb(query):
# yield url
+ def queryViaAPI(self, query):
+ import json
+ url =
u'http://ajax.googleapis.com/ajax/services/search/web?'
+ params = {
+ 'key': config.google_key,
+ 'v':'1.0',
+ 'q': query,
+ }
+ url += urllib.urlencode(params)
+
+ while True:
+ try:
+ wikipedia.output(u'Querying Google AJAX Search API...') #, offset
%i' % offset)
+ result = json.loads(self.site.getUrl(url, refer =
config.google_api_refer, no_hostname=True))
+ for res in result['responseData']['results']:
+ yield res['url']
+ except:
+ wikipedia.output(u"An error occured. Retrying in 10
seconds...")
+ time.sleep(10)
+ continue
+
+
def queryViaSoapApi(self, query):
import google
+
google.LICENSE_KEY = config.google_key
offset = 0
estimatedTotalResultsCount = None
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2009-10-29 20:48:49 UTC (rev 7571)
+++ trunk/pywikipedia/wikipedia.py 2009-10-29 21:02:26 UTC (rev 7572)
@@ -5448,8 +5448,8 @@
return response, data
- def getUrl(self, path, retry = None, sysop = False, data = None,
- compress = True, no_hostname = False, cookie_only=False,
back_response=False):
+ def getUrl(self, path, retry = None, sysop = False, data = None, compress = True,
+ no_hostname = False, cookie_only=False, refer=None, back_response=False):
"""
Low-level routine to get a URL from the wiki.
@@ -5486,6 +5486,8 @@
url = path # and other useful pages without using some other
functions.
else:
url = '%s://%s%s' % (self.protocol(), self.hostname(), path)
+ if refer:
+ uo.addheader('Refer', refer)
data = self.urlEncode(data)
# Try to retrieve the page until it was successfully loaded (just in