Revision: 4090 Author: cosoleto Date: 2007-08-22 20:52:26 +0000 (Wed, 22 Aug 2007)
Log Message: ----------- Added support for Live Search
Modified Paths: -------------- trunk/pywikipedia/config.py trunk/pywikipedia/copyright.py
Modified: trunk/pywikipedia/config.py =================================================================== --- trunk/pywikipedia/config.py 2007-08-22 20:35:48 UTC (rev 4089) +++ trunk/pywikipedia/config.py 2007-08-22 20:52:26 UTC (rev 4090) @@ -288,13 +288,10 @@
############## SEARCH ENGINE SETTINGS ##############
-# Some scripts allow querying Google either via the Google Web API, or by -# just parsing the HTML from the Google website. -# To use the Google Web API, you must install the pyGoogle module from -# http://pygoogle.sf.net/ and have a Google Web API license key. Note that +# Some scripts allow querying Google via the Google Web API. To use this feature, you must +# install the pyGoogle module from http://pygoogle.sf.net/ and have a Google +# Web API license key. Note that # Google doesn't give out license keys anymore. -# If you don't enter a google license key in your user config file, the scripts -# will just parse the raw HTML code from the website. google_key = ''
# Some scripts allow using the Yahoo! Search Web Services. To use this feature, @@ -302,17 +299,23 @@ # and get a Yahoo AppID from http://developer.yahoo.com yahoo_appid = ''
+# To use Windows Live Search web service you must get an AppID from +# http://search.msn.com/developer +msn_appid = '' + ############## COPYRIGHT SETTINGS ##############
# Enable/disable search engine in copyright.py script copyright_google = True copyright_yahoo = True +copyright_msn = False
# Perform a deep check, loading URLs to search if 'Wikipedia' is present. # This may be useful to improve number of correct results. If you haven't # a fast connection, you might want to keep they disabled. copyright_check_in_source_google = False copyright_check_in_source_yahoo = False +copyright_check_in_source_msn = False
# Limit number of queries for page. copyright_max_query_for_page = 25
Modified: trunk/pywikipedia/copyright.py =================================================================== --- trunk/pywikipedia/copyright.py 2007-08-22 20:35:48 UTC (rev 4089) +++ trunk/pywikipedia/copyright.py 2007-08-22 20:52:26 UTC (rev 4090) @@ -596,7 +596,8 @@ def add_in_urllist(url, add_item, engine):
if (engine == 'google' and config.copyright_check_in_source_google) or \ - (engine == 'yahoo' and config.copyright_check_in_source_yahoo): + (engine == 'yahoo' and config.copyright_check_in_source_yahoo) or \ + (engine == 'msn' and config.copyright_check_in_source_msn): check_in_source = True else: check_in_source = False @@ -697,32 +698,40 @@ print "Got an error ->", err if search_request_retry: search_request_retry -= 1 - #if search_in_msn: - # ## max_query_len = 150? - # from __SOAPpy import WSDL - # print " msn query..." - # wsdl_url = 'http://soap.search.msn.com/webservices.asmx?wsdl' - # server = WSDL.Proxy(wsdl_url) - # params = {'AppID': config.msn_appid, 'Query': '-Wikipedia "' + query + '"', 'CultureInfo': 'en-US', 'SafeSearch': 'Off', 'Requests': { - # 'SourceRequest':{'Source': 'Web', 'Offset': 0, 'Count': 10, 'ResultFields': 'All',}}} - # - # search_request_retry = config.copyright_connection_tries - # results = '' - # while search_request_retry: - # try: - # server_results = server.Search(Request = params) - # search_request_retry = 0 - # if server_results.Responses[0].Results: - # results = server_results.Responses[0].Results[0] - # except Exception, err: - # print "Got an error ->", err - # search_request_retry -= 1 - # for entry in results: - # try: - # add_in_urllist(url, entry.Url, 'msn') - # except AttributeError: - # print "attrib ERROR" + if config.copyright_msn: + #max_query_len = 150? + from SOAPpy import WSDL + print " Live query..."
+ try: + server = WSDL.Proxy('http://soap.search.msn.com/webservices.asmx?wsdl') + except: + print "Live Search Error" + raise + params = {'AppID': config.msn_appid, 'Query': '-Wikipedia "' + query + '"', 'CultureInfo': 'en-US', 'SafeSearch': 'Off', 'Requests': { + 'SourceRequest':{'Source': 'Web', 'Offset': 0, 'Count': 10, 'ResultFields': 'All',}}} + + search_request_retry = config.copyright_connection_tries + results = '' + while search_request_retry: + try: + server_results = server.Search(Request = params) + search_request_retry = 0 + if server_results.Responses[0].Results: + results = server_results.Responses[0].Results[0] + except Exception, err: + print "Got an error ->", err + if search_request_retry: + search_request_retry -= 1 + + if results: + # list or instance? + if type(results) == type([]): + for entry in results: + add_in_urllist(url, entry.Url, 'msn') + else: + add_in_urllist(url, results.Url, 'msn') + offset = 0 for i in range(len(url)): if check_list(url[i + offset][0], excl_list, verbose = True):
pywikipedia-l@lists.wikimedia.org