Revision: 4090
Author: cosoleto
Date: 2007-08-22 20:52:26 +0000 (Wed, 22 Aug 2007)
Log Message:
-----------
Added support for Live Search
Modified Paths:
--------------
trunk/pywikipedia/config.py
trunk/pywikipedia/copyright.py
Modified: trunk/pywikipedia/config.py
===================================================================
--- trunk/pywikipedia/config.py 2007-08-22 20:35:48 UTC (rev 4089)
+++ trunk/pywikipedia/config.py 2007-08-22 20:52:26 UTC (rev 4090)
@@ -288,13 +288,10 @@
############## SEARCH ENGINE SETTINGS ##############
-# Some scripts allow querying Google either via the Google Web API, or by
-# just parsing the HTML from the Google website.
-# To use the Google Web API, you must install the pyGoogle module from
-#
http://pygoogle.sf.net/ and have a Google Web API license key. Note that
+# Some scripts allow querying Google via the Google Web API. To use this feature, you
must
+# install the pyGoogle module from
http://pygoogle.sf.net/ and have a Google
+# Web API license key. Note that
# Google doesn't give out license keys anymore.
-# If you don't enter a google license key in your user config file, the scripts
-# will just parse the raw HTML code from the website.
google_key = ''
# Some scripts allow using the Yahoo! Search Web Services. To use this feature,
@@ -302,17 +299,23 @@
# and get a Yahoo AppID from
http://developer.yahoo.com
yahoo_appid = ''
+# To use Windows Live Search web service you must get an AppID from
+#
http://search.msn.com/developer
+msn_appid = ''
+
############## COPYRIGHT SETTINGS ##############
# Enable/disable search engine in copyright.py script
copyright_google = True
copyright_yahoo = True
+copyright_msn = False
# Perform a deep check, loading URLs to search if 'Wikipedia' is present.
# This may be useful to improve number of correct results. If you haven't
# a fast connection, you might want to keep they disabled.
copyright_check_in_source_google = False
copyright_check_in_source_yahoo = False
+copyright_check_in_source_msn = False
# Limit number of queries for page.
copyright_max_query_for_page = 25
Modified: trunk/pywikipedia/copyright.py
===================================================================
--- trunk/pywikipedia/copyright.py 2007-08-22 20:35:48 UTC (rev 4089)
+++ trunk/pywikipedia/copyright.py 2007-08-22 20:52:26 UTC (rev 4090)
@@ -596,7 +596,8 @@
def add_in_urllist(url, add_item, engine):
if (engine == 'google' and config.copyright_check_in_source_google) or \
- (engine == 'yahoo' and config.copyright_check_in_source_yahoo):
+ (engine == 'yahoo' and config.copyright_check_in_source_yahoo) or \
+ (engine == 'msn' and config.copyright_check_in_source_msn):
check_in_source = True
else:
check_in_source = False
@@ -697,32 +698,40 @@
print "Got an error ->", err
if search_request_retry:
search_request_retry -= 1
- #if search_in_msn:
- # ## max_query_len = 150?
- # from __SOAPpy import WSDL
- # print " msn query..."
- # wsdl_url = 'http://soap.search.msn.com/webservices.asmx?wsdl'
- # server = WSDL.Proxy(wsdl_url)
- # params = {'AppID': config.msn_appid, 'Query': '-Wikipedia
"' + query + '"', 'CultureInfo': 'en-US',
'SafeSearch': 'Off', 'Requests': {
- # 'SourceRequest':{'Source': 'Web',
'Offset': 0, 'Count': 10, 'ResultFields': 'All',}}}
- #
- # search_request_retry = config.copyright_connection_tries
- # results = ''
- # while search_request_retry:
- # try:
- # server_results = server.Search(Request = params)
- # search_request_retry = 0
- # if server_results.Responses[0].Results:
- # results = server_results.Responses[0].Results[0]
- # except Exception, err:
- # print "Got an error ->", err
- # search_request_retry -= 1
- # for entry in results:
- # try:
- # add_in_urllist(url, entry.Url, 'msn')
- # except AttributeError:
- # print "attrib ERROR"
+ if config.copyright_msn:
+ #max_query_len = 150?
+ from SOAPpy import WSDL
+ print " Live query..."
+ try:
+ server =
WSDL.Proxy('http://soap.search.msn.com/webservices.asmx?wsdl')
+ except:
+ print "Live Search Error"
+ raise
+ params = {'AppID': config.msn_appid, 'Query': '-Wikipedia
"' + query + '"', 'CultureInfo': 'en-US',
'SafeSearch': 'Off', 'Requests': {
+ 'SourceRequest':{'Source': 'Web',
'Offset': 0, 'Count': 10, 'ResultFields': 'All',}}}
+
+ search_request_retry = config.copyright_connection_tries
+ results = ''
+ while search_request_retry:
+ try:
+ server_results = server.Search(Request = params)
+ search_request_retry = 0
+ if server_results.Responses[0].Results:
+ results = server_results.Responses[0].Results[0]
+ except Exception, err:
+ print "Got an error ->", err
+ if search_request_retry:
+ search_request_retry -= 1
+
+ if results:
+ # list or instance?
+ if type(results) == type([]):
+ for entry in results:
+ add_in_urllist(url, entry.Url, 'msn')
+ else:
+ add_in_urllist(url, results.Url, 'msn')
+
offset = 0
for i in range(len(url)):
if check_list(url[i + offset][0], excl_list, verbose = True):