Alex S.H. Lin has submitted this change and it was merged.
Change subject: remove obsolete imports, remove queryViaWeb() method ......................................................................
remove obsolete imports, remove queryViaWeb() method
queryViaWeb() was commented because it is probably not in compliance with Google's "Terms of service"
Change-Id: Ibb9c34e40158bad8e2b498fe7ca207541054e9da --- M pagegenerators.py 1 file changed, 0 insertions(+), 39 deletions(-)
Approvals: Alex S.H. Lin: Verified; Looks good to me, approved jenkins-bot: Verified
diff --git a/pagegenerators.py b/pagegenerators.py index 5b0c70d..933526a 100644 --- a/pagegenerators.py +++ b/pagegenerators.py @@ -21,13 +21,11 @@ __version__ = '$Id$'
import re -import sys import codecs import date import datetime import time import urllib -import urllib2 import traceback import wikipedia as pywikibot import config @@ -1001,43 +999,6 @@ estimatedTotalResultsCount = data.meta.estimatedTotalResultsCount #print 'estimatedTotalResultsCount: ', estimatedTotalResultsCount offset += 10 - -############# -## commented out because it is probably not in compliance with Google's -## "Terms of service" (see 5.3, http://www.google.com/accounts/TOS?loc=US) -## -## def queryViaWeb(self, query): -## """ -## Google has stopped giving out API license keys, and sooner or later -## they will probably shut down the service. -## This is a quick and ugly solution: we just grab the search results from -## the normal web interface. -## """ -## linkR = re.compile(r'<a href="([^>"]+?)" class=l>', re.IGNORECASE) -## offset = 0 -## -## while True: -## pywikibot.output("Google: Querying page %d" % (offset / 100 + 1)) -## address = "http://www.google.com/search?q=%s&num=100&hl=en&start=%d" \ -## % (urllib.quote_plus(query), offset) -## # we fake being Firefox because Google blocks unknown browsers -## request = urllib2.Request( -## address, None, -## {'User-Agent': -## 'Mozilla/5.0 (X11; U; Linux i686; de; rv:1.8) Gecko/20051128 ' -## 'SUSE/1.5-0.1 Firefox/1.5'}) -## urlfile = urllib2.urlopen(request) -## page = urlfile.read() -## urlfile.close() -## for url in linkR.findall(page): -## yield url -## -## # Is there a "Next" link for next page of results? -## if "<div id=nn>" in page: -## offset += 100 # Yes, go to next page of results. -## else: -## return -#############
def __iter__(self): # restrict query to local site