Alex S.H. Lin has submitted this change and it was merged.
Change subject: remove obsolete imports, remove queryViaWeb() method
......................................................................
remove obsolete imports, remove queryViaWeb() method
queryViaWeb() was commented because it is probably not in
compliance with Google's "Terms of service"
Change-Id: Ibb9c34e40158bad8e2b498fe7ca207541054e9da
---
M pagegenerators.py
1 file changed, 0 insertions(+), 39 deletions(-)
Approvals:
Alex S.H. Lin: Verified; Looks good to me, approved
jenkins-bot: Verified
diff --git a/pagegenerators.py b/pagegenerators.py
index 5b0c70d..933526a 100644
--- a/pagegenerators.py
+++ b/pagegenerators.py
@@ -21,13 +21,11 @@
__version__ = '$Id$'
import re
-import sys
import codecs
import date
import datetime
import time
import urllib
-import urllib2
import traceback
import wikipedia as pywikibot
import config
@@ -1001,43 +999,6 @@
estimatedTotalResultsCount = data.meta.estimatedTotalResultsCount
#print 'estimatedTotalResultsCount: ', estimatedTotalResultsCount
offset += 10
-
-#############
-## commented out because it is probably not in compliance with Google's
-## "Terms of service" (see 5.3,
http://www.google.com/accounts/TOS?loc=US)
-##
-## def queryViaWeb(self, query):
-## """
-## Google has stopped giving out API license keys, and sooner or later
-## they will probably shut down the service.
-## This is a quick and ugly solution: we just grab the search results from
-## the normal web interface.
-## """
-## linkR = re.compile(r'<a href="([^>"]+?)"
class=l>', re.IGNORECASE)
-## offset = 0
-##
-## while True:
-## pywikibot.output("Google: Querying page %d" % (offset / 100 +
1))
-## address =
"http://www.google.com/search?q=%s&num=100&hl=en&start=%d" \
-## % (urllib.quote_plus(query), offset)
-## # we fake being Firefox because Google blocks unknown browsers
-## request = urllib2.Request(
-## address, None,
-## {'User-Agent':
-## 'Mozilla/5.0 (X11; U; Linux i686; de; rv:1.8) Gecko/20051128
'
-## 'SUSE/1.5-0.1 Firefox/1.5'})
-## urlfile = urllib2.urlopen(request)
-## page = urlfile.read()
-## urlfile.close()
-## for url in linkR.findall(page):
-## yield url
-##
-## # Is there a "Next" link for next page of results?
-## if "<div id=nn>" in page:
-## offset += 100 # Yes, go to next page of results.
-## else:
-## return
-#############
def __iter__(self):
# restrict query to local site
--
To view, visit
https://gerrit.wikimedia.org/r/104333
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ibb9c34e40158bad8e2b498fe7ca207541054e9da
Gerrit-PatchSet: 2
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Alex S.H. Lin <alexsh(a)mail2000.com.tw>
Gerrit-Reviewer: Andre Engels <andreengels(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Purodha <publi(a)web.de>
Gerrit-Reviewer: Russell Blau <russblau(a)imapmail.org>
Gerrit-Reviewer: jenkins-bot