jenkins-bot has submitted this change and it was merged.
Change subject: [IMPROV] Replace openurl with http.fetch
......................................................................
[IMPROV] Replace openurl with http.fetch
This uses http.fetch when openurl wasn't used to download files only
partially.
Bug: T68102
Change-Id: I1381b42dc273df6c49062962b17671737d2f7c77
---
M scripts/data_ingestion.py
M scripts/flickrripper.py
M scripts/imagerecat.py
3 files changed, 16 insertions(+), 18 deletions(-)
Approvals:
John Vandenberg: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/data_ingestion.py b/scripts/data_ingestion.py
index c30669e..8f72884 100755
--- a/scripts/data_ingestion.py
+++ b/scripts/data_ingestion.py
@@ -32,16 +32,15 @@
import pywikibot
from pywikibot import pagegenerators
+from pywikibot.comms.http import fetch
from pywikibot.tools import deprecated, deprecated_args
from scripts import upload
if sys.version_info[0] > 2:
from urllib.parse import urlparse
- from urllib.request import urlopen
else:
from urlparse import urlparse
- from urllib import urlopen
class Photo(pywikibot.FilePage):
@@ -85,7 +84,7 @@
TODO: Add exception handling
"""
if not self.contents:
- imageFile = urlopen(self.URL).read()
+ imageFile = fetch(self.URL).raw
self.contents = io.BytesIO(imageFile)
return self.contents
diff --git a/scripts/flickrripper.py b/scripts/flickrripper.py
index 38e97df..3f329e3 100755
--- a/scripts/flickrripper.py
+++ b/scripts/flickrripper.py
@@ -42,9 +42,8 @@
if sys.version_info[0] > 2:
from urllib.parse import urlencode
- from urllib.request import urlopen
else:
- from urllib import urlencode, urlopen
+ from urllib import urlencode
try:
import flickrapi # see:
http://stuvel.eu/projects/flickrapi
@@ -57,6 +56,8 @@
import pywikibot
from pywikibot import config, textlib
+from pywikibot.comms.http import fetch
+
from scripts import upload
try:
@@ -126,7 +127,7 @@
TODO: Add exception handling
"""
- imageFile = urlopen(photoUrl).read()
+ imageFile = fetch(photoUrl).raw
return io.BytesIO(imageFile)
@@ -162,10 +163,8 @@
"""
parameters = urlencode({'id': photo_id, 'raw': 'on'})
- rawDescription = urlopen(
- "http://wikipedia.ramselehof.de/flinfo.php?%s" % parameters).read()
-
- return rawDescription.decode('utf-8')
+ return fetch(
+ 'http://wikipedia.ramselehof.de/flinfo.php?%s' % parameters).content
def getFilename(photoInfo=None, site=None, project=u'Flickr'):
diff --git a/scripts/imagerecat.py b/scripts/imagerecat.py
index d0e08d2..ae0c5c1 100755
--- a/scripts/imagerecat.py
+++ b/scripts/imagerecat.py
@@ -44,12 +44,12 @@
import pywikibot
from pywikibot import pagegenerators, textlib
+from pywikibot.comms.http import fetch
if sys.version_info[0] > 2:
from urllib.parse import urlencode
- from urllib.request import urlopen
else:
- from urllib import urlencode, urlopen
+ from urllib import urlencode
category_blacklist = []
@@ -164,10 +164,10 @@
try:
if tries < maxtries:
tries += 1
- commonsHelperPage = urlopen(
+ commonsHelperPage = fetch(
"https://toolserver.org/~daniel/WikiSense/CommonSense.php?%s" % parameters)
matches = commonsenseRe.search(
- commonsHelperPage.read().decode('utf-8'))
+ commonsHelperPage.content)
gotInfo = True
else:
break
@@ -229,8 +229,8 @@
parameters = urlencode({'lat': latitude, 'lon': longitude,
'accept-language': 'en'})
while not gotInfo:
try:
- page =
urlopen("https://nominatim.openstreetmap.org/reverse?format=xml&%s… %
parameters)
- et = xml.etree.ElementTree.parse(page)
+ page =
fetch('https://nominatim.openstreetmap.org/reverse?format=xml&%s… %
parameters)
+ et = xml.etree.ElementTree.fromstring(page.content)
gotInfo = True
except IOError:
pywikibot.output(u'Got an IOError, let\'s try again')
@@ -382,10 +382,10 @@
'bot': '1'})
filterCategoriesRe = re.compile(r'\[\[Category:([^\]]*)\]\]')
try:
- filterCategoriesPage = urlopen(
+ filterCategoriesPage = fetch(
"https://toolserver.org/~multichill/filtercats.php?%s" %
parameters)
result = filterCategoriesRe.findall(
- filterCategoriesPage.read().decode('utf-8'))
+ filterCategoriesPage.content)
except IOError:
# Something is wrong, forget about this filter, and return the input
return categories
--
To view, visit
https://gerrit.wikimedia.org/r/208479
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I1381b42dc273df6c49062962b17671737d2f7c77
Gerrit-PatchSet: 2
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: John Vandenberg <jayvdb(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: XZise <CommodoreFabianus(a)gmx.de>
Gerrit-Reviewer: jenkins-bot <>