jenkins-bot submitted this change.

View Change


Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
[IMPR] Enable FilePage.download() to download thumbnails

- add *url_width*, *url_height* and *url_param* to FilePage.download
- allow filename to be a PathLike object or an iterable of path sections
- use title if a user path sepcifier is used only
- adjust path suffix if required
- remove try clause for OSError exception because the exception is
raised without further statements
- update documentation
- add tests

Bug: T247095
Change-Id: I21f2bb9a15681540044b18fcd71d54061bc12913
---
M pywikibot/page/_filepage.py
M pywikibot/site/_apisite.py
M tests/file_tests.py
3 files changed, 177 insertions(+), 58 deletions(-)

diff --git a/pywikibot/page/_filepage.py b/pywikibot/page/_filepage.py
index 1916925..79f59a0 100644
--- a/pywikibot/page/_filepage.py
+++ b/pywikibot/page/_filepage.py
@@ -10,10 +10,14 @@
#
# Distributed under the terms of the MIT license.
#
-import os.path
from http import HTTPStatus
+from os import PathLike
+from pathlib import Path
+from typing import Optional, Union
+from urllib.parse import urlparse

import pywikibot
+from pywikibot.backports import Iterable
from pywikibot.comms import http
from pywikibot.exceptions import NoPageError
from pywikibot.page._page import Page
@@ -107,25 +111,31 @@
self._imagePageHtml = http.request(self.site, path).text
return self._imagePageHtml

- def get_file_url(self, url_width=None, url_height=None,
- url_param=None) -> str:
- """
- Return the url or the thumburl of the file described on this page.
+ def get_file_url(self,
+ url_width: Optional[int] = None,
+ url_height: Optional[int] = None,
+ url_param: Optional[int] = None) -> str:
+ """Return the url or the thumburl of the file described on this page.

Fetch the information if not available.

- Once retrieved, thumburl information will also be accessible as
- latest_file_info attributes, named as in [1]:
- - url, thumburl, thumbwidth and thumbheight
+ Once retrieved, file information will also be accessible as
+ :attr:`latest_file_info` attributes, named as in :api:`Imageinfo`.
+ If *url_width*, *url_height* or *url_param* is given, additional
+ properties ``thumbwidth``, ``thumbheight``, ``thumburl`` and
+ ``responsiveUrls`` are provided.

- Parameters correspond to iiprops in:
- [1] :api:`Imageinfo`
+ .. note:: Parameters validation and error handling left to the
+ API call.
+ .. seealso::

- Parameters validation and error handling left to the API call.
+ * :meth:`APISite.loadimageinfo()
+ <pywikibot.site._apisite.APISite.loadimageinfo>`
+ * :api:`Imageinfo`

- :param url_width: see iiurlwidth in [1]
- :param url_height: see iiurlheigth in [1]
- :param url_param: see iiurlparam in [1]
+ :param url_width: get info for a thumbnail with given width
+ :param url_height: get info for a thumbnail with given height
+ :param url_param: get info for a thumbnail with given param
:return: latest file url or thumburl
"""
# Plain url is requested.
@@ -267,47 +277,95 @@
return self.site.upload(self, source_filename=filename, source_url=url,
**kwargs)

- def download(self, filename=None, chunk_size=100 * 1024, revision=None):
- """
- Download to filename file of FilePage.
+ def download(self,
+ filename: Union[None, str, PathLike, Iterable[str]] = None,
+ chunk_size: int = 100 * 1024,
+ revision: Optional['FileInfo'] = None, *,
+ url_width: Optional[int] = None,
+ url_height: Optional[int] = None,
+ url_param: Optional[int] = None) -> bool:
+ """Download to filename file of FilePage.

- :param filename: filename where to save file:
- None: self.title(as_filename=True, with_ns=False)
- will be used
- str: provided filename will be used.
- :type filename: None or str
+ **Usage examples:**
+
+ Download an image:
+
+ >>> site = pywikibot.Site('wikipedia:test')
+ >>> file = pywikibot.FilePage(site, 'Pywikibot MW gear icon.svg')
+ >>> file.download()
+ True
+
+ Pywikibot_MW_gear_icon.svg was downloaded.
+
+ Download a thumnail:
+
+ >>> file.download(url_param='120px')
+ True
+
+ The suffix has changed and Pywikibot_MW_gear_icon.png was
+ downloaded.
+
+ .. versionadded:: 8.2
+ *url_width*, *url_height* and *url_param* parameters.
+ .. versionchanged:: 8.2
+ *filename* argument may be also a path-like object or an
+ iterable of path segments.
+ .. note:: filename suffix is adjusted if target url's suffix is
+ different which may be the case if a thumbnail is loaded.
+ .. seealso:: :api:`Imageinfo` for new parameters
+
+ :param filename: filename where to save file. If ``None``,
+ ``self.title(as_filename=True, with_ns=False)`` will be used.
+ If an Iterable is specified the items will be used as path
+ segments. To specify the user directory path you have to use
+ either ``~`` or ``~user`` as first path segment e.g. ``~/foo``
+ or ``('~', 'foo')`` as filename. If only the user directory
+ specifier is given, the title is used as filename like for
+ None. If the suffix is missing or different from url (which
+ can happen if a *url_width*, *url_height* or *url_param*
+ argument is given), the file suffix is adjusted.
:param chunk_size: the size of each chunk to be received and
written to file.
- :type chunk_size: int
- :param revision: file revision to download:
- None: self.latest_file_info will be used
- FileInfo: provided revision will be used.
- :type revision: None or FileInfo
+ :param revision: file revision to download. If None
+ :attr:`latest_file_info` will be used; otherwise provided
+ revision will be used.
+ :param url_width: download thumbnail with given width
+ :param url_height: download thumbnail with given height
+ :param url_param: download thumbnail with given param
:return: True if download is successful, False otherwise.
:raise IOError: if filename cannot be written for any reason.
"""
- if filename is None:
- filename = self.title(as_filename=True, with_ns=False)
+ if not filename:
+ path = Path()
+ elif isinstance(filename, (str, PathLike)):
+ path = Path(filename)
+ else:
+ path = Path(*filename)

- filename = os.path.expanduser(filename)
+ if path.stem in ('', '~', '~user'):
+ path = path / self.title(as_filename=True, with_ns=False)

- if revision is None:
+ thumb = bool(url_width or url_height or url_param)
+ if thumb or revision is None:
+ url = self.get_file_url(url_width, url_height, url_param)
revision = self.latest_file_info
+ else:
+ url = revision.url

- req = http.fetch(revision.url, stream=True)
+ # adjust suffix
+ path = path.with_suffix(Path(urlparse(url).path).suffix)
+ # adjust user path
+ path = path.expanduser()
+ req = http.fetch(url, stream=True)
if req.status_code == HTTPStatus.OK:
- try:
- with open(filename, 'wb') as f:
- for chunk in req.iter_content(chunk_size):
- f.write(chunk)
- except OSError as e:
- raise e
+ with open(path, 'wb') as f:
+ for chunk in req.iter_content(chunk_size):
+ f.write(chunk)

- sha1 = compute_file_hash(filename)
- return sha1 == revision.sha1
+ return thumb or compute_file_hash(path) == revision.sha1
+
pywikibot.warning(
- 'Unsuccessful request ({}): {}'
- .format(req.status_code, req.url))
+ f'Unsuccessful request ({req.status_code}): {req.url}')
return False

def globalusage(self, total=None):
diff --git a/pywikibot/site/_apisite.py b/pywikibot/site/_apisite.py
index a9decae..77a9a32 100644
--- a/pywikibot/site/_apisite.py
+++ b/pywikibot/site/_apisite.py
@@ -1372,24 +1372,34 @@
) -> None:
"""Load image info from api and save in page attributes.

- Parameters correspond to iiprops in:
- [1] :api:`Imageinfo`
+ The following properties are loaded: ``timestamp``, ``user``,
+ ``comment``, ``url``, ``size``, ``sha1``, ``mime``, ``mediatype``,
+ ``metadata``, ``archivename`` and ``bitdepth``. If *url_width*,
+ *url_height* or *url_param* is given, additional properties
+ ``thumbwidth``, ``thumbheight``, ``thumburl`` and
+ ``responsiveUrls`` are given.

- Parameters validation and error handling left to the API call.
+ .. note:: Parameters validation and error handling left to the
+ API call.
+ .. versionchanged:: 8.2
+ *mediatype* and *bitdepth* properties were added.
+ .. seealso:: :api:`Imageinfo`

:param history: if true, return the image's version history
- :param url_width: see iiurlwidth in [1]
- :param url_height: see iiurlheigth in [1]
- :param url_param: see iiurlparam in [1]
-
+ :param url_width: get info for a thumbnail with given width
+ :param url_height: get info for a thumbnail with given height
+ :param url_param: get info for a thumbnail with given param
"""
- args = {'titles': page.title(with_section=False),
- 'iiurlwidth': url_width,
- 'iiurlheight': url_height,
- 'iiurlparam': url_param,
- 'iiprop': ['timestamp', 'user', 'comment', 'url', 'size',
- 'sha1', 'mime', 'metadata', 'archivename']
- }
+ args = {
+ 'titles': page.title(with_section=False),
+ 'iiurlwidth': url_width,
+ 'iiurlheight': url_height,
+ 'iiurlparam': url_param,
+ 'iiprop': [
+ 'timestamp', 'user', 'comment', 'url', 'size', 'sha1', 'mime',
+ 'mediatype', 'metadata', 'archivename', 'bitdepth',
+ ]
+ }
if not history:
args['total'] = 1
query = self._generator(api.PropertyGenerator,
diff --git a/tests/file_tests.py b/tests/file_tests.py
index 38325c8..be0fafd 100755
--- a/tests/file_tests.py
+++ b/tests/file_tests.py
@@ -280,12 +280,44 @@
cached = True

def test_successful_download(self):
- """Test successful_download."""
+ """Test successful download."""
page = pywikibot.FilePage(self.site, 'File:Albert Einstein.jpg')
filename = join_images_path('Albert Einstein.jpg')
status_code = page.download(filename)
self.assertTrue(status_code)
- os.unlink(filename)
+ oldsize = os.stat(filename).st_size
+
+ status_code = page.download(filename, url_height=128)
+ self.assertTrue(status_code)
+ size = os.stat(filename).st_size
+ self.assertLess(size, oldsize)
+
+ status_code = page.download(filename, url_width=120)
+ self.assertTrue(status_code)
+ size = os.stat(filename).st_size
+ self.assertLess(size, oldsize)
+
+ status_code = page.download(filename, url_param='120px')
+ self.assertTrue(status_code)
+ self.assertEqual(size, os.stat(filename).st_size)
+
+ os.remove(filename)
+
+ def test_changed_title(self):
+ """Test changed title."""
+ page = pywikibot.FilePage(self.site, 'Pywikibot MW gear icon.svg')
+ filename = join_images_path('Pywikibot MW gear icon.svg')
+ status_code = page.download(filename)
+ self.assertTrue(status_code)
+ self.assertTrue(os.path.exists(filename))
+
+ status_code = page.download(filename, url_param='120px')
+ self.assertTrue(status_code)
+ new_filename = filename.replace('.svg', '.png')
+ self.assertTrue(os.path.exists(new_filename))
+
+ os.remove(filename)
+ os.remove(new_filename)

def test_not_existing_download(self):
"""Test not existing download."""

To view, visit change 924058. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: I21f2bb9a15681540044b18fcd71d54061bc12913
Gerrit-Change-Number: 924058
Gerrit-PatchSet: 10
Gerrit-Owner: Xqt <info@gno.de>
Gerrit-Reviewer: Framawiki <framawiki@tools.wmflabs.org>
Gerrit-Reviewer: TheSandDoctor <majorjohn1@mail.com>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged