jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/924058 )
Change subject: [IMPR] Enable FilePage.download() to download thumbnails ......................................................................
[IMPR] Enable FilePage.download() to download thumbnails
- add *url_width*, *url_height* and *url_param* to FilePage.download - allow filename to be a PathLike object or an iterable of path sections - use title if a user path sepcifier is used only - adjust path suffix if required - remove try clause for OSError exception because the exception is raised without further statements - update documentation - add tests
Bug: T247095 Change-Id: I21f2bb9a15681540044b18fcd71d54061bc12913 --- M pywikibot/page/_filepage.py M pywikibot/site/_apisite.py M tests/file_tests.py 3 files changed, 177 insertions(+), 58 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/pywikibot/page/_filepage.py b/pywikibot/page/_filepage.py index 1916925..79f59a0 100644 --- a/pywikibot/page/_filepage.py +++ b/pywikibot/page/_filepage.py @@ -10,10 +10,14 @@ # # Distributed under the terms of the MIT license. # -import os.path from http import HTTPStatus +from os import PathLike +from pathlib import Path +from typing import Optional, Union +from urllib.parse import urlparse
import pywikibot +from pywikibot.backports import Iterable from pywikibot.comms import http from pywikibot.exceptions import NoPageError from pywikibot.page._page import Page @@ -107,25 +111,31 @@ self._imagePageHtml = http.request(self.site, path).text return self._imagePageHtml
- def get_file_url(self, url_width=None, url_height=None, - url_param=None) -> str: - """ - Return the url or the thumburl of the file described on this page. + def get_file_url(self, + url_width: Optional[int] = None, + url_height: Optional[int] = None, + url_param: Optional[int] = None) -> str: + """Return the url or the thumburl of the file described on this page.
Fetch the information if not available.
- Once retrieved, thumburl information will also be accessible as - latest_file_info attributes, named as in [1]: - - url, thumburl, thumbwidth and thumbheight + Once retrieved, file information will also be accessible as + :attr:`latest_file_info` attributes, named as in :api:`Imageinfo`. + If *url_width*, *url_height* or *url_param* is given, additional + properties ``thumbwidth``, ``thumbheight``, ``thumburl`` and + ``responsiveUrls`` are provided.
- Parameters correspond to iiprops in: - [1] :api:`Imageinfo` + .. note:: Parameters validation and error handling left to the + API call. + .. seealso::
- Parameters validation and error handling left to the API call. + * :meth:`APISite.loadimageinfo() + <pywikibot.site._apisite.APISite.loadimageinfo>` + * :api:`Imageinfo`
- :param url_width: see iiurlwidth in [1] - :param url_height: see iiurlheigth in [1] - :param url_param: see iiurlparam in [1] + :param url_width: get info for a thumbnail with given width + :param url_height: get info for a thumbnail with given height + :param url_param: get info for a thumbnail with given param :return: latest file url or thumburl """ # Plain url is requested. @@ -267,47 +277,95 @@ return self.site.upload(self, source_filename=filename, source_url=url, **kwargs)
- def download(self, filename=None, chunk_size=100 * 1024, revision=None): - """ - Download to filename file of FilePage. + def download(self, + filename: Union[None, str, PathLike, Iterable[str]] = None, + chunk_size: int = 100 * 1024, + revision: Optional['FileInfo'] = None, *, + url_width: Optional[int] = None, + url_height: Optional[int] = None, + url_param: Optional[int] = None) -> bool: + """Download to filename file of FilePage.
- :param filename: filename where to save file: - None: self.title(as_filename=True, with_ns=False) - will be used - str: provided filename will be used. - :type filename: None or str + **Usage examples:** + + Download an image: + + >>> site = pywikibot.Site('wikipedia:test') + >>> file = pywikibot.FilePage(site, 'Pywikibot MW gear icon.svg') + >>> file.download() + True + + Pywikibot_MW_gear_icon.svg was downloaded. + + Download a thumnail: + + >>> file.download(url_param='120px') + True + + The suffix has changed and Pywikibot_MW_gear_icon.png was + downloaded. + + .. versionadded:: 8.2 + *url_width*, *url_height* and *url_param* parameters. + .. versionchanged:: 8.2 + *filename* argument may be also a path-like object or an + iterable of path segments. + .. note:: filename suffix is adjusted if target url's suffix is + different which may be the case if a thumbnail is loaded. + .. seealso:: :api:`Imageinfo` for new parameters + + :param filename: filename where to save file. If ``None``, + ``self.title(as_filename=True, with_ns=False)`` will be used. + If an Iterable is specified the items will be used as path + segments. To specify the user directory path you have to use + either ``~`` or ``~user`` as first path segment e.g. ``~/foo`` + or ``('~', 'foo')`` as filename. If only the user directory + specifier is given, the title is used as filename like for + None. If the suffix is missing or different from url (which + can happen if a *url_width*, *url_height* or *url_param* + argument is given), the file suffix is adjusted. :param chunk_size: the size of each chunk to be received and written to file. - :type chunk_size: int - :param revision: file revision to download: - None: self.latest_file_info will be used - FileInfo: provided revision will be used. - :type revision: None or FileInfo + :param revision: file revision to download. If None + :attr:`latest_file_info` will be used; otherwise provided + revision will be used. + :param url_width: download thumbnail with given width + :param url_height: download thumbnail with given height + :param url_param: download thumbnail with given param :return: True if download is successful, False otherwise. :raise IOError: if filename cannot be written for any reason. """ - if filename is None: - filename = self.title(as_filename=True, with_ns=False) + if not filename: + path = Path() + elif isinstance(filename, (str, PathLike)): + path = Path(filename) + else: + path = Path(*filename)
- filename = os.path.expanduser(filename) + if path.stem in ('', '~', '~user'): + path = path / self.title(as_filename=True, with_ns=False)
- if revision is None: + thumb = bool(url_width or url_height or url_param) + if thumb or revision is None: + url = self.get_file_url(url_width, url_height, url_param) revision = self.latest_file_info + else: + url = revision.url
- req = http.fetch(revision.url, stream=True) + # adjust suffix + path = path.with_suffix(Path(urlparse(url).path).suffix) + # adjust user path + path = path.expanduser() + req = http.fetch(url, stream=True) if req.status_code == HTTPStatus.OK: - try: - with open(filename, 'wb') as f: - for chunk in req.iter_content(chunk_size): - f.write(chunk) - except OSError as e: - raise e + with open(path, 'wb') as f: + for chunk in req.iter_content(chunk_size): + f.write(chunk)
- sha1 = compute_file_hash(filename) - return sha1 == revision.sha1 + return thumb or compute_file_hash(path) == revision.sha1 + pywikibot.warning( - 'Unsuccessful request ({}): {}' - .format(req.status_code, req.url)) + f'Unsuccessful request ({req.status_code}): {req.url}') return False
def globalusage(self, total=None): diff --git a/pywikibot/site/_apisite.py b/pywikibot/site/_apisite.py index a9decae..77a9a32 100644 --- a/pywikibot/site/_apisite.py +++ b/pywikibot/site/_apisite.py @@ -1372,24 +1372,34 @@ ) -> None: """Load image info from api and save in page attributes.
- Parameters correspond to iiprops in: - [1] :api:`Imageinfo` + The following properties are loaded: ``timestamp``, ``user``, + ``comment``, ``url``, ``size``, ``sha1``, ``mime``, ``mediatype``, + ``metadata``, ``archivename`` and ``bitdepth``. If *url_width*, + *url_height* or *url_param* is given, additional properties + ``thumbwidth``, ``thumbheight``, ``thumburl`` and + ``responsiveUrls`` are given.
- Parameters validation and error handling left to the API call. + .. note:: Parameters validation and error handling left to the + API call. + .. versionchanged:: 8.2 + *mediatype* and *bitdepth* properties were added. + .. seealso:: :api:`Imageinfo`
:param history: if true, return the image's version history - :param url_width: see iiurlwidth in [1] - :param url_height: see iiurlheigth in [1] - :param url_param: see iiurlparam in [1] - + :param url_width: get info for a thumbnail with given width + :param url_height: get info for a thumbnail with given height + :param url_param: get info for a thumbnail with given param """ - args = {'titles': page.title(with_section=False), - 'iiurlwidth': url_width, - 'iiurlheight': url_height, - 'iiurlparam': url_param, - 'iiprop': ['timestamp', 'user', 'comment', 'url', 'size', - 'sha1', 'mime', 'metadata', 'archivename'] - } + args = { + 'titles': page.title(with_section=False), + 'iiurlwidth': url_width, + 'iiurlheight': url_height, + 'iiurlparam': url_param, + 'iiprop': [ + 'timestamp', 'user', 'comment', 'url', 'size', 'sha1', 'mime', + 'mediatype', 'metadata', 'archivename', 'bitdepth', + ] + } if not history: args['total'] = 1 query = self._generator(api.PropertyGenerator, diff --git a/tests/file_tests.py b/tests/file_tests.py index 38325c8..be0fafd 100755 --- a/tests/file_tests.py +++ b/tests/file_tests.py @@ -280,12 +280,44 @@ cached = True
def test_successful_download(self): - """Test successful_download.""" + """Test successful download.""" page = pywikibot.FilePage(self.site, 'File:Albert Einstein.jpg') filename = join_images_path('Albert Einstein.jpg') status_code = page.download(filename) self.assertTrue(status_code) - os.unlink(filename) + oldsize = os.stat(filename).st_size + + status_code = page.download(filename, url_height=128) + self.assertTrue(status_code) + size = os.stat(filename).st_size + self.assertLess(size, oldsize) + + status_code = page.download(filename, url_width=120) + self.assertTrue(status_code) + size = os.stat(filename).st_size + self.assertLess(size, oldsize) + + status_code = page.download(filename, url_param='120px') + self.assertTrue(status_code) + self.assertEqual(size, os.stat(filename).st_size) + + os.remove(filename) + + def test_changed_title(self): + """Test changed title.""" + page = pywikibot.FilePage(self.site, 'Pywikibot MW gear icon.svg') + filename = join_images_path('Pywikibot MW gear icon.svg') + status_code = page.download(filename) + self.assertTrue(status_code) + self.assertTrue(os.path.exists(filename)) + + status_code = page.download(filename, url_param='120px') + self.assertTrue(status_code) + new_filename = filename.replace('.svg', '.png') + self.assertTrue(os.path.exists(new_filename)) + + os.remove(filename) + os.remove(new_filename)
def test_not_existing_download(self): """Test not existing download."""