jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/769276 )
Change subject: [IMPR] Provide an Uploader class to upload files ......................................................................
[IMPR] Provide an Uploader class to upload files
currently the upload function has an overhead of parameter which are called recursivly. Provide an Uploader class to hold the parameters.
- remove UploadMixin and add upload method to APISite which just creates the Uploader and calls the upload() method - add Uploader class which ruses the upload parameters to hold them - upload() method to call the inner _upload method - documentation added
Change-Id: Ifc83112400cbe2ba3a2ebf89099166c0c0fc3398 --- M docs/api_ref/pywikibot.site.rst M pywikibot/site/_apisite.py M pywikibot/site/_upload.py 3 files changed, 209 insertions(+), 179 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/docs/api_ref/pywikibot.site.rst b/docs/api_ref/pywikibot.site.rst index af145a8..cd97c76 100644 --- a/docs/api_ref/pywikibot.site.rst +++ b/docs/api_ref/pywikibot.site.rst @@ -35,3 +35,8 @@ TokenWallet Package ------------------- .. automodule:: pywikibot.site._tokenwallet + +Uploader Package +---------------- +.. automodule:: pywikibot.site._upload + diff --git a/pywikibot/site/_apisite.py b/pywikibot/site/_apisite.py index 07fd907..a1bbd11 100644 --- a/pywikibot/site/_apisite.py +++ b/pywikibot/site/_apisite.py @@ -65,7 +65,7 @@ from pywikibot.site._namespace import Namespace from pywikibot.site._siteinfo import Siteinfo from pywikibot.site._tokenwallet import TokenWallet -from pywikibot.site._upload import UploadMixin +from pywikibot.site._upload import Uploader from pywikibot.tools import ( MediaWikiVersion, deprecated, @@ -93,7 +93,6 @@ ThanksMixin, UrlShortenerMixin, WikibaseClientMixin, - UploadMixin, ):
"""API interface to MediaWiki site. @@ -2521,6 +2520,29 @@ siiprop=props) return req.submit()['query']['stashimageinfo'][0]
+ @need_right('upload') + def upload(self, filepage, **kwargs) -> bool: + """Upload a file to the wiki. + + :see: https://www.mediawiki.org/wiki/API:Upload + + Either source_filename or source_url, but not both, must be provided. + + .. versionchanged:: 6.0 + keyword arguments required for all parameters except `filepage` + + .. versionchanged:: 6.2: + asynchronous upload is used if `asynchronous` parameter is set. + + For keyword arguments refer :class:`pywikibot.site._upload.Uploader` + + :param filepage: a FilePage object from which the wiki-name of the + file will be obtained. + :return: It returns True if the upload was successful and False + otherwise. + """ + return Uploader(self, filepage, **kwargs).upload() + def get_property_names(self, force: bool = False): """ Get property names for pages_with_property(). diff --git a/pywikibot/site/_upload.py b/pywikibot/site/_upload.py index d09a505..a8608ae 100644 --- a/pywikibot/site/_upload.py +++ b/pywikibot/site/_upload.py @@ -7,7 +7,7 @@ import mimetypes import os from collections.abc import Iterable -from typing import Optional, Union +from typing import Optional from warnings import warn
import pywikibot @@ -16,95 +16,122 @@ Error, UploadError, ) -from pywikibot.site._decorators import need_right from pywikibot.tools import compute_file_hash
-__all__ = ('UploadMixin', ) +__all__ = ('Uploader', ) _logger = 'wiki.apisite'
-class UploadMixin: +class Uploader:
- """Site mixin to Upload a file to the wiki. + """Uploader class to upload a file to the wiki.
.. versionadded:: 7.1 + + :param site: The current site to work on + :param filepage: a FilePage object from which the wiki-name of the + file will be obtained. + :param source_filename: path to the file to be uploaded + :param source_url: URL of the file to be uploaded + :param comment: Edit summary; if this is not provided, then + filepage.text will be used. An empty summary is not permitted. + This may also serve as the initial page text (see below). + :param text: Initial page text; if this is not set, then + filepage.text will be used, or comment. + :param watch: If true, add filepage to the bot user's watchlist + :param chunk_size: The chunk size in bytes for chunked uploading (see + https://www.mediawiki.org/wiki/API:Upload#Chunked_uploading). + It will only upload in chunks, if the chunk size is positive + but lower than the file size. + :param asynchronous: Make potentially large file operations + asynchronous on the server side when possible. + :param ignore_warnings: It may be a static boolean, a callable + returning a boolean or an iterable. The callable gets a list of + UploadError instances and the iterable should contain the warning + codes for which an equivalent callable would return True if all + UploadError codes are in thet list. If the result is False it'll + not continue uploading the file and otherwise disable any warning + and reattempt to upload the file. NOTE: If report_success is True + or None it'll raise an UploadError exception if the static + boolean is False. + :type ignore_warnings: bool or callable or iterable of str + :param report_success: If the upload was successful it'll print a + success message and if ignore_warnings is set to False it'll + raise an UploadError if a warning occurred. If it's None + (default) it'll be True if ignore_warnings is a bool and False + otherwise. If it's True or None ignore_warnings must be a bool. """
- @need_right('upload') - def upload(self, filepage, *, - source_filename: Optional[str] = None, - source_url: Optional[str] = None, - comment: Optional[str] = None, - text: Optional[str] = None, - watch: bool = False, - ignore_warnings=False, - chunk_size: int = 0, - asynchronous: bool = False, - report_success: Optional[bool] = None, - _file_key: Optional[str] = None, - _offset: Union[bool, int] = 0) -> bool: + def __init__(self, + site: 'pywikibot.site.APISite', + filepage: 'pywikibot.FilePage', + *, + source_filename: Optional[str] = None, + source_url: Optional[str] = None, + comment: Optional[str] = None, + text: Optional[str] = None, + watch: bool = False, + chunk_size: int = 0, + asynchronous: bool = False, + ignore_warnings=False, + report_success: Optional[bool] = None) -> None: + """Initializer.""" + self.site = site + self.filepage = filepage + self.comment = comment + self.text = text + self.watch = watch + self.ignore_warnings = ignore_warnings + self.chunk_size = chunk_size + self.asynchronous = asynchronous + self.report_success = report_success + + if source_filename and source_url: + raise ValueError('APISite.upload: must provide either ' + 'source_filename or source_url, not both.') + + self.filename = source_filename + self.url = source_url + + def upload(self) -> bool: + """Check for required parameters to upload and run the job. + + :return: Whether the upload was successful. """ - Upload a file to the wiki. + if self.comment is None: + self.comment = self.filepage.text + if not self.comment: + raise ValueError('APISite.upload: cannot upload file without ' + 'a summary/description.')
- :see: https://www.mediawiki.org/wiki/API:Upload + if self.text is None: + self.text = self.filepage.text + if not self.text: + self.text = self.comment
- Either source_filename or source_url, but not both, must be provided. + return self._upload(self.ignore_warnings, self.report_success)
- .. versionchanged:: 6.0 - keyword arguments required for all parameters except *filepage* + def _upload(self, ignore_warnings, report_success, + file_key=None, offset=0) -> bool: + """Recursive Upload method.
- .. versionchanged:: 6.2: - asynchronous upload is used if *asynchronous* parameter is set. - - :param filepage: a FilePage object from which the wiki-name of the - file will be obtained. - :param source_filename: path to the file to be uploaded - :param source_url: URL of the file to be uploaded - :param comment: Edit summary; if this is not provided, then - filepage.text will be used. An empty summary is not permitted. - This may also serve as the initial page text (see below). - :param text: Initial page text; if this is not set, then - filepage.text will be used, or comment. - :param watch: If true, add filepage to the bot user's watchlist - :param ignore_warnings: It may be a static boolean, a callable - returning a boolean or an iterable. The callable gets a list of - UploadError instances and the iterable should contain the warning - codes for which an equivalent callable would return True if all - UploadError codes are in thet list. If the result is False it'll - not continue uploading the file and otherwise disable any warning - and reattempt to upload the file. NOTE: If report_success is True - or None it'll raise an UploadError exception if the static - boolean is False. - :type ignore_warnings: bool or callable or iterable of str - :param chunk_size: The chunk size in bytes for chunked uploading (see - https://www.mediawiki.org/wiki/API:Upload#Chunked_uploading). - It will only upload in chunks, if the chunk size is positive - but lower than the file size. - :param asynchronous: Make potentially large file operations - asynchronous on the server side when possible. - :param report_success: If the upload was successful it'll print a - success message and if ignore_warnings is set to False it'll - raise an UploadError if a warning occurred. If it's None - (default) it'll be True if ignore_warnings is a bool and False - otherwise. If it's True or None ignore_warnings must be a bool. - :param _file_key: Private parameter for upload recurion. Reuses - an already uploaded file using the filekey. If None (default) - it will upload the file. - :param _offset: Private parameter for upload recurion. When - file_key is not None this can be an integer to continue a - previously canceled chunked upload. If False it treats that - as a finished upload. If True it requests the stash info from - the server to determine the offset. By default starts at 0. - :return: It returns True if the upload was successful and False - otherwise. + :param file_key: Reuses an already uploaded file using the + filekey. If None (default) it will upload the file. + :param offset: When file_key is not None this can be an integer + to continue a previously canceled chunked upload. If False + it treats that as a finished upload. If True it requests the + stash info from the server to determine the offset. By + default starts at 0. + :return: Whether the upload was successful. """ + def create_warnings_list(response): return [ UploadError( warning, upload_warnings.get(warning, '{msg}').format(msg=data), - _file_key, response['offset']) + file_key, response['offset']) for warning, data in response['warnings'].items()]
# some warning keys have been changed @@ -139,16 +166,8 @@ }
# An offset != 0 doesn't make sense without a file key - assert(_offset == 0 or _file_key is not None) - # check for required parameters - if source_filename and source_url: - raise ValueError('APISite.upload: must provide either ' - 'source_filename or source_url, not both.') - if comment is None: - comment = filepage.text - if not comment: - raise ValueError('APISite.upload: cannot upload file without ' - 'a summary/description.') + assert(offset == 0 or file_key is not None) + if report_success is None: report_success = isinstance(ignore_warnings, bool) if report_success is True: @@ -162,51 +181,47 @@ return all(w.code in ignored_warnings for w in warnings)
ignore_all_warnings = not callable(ignore_warnings) and ignore_warnings - if text is None: - text = filepage.text - if not text: - text = comment - token = self.tokens['edit'] + + token = self.site.tokens['edit'] result = None - file_page_title = filepage.title(with_ns=False) + file_page_title = self.filepage.title(with_ns=False) file_size = None - offset = _offset
# make sure file actually exists - if source_filename: - if os.path.isfile(source_filename): - file_size = os.path.getsize(source_filename) + if self.filename: + if os.path.isfile(self.filename): + file_size = os.path.getsize(self.filename) elif offset is not False: raise ValueError("File '{}' does not exist." - .format(source_filename)) + .format(self.filename))
# Verify the stash when a file key and offset is given: # requests the SHA1 and file size uploaded and compares it to - # the local file. Also verify that _offset is matching the - # file size if the _offset is an int. If _offset is False if + # the local file. Also verify that offset is matching the + # file size if the offset is an int. If offset is False if # verifies that the file size match with the local file. verify_stash = False - if source_filename and _file_key: + if self.filename and file_key: assert offset is False or file_size is not None verify_stash = True if (offset is not False and offset is not True and offset > file_size): raise ValueError( 'For the file key "{}" the offset was set to {} ' - 'while the file is only {} bytes large.'.format( - _file_key, offset, file_size)) + 'while the file is only {} bytes large.' + .format(file_key, offset, file_size))
if verify_stash or offset is True: - if not _file_key: + if not file_key: raise ValueError('Without a file key it cannot request the ' 'stash information') - if not source_filename: + if not self.filename: raise ValueError('Can request stash information only when ' 'using a file name.') props = ['size'] if verify_stash: props += ['sha1'] - stash_info = self.stash_info(_file_key, props) + stash_info = self.site.stash_info(file_key, props) if offset is True: offset = stash_info['size'] elif offset is False: @@ -214,55 +229,55 @@ raise ValueError( 'For the file key "{}" the server reported a size ' '{} while the file size is {}' - .format(_file_key, stash_info['size'], file_size)) + .format(file_key, stash_info['size'], file_size)) elif offset is not False and offset != stash_info['size']: raise ValueError( 'For the file key "{}" the server reported a size {} ' - 'while the offset was {}'.format( - _file_key, stash_info['size'], offset)) + 'while the offset was {}' + .format(file_key, stash_info['size'], offset))
if verify_stash: # The SHA1 was also requested so calculate and compare it assert 'sha1' in stash_info, \ 'sha1 not in stash info: {}'.format(stash_info) - sha1 = compute_file_hash(source_filename, bytes_to_read=offset) + sha1 = compute_file_hash(self.filename, bytes_to_read=offset) if sha1 != stash_info['sha1']: raise ValueError( 'The SHA1 of {} bytes of the stashed "{}" is {} ' - 'while the local file is {}'.format( - offset, _file_key, stash_info['sha1'], sha1)) + 'while the local file is {}' + .format(offset, file_key, stash_info['sha1'], sha1))
assert offset is not True - if _file_key and file_size is None: + if file_key and file_size is None: assert offset is False
- if _file_key and offset is False or offset == file_size: - pywikibot.log('Reused already upload file using ' - 'filekey "{}"'.format(_file_key)) + if file_key and offset is False or offset == file_size: + pywikibot.log('Reused already upload file using filekey "{}"' + .format(file_key)) # TODO: Use sessionkey instead of filekey if necessary - final_request = self._request( + final_request = self.site._request( parameters={ 'action': 'upload', 'token': token, 'filename': file_page_title, - 'comment': comment, - 'text': text, - 'async': asynchronous, - 'filekey': _file_key + 'comment': self.comment, + 'text': self.text, + 'async': self.asynchronous, + 'filekey': file_key })
- elif source_filename: + elif self.filename: # TODO: Dummy value to allow also Unicode names, see bug T75661 mime_filename = 'FAKE-NAME' # upload local file throttle = True - filesize = os.path.getsize(source_filename) - chunked_upload = 0 < chunk_size < filesize - with open(source_filename, 'rb') as f: - final_request = self._request( + filesize = os.path.getsize(self.filename) + chunked_upload = 0 < self.chunk_size < filesize + with open(self.filename, 'rb') as f: + final_request = self.site._request( throttle=throttle, parameters={ - 'action': 'upload', 'token': token, 'text': text, - 'filename': file_page_title, 'comment': comment}) + 'action': 'upload', 'token': token, 'text': self.text, + 'filename': file_page_title, 'comment': self.comment}) if chunked_upload: if offset > 0: pywikibot.log('Continuing upload from byte {}' @@ -272,20 +287,20 @@
if poll: # run a poll; not possible in first iteration - assert _file_key - req = self._simple_request( + assert file_key + req = self.site._simple_request( action='upload', token=token, - filekey=_file_key, + filekey=file_key, checkstatus=True) else: f.seek(offset) - chunk = f.read(chunk_size) + chunk = f.read(self.chunk_size) # workaround (hack) for T132676 # append another '\r' so that one is the payload # and the second is used for newline when mangled # by email package. - if (len(chunk) < chunk_size + if (len(chunk) < self.chunk_size or (offset + len(chunk)) == filesize and chunk[-1] == b'\r'[0]): chunk += b'\r' @@ -295,7 +310,7 @@ ('application', 'octet-stream'), {'filename': mime_filename}) } - req = self._request( + req = self.site._request( throttle=throttle, mime=mime_params, parameters={ @@ -305,19 +320,19 @@ 'filesize': filesize, 'offset': offset, 'filename': file_page_title, - 'async': asynchronous, + 'async': self.asynchronous, 'ignorewarnings': ignore_all_warnings})
- if _file_key: - req['filekey'] = _file_key + if file_key: + req['filekey'] = file_key
try: data = req.submit()['upload'] - self._uploaddisabled = False + self.site._uploaddisabled = False except APIError as error: # TODO: catch and process foreseeable errors if error.code == 'uploaddisabled': - self._uploaddisabled = True + self.site._uploaddisabled = True elif error.code == 'stashfailed' \ and 'offset' in error.other: # TODO: Ask MediaWiki to change this @@ -348,7 +363,7 @@ break
# Polls may not contain file key in response - _file_key = data.get('filekey', _file_key) + file_key = data.get('filekey', file_key) if data['result'] == 'Warning': assert('warnings' in data and not ignore_all_warnings) @@ -366,17 +381,8 @@ # Future warnings of this run # can be ignored if restart: - return self.upload( - filepage, - source_filename=source_filename, - source_url=source_url, - comment=comment, - text=text, - watch=watch, + return self._upload( ignore_warnings=True, - chunk_size=chunk_size, - _file_key=None, - _offset=0, report_success=False )
@@ -395,10 +401,10 @@ new_offset = int(data['offset']) if offset + len(chunk) != new_offset: pywikibot.log('Old offset: {}; Returned ' - 'offset: {}; Chunk size: ' - '{}'.format(offset, - new_offset, - len(chunk))) + 'offset: {}; Chunk size: {}' + .format(offset, + new_offset, + len(chunk))) pywikibot.warning('Unexpected offset.') offset = new_offset else: @@ -410,19 +416,19 @@ 'assemble chunks.') elif data['result'] == 'Success': # finished pywikibot.log('Finished uploading last chunk.') - final_request['filekey'] = _file_key - final_request['async'] = asynchronous + final_request['filekey'] = file_key + final_request['async'] = self.asynchronous break else: raise Error( 'Unrecognized result: %s' % data['result'])
else: # not chunked upload - if _file_key: - final_request['filekey'] = _file_key + if file_key: + final_request['filekey'] = file_key else: file_contents = f.read() - filetype = (mimetypes.guess_type(source_filename)[0] + filetype = (mimetypes.guess_type(self.filename)[0] or 'application/octet-stream') final_request.mime = { 'file': (file_contents, filetype.split('/'), @@ -430,25 +436,25 @@ } else: # upload by URL - if not self.has_right('upload_by_url'): + if not self.site.has_right('upload_by_url'): raise Error( "User '{}' is not authorized to upload by URL on site {}." - .format(self.user(), self)) - final_request = self._simple_request( - action='upload', filename=file_page_title, - url=source_url, comment=comment, text=text, token=token) + .format(self.site.user(), self)) + final_request = self.site._simple_request( + action='upload', filename=file_page_title, url=self.url, + comment=self.comment, text=self.text, token=token)
while True: if not result: - final_request['watch'] = watch + final_request['watch'] = self.watch final_request['ignorewarnings'] = ignore_all_warnings try: result = final_request.submit() - self._uploaddisabled = False + self.site._uploaddisabled = False except APIError as error: # TODO: catch and process foreseeable errors if error.code == 'uploaddisabled': - self._uploaddisabled = True + self.site._uploaddisabled = True raise error result = result['upload'] pywikibot.debug(result, _logger) @@ -459,32 +465,29 @@ if result['result'] == 'Warning': assert 'warnings' in result and not ignore_all_warnings
- if source_filename: + if self.filename: if 'filekey' in result: - _file_key = result['filekey'] + file_key = result['filekey'] elif 'sessionkey' in result: # TODO: Probably needs to be reflected in the API call # above - _file_key = result['sessionkey'] + file_key = result['sessionkey'] pywikibot.warning( 'Using sessionkey instead of filekey.') else: - _file_key = None + file_key = None pywikibot.warning('No filekey defined.') else: - _file_key = None + file_key = None
if not report_success: - result.setdefault('offset', bool(source_filename)) - offset = result['offset'] if source_filename else False + result.setdefault('offset', bool(self.filename)) + offset = result['offset'] if self.filename else False if ignore_warnings(create_warnings_list(result)): - return self.upload( - filepage, source_filename=source_filename, - source_url=source_url, comment=comment, - text=text, watch=watch, ignore_warnings=True, - chunk_size=chunk_size, asynchronous=asynchronous, - _file_key=_file_key, _offset=offset, - report_success=False) + return self._upload(ignore_warnings=True, + report_success=False, + file_key=file_key, + offset=offset) return False
if len(result['warnings']) > 1: @@ -498,18 +501,18 @@ raise UploadError(warning, upload_warnings[warning] .format(msg=message), - file_key=_file_key, + file_key=file_key, offset=result.get('offset', False))
if result['result'] == 'Poll': # Polling is meaningless without a file key - assert _file_key + assert file_key pywikibot.log('Waiting for upload to be published.') result = None - final_request = self._simple_request( + final_request = self.site._simple_request( action='upload', token=token, - filekey=_file_key, + filekey=file_key, checkstatus=True) continue
@@ -519,7 +522,7 @@ # If we receive a nochange, that would mean we're in simulation # mode, don't attempt to access imageinfo if 'nochange' not in result: - filepage._load_file_revisions([result['imageinfo']]) + self.filepage._load_file_revisions([result['imageinfo']]) return True
raise Error('Unrecognized result: %s' % data['result'])
pywikibot-commits@lists.wikimedia.org