Revision: 7552
Author: russblau
Date: 2009-10-28 17:37:09 +0000 (Wed, 28 Oct 2009)
Log Message:
-----------
Create copy for rewrite branch.
Added Paths:
-----------
branches/rewrite/scripts/upload.py
Copied: branches/rewrite/scripts/upload.py (from rev 7545, trunk/pywikipedia/upload.py)
===================================================================
--- branches/rewrite/scripts/upload.py (rev 0)
+++ branches/rewrite/scripts/upload.py 2009-10-28 17:37:09 UTC (rev 7552)
@@ -0,0 +1,426 @@
+# -*- coding: utf-8 -*-
+"""
+Script to upload images to wikipedia.
+
+Arguments:
+
+ -keep Keep the filename as is
+ -filename Target filename
+ -noverify Do not ask for verification of the upload description if one is given
+
+If any other arguments are given, the first is the URL or filename
+to upload, and the rest is a proposed description to go with the
+upload. If none of these are given, the user is asked for the
+file or URL to upload. The bot will then upload the image to the wiki.
+
+The script will ask for the location of an image, if not given as a parameter,
+and for a description.
+"""
+#
+# (C) Rob W.W. Hooft, Andre Engels 2003-2004
+#
+# Distributed under the terms of the MIT license.
+#
+__version__='$Id$'
+
+import os, sys, time
+import urllib, mimetypes
+import wikipedia, config, query
+
+def post_multipart(site, address, fields, files, cookies):
+ """
+ Post fields and files to an http host as multipart/form-data.
+ fields is a sequence of (name, value) elements for regular form fields.
+ files is a sequence of (name, filename, value) elements for data to be uploaded as
files
+ Return the server's response page.
+ """
+ contentType, body = encode_multipart_formdata(fields, files)
+ return site.postData(address, body, contentType = contentType, cookies = cookies)
+
+def encode_multipart_formdata(fields, files):
+ """
+ fields is a sequence of (name, value) elements for regular form fields.
+ files is a sequence of (name, filename, value) elements for data to be uploaded as
files
+ Return (content_type, body) ready for httplib.HTTP instance
+ """
+ boundary = '----------ThIs_Is_tHe_bouNdaRY_$'
+ lines = []
+ for (key, value) in fields:
+ lines.append('--' + boundary)
+ lines.append('Content-Disposition: form-data; name="%s"' %
key)
+ lines.append('')
+ lines.append(value)
+ for (key, filename, value) in files:
+ lines.append('--' + boundary)
+ lines.append('Content-Disposition: form-data; name="%s";
filename="%s"' % (key, filename))
+ lines.append('Content-Type: %s' % get_content_type(filename))
+ lines.append('')
+ lines.append(value)
+ lines.append('--' + boundary + '--')
+ lines.append('')
+ body = '\r\n'.join(lines)
+ content_type = 'multipart/form-data; boundary=%s' % boundary
+ return content_type, body
+
+def get_content_type(filename):
+ return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
+
+
+class UploadRobot:
+ def __init__(self, url, urlEncoding = None, description = u'', useFilename =
None, keepFilename = False,
+ verifyDescription = True, ignoreWarning = False, targetSite = None,
uploadByUrl = False):
+ """
+ ignoreWarning - Set this to True if you want to upload even if another
+ file would be overwritten or another mistake would be
+ risked.
+ """
+ self._retrieved = False
+ self.url = url
+ self.urlEncoding = urlEncoding
+ self.description = description
+ self.useFilename = useFilename
+ self.keepFilename = keepFilename
+ self.verifyDescription = verifyDescription
+ self.ignoreWarning = ignoreWarning
+ if config.upload_to_commons:
+ self.targetSite = targetSite or wikipedia.getSite('commons',
'commons')
+ else:
+ self.targetSite = targetSite or wikipedia.getSite()
+ self.targetSite.forceLogin()
+ self.uploadByUrl = uploadByUrl
+
+ def urlOK(self):
+ '''
+ Returns true iff the URL references an online site or an
+ existing local file.
+ '''
+ return self.url != '' and ('://' in self.url or
os.path.exists(self.url))
+
+ def read_file_content(self):
+ if not self._retrieved or self.uploadByUrl:
+ # Get file contents
+ wikipedia.output(u'Reading file %s' % self.url)
+ if '://' in self.url:
+ resume = False
+ dt = 15
+
+ while not self._retrieved:
+ uo = wikipedia.MyURLopener()
+ if resume:
+ wikipedia.output(u"Resume download...")
+ uo.addheader('Range', 'bytes=%s-' % rlen)
+
+ file = uo.open(self.url)
+
+ if 'text/html' in
file.info().getheader('Content-Type'):
+ print "Couldn't download the image: the requested URL
was not found on this server."
+ return
+
+ content_len = file.info().getheader('Content-Length')
+ accept_ranges = file.info().getheader('Accept-Ranges') ==
'bytes'
+
+ if resume:
+ self._contents += file.read()
+ else:
+ self._contents = file.read()
+
+ file.close()
+ self._retrieved = True
+
+ if content_len:
+ rlen = len(self._contents)
+ content_len = int(content_len)
+ if rlen < content_len:
+ self._retrieved = False
+ wikipedia.output(u"Connection closed at byte %s (%s
left)" % (rlen, content_len))
+ if accept_ranges and rlen > 0:
+ resume = True
+ wikipedia.output(u"Sleeping for %d seconds..." %
dt)
+ time.sleep(dt)
+ if dt <= 60:
+ dt += 15
+ elif dt < 360:
+ dt += 60
+ else:
+ if wikipedia.verbose:
+ wikipedia.output(u"WARNING: No check length to retrieved
data is possible.")
+ else:
+ # Opening local files with MyURLopener would be possible, but we
+ # don't do it because it only accepts ASCII characters in the
+ # filename.
+ file = open(self.url,"rb")
+ self._contents = file.read()
+ file.close()
+
+ def process_filename(self):
+ # Isolate the pure name
+ filename = self.url
+
+ if '/' in filename:
+ filename = filename.split('/')[-1]
+
+ if '\\' in filename:
+ filename = filename.split('\\')[-1]
+
+ if self.urlEncoding:
+ filename = urllib.unquote(filename.decode(self.urlEncoding))
+
+ if self.useFilename:
+ filename = self.useFilename
+ if not self.keepFilename:
+ wikipedia.output(u"The filename on the target wiki will default to:
%s" % filename)
+ # ask newfn until it's valid
+ ok = False
+ # FIXME: these 2 belong somewhere else, presumably in family
+ forbidden = '/' # to be extended
+ allowed_formats = (u'gif', u'jpg', u'jpeg',
u'mid', u'midi', u'ogg', u'png', u'svg',
u'xcf', u'djvu')
+ while not ok:
+ ok = True
+ newfn = wikipedia.input(u'Enter a better name, or press enter to
accept:')
+ if newfn == "":
+ newfn = filename
+ ext = os.path.splitext(newfn)[1].lower().strip('.')
+ for c in forbidden:
+ if c in newfn:
+ print "Invalid character: %s. Please try again" % c
+ ok = False
+ if ext not in allowed_formats and ok:
+ choice = wikipedia.inputChoice(u"File format is not one of [%s],
but %s. Continue?" % (u' '.join(allowed_formats), ext), ['yes',
'no'], ['y', 'N'], 'N')
+ if choice == 'n':
+ ok = False
+ if newfn != '':
+ filename = newfn
+ # MediaWiki doesn't allow spaces in the file name.
+ # Replace them here to avoid an extra confirmation form
+ filename = filename.replace(' ', '_')
+ # A proper description for the submission.
+ wikipedia.output(u"The suggested description is:")
+ wikipedia.output(self.description)
+ if self.verifyDescription:
+ newDescription = u''
+ choice = wikipedia.inputChoice(u'Do you want to change this
description?', ['Yes', 'No'], ['y', 'N'],
'n')
+ if choice == 'y':
+ import editarticle
+ editor = editarticle.TextEditor()
+ newDescription = editor.edit(self.description)
+ # if user saved / didn't press Cancel
+ if newDescription:
+ self.description = newDescription
+ return filename
+
+ def upload_image(self, debug=False):
+ """Gets the image at URL self.url, and uploads it to the target
wiki.
+ Returns the filename which was used to upload the image.
+ If the upload fails, the user is asked whether to try again or not.
+ If the user chooses not to retry, returns null.
+ """
+ try:
+ if config.use_api and self.targetSite.versionnumber() >= 16:
+ x = self.targetSite.api_address()
+ del x
+ else:
+ raise NotImplementedError
+ except NotImplementedError:
+ return self._uploadImageOld(debug)
+
+ if not hasattr(self,'_contents'):
+ self.read_file_content()
+
+ filename = self.process_filename()
+
+ params = {
+ 'action': 'upload',
+ 'token': self.targetSite.getToken(),
+ 'comment': self.description,
+ 'filename': filename,
+ #'': '',
+ }
+ if self.uploadByUrl:
+ params['url'] = self.url
+ else:
+ params['file'] = self._contents
+
+ if self.ignoreWarning:
+ params['ignorewarnings'] = 1
+
+ wikipedia.output(u'Uploading file to %s via API....' % self.targetSite)
+
+ data = query.GetData(params, self.targetSite)
+
+ if wikipedia.verbose:
+ wikipedia.output("%s" % data)
+
+ if 'error' in data: # error occured
+ errCode = data['error']['code']
+ wikipedia.output("%s" % data)
+ else:
+ data = data['upload']
+ if data['result'] == u'Warning': #upload success but return
warning.
+ warn = data['warnings'].keys()[0]
+ wikipedia.output("We got a warning message:", newline=False)
+ warFn = data['warnings'][warn]
+ if warn == 'duplicate-archive':
+ wikipedia.output("The file is duplicate a deleted file %s."
% warFn)
+ elif warn == 'was-deleted':
+ wikipedia.output("This file was deleted for %s." % warFn)
+ elif warn == 'emptyfile':
+ wikipedia.output("File %s is an empty file." % warFn)
+ elif warn == 'exists':
+ wikipedia.output("File %s is exists." % warFn)
+ elif warn == 'duplicate':
+ wikipedia.output("Uploaded file is duplicate with %s." %
warFn)
+ elif warn == 'badfilename':
+ wikipedia.output("Target filename is invaild.")
+ elif warn == 'filetype-unwanted-type':
+ wikipedia.output("File %s type is unwatched type." %
warFn)
+ answer = wikipedia.inputChoice(u"Do you want to ignore?",
['Yes', 'No'], ['y', 'N'], 'N')
+ if answer == "y":
+ self.ignoreWarning = 1
+ self.keepFilename = True
+ return self.upload_image(debug)
+ else:
+ wikipedia.output("Upload aborted.")
+ return
+
+ elif data['result'] == u'Success': #No any warning, upload
and online complete.
+ wikipedia.output(u"Upload successful.")
+ return filename #data['filename']
+
+
+ def _uploadImageOld(self, debug=False):
+ if not hasattr(self,'_contents'):
+ self.read_file_content()
+
+ filename = self.process_filename()
+ # Convert the filename (currently Unicode) to the encoding used on the
+ # target wiki
+ encodedFilename = filename.encode(self.targetSite.encoding())
+
+
+ formdata = {}
+ formdata["wpUploadDescription"] = self.description
+ formdata["wpUploadAffirm"] = "1"
+ formdata["wpUpload"] = "upload bestand"
+ # This somehow doesn't work.
+ if self.ignoreWarning:
+ formdata["wpIgnoreWarning"] = "1"
+
+ # Get an edit token so we can do the upload
+ formdata["wpEditToken"] = self.targetSite.getToken()
+
+ # Set the new filename
+ formdata["wpDestFile"] = filename
+
+ if self.uploadByUrl:
+ formdata["wpUploadFileURL"] = self.url
+ formdata["wpSourceType"] = 'Url'
+ #Not needed now. Might be needed in the future
+ #else:
+ # formdata["wpSourceType"] = 'file'
+
+ # try to encode the strings to the encoding used by the target site.
+ # if that's not possible (e.g. because there are non-Latin-1 characters and
+ # the home Wikipedia uses Latin-1), convert all non-ASCII characters to
+ # HTML entities.
+ for key in formdata:
+ assert isinstance(key, basestring), "ERROR: %s is not a string but
%s" % (key, type(key))
+ try:
+ formdata[key] = formdata[key].encode(self.targetSite.encoding())
+ except (UnicodeEncodeError, UnicodeDecodeError):
+ formdata[key] =
wikipedia.UnicodeToAsciiHtml(formdata[key]).encode(self.targetSite.encoding())
+
+ # don't upload if we're in debug mode
+ if not debug:
+ wikipedia.output(u'Uploading file to %s...' % self.targetSite)
+
+ if self.uploadByUrl:
+ # Just do a post with all the fields filled out
+ response, returned_html =
self.targetSite.postForm(self.targetSite.upload_address(), formdata.items(), cookies =
self.targetSite.cookies())
+ else:
+ response, returned_html = post_multipart(self.targetSite,
self.targetSite.upload_address(),
+ formdata.items(), (('wpUploadFile',
encodedFilename, self._contents),),
+ cookies = self.targetSite.cookies())
+ # There are 2 ways MediaWiki can react on success: either it gives
+ # a 200 with a success message, or it gives a 302 (redirection).
+ # Do we know how the "success!" HTML page should look like?
+ # ATTENTION: if you changed your Wikimedia Commons account not to show
+ # an English interface, this detection will fail!
+ success_msg = self.targetSite.mediawiki_message('successfulupload')
+ if success_msg in returned_html or response.status == 302:
+ wikipedia.output(u"Upload successful.")
+ # The following is not a good idea, because the server also gives a 200 when
+ # something went wrong.
+ #if response.status in [200, 302]:
+ # wikipedia.output(u"Upload successful.")
+
+ elif response.status == 301:
+ wikipedia.output(u"Following redirect...")
+ address = response.getheader('Location')
+ wikipedia.output(u"Changed upload address to %s. Please update
%s.py" % (address, self.targetSite.family.__module__))
+ exec('self.targetSite.upload_address = lambda: %r' % address,
locals(), globals())
+ return self.upload_image(debug)
+ else:
+ try:
+ # Try to find the error message within the HTML page.
+ # If we can't find it, we just dump the entire HTML page.
+ returned_html = returned_html[returned_html.index('<!-- start
content -->') + 22: returned_html.index('<!-- end content -->')]
+ except:
+ pass
+ wikipedia.output(u'%s\n\n' % returned_html)
+ wikipedia.output(u'%i %s' % (response.status, response.reason))
+
+ if self.targetSite.mediawiki_message('uploadwarning') in
returned_html:
+ answer = wikipedia.inputChoice(u"You have recevied an upload
warning message. Ignore?", ['Yes', 'No'], ['y', 'N'],
'N')
+ if answer == "y":
+ self.ignoreWarning = 1
+ self.keepFilename = True
+ return self._uploadImageOld(debug)
+ else:
+ answer = wikipedia.inputChoice(u'Upload of %s probably failed.
Above you see the HTML page which was returned by MediaWiki. Try again?' % filename,
['Yes', 'No'], ['y', 'N'], 'N')
+ if answer == "y":
+ return self._uploadImageOld(debug)
+ else:
+ return
+ return filename
+
+ def run(self):
+ while not self.urlOK():
+ if not self.url:
+ wikipedia.output(u'No input filename given')
+ else:
+ wikipedia.output(u'Invalid input filename given. Try again.')
+ self.url = wikipedia.input(u'File or URL where image is now:')
+ return self.upload_image()
+
+def main(args):
+ url = u''
+ description = []
+ keepFilename = False
+ useFilename = None
+ verifyDescription = True
+
+ # call wikipedia.py function to process all global wikipedia args
+ # returns a list of non-global args, i.e. args for upload.py
+ args = wikipedia.handleArgs()
+
+ for arg in args:
+ if arg:
+ if arg.startswith('-keep'):
+ keepFilename = True
+ elif arg.startswith('-filename:'):
+ useFilename = arg[10:]
+ elif arg.startswith('-noverify'):
+ verifyDescription = False
+ elif url == u'':
+ url = arg
+ else:
+ description.append(arg)
+ description = u' '.join(description)
+ bot = UploadRobot(url, description=description, useFilename=useFilename,
keepFilename=keepFilename, verifyDescription=verifyDescription)
+ bot.run()
+
+if __name__ == "__main__":
+ try:
+ main(sys.argv[1:])
+ finally:
+ wikipedia.stopme()