Beautifully crafted timelines that are easy and intuitive to use. http://timeline.knightlab.com/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
979 lines
40 KiB
979 lines
40 KiB
## Amazon S3 manager |
|
## Author: Michal Ludvig <michal@logix.cz> |
|
## http://www.logix.cz/michal |
|
## License: GPL Version 2 |
|
|
|
import sys |
|
import os, os.path |
|
import time |
|
import httplib |
|
import logging |
|
import mimetypes |
|
import re |
|
from logging import debug, info, warning, error |
|
from stat import ST_SIZE |
|
|
|
try: |
|
from hashlib import md5 |
|
except ImportError: |
|
from md5 import md5 |
|
|
|
from Utils import * |
|
from SortedDict import SortedDict |
|
from AccessLog import AccessLog |
|
from ACL import ACL, GranteeLogDelivery |
|
from BidirMap import BidirMap |
|
from Config import Config |
|
from Exceptions import * |
|
from MultiPart import MultiPartUpload |
|
from S3Uri import S3Uri |
|
from ConnMan import ConnMan |
|
|
|
try: |
|
import magic, gzip |
|
try: |
|
## https://github.com/ahupp/python-magic |
|
magic_ = magic.Magic(mime=True) |
|
def mime_magic_file(file): |
|
return magic_.from_file(file) |
|
def mime_magic_buffer(buffer): |
|
return magic_.from_buffer(buffer) |
|
except TypeError: |
|
## http://pypi.python.org/pypi/filemagic |
|
try: |
|
magic_ = magic.Magic(flags=magic.MAGIC_MIME) |
|
def mime_magic_file(file): |
|
return magic_.id_filename(file) |
|
def mime_magic_buffer(buffer): |
|
return magic_.id_buffer(buffer) |
|
except TypeError: |
|
## file-5.11 built-in python bindings |
|
magic_ = magic.open(magic.MAGIC_MIME) |
|
magic_.load() |
|
def mime_magic_file(file): |
|
return magic_.file(file) |
|
def mime_magic_buffer(buffer): |
|
return magic_.buffer(buffer) |
|
|
|
except AttributeError: |
|
## Older python-magic versions |
|
magic_ = magic.open(magic.MAGIC_MIME) |
|
magic_.load() |
|
def mime_magic_file(file): |
|
return magic_.file(file) |
|
def mime_magic_buffer(buffer): |
|
return magic_.buffer(buffer) |
|
|
|
def mime_magic(file): |
|
type = mime_magic_file(file) |
|
if type != "application/x-gzip; charset=binary": |
|
return (type, None) |
|
else: |
|
return (mime_magic_buffer(gzip.open(file).read(8192)), 'gzip') |
|
|
|
except ImportError, e: |
|
if str(e).find("magic") >= 0: |
|
magic_message = "Module python-magic is not available." |
|
else: |
|
magic_message = "Module python-magic can't be used (%s)." % e.message |
|
magic_message += " Guessing MIME types based on file extensions." |
|
magic_warned = False |
|
def mime_magic(file): |
|
global magic_warned |
|
if (not magic_warned): |
|
warning(magic_message) |
|
magic_warned = True |
|
return mimetypes.guess_type(file) |
|
|
|
__all__ = [] |
|
class S3Request(object): |
|
def __init__(self, s3, method_string, resource, headers, params = {}): |
|
self.s3 = s3 |
|
self.headers = SortedDict(headers or {}, ignore_case = True) |
|
# Add in any extra headers from s3 config object |
|
if self.s3.config.extra_headers: |
|
self.headers.update(self.s3.config.extra_headers) |
|
if len(self.s3.config.access_token)>0: |
|
self.s3.config.role_refresh() |
|
self.headers['x-amz-security-token']=self.s3.config.access_token |
|
self.resource = resource |
|
self.method_string = method_string |
|
self.params = params |
|
|
|
self.update_timestamp() |
|
self.sign() |
|
|
|
def update_timestamp(self): |
|
if self.headers.has_key("date"): |
|
del(self.headers["date"]) |
|
self.headers["x-amz-date"] = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) |
|
|
|
def format_param_str(self): |
|
""" |
|
Format URL parameters from self.params and returns |
|
?parm1=val1&parm2=val2 or an empty string if there |
|
are no parameters. Output of this function should |
|
be appended directly to self.resource['uri'] |
|
""" |
|
param_str = "" |
|
for param in self.params: |
|
if self.params[param] not in (None, ""): |
|
param_str += "&%s=%s" % (param, self.params[param]) |
|
else: |
|
param_str += "&%s" % param |
|
return param_str and "?" + param_str[1:] |
|
|
|
def sign(self): |
|
h = self.method_string + "\n" |
|
h += self.headers.get("content-md5", "")+"\n" |
|
h += self.headers.get("content-type", "")+"\n" |
|
h += self.headers.get("date", "")+"\n" |
|
for header in self.headers.keys(): |
|
if header.startswith("x-amz-"): |
|
h += header+":"+str(self.headers[header])+"\n" |
|
if self.resource['bucket']: |
|
h += "/" + self.resource['bucket'] |
|
h += self.resource['uri'] |
|
debug("SignHeaders: " + repr(h)) |
|
signature = sign_string(h) |
|
|
|
self.headers["Authorization"] = "AWS "+self.s3.config.access_key+":"+signature |
|
|
|
def get_triplet(self): |
|
self.update_timestamp() |
|
self.sign() |
|
resource = dict(self.resource) ## take a copy |
|
resource['uri'] += self.format_param_str() |
|
return (self.method_string, resource, self.headers) |
|
|
|
class S3(object): |
|
http_methods = BidirMap( |
|
GET = 0x01, |
|
PUT = 0x02, |
|
HEAD = 0x04, |
|
DELETE = 0x08, |
|
POST = 0x10, |
|
MASK = 0x1F, |
|
) |
|
|
|
targets = BidirMap( |
|
SERVICE = 0x0100, |
|
BUCKET = 0x0200, |
|
OBJECT = 0x0400, |
|
MASK = 0x0700, |
|
) |
|
|
|
operations = BidirMap( |
|
UNDFINED = 0x0000, |
|
LIST_ALL_BUCKETS = targets["SERVICE"] | http_methods["GET"], |
|
BUCKET_CREATE = targets["BUCKET"] | http_methods["PUT"], |
|
BUCKET_LIST = targets["BUCKET"] | http_methods["GET"], |
|
BUCKET_DELETE = targets["BUCKET"] | http_methods["DELETE"], |
|
OBJECT_PUT = targets["OBJECT"] | http_methods["PUT"], |
|
OBJECT_GET = targets["OBJECT"] | http_methods["GET"], |
|
OBJECT_HEAD = targets["OBJECT"] | http_methods["HEAD"], |
|
OBJECT_DELETE = targets["OBJECT"] | http_methods["DELETE"], |
|
OBJECT_POST = targets["OBJECT"] | http_methods["POST"], |
|
) |
|
|
|
codes = { |
|
"NoSuchBucket" : "Bucket '%s' does not exist", |
|
"AccessDenied" : "Access to bucket '%s' was denied", |
|
"BucketAlreadyExists" : "Bucket '%s' already exists", |
|
} |
|
|
|
## S3 sometimes sends HTTP-307 response |
|
redir_map = {} |
|
|
|
## Maximum attempts of re-issuing failed requests |
|
_max_retries = 5 |
|
|
|
def __init__(self, config): |
|
self.config = config |
|
|
|
def get_hostname(self, bucket): |
|
if bucket and check_bucket_name_dns_conformity(bucket): |
|
if self.redir_map.has_key(bucket): |
|
host = self.redir_map[bucket] |
|
else: |
|
host = getHostnameFromBucket(bucket) |
|
else: |
|
host = self.config.host_base |
|
debug('get_hostname(%s): %s' % (bucket, host)) |
|
return host |
|
|
|
def set_hostname(self, bucket, redir_hostname): |
|
self.redir_map[bucket] = redir_hostname |
|
|
|
def format_uri(self, resource): |
|
if resource['bucket'] and not check_bucket_name_dns_conformity(resource['bucket']): |
|
uri = "/%s%s" % (resource['bucket'], resource['uri']) |
|
else: |
|
uri = resource['uri'] |
|
if self.config.proxy_host != "": |
|
uri = "http://%s%s" % (self.get_hostname(resource['bucket']), uri) |
|
debug('format_uri(): ' + uri) |
|
return uri |
|
|
|
## Commands / Actions |
|
def list_all_buckets(self): |
|
request = self.create_request("LIST_ALL_BUCKETS") |
|
response = self.send_request(request) |
|
response["list"] = getListFromXml(response["data"], "Bucket") |
|
return response |
|
|
|
def bucket_list(self, bucket, prefix = None, recursive = None): |
|
def _list_truncated(data): |
|
## <IsTruncated> can either be "true" or "false" or be missing completely |
|
is_truncated = getTextFromXml(data, ".//IsTruncated") or "false" |
|
return is_truncated.lower() != "false" |
|
|
|
def _get_contents(data): |
|
return getListFromXml(data, "Contents") |
|
|
|
def _get_common_prefixes(data): |
|
return getListFromXml(data, "CommonPrefixes") |
|
|
|
uri_params = {} |
|
truncated = True |
|
list = [] |
|
prefixes = [] |
|
|
|
while truncated: |
|
response = self.bucket_list_noparse(bucket, prefix, recursive, uri_params) |
|
current_list = _get_contents(response["data"]) |
|
current_prefixes = _get_common_prefixes(response["data"]) |
|
truncated = _list_truncated(response["data"]) |
|
if truncated: |
|
if current_list: |
|
uri_params['marker'] = self.urlencode_string(current_list[-1]["Key"]) |
|
else: |
|
uri_params['marker'] = self.urlencode_string(current_prefixes[-1]["Prefix"]) |
|
debug("Listing continues after '%s'" % uri_params['marker']) |
|
|
|
list += current_list |
|
prefixes += current_prefixes |
|
|
|
response['list'] = list |
|
response['common_prefixes'] = prefixes |
|
return response |
|
|
|
def bucket_list_noparse(self, bucket, prefix = None, recursive = None, uri_params = {}): |
|
if prefix: |
|
uri_params['prefix'] = self.urlencode_string(prefix) |
|
if not self.config.recursive and not recursive: |
|
uri_params['delimiter'] = "/" |
|
request = self.create_request("BUCKET_LIST", bucket = bucket, **uri_params) |
|
response = self.send_request(request) |
|
#debug(response) |
|
return response |
|
|
|
def bucket_create(self, bucket, bucket_location = None): |
|
headers = SortedDict(ignore_case = True) |
|
body = "" |
|
if bucket_location and bucket_location.strip().upper() != "US": |
|
bucket_location = bucket_location.strip() |
|
if bucket_location.upper() == "EU": |
|
bucket_location = bucket_location.upper() |
|
else: |
|
bucket_location = bucket_location.lower() |
|
body = "<CreateBucketConfiguration><LocationConstraint>" |
|
body += bucket_location |
|
body += "</LocationConstraint></CreateBucketConfiguration>" |
|
debug("bucket_location: " + body) |
|
check_bucket_name(bucket, dns_strict = True) |
|
else: |
|
check_bucket_name(bucket, dns_strict = False) |
|
if self.config.acl_public: |
|
headers["x-amz-acl"] = "public-read" |
|
request = self.create_request("BUCKET_CREATE", bucket = bucket, headers = headers) |
|
response = self.send_request(request, body) |
|
return response |
|
|
|
def bucket_delete(self, bucket): |
|
request = self.create_request("BUCKET_DELETE", bucket = bucket) |
|
response = self.send_request(request) |
|
return response |
|
|
|
def get_bucket_location(self, uri): |
|
request = self.create_request("BUCKET_LIST", bucket = uri.bucket(), extra = "?location") |
|
response = self.send_request(request) |
|
location = getTextFromXml(response['data'], "LocationConstraint") |
|
if not location or location in [ "", "US" ]: |
|
location = "us-east-1" |
|
elif location == "EU": |
|
location = "eu-west-1" |
|
return location |
|
|
|
def bucket_info(self, uri): |
|
# For now reports only "Location". One day perhaps more. |
|
response = {} |
|
response['bucket-location'] = self.get_bucket_location(uri) |
|
return response |
|
|
|
def website_info(self, uri, bucket_location = None): |
|
headers = SortedDict(ignore_case = True) |
|
bucket = uri.bucket() |
|
body = "" |
|
|
|
request = self.create_request("BUCKET_LIST", bucket = bucket, extra="?website") |
|
try: |
|
response = self.send_request(request, body) |
|
response['index_document'] = getTextFromXml(response['data'], ".//IndexDocument//Suffix") |
|
response['error_document'] = getTextFromXml(response['data'], ".//ErrorDocument//Key") |
|
response['website_endpoint'] = self.config.website_endpoint % { |
|
"bucket" : uri.bucket(), |
|
"location" : self.get_bucket_location(uri)} |
|
return response |
|
except S3Error, e: |
|
if e.status == 404: |
|
debug("Could not get /?website - website probably not configured for this bucket") |
|
return None |
|
raise |
|
|
|
def website_create(self, uri, bucket_location = None): |
|
headers = SortedDict(ignore_case = True) |
|
bucket = uri.bucket() |
|
body = '<WebsiteConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/">' |
|
body += ' <IndexDocument>' |
|
body += (' <Suffix>%s</Suffix>' % self.config.website_index) |
|
body += ' </IndexDocument>' |
|
if self.config.website_error: |
|
body += ' <ErrorDocument>' |
|
body += (' <Key>%s</Key>' % self.config.website_error) |
|
body += ' </ErrorDocument>' |
|
body += '</WebsiteConfiguration>' |
|
|
|
request = self.create_request("BUCKET_CREATE", bucket = bucket, extra="?website") |
|
debug("About to send request '%s' with body '%s'" % (request, body)) |
|
response = self.send_request(request, body) |
|
debug("Received response '%s'" % (response)) |
|
|
|
return response |
|
|
|
def website_delete(self, uri, bucket_location = None): |
|
headers = SortedDict(ignore_case = True) |
|
bucket = uri.bucket() |
|
body = "" |
|
|
|
request = self.create_request("BUCKET_DELETE", bucket = bucket, extra="?website") |
|
debug("About to send request '%s' with body '%s'" % (request, body)) |
|
response = self.send_request(request, body) |
|
debug("Received response '%s'" % (response)) |
|
|
|
if response['status'] != 204: |
|
raise S3ResponseError("Expected status 204: %s" % response) |
|
|
|
return response |
|
|
|
def add_encoding(self, filename, content_type): |
|
if content_type.find("charset=") != -1: |
|
return False |
|
exts = self.config.add_encoding_exts.split(',') |
|
if exts[0]=='': |
|
return False |
|
parts = filename.rsplit('.',2) |
|
if len(parts) < 2: |
|
return False |
|
ext = parts[1] |
|
if ext in exts: |
|
return True |
|
else: |
|
return False |
|
|
|
def object_put(self, filename, uri, extra_headers = None, extra_label = ""): |
|
# TODO TODO |
|
# Make it consistent with stream-oriented object_get() |
|
if uri.type != "s3": |
|
raise ValueError("Expected URI type 's3', got '%s'" % uri.type) |
|
|
|
if filename != "-" and not os.path.isfile(filename): |
|
raise InvalidFileError(u"%s is not a regular file" % unicodise(filename)) |
|
try: |
|
if filename == "-": |
|
file = sys.stdin |
|
size = 0 |
|
else: |
|
file = open(filename, "rb") |
|
size = os.stat(filename)[ST_SIZE] |
|
except (IOError, OSError), e: |
|
raise InvalidFileError(u"%s: %s" % (unicodise(filename), e.strerror)) |
|
|
|
headers = SortedDict(ignore_case = True) |
|
if extra_headers: |
|
headers.update(extra_headers) |
|
|
|
## MIME-type handling |
|
content_type = self.config.mime_type |
|
content_encoding = None |
|
if filename != "-" and not content_type and self.config.guess_mime_type: |
|
(content_type, content_encoding) = mime_magic(filename) |
|
if not content_type: |
|
content_type = self.config.default_mime_type |
|
if not content_encoding: |
|
content_encoding = self.config.encoding.upper() |
|
|
|
## add charset to content type |
|
if self.add_encoding(filename, content_type) and content_encoding is not None: |
|
content_type = content_type + "; charset=" + content_encoding |
|
|
|
headers["content-type"] = content_type |
|
if content_encoding is not None: |
|
headers["content-encoding"] = content_encoding |
|
|
|
## Other Amazon S3 attributes |
|
if self.config.acl_public: |
|
headers["x-amz-acl"] = "public-read" |
|
if self.config.reduced_redundancy: |
|
headers["x-amz-storage-class"] = "REDUCED_REDUNDANCY" |
|
|
|
## Multipart decision |
|
multipart = False |
|
if not self.config.enable_multipart and filename == "-": |
|
raise ParameterError("Multi-part upload is required to upload from stdin") |
|
if self.config.enable_multipart: |
|
if size > self.config.multipart_chunk_size_mb * 1024 * 1024 or filename == "-": |
|
multipart = True |
|
if multipart: |
|
# Multipart requests are quite different... drop here |
|
return self.send_file_multipart(file, headers, uri, size) |
|
|
|
## Not multipart... |
|
headers["content-length"] = size |
|
request = self.create_request("OBJECT_PUT", uri = uri, headers = headers) |
|
labels = { 'source' : unicodise(filename), 'destination' : unicodise(uri.uri()), 'extra' : extra_label } |
|
response = self.send_file(request, file, labels) |
|
return response |
|
|
|
def object_get(self, uri, stream, start_position = 0, extra_label = ""): |
|
if uri.type != "s3": |
|
raise ValueError("Expected URI type 's3', got '%s'" % uri.type) |
|
request = self.create_request("OBJECT_GET", uri = uri) |
|
labels = { 'source' : unicodise(uri.uri()), 'destination' : unicodise(stream.name), 'extra' : extra_label } |
|
response = self.recv_file(request, stream, labels, start_position) |
|
return response |
|
|
|
def object_delete(self, uri): |
|
if uri.type != "s3": |
|
raise ValueError("Expected URI type 's3', got '%s'" % uri.type) |
|
request = self.create_request("OBJECT_DELETE", uri = uri) |
|
response = self.send_request(request) |
|
return response |
|
|
|
def object_copy(self, src_uri, dst_uri, extra_headers = None): |
|
if src_uri.type != "s3": |
|
raise ValueError("Expected URI type 's3', got '%s'" % src_uri.type) |
|
if dst_uri.type != "s3": |
|
raise ValueError("Expected URI type 's3', got '%s'" % dst_uri.type) |
|
headers = SortedDict(ignore_case = True) |
|
headers['x-amz-copy-source'] = "/%s/%s" % (src_uri.bucket(), self.urlencode_string(src_uri.object())) |
|
## TODO: For now COPY, later maybe add a switch? |
|
headers['x-amz-metadata-directive'] = "COPY" |
|
if self.config.acl_public: |
|
headers["x-amz-acl"] = "public-read" |
|
if self.config.reduced_redundancy: |
|
headers["x-amz-storage-class"] = "REDUCED_REDUNDANCY" |
|
# if extra_headers: |
|
# headers.update(extra_headers) |
|
request = self.create_request("OBJECT_PUT", uri = dst_uri, headers = headers) |
|
response = self.send_request(request) |
|
return response |
|
|
|
def object_move(self, src_uri, dst_uri, extra_headers = None): |
|
response_copy = self.object_copy(src_uri, dst_uri, extra_headers) |
|
debug("Object %s copied to %s" % (src_uri, dst_uri)) |
|
if getRootTagName(response_copy["data"]) == "CopyObjectResult": |
|
response_delete = self.object_delete(src_uri) |
|
debug("Object %s deleted" % src_uri) |
|
return response_copy |
|
|
|
def object_info(self, uri): |
|
request = self.create_request("OBJECT_HEAD", uri = uri) |
|
response = self.send_request(request) |
|
return response |
|
|
|
def get_acl(self, uri): |
|
if uri.has_object(): |
|
request = self.create_request("OBJECT_GET", uri = uri, extra = "?acl") |
|
else: |
|
request = self.create_request("BUCKET_LIST", bucket = uri.bucket(), extra = "?acl") |
|
|
|
response = self.send_request(request) |
|
acl = ACL(response['data']) |
|
return acl |
|
|
|
def set_acl(self, uri, acl): |
|
if uri.has_object(): |
|
request = self.create_request("OBJECT_PUT", uri = uri, extra = "?acl") |
|
else: |
|
request = self.create_request("BUCKET_CREATE", bucket = uri.bucket(), extra = "?acl") |
|
|
|
body = str(acl) |
|
debug(u"set_acl(%s): acl-xml: %s" % (uri, body)) |
|
response = self.send_request(request, body) |
|
return response |
|
|
|
def get_policy(self, uri): |
|
request = self.create_request("BUCKET_LIST", bucket = uri.bucket(), extra = "?policy") |
|
response = self.send_request(request) |
|
return response['data'] |
|
|
|
def set_policy(self, uri, policy): |
|
headers = {} |
|
# TODO check policy is proper json string |
|
headers['content-type'] = 'application/json' |
|
request = self.create_request("BUCKET_CREATE", uri = uri, |
|
extra = "?policy", headers=headers) |
|
body = policy |
|
debug(u"set_policy(%s): policy-json: %s" % (uri, body)) |
|
request.sign() |
|
response = self.send_request(request, body=body) |
|
return response |
|
|
|
def delete_policy(self, uri): |
|
request = self.create_request("BUCKET_DELETE", uri = uri, extra = "?policy") |
|
debug(u"delete_policy(%s)" % uri) |
|
response = self.send_request(request) |
|
return response |
|
|
|
def get_accesslog(self, uri): |
|
request = self.create_request("BUCKET_LIST", bucket = uri.bucket(), extra = "?logging") |
|
response = self.send_request(request) |
|
accesslog = AccessLog(response['data']) |
|
return accesslog |
|
|
|
def set_accesslog_acl(self, uri): |
|
acl = self.get_acl(uri) |
|
debug("Current ACL(%s): %s" % (uri.uri(), str(acl))) |
|
acl.appendGrantee(GranteeLogDelivery("READ_ACP")) |
|
acl.appendGrantee(GranteeLogDelivery("WRITE")) |
|
debug("Updated ACL(%s): %s" % (uri.uri(), str(acl))) |
|
self.set_acl(uri, acl) |
|
|
|
def set_accesslog(self, uri, enable, log_target_prefix_uri = None, acl_public = False): |
|
request = self.create_request("BUCKET_CREATE", bucket = uri.bucket(), extra = "?logging") |
|
accesslog = AccessLog() |
|
if enable: |
|
accesslog.enableLogging(log_target_prefix_uri) |
|
accesslog.setAclPublic(acl_public) |
|
else: |
|
accesslog.disableLogging() |
|
body = str(accesslog) |
|
debug(u"set_accesslog(%s): accesslog-xml: %s" % (uri, body)) |
|
try: |
|
response = self.send_request(request, body) |
|
except S3Error, e: |
|
if e.info['Code'] == "InvalidTargetBucketForLogging": |
|
info("Setting up log-delivery ACL for target bucket.") |
|
self.set_accesslog_acl(S3Uri("s3://%s" % log_target_prefix_uri.bucket())) |
|
response = self.send_request(request, body) |
|
else: |
|
raise |
|
return accesslog, response |
|
|
|
## Low level methods |
|
def urlencode_string(self, string, urlencoding_mode = None): |
|
if type(string) == unicode: |
|
string = string.encode("utf-8") |
|
|
|
if urlencoding_mode is None: |
|
urlencoding_mode = self.config.urlencoding_mode |
|
|
|
if urlencoding_mode == "verbatim": |
|
## Don't do any pre-processing |
|
return string |
|
|
|
encoded = "" |
|
## List of characters that must be escaped for S3 |
|
## Haven't found this in any official docs |
|
## but my tests show it's more less correct. |
|
## If you start getting InvalidSignature errors |
|
## from S3 check the error headers returned |
|
## from S3 to see whether the list hasn't |
|
## changed. |
|
for c in string: # I'm not sure how to know in what encoding |
|
# 'object' is. Apparently "type(object)==str" |
|
# but the contents is a string of unicode |
|
# bytes, e.g. '\xc4\x8d\xc5\xafr\xc3\xa1k' |
|
# Don't know what it will do on non-utf8 |
|
# systems. |
|
# [hope that sounds reassuring ;-)] |
|
o = ord(c) |
|
if (o < 0x20 or o == 0x7f): |
|
if urlencoding_mode == "fixbucket": |
|
encoded += "%%%02X" % o |
|
else: |
|
error(u"Non-printable character 0x%02x in: %s" % (o, string)) |
|
error(u"Please report it to s3tools-bugs@lists.sourceforge.net") |
|
encoded += replace_nonprintables(c) |
|
elif (o == 0x20 or # Space and below |
|
o == 0x22 or # " |
|
o == 0x23 or # # |
|
o == 0x25 or # % (escape character) |
|
o == 0x26 or # & |
|
o == 0x2B or # + (or it would become <space>) |
|
o == 0x3C or # < |
|
o == 0x3E or # > |
|
o == 0x3F or # ? |
|
o == 0x60 or # ` |
|
o >= 123): # { and above, including >= 128 for UTF-8 |
|
encoded += "%%%02X" % o |
|
else: |
|
encoded += c |
|
debug("String '%s' encoded to '%s'" % (string, encoded)) |
|
return encoded |
|
|
|
def create_request(self, operation, uri = None, bucket = None, object = None, headers = None, extra = None, **params): |
|
resource = { 'bucket' : None, 'uri' : "/" } |
|
|
|
if uri and (bucket or object): |
|
raise ValueError("Both 'uri' and either 'bucket' or 'object' parameters supplied") |
|
## If URI is given use that instead of bucket/object parameters |
|
if uri: |
|
bucket = uri.bucket() |
|
object = uri.has_object() and uri.object() or None |
|
|
|
if bucket: |
|
resource['bucket'] = str(bucket) |
|
if object: |
|
resource['uri'] = "/" + self.urlencode_string(object) |
|
if extra: |
|
resource['uri'] += extra |
|
|
|
method_string = S3.http_methods.getkey(S3.operations[operation] & S3.http_methods["MASK"]) |
|
|
|
request = S3Request(self, method_string, resource, headers, params) |
|
|
|
debug("CreateRequest: resource[uri]=" + resource['uri']) |
|
return request |
|
|
|
def _fail_wait(self, retries): |
|
# Wait a few seconds. The more it fails the more we wait. |
|
return (self._max_retries - retries + 1) * 3 |
|
|
|
def send_request(self, request, body = None, retries = _max_retries): |
|
method_string, resource, headers = request.get_triplet() |
|
debug("Processing request, please wait...") |
|
if not headers.has_key('content-length'): |
|
headers['content-length'] = body and len(body) or 0 |
|
try: |
|
# "Stringify" all headers |
|
for header in headers.keys(): |
|
headers[header] = str(headers[header]) |
|
conn = ConnMan.get(self.get_hostname(resource['bucket'])) |
|
uri = self.format_uri(resource) |
|
debug("Sending request method_string=%r, uri=%r, headers=%r, body=(%i bytes)" % (method_string, uri, headers, len(body or ""))) |
|
conn.c.request(method_string, uri, body, headers) |
|
response = {} |
|
http_response = conn.c.getresponse() |
|
response["status"] = http_response.status |
|
response["reason"] = http_response.reason |
|
response["headers"] = convertTupleListToDict(http_response.getheaders()) |
|
response["data"] = http_response.read() |
|
debug("Response: " + str(response)) |
|
ConnMan.put(conn) |
|
except ParameterError, e: |
|
raise |
|
except Exception, e: |
|
if retries: |
|
warning("Retrying failed request: %s (%s)" % (resource['uri'], e)) |
|
warning("Waiting %d sec..." % self._fail_wait(retries)) |
|
time.sleep(self._fail_wait(retries)) |
|
return self.send_request(request, body, retries - 1) |
|
else: |
|
raise S3RequestError("Request failed for: %s" % resource['uri']) |
|
|
|
if response["status"] == 307: |
|
## RedirectPermanent |
|
redir_bucket = getTextFromXml(response['data'], ".//Bucket") |
|
redir_hostname = getTextFromXml(response['data'], ".//Endpoint") |
|
self.set_hostname(redir_bucket, redir_hostname) |
|
warning("Redirected to: %s" % (redir_hostname)) |
|
return self.send_request(request, body) |
|
|
|
if response["status"] >= 500: |
|
e = S3Error(response) |
|
if retries: |
|
warning(u"Retrying failed request: %s" % resource['uri']) |
|
warning(unicode(e)) |
|
warning("Waiting %d sec..." % self._fail_wait(retries)) |
|
time.sleep(self._fail_wait(retries)) |
|
return self.send_request(request, body, retries - 1) |
|
else: |
|
raise e |
|
|
|
if response["status"] < 200 or response["status"] > 299: |
|
raise S3Error(response) |
|
|
|
return response |
|
|
|
def send_file(self, request, file, labels, buffer = '', throttle = 0, retries = _max_retries, offset = 0, chunk_size = -1): |
|
method_string, resource, headers = request.get_triplet() |
|
size_left = size_total = headers.get("content-length") |
|
if self.config.progress_meter: |
|
progress = self.config.progress_class(labels, size_total) |
|
else: |
|
info("Sending file '%s', please wait..." % file.name) |
|
timestamp_start = time.time() |
|
try: |
|
conn = ConnMan.get(self.get_hostname(resource['bucket'])) |
|
conn.c.putrequest(method_string, self.format_uri(resource)) |
|
for header in headers.keys(): |
|
conn.c.putheader(header, str(headers[header])) |
|
conn.c.endheaders() |
|
except ParameterError, e: |
|
raise |
|
except Exception, e: |
|
if self.config.progress_meter: |
|
progress.done("failed") |
|
if retries: |
|
warning("Retrying failed request: %s (%s)" % (resource['uri'], e)) |
|
warning("Waiting %d sec..." % self._fail_wait(retries)) |
|
time.sleep(self._fail_wait(retries)) |
|
# Connection error -> same throttle value |
|
return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size) |
|
else: |
|
raise S3UploadError("Upload failed for: %s" % resource['uri']) |
|
if buffer == '': |
|
file.seek(offset) |
|
md5_hash = md5() |
|
try: |
|
while (size_left > 0): |
|
#debug("SendFile: Reading up to %d bytes from '%s' - remaining bytes: %s" % (self.config.send_chunk, file.name, size_left)) |
|
if buffer == '': |
|
data = file.read(min(self.config.send_chunk, size_left)) |
|
else: |
|
data = buffer |
|
md5_hash.update(data) |
|
conn.c.send(data) |
|
if self.config.progress_meter: |
|
progress.update(delta_position = len(data)) |
|
size_left -= len(data) |
|
if throttle: |
|
time.sleep(throttle) |
|
md5_computed = md5_hash.hexdigest() |
|
response = {} |
|
http_response = conn.c.getresponse() |
|
response["status"] = http_response.status |
|
response["reason"] = http_response.reason |
|
response["headers"] = convertTupleListToDict(http_response.getheaders()) |
|
response["data"] = http_response.read() |
|
response["size"] = size_total |
|
ConnMan.put(conn) |
|
debug(u"Response: %s" % response) |
|
except ParameterError, e: |
|
raise |
|
except Exception, e: |
|
if self.config.progress_meter: |
|
progress.done("failed") |
|
if retries: |
|
if retries < self._max_retries: |
|
throttle = throttle and throttle * 5 or 0.01 |
|
warning("Upload failed: %s (%s)" % (resource['uri'], e)) |
|
warning("Retrying on lower speed (throttle=%0.2f)" % throttle) |
|
warning("Waiting %d sec..." % self._fail_wait(retries)) |
|
time.sleep(self._fail_wait(retries)) |
|
# Connection error -> same throttle value |
|
return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size) |
|
else: |
|
debug("Giving up on '%s' %s" % (file.name, e)) |
|
raise S3UploadError("Upload failed for: %s" % resource['uri']) |
|
|
|
timestamp_end = time.time() |
|
response["elapsed"] = timestamp_end - timestamp_start |
|
response["speed"] = response["elapsed"] and float(response["size"]) / response["elapsed"] or float(-1) |
|
|
|
if self.config.progress_meter: |
|
## Finalising the upload takes some time -> update() progress meter |
|
## to correct the average speed. Otherwise people will complain that |
|
## 'progress' and response["speed"] are inconsistent ;-) |
|
progress.update() |
|
progress.done("done") |
|
|
|
if response["status"] == 307: |
|
## RedirectPermanent |
|
redir_bucket = getTextFromXml(response['data'], ".//Bucket") |
|
redir_hostname = getTextFromXml(response['data'], ".//Endpoint") |
|
self.set_hostname(redir_bucket, redir_hostname) |
|
warning("Redirected to: %s" % (redir_hostname)) |
|
return self.send_file(request, file, labels, buffer, offset = offset, chunk_size = chunk_size) |
|
|
|
# S3 from time to time doesn't send ETag back in a response :-( |
|
# Force re-upload here. |
|
if not response['headers'].has_key('etag'): |
|
response['headers']['etag'] = '' |
|
|
|
if response["status"] < 200 or response["status"] > 299: |
|
try_retry = False |
|
if response["status"] >= 500: |
|
## AWS internal error - retry |
|
try_retry = True |
|
elif response["status"] >= 400: |
|
err = S3Error(response) |
|
## Retriable client error? |
|
if err.code in [ 'BadDigest', 'OperationAborted', 'TokenRefreshRequired', 'RequestTimeout' ]: |
|
try_retry = True |
|
|
|
if try_retry: |
|
if retries: |
|
warning("Upload failed: %s (%s)" % (resource['uri'], S3Error(response))) |
|
warning("Waiting %d sec..." % self._fail_wait(retries)) |
|
time.sleep(self._fail_wait(retries)) |
|
return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size) |
|
else: |
|
warning("Too many failures. Giving up on '%s'" % (file.name)) |
|
raise S3UploadError |
|
|
|
## Non-recoverable error |
|
raise S3Error(response) |
|
|
|
debug("MD5 sums: computed=%s, received=%s" % (md5_computed, response["headers"]["etag"])) |
|
if response["headers"]["etag"].strip('"\'') != md5_hash.hexdigest(): |
|
warning("MD5 Sums don't match!") |
|
if retries: |
|
warning("Retrying upload of %s" % (file.name)) |
|
return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size) |
|
else: |
|
warning("Too many failures. Giving up on '%s'" % (file.name)) |
|
raise S3UploadError |
|
|
|
return response |
|
|
|
def send_file_multipart(self, file, headers, uri, size): |
|
chunk_size = self.config.multipart_chunk_size_mb * 1024 * 1024 |
|
timestamp_start = time.time() |
|
upload = MultiPartUpload(self, file, uri, headers) |
|
upload.upload_all_parts() |
|
response = upload.complete_multipart_upload() |
|
timestamp_end = time.time() |
|
response["elapsed"] = timestamp_end - timestamp_start |
|
response["size"] = size |
|
response["speed"] = response["elapsed"] and float(response["size"]) / response["elapsed"] or float(-1) |
|
return response |
|
|
|
def recv_file(self, request, stream, labels, start_position = 0, retries = _max_retries): |
|
method_string, resource, headers = request.get_triplet() |
|
if self.config.progress_meter: |
|
progress = self.config.progress_class(labels, 0) |
|
else: |
|
info("Receiving file '%s', please wait..." % stream.name) |
|
timestamp_start = time.time() |
|
try: |
|
conn = ConnMan.get(self.get_hostname(resource['bucket'])) |
|
conn.c.putrequest(method_string, self.format_uri(resource)) |
|
for header in headers.keys(): |
|
conn.c.putheader(header, str(headers[header])) |
|
if start_position > 0: |
|
debug("Requesting Range: %d .. end" % start_position) |
|
conn.c.putheader("Range", "bytes=%d-" % start_position) |
|
conn.c.endheaders() |
|
response = {} |
|
http_response = conn.c.getresponse() |
|
response["status"] = http_response.status |
|
response["reason"] = http_response.reason |
|
response["headers"] = convertTupleListToDict(http_response.getheaders()) |
|
debug("Response: %s" % response) |
|
except ParameterError, e: |
|
raise |
|
except Exception, e: |
|
if self.config.progress_meter: |
|
progress.done("failed") |
|
if retries: |
|
warning("Retrying failed request: %s (%s)" % (resource['uri'], e)) |
|
warning("Waiting %d sec..." % self._fail_wait(retries)) |
|
time.sleep(self._fail_wait(retries)) |
|
# Connection error -> same throttle value |
|
return self.recv_file(request, stream, labels, start_position, retries - 1) |
|
else: |
|
raise S3DownloadError("Download failed for: %s" % resource['uri']) |
|
|
|
if response["status"] == 307: |
|
## RedirectPermanent |
|
response['data'] = http_response.read() |
|
redir_bucket = getTextFromXml(response['data'], ".//Bucket") |
|
redir_hostname = getTextFromXml(response['data'], ".//Endpoint") |
|
self.set_hostname(redir_bucket, redir_hostname) |
|
warning("Redirected to: %s" % (redir_hostname)) |
|
return self.recv_file(request, stream, labels) |
|
|
|
if response["status"] < 200 or response["status"] > 299: |
|
raise S3Error(response) |
|
|
|
if start_position == 0: |
|
# Only compute MD5 on the fly if we're downloading from beginning |
|
# Otherwise we'd get a nonsense. |
|
md5_hash = md5() |
|
size_left = int(response["headers"]["content-length"]) |
|
size_total = start_position + size_left |
|
current_position = start_position |
|
|
|
if self.config.progress_meter: |
|
progress.total_size = size_total |
|
progress.initial_position = current_position |
|
progress.current_position = current_position |
|
|
|
try: |
|
while (current_position < size_total): |
|
this_chunk = size_left > self.config.recv_chunk and self.config.recv_chunk or size_left |
|
data = http_response.read(this_chunk) |
|
if len(data) == 0: |
|
raise S3Error("EOF from S3!") |
|
|
|
stream.write(data) |
|
if start_position == 0: |
|
md5_hash.update(data) |
|
current_position += len(data) |
|
## Call progress meter from here... |
|
if self.config.progress_meter: |
|
progress.update(delta_position = len(data)) |
|
ConnMan.put(conn) |
|
except Exception, e: |
|
if self.config.progress_meter: |
|
progress.done("failed") |
|
if retries: |
|
warning("Retrying failed request: %s (%s)" % (resource['uri'], e)) |
|
warning("Waiting %d sec..." % self._fail_wait(retries)) |
|
time.sleep(self._fail_wait(retries)) |
|
# Connection error -> same throttle value |
|
return self.recv_file(request, stream, labels, current_position, retries - 1) |
|
else: |
|
raise S3DownloadError("Download failed for: %s" % resource['uri']) |
|
|
|
stream.flush() |
|
timestamp_end = time.time() |
|
|
|
if self.config.progress_meter: |
|
## The above stream.flush() may take some time -> update() progress meter |
|
## to correct the average speed. Otherwise people will complain that |
|
## 'progress' and response["speed"] are inconsistent ;-) |
|
progress.update() |
|
progress.done("done") |
|
|
|
if start_position == 0: |
|
# Only compute MD5 on the fly if we were downloading from the beginning |
|
response["md5"] = md5_hash.hexdigest() |
|
else: |
|
# Otherwise try to compute MD5 of the output file |
|
try: |
|
response["md5"] = hash_file_md5(stream.name) |
|
except IOError, e: |
|
if e.errno != errno.ENOENT: |
|
warning("Unable to open file: %s: %s" % (stream.name, e)) |
|
warning("Unable to verify MD5. Assume it matches.") |
|
response["md5"] = response["headers"]["etag"] |
|
|
|
response["md5match"] = response["headers"]["etag"].find(response["md5"]) >= 0 |
|
response["elapsed"] = timestamp_end - timestamp_start |
|
response["size"] = current_position |
|
response["speed"] = response["elapsed"] and float(response["size"]) / response["elapsed"] or float(-1) |
|
if response["size"] != start_position + long(response["headers"]["content-length"]): |
|
warning("Reported size (%s) does not match received size (%s)" % ( |
|
start_position + response["headers"]["content-length"], response["size"])) |
|
debug("ReceiveFile: Computed MD5 = %s" % response["md5"]) |
|
if not response["md5match"]: |
|
warning("MD5 signatures do not match: computed=%s, received=%s" % ( |
|
response["md5"], response["headers"]["etag"])) |
|
return response |
|
__all__.append("S3") |
|
|
|
# vim:et:ts=4:sts=4:ai
|
|
|