From fb0ab39f062a11ff0148fc34deecc966adbf2e4c Mon Sep 17 00:00:00 2001 From: jywsn Date: Thu, 26 Sep 2013 14:30:44 -0500 Subject: [PATCH] use common fablib; use timestamp as default version; commit/push build directory; custom banner on config.json --- config.json | 22 +- fabfile.py | 29 + fabfile/S3/ACL.py | 224 ---- fabfile/S3/AccessLog.py | 92 -- fabfile/S3/BidirMap.py | 42 - fabfile/S3/CloudFront.py | 773 -------------- fabfile/S3/Config.py | 294 ------ fabfile/S3/ConnMan.py | 71 -- fabfile/S3/Exceptions.py | 88 -- fabfile/S3/FileDict.py | 53 - fabfile/S3/FileLists.py | 517 ---------- fabfile/S3/HashCache.py | 53 - fabfile/S3/MultiPart.py | 137 --- fabfile/S3/PkgInfo.py | 14 - fabfile/S3/Progress.py | 173 ---- fabfile/S3/S3.py | 979 ------------------ fabfile/S3/S3Uri.py | 223 ---- fabfile/S3/SimpleDB.py | 178 ---- fabfile/S3/SortedDict.py | 66 -- fabfile/S3/Utils.py | 462 --------- fabfile/S3/__init__.py | 0 fabfile/__init__.py | 560 ---------- fabfile/s3cmd | 2116 -------------------------------------- requirements.txt | 1 + 24 files changed, 50 insertions(+), 7117 deletions(-) create mode 100644 fabfile.py delete mode 100644 fabfile/S3/ACL.py delete mode 100644 fabfile/S3/AccessLog.py delete mode 100644 fabfile/S3/BidirMap.py delete mode 100644 fabfile/S3/CloudFront.py delete mode 100644 fabfile/S3/Config.py delete mode 100644 fabfile/S3/ConnMan.py delete mode 100644 fabfile/S3/Exceptions.py delete mode 100644 fabfile/S3/FileDict.py delete mode 100644 fabfile/S3/FileLists.py delete mode 100644 fabfile/S3/HashCache.py delete mode 100644 fabfile/S3/MultiPart.py delete mode 100644 fabfile/S3/PkgInfo.py delete mode 100644 fabfile/S3/Progress.py delete mode 100644 fabfile/S3/S3.py delete mode 100644 fabfile/S3/S3Uri.py delete mode 100644 fabfile/S3/SimpleDB.py delete mode 100644 fabfile/S3/SortedDict.py delete mode 100644 fabfile/S3/Utils.py delete mode 100644 fabfile/S3/__init__.py delete mode 100644 fabfile/__init__.py delete mode 100755 fabfile/s3cmd diff --git a/config.json b/config.json index b536915..d04a90a 100644 --- a/config.json +++ b/config.json @@ -135,9 +135,27 @@ ], // // banner-ize files - // input: list of file paths + // input: list of objects specifying inputs + // @src: source file/directory + // @regex: regular expression to match files (if @src is directory) + // @template: template to use for banner (optional) // - "banner": [ "build/js", "build/css" ] + "banner": [ + { + "src": "build", + "regex": "(js|css)/.*\\.(css|js)$", + "template": [ + "/*", + " TimelineJS - ver. %(version)s - %(date)s", + " Copyright (c) 2012-2013 Northwestern University", + " a project of the Northwestern University Knight Lab, originally created by Zach Wise", + " https://github.com/NUKnightLab/TimelineJS", + " This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.", + " If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.", + "*/" + ] + } + ] }, // // stage diff --git a/fabfile.py b/fabfile.py new file mode 100644 index 0000000..d97df10 --- /dev/null +++ b/fabfile.py @@ -0,0 +1,29 @@ +from os.path import abspath, basename, dirname, join +import sys +from fabric.api import env + +# +# Project-specific settings, alter as needed +# +# env.project_name = basename(dirname(__file__)) +env.project_name = 'TimelineJS' + +# +# Add paths +# +def add_paths(*args): + """Make paths are in sys.path.""" + for p in args: + if p not in sys.path: + sys.path.append(p) + +project_path = dirname(abspath(__file__)) +repos_path = dirname(project_path) +fablib_path = join(repos_path, 'fablib') + +add_paths(project_path, repos_path, fablib_path) + +# +# Import from fablib +# +from fablib import * diff --git a/fabfile/S3/ACL.py b/fabfile/S3/ACL.py deleted file mode 100644 index a22c921..0000000 --- a/fabfile/S3/ACL.py +++ /dev/null @@ -1,224 +0,0 @@ -## Amazon S3 - Access Control List representation -## Author: Michal Ludvig -## http://www.logix.cz/michal -## License: GPL Version 2 - -from Utils import getTreeFromXml - -try: - import xml.etree.ElementTree as ET -except ImportError: - import elementtree.ElementTree as ET - -class Grantee(object): - ALL_USERS_URI = "http://acs.amazonaws.com/groups/global/AllUsers" - LOG_DELIVERY_URI = "http://acs.amazonaws.com/groups/s3/LogDelivery" - - def __init__(self): - self.xsi_type = None - self.tag = None - self.name = None - self.display_name = None - self.permission = None - - def __repr__(self): - return 'Grantee("%(tag)s", "%(name)s", "%(permission)s")' % { - "tag" : self.tag, - "name" : self.name, - "permission" : self.permission - } - - def isAllUsers(self): - return self.tag == "URI" and self.name == Grantee.ALL_USERS_URI - - def isAnonRead(self): - return self.isAllUsers() and (self.permission == "READ" or self.permission == "FULL_CONTROL") - - def getElement(self): - el = ET.Element("Grant") - grantee = ET.SubElement(el, "Grantee", { - 'xmlns:xsi' : 'http://www.w3.org/2001/XMLSchema-instance', - 'xsi:type' : self.xsi_type - }) - name = ET.SubElement(grantee, self.tag) - name.text = self.name - permission = ET.SubElement(el, "Permission") - permission.text = self.permission - return el - -class GranteeAnonRead(Grantee): - def __init__(self): - Grantee.__init__(self) - self.xsi_type = "Group" - self.tag = "URI" - self.name = Grantee.ALL_USERS_URI - self.permission = "READ" - -class GranteeLogDelivery(Grantee): - def __init__(self, permission): - """ - permission must be either READ_ACP or WRITE - """ - Grantee.__init__(self) - self.xsi_type = "Group" - self.tag = "URI" - self.name = Grantee.LOG_DELIVERY_URI - self.permission = permission - -class ACL(object): - EMPTY_ACL = "" - - def __init__(self, xml = None): - if not xml: - xml = ACL.EMPTY_ACL - - self.grantees = [] - self.owner_id = "" - self.owner_nick = "" - - tree = getTreeFromXml(xml) - self.parseOwner(tree) - self.parseGrants(tree) - - def parseOwner(self, tree): - self.owner_id = tree.findtext(".//Owner//ID") - self.owner_nick = tree.findtext(".//Owner//DisplayName") - - def parseGrants(self, tree): - for grant in tree.findall(".//Grant"): - grantee = Grantee() - g = grant.find(".//Grantee") - grantee.xsi_type = g.attrib['{http://www.w3.org/2001/XMLSchema-instance}type'] - grantee.permission = grant.find('Permission').text - for el in g: - if el.tag == "DisplayName": - grantee.display_name = el.text - else: - grantee.tag = el.tag - grantee.name = el.text - self.grantees.append(grantee) - - def getGrantList(self): - acl = [] - for grantee in self.grantees: - if grantee.display_name: - user = grantee.display_name - elif grantee.isAllUsers(): - user = "*anon*" - else: - user = grantee.name - acl.append({'grantee': user, 'permission': grantee.permission}) - return acl - - def getOwner(self): - return { 'id' : self.owner_id, 'nick' : self.owner_nick } - - def isAnonRead(self): - for grantee in self.grantees: - if grantee.isAnonRead(): - return True - return False - - def grantAnonRead(self): - if not self.isAnonRead(): - self.appendGrantee(GranteeAnonRead()) - - def revokeAnonRead(self): - self.grantees = [g for g in self.grantees if not g.isAnonRead()] - - def appendGrantee(self, grantee): - self.grantees.append(grantee) - - def hasGrant(self, name, permission): - name = name.lower() - permission = permission.upper() - - for grantee in self.grantees: - if grantee.name.lower() == name: - if grantee.permission == "FULL_CONTROL": - return True - elif grantee.permission.upper() == permission: - return True - - return False; - - def grant(self, name, permission): - if self.hasGrant(name, permission): - return - - name = name.lower() - permission = permission.upper() - - if "ALL" == permission: - permission = "FULL_CONTROL" - - if "FULL_CONTROL" == permission: - self.revoke(name, "ALL") - - grantee = Grantee() - grantee.name = name - grantee.permission = permission - - if name.find('@') <= -1: # ultra lame attempt to differenciate emails id from canonical ids - grantee.xsi_type = "CanonicalUser" - grantee.tag = "ID" - else: - grantee.xsi_type = "AmazonCustomerByEmail" - grantee.tag = "EmailAddress" - - self.appendGrantee(grantee) - - - def revoke(self, name, permission): - name = name.lower() - permission = permission.upper() - - if "ALL" == permission: - self.grantees = [g for g in self.grantees if not g.name.lower() == name] - else: - self.grantees = [g for g in self.grantees if not (g.name.lower() == name and g.permission.upper() == permission)] - - - def __str__(self): - tree = getTreeFromXml(ACL.EMPTY_ACL) - tree.attrib['xmlns'] = "http://s3.amazonaws.com/doc/2006-03-01/" - owner = tree.find(".//Owner//ID") - owner.text = self.owner_id - acl = tree.find(".//AccessControlList") - for grantee in self.grantees: - acl.append(grantee.getElement()) - return ET.tostring(tree) - -if __name__ == "__main__": - xml = """ - - - 12345678901234567890 - owner-nickname - - - - - 12345678901234567890 - owner-nickname - - FULL_CONTROL - - - - http://acs.amazonaws.com/groups/global/AllUsers - - READ - - - - """ - acl = ACL(xml) - print "Grants:", acl.getGrantList() - acl.revokeAnonRead() - print "Grants:", acl.getGrantList() - acl.grantAnonRead() - print "Grants:", acl.getGrantList() - print acl - -# vim:et:ts=4:sts=4:ai diff --git a/fabfile/S3/AccessLog.py b/fabfile/S3/AccessLog.py deleted file mode 100644 index 7ae99ef..0000000 --- a/fabfile/S3/AccessLog.py +++ /dev/null @@ -1,92 +0,0 @@ -## Amazon S3 - Access Control List representation -## Author: Michal Ludvig -## http://www.logix.cz/michal -## License: GPL Version 2 - -import S3Uri -from Exceptions import ParameterError -from Utils import getTreeFromXml -from ACL import GranteeAnonRead - -try: - import xml.etree.ElementTree as ET -except ImportError: - import elementtree.ElementTree as ET - -__all__ = [] -class AccessLog(object): - LOG_DISABLED = "" - LOG_TEMPLATE = "" - - def __init__(self, xml = None): - if not xml: - xml = self.LOG_DISABLED - self.tree = getTreeFromXml(xml) - self.tree.attrib['xmlns'] = "http://doc.s3.amazonaws.com/2006-03-01" - - def isLoggingEnabled(self): - return bool(self.tree.find(".//LoggingEnabled")) - - def disableLogging(self): - el = self.tree.find(".//LoggingEnabled") - if el: - self.tree.remove(el) - - def enableLogging(self, target_prefix_uri): - el = self.tree.find(".//LoggingEnabled") - if not el: - el = getTreeFromXml(self.LOG_TEMPLATE) - self.tree.append(el) - el.find(".//TargetBucket").text = target_prefix_uri.bucket() - el.find(".//TargetPrefix").text = target_prefix_uri.object() - - def targetPrefix(self): - if self.isLoggingEnabled(): - el = self.tree.find(".//LoggingEnabled") - target_prefix = "s3://%s/%s" % ( - self.tree.find(".//LoggingEnabled//TargetBucket").text, - self.tree.find(".//LoggingEnabled//TargetPrefix").text) - return S3Uri.S3Uri(target_prefix) - else: - return "" - - def setAclPublic(self, acl_public): - le = self.tree.find(".//LoggingEnabled") - if not le: - raise ParameterError("Logging not enabled, can't set default ACL for logs") - tg = le.find(".//TargetGrants") - if not acl_public: - if not tg: - ## All good, it's not been there - return - else: - le.remove(tg) - else: # acl_public == True - anon_read = GranteeAnonRead().getElement() - if not tg: - tg = ET.SubElement(le, "TargetGrants") - ## What if TargetGrants already exists? We should check if - ## AnonRead is there before appending a new one. Later... - tg.append(anon_read) - - def isAclPublic(self): - raise NotImplementedError() - - def __str__(self): - return ET.tostring(self.tree) -__all__.append("AccessLog") - -if __name__ == "__main__": - from S3Uri import S3Uri - log = AccessLog() - print log - log.enableLogging(S3Uri("s3://targetbucket/prefix/log-")) - print log - log.setAclPublic(True) - print log - log.setAclPublic(False) - print log - log.disableLogging() - print log - -# vim:et:ts=4:sts=4:ai diff --git a/fabfile/S3/BidirMap.py b/fabfile/S3/BidirMap.py deleted file mode 100644 index 0d2849a..0000000 --- a/fabfile/S3/BidirMap.py +++ /dev/null @@ -1,42 +0,0 @@ -## Amazon S3 manager -## Author: Michal Ludvig -## http://www.logix.cz/michal -## License: GPL Version 2 - -class BidirMap(object): - def __init__(self, **map): - self.k2v = {} - self.v2k = {} - for key in map: - self.__setitem__(key, map[key]) - - def __setitem__(self, key, value): - if self.v2k.has_key(value): - if self.v2k[value] != key: - raise KeyError("Value '"+str(value)+"' already in use with key '"+str(self.v2k[value])+"'") - try: - del(self.v2k[self.k2v[key]]) - except KeyError: - pass - self.k2v[key] = value - self.v2k[value] = key - - def __getitem__(self, key): - return self.k2v[key] - - def __str__(self): - return self.v2k.__str__() - - def getkey(self, value): - return self.v2k[value] - - def getvalue(self, key): - return self.k2v[key] - - def keys(self): - return [key for key in self.k2v] - - def values(self): - return [value for value in self.v2k] - -# vim:et:ts=4:sts=4:ai diff --git a/fabfile/S3/CloudFront.py b/fabfile/S3/CloudFront.py deleted file mode 100644 index 59bcff6..0000000 --- a/fabfile/S3/CloudFront.py +++ /dev/null @@ -1,773 +0,0 @@ -## Amazon CloudFront support -## Author: Michal Ludvig -## http://www.logix.cz/michal -## License: GPL Version 2 - -import sys -import time -import httplib -import random -from datetime import datetime -from logging import debug, info, warning, error - -try: - import xml.etree.ElementTree as ET -except ImportError: - import elementtree.ElementTree as ET - -from Config import Config -from Exceptions import * -from Utils import getTreeFromXml, appendXmlTextNode, getDictFromTree, dateS3toPython, sign_string, getBucketFromHostname, getHostnameFromBucket -from S3Uri import S3Uri, S3UriS3 -from FileLists import fetch_remote_list - -cloudfront_api_version = "2010-11-01" -cloudfront_resource = "/%(api_ver)s/distribution" % { 'api_ver' : cloudfront_api_version } - -def output(message): - sys.stdout.write(message + "\n") - -def pretty_output(label, message): - #label = ("%s " % label).ljust(20, ".") - label = ("%s:" % label).ljust(15) - output("%s %s" % (label, message)) - -class DistributionSummary(object): - ## Example: - ## - ## - ## 1234567890ABC - ## Deployed - ## 2009-01-16T11:49:02.189Z - ## blahblahblah.cloudfront.net - ## - ## example.bucket.s3.amazonaws.com - ## - ## cdn.example.com - ## img.example.com - ## What Ever - ## true - ## - - def __init__(self, tree): - if tree.tag != "DistributionSummary": - raise ValueError("Expected xml, got: <%s />" % tree.tag) - self.parse(tree) - - def parse(self, tree): - self.info = getDictFromTree(tree) - self.info['Enabled'] = (self.info['Enabled'].lower() == "true") - if self.info.has_key("CNAME") and type(self.info['CNAME']) != list: - self.info['CNAME'] = [self.info['CNAME']] - - def uri(self): - return S3Uri("cf://%s" % self.info['Id']) - -class DistributionList(object): - ## Example: - ## - ## - ## - ## 100 - ## false - ## - ## ... handled by DistributionSummary() class ... - ## - ## - - def __init__(self, xml): - tree = getTreeFromXml(xml) - if tree.tag != "DistributionList": - raise ValueError("Expected xml, got: <%s />" % tree.tag) - self.parse(tree) - - def parse(self, tree): - self.info = getDictFromTree(tree) - ## Normalise some items - self.info['IsTruncated'] = (self.info['IsTruncated'].lower() == "true") - - self.dist_summs = [] - for dist_summ in tree.findall(".//DistributionSummary"): - self.dist_summs.append(DistributionSummary(dist_summ)) - -class Distribution(object): - ## Example: - ## - ## - ## 1234567890ABC - ## InProgress - ## 2009-01-16T13:07:11.319Z - ## blahblahblah.cloudfront.net - ## - ## ... handled by DistributionConfig() class ... - ## - ## - - def __init__(self, xml): - tree = getTreeFromXml(xml) - if tree.tag != "Distribution": - raise ValueError("Expected xml, got: <%s />" % tree.tag) - self.parse(tree) - - def parse(self, tree): - self.info = getDictFromTree(tree) - ## Normalise some items - self.info['LastModifiedTime'] = dateS3toPython(self.info['LastModifiedTime']) - - self.info['DistributionConfig'] = DistributionConfig(tree = tree.find(".//DistributionConfig")) - - def uri(self): - return S3Uri("cf://%s" % self.info['Id']) - -class DistributionConfig(object): - ## Example: - ## - ## - ## somebucket.s3.amazonaws.com - ## s3://somebucket/ - ## http://somebucket.s3.amazonaws.com/ - ## true - ## - ## bu.ck.et - ## /cf-somebucket/ - ## - ## - - EMPTY_CONFIG = "true" - xmlns = "http://cloudfront.amazonaws.com/doc/%(api_ver)s/" % { 'api_ver' : cloudfront_api_version } - def __init__(self, xml = None, tree = None): - if xml is None: - xml = DistributionConfig.EMPTY_CONFIG - - if tree is None: - tree = getTreeFromXml(xml) - - if tree.tag != "DistributionConfig": - raise ValueError("Expected xml, got: <%s />" % tree.tag) - self.parse(tree) - - def parse(self, tree): - self.info = getDictFromTree(tree) - self.info['Enabled'] = (self.info['Enabled'].lower() == "true") - if not self.info.has_key("CNAME"): - self.info['CNAME'] = [] - if type(self.info['CNAME']) != list: - self.info['CNAME'] = [self.info['CNAME']] - self.info['CNAME'] = [cname.lower() for cname in self.info['CNAME']] - if not self.info.has_key("Comment"): - self.info['Comment'] = "" - if not self.info.has_key("DefaultRootObject"): - self.info['DefaultRootObject'] = "" - ## Figure out logging - complex node not parsed by getDictFromTree() - logging_nodes = tree.findall(".//Logging") - if logging_nodes: - logging_dict = getDictFromTree(logging_nodes[0]) - logging_dict['Bucket'], success = getBucketFromHostname(logging_dict['Bucket']) - if not success: - warning("Logging to unparsable bucket name: %s" % logging_dict['Bucket']) - self.info['Logging'] = S3UriS3("s3://%(Bucket)s/%(Prefix)s" % logging_dict) - else: - self.info['Logging'] = None - - def __str__(self): - tree = ET.Element("DistributionConfig") - tree.attrib['xmlns'] = DistributionConfig.xmlns - - ## Retain the order of the following calls! - s3org = appendXmlTextNode("S3Origin", '', tree) - appendXmlTextNode("DNSName", self.info['S3Origin']['DNSName'], s3org) - appendXmlTextNode("CallerReference", self.info['CallerReference'], tree) - for cname in self.info['CNAME']: - appendXmlTextNode("CNAME", cname.lower(), tree) - if self.info['Comment']: - appendXmlTextNode("Comment", self.info['Comment'], tree) - appendXmlTextNode("Enabled", str(self.info['Enabled']).lower(), tree) - # don't create a empty DefaultRootObject element as it would result in a MalformedXML error - if str(self.info['DefaultRootObject']): - appendXmlTextNode("DefaultRootObject", str(self.info['DefaultRootObject']), tree) - if self.info['Logging']: - logging_el = ET.Element("Logging") - appendXmlTextNode("Bucket", getHostnameFromBucket(self.info['Logging'].bucket()), logging_el) - appendXmlTextNode("Prefix", self.info['Logging'].object(), logging_el) - tree.append(logging_el) - return ET.tostring(tree) - -class Invalidation(object): - ## Example: - ## - ## - ## id - ## status - ## date - ## - ## /image1.jpg - ## /image2.jpg - ## /videos/movie.flv - ## my-batch - ## - ## - - def __init__(self, xml): - tree = getTreeFromXml(xml) - if tree.tag != "Invalidation": - raise ValueError("Expected xml, got: <%s />" % tree.tag) - self.parse(tree) - - def parse(self, tree): - self.info = getDictFromTree(tree) - - def __str__(self): - return str(self.info) - -class InvalidationList(object): - ## Example: - ## - ## - ## - ## Invalidation ID - ## 2 - ## true - ## - ## [Second Invalidation ID] - ## Completed - ## - ## - ## [First Invalidation ID] - ## Completed - ## - ## - - def __init__(self, xml): - tree = getTreeFromXml(xml) - if tree.tag != "InvalidationList": - raise ValueError("Expected xml, got: <%s />" % tree.tag) - self.parse(tree) - - def parse(self, tree): - self.info = getDictFromTree(tree) - - def __str__(self): - return str(self.info) - -class InvalidationBatch(object): - ## Example: - ## - ## - ## /image1.jpg - ## /image2.jpg - ## /videos/movie.flv - ## /sound%20track.mp3 - ## my-batch - ## - - def __init__(self, reference = None, distribution = None, paths = []): - if reference: - self.reference = reference - else: - if not distribution: - distribution="0" - self.reference = "%s.%s.%s" % (distribution, - datetime.strftime(datetime.now(),"%Y%m%d%H%M%S"), - random.randint(1000,9999)) - self.paths = [] - self.add_objects(paths) - - def add_objects(self, paths): - self.paths.extend(paths) - - def get_reference(self): - return self.reference - - def __str__(self): - tree = ET.Element("InvalidationBatch") - - for path in self.paths: - if len(path) < 1 or path[0] != "/": - path = "/" + path - appendXmlTextNode("Path", path, tree) - appendXmlTextNode("CallerReference", self.reference, tree) - return ET.tostring(tree) - -class CloudFront(object): - operations = { - "CreateDist" : { 'method' : "POST", 'resource' : "" }, - "DeleteDist" : { 'method' : "DELETE", 'resource' : "/%(dist_id)s" }, - "GetList" : { 'method' : "GET", 'resource' : "" }, - "GetDistInfo" : { 'method' : "GET", 'resource' : "/%(dist_id)s" }, - "GetDistConfig" : { 'method' : "GET", 'resource' : "/%(dist_id)s/config" }, - "SetDistConfig" : { 'method' : "PUT", 'resource' : "/%(dist_id)s/config" }, - "Invalidate" : { 'method' : "POST", 'resource' : "/%(dist_id)s/invalidation" }, - "GetInvalList" : { 'method' : "GET", 'resource' : "/%(dist_id)s/invalidation" }, - "GetInvalInfo" : { 'method' : "GET", 'resource' : "/%(dist_id)s/invalidation/%(request_id)s" }, - } - - ## Maximum attempts of re-issuing failed requests - _max_retries = 5 - dist_list = None - - def __init__(self, config): - self.config = config - - ## -------------------------------------------------- - ## Methods implementing CloudFront API - ## -------------------------------------------------- - - def GetList(self): - response = self.send_request("GetList") - response['dist_list'] = DistributionList(response['data']) - if response['dist_list'].info['IsTruncated']: - raise NotImplementedError("List is truncated. Ask s3cmd author to add support.") - ## TODO: handle Truncated - return response - - def CreateDistribution(self, uri, cnames_add = [], comment = None, logging = None, default_root_object = None): - dist_config = DistributionConfig() - dist_config.info['Enabled'] = True - dist_config.info['S3Origin']['DNSName'] = uri.host_name() - dist_config.info['CallerReference'] = str(uri) - dist_config.info['DefaultRootObject'] = default_root_object - if comment == None: - dist_config.info['Comment'] = uri.public_url() - else: - dist_config.info['Comment'] = comment - for cname in cnames_add: - if dist_config.info['CNAME'].count(cname) == 0: - dist_config.info['CNAME'].append(cname) - if logging: - dist_config.info['Logging'] = S3UriS3(logging) - request_body = str(dist_config) - debug("CreateDistribution(): request_body: %s" % request_body) - response = self.send_request("CreateDist", body = request_body) - response['distribution'] = Distribution(response['data']) - return response - - def ModifyDistribution(self, cfuri, cnames_add = [], cnames_remove = [], - comment = None, enabled = None, logging = None, - default_root_object = None): - if cfuri.type != "cf": - raise ValueError("Expected CFUri instead of: %s" % cfuri) - # Get current dist status (enabled/disabled) and Etag - info("Checking current status of %s" % cfuri) - response = self.GetDistConfig(cfuri) - dc = response['dist_config'] - if enabled != None: - dc.info['Enabled'] = enabled - if comment != None: - dc.info['Comment'] = comment - if default_root_object != None: - dc.info['DefaultRootObject'] = default_root_object - for cname in cnames_add: - if dc.info['CNAME'].count(cname) == 0: - dc.info['CNAME'].append(cname) - for cname in cnames_remove: - while dc.info['CNAME'].count(cname) > 0: - dc.info['CNAME'].remove(cname) - if logging != None: - if logging == False: - dc.info['Logging'] = False - else: - dc.info['Logging'] = S3UriS3(logging) - response = self.SetDistConfig(cfuri, dc, response['headers']['etag']) - return response - - def DeleteDistribution(self, cfuri): - if cfuri.type != "cf": - raise ValueError("Expected CFUri instead of: %s" % cfuri) - # Get current dist status (enabled/disabled) and Etag - info("Checking current status of %s" % cfuri) - response = self.GetDistConfig(cfuri) - if response['dist_config'].info['Enabled']: - info("Distribution is ENABLED. Disabling first.") - response['dist_config'].info['Enabled'] = False - response = self.SetDistConfig(cfuri, response['dist_config'], - response['headers']['etag']) - warning("Waiting for Distribution to become disabled.") - warning("This may take several minutes, please wait.") - while True: - response = self.GetDistInfo(cfuri) - d = response['distribution'] - if d.info['Status'] == "Deployed" and d.info['Enabled'] == False: - info("Distribution is now disabled") - break - warning("Still waiting...") - time.sleep(10) - headers = {} - headers['if-match'] = response['headers']['etag'] - response = self.send_request("DeleteDist", dist_id = cfuri.dist_id(), - headers = headers) - return response - - def GetDistInfo(self, cfuri): - if cfuri.type != "cf": - raise ValueError("Expected CFUri instead of: %s" % cfuri) - response = self.send_request("GetDistInfo", dist_id = cfuri.dist_id()) - response['distribution'] = Distribution(response['data']) - return response - - def GetDistConfig(self, cfuri): - if cfuri.type != "cf": - raise ValueError("Expected CFUri instead of: %s" % cfuri) - response = self.send_request("GetDistConfig", dist_id = cfuri.dist_id()) - response['dist_config'] = DistributionConfig(response['data']) - return response - - def SetDistConfig(self, cfuri, dist_config, etag = None): - if etag == None: - debug("SetDistConfig(): Etag not set. Fetching it first.") - etag = self.GetDistConfig(cfuri)['headers']['etag'] - debug("SetDistConfig(): Etag = %s" % etag) - request_body = str(dist_config) - debug("SetDistConfig(): request_body: %s" % request_body) - headers = {} - headers['if-match'] = etag - response = self.send_request("SetDistConfig", dist_id = cfuri.dist_id(), - body = request_body, headers = headers) - return response - - def InvalidateObjects(self, uri, paths, default_index_file, invalidate_default_index_on_cf, invalidate_default_index_root_on_cf): - # joseprio: if the user doesn't want to invalidate the default index - # path, or if the user wants to invalidate the root of the default - # index, we need to process those paths - if default_index_file is not None and (not invalidate_default_index_on_cf or invalidate_default_index_root_on_cf): - new_paths = [] - default_index_suffix = '/' + default_index_file - for path in paths: - if path.endswith(default_index_suffix) or path == default_index_file: - if invalidate_default_index_on_cf: - new_paths.append(path) - if invalidate_default_index_root_on_cf: - new_paths.append(path[:-len(default_index_file)]) - else: - new_paths.append(path) - paths = new_paths - - # uri could be either cf:// or s3:// uri - cfuri = self.get_dist_name_for_bucket(uri) - if len(paths) > 999: - try: - tmp_filename = Utils.mktmpfile() - f = open(tmp_filename, "w") - f.write("\n".join(paths)+"\n") - f.close() - warning("Request to invalidate %d paths (max 999 supported)" % len(paths)) - warning("All the paths are now saved in: %s" % tmp_filename) - except: - pass - raise ParameterError("Too many paths to invalidate") - invalbatch = InvalidationBatch(distribution = cfuri.dist_id(), paths = paths) - debug("InvalidateObjects(): request_body: %s" % invalbatch) - response = self.send_request("Invalidate", dist_id = cfuri.dist_id(), - body = str(invalbatch)) - response['dist_id'] = cfuri.dist_id() - if response['status'] == 201: - inval_info = Invalidation(response['data']).info - response['request_id'] = inval_info['Id'] - debug("InvalidateObjects(): response: %s" % response) - return response - - def GetInvalList(self, cfuri): - if cfuri.type != "cf": - raise ValueError("Expected CFUri instead of: %s" % cfuri) - response = self.send_request("GetInvalList", dist_id = cfuri.dist_id()) - response['inval_list'] = InvalidationList(response['data']) - return response - - def GetInvalInfo(self, cfuri): - if cfuri.type != "cf": - raise ValueError("Expected CFUri instead of: %s" % cfuri) - if cfuri.request_id() is None: - raise ValueError("Expected CFUri with Request ID") - response = self.send_request("GetInvalInfo", dist_id = cfuri.dist_id(), request_id = cfuri.request_id()) - response['inval_status'] = Invalidation(response['data']) - return response - - ## -------------------------------------------------- - ## Low-level methods for handling CloudFront requests - ## -------------------------------------------------- - - def send_request(self, op_name, dist_id = None, request_id = None, body = None, headers = {}, retries = _max_retries): - operation = self.operations[op_name] - if body: - headers['content-type'] = 'text/plain' - request = self.create_request(operation, dist_id, request_id, headers) - conn = self.get_connection() - debug("send_request(): %s %s" % (request['method'], request['resource'])) - conn.request(request['method'], request['resource'], body, request['headers']) - http_response = conn.getresponse() - response = {} - response["status"] = http_response.status - response["reason"] = http_response.reason - response["headers"] = dict(http_response.getheaders()) - response["data"] = http_response.read() - conn.close() - - debug("CloudFront: response: %r" % response) - - if response["status"] >= 500: - e = CloudFrontError(response) - if retries: - warning(u"Retrying failed request: %s" % op_name) - warning(unicode(e)) - warning("Waiting %d sec..." % self._fail_wait(retries)) - time.sleep(self._fail_wait(retries)) - return self.send_request(op_name, dist_id, body, retries - 1) - else: - raise e - - if response["status"] < 200 or response["status"] > 299: - raise CloudFrontError(response) - - return response - - def create_request(self, operation, dist_id = None, request_id = None, headers = None): - resource = cloudfront_resource + ( - operation['resource'] % { 'dist_id' : dist_id, 'request_id' : request_id }) - - if not headers: - headers = {} - - if headers.has_key("date"): - if not headers.has_key("x-amz-date"): - headers["x-amz-date"] = headers["date"] - del(headers["date"]) - - if not headers.has_key("x-amz-date"): - headers["x-amz-date"] = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) - - if len(self.config.access_token)>0: - self.config.refresh_role() - headers['x-amz-security-token']=self.config.access_token - - signature = self.sign_request(headers) - headers["Authorization"] = "AWS "+self.config.access_key+":"+signature - - request = {} - request['resource'] = resource - request['headers'] = headers - request['method'] = operation['method'] - - return request - - def sign_request(self, headers): - string_to_sign = headers['x-amz-date'] - signature = sign_string(string_to_sign) - debug(u"CloudFront.sign_request('%s') = %s" % (string_to_sign, signature)) - return signature - - def get_connection(self): - if self.config.proxy_host != "": - raise ParameterError("CloudFront commands don't work from behind a HTTP proxy") - return httplib.HTTPSConnection(self.config.cloudfront_host) - - def _fail_wait(self, retries): - # Wait a few seconds. The more it fails the more we wait. - return (self._max_retries - retries + 1) * 3 - - def get_dist_name_for_bucket(self, uri): - if (uri.type == "cf"): - return uri - if (uri.type != "s3"): - raise ParameterError("CloudFront or S3 URI required instead of: %s" % arg) - - debug("_get_dist_name_for_bucket(%r)" % uri) - if CloudFront.dist_list is None: - response = self.GetList() - CloudFront.dist_list = {} - for d in response['dist_list'].dist_summs: - if d.info.has_key("S3Origin"): - CloudFront.dist_list[getBucketFromHostname(d.info['S3Origin']['DNSName'])[0]] = d.uri() - elif d.info.has_key("CustomOrigin"): - # Aral: This used to skip over distributions with CustomOrigin, however, we mustn't - # do this since S3 buckets that are set up as websites use custom origins. - # Thankfully, the custom origin URLs they use start with the URL of the - # S3 bucket. Here, we make use this naming convention to support this use case. - distListIndex = getBucketFromHostname(d.info['CustomOrigin']['DNSName'])[0]; - distListIndex = distListIndex[:len(uri.bucket())] - CloudFront.dist_list[distListIndex] = d.uri() - else: - # Aral: I'm not sure when this condition will be reached, but keeping it in there. - continue - debug("dist_list: %s" % CloudFront.dist_list) - try: - return CloudFront.dist_list[uri.bucket()] - except Exception, e: - debug(e) - raise ParameterError("Unable to translate S3 URI to CloudFront distribution name: %s" % arg) - -class Cmd(object): - """ - Class that implements CloudFront commands - """ - - class Options(object): - cf_cnames_add = [] - cf_cnames_remove = [] - cf_comment = None - cf_enable = None - cf_logging = None - cf_default_root_object = None - - def option_list(self): - return [opt for opt in dir(self) if opt.startswith("cf_")] - - def update_option(self, option, value): - setattr(Cmd.options, option, value) - - options = Options() - - @staticmethod - def _parse_args(args): - cf = CloudFront(Config()) - cfuris = [] - for arg in args: - uri = cf.get_dist_name_for_bucket(S3Uri(arg)) - cfuris.append(uri) - return cfuris - - @staticmethod - def info(args): - cf = CloudFront(Config()) - if not args: - response = cf.GetList() - for d in response['dist_list'].dist_summs: - if d.info.has_key("S3Origin"): - origin = S3UriS3.httpurl_to_s3uri(d.info['S3Origin']['DNSName']) - elif d.info.has_key("CustomOrigin"): - origin = "http://%s/" % d.info['CustomOrigin']['DNSName'] - else: - origin = "" - pretty_output("Origin", origin) - pretty_output("DistId", d.uri()) - pretty_output("DomainName", d.info['DomainName']) - if d.info.has_key("CNAME"): - pretty_output("CNAMEs", ", ".join(d.info['CNAME'])) - pretty_output("Status", d.info['Status']) - pretty_output("Enabled", d.info['Enabled']) - output("") - else: - cfuris = Cmd._parse_args(args) - for cfuri in cfuris: - response = cf.GetDistInfo(cfuri) - d = response['distribution'] - dc = d.info['DistributionConfig'] - if dc.info.has_key("S3Origin"): - origin = S3UriS3.httpurl_to_s3uri(dc.info['S3Origin']['DNSName']) - elif dc.info.has_key("CustomOrigin"): - origin = "http://%s/" % dc.info['CustomOrigin']['DNSName'] - else: - origin = "" - pretty_output("Origin", origin) - pretty_output("DistId", d.uri()) - pretty_output("DomainName", d.info['DomainName']) - if dc.info.has_key("CNAME"): - pretty_output("CNAMEs", ", ".join(dc.info['CNAME'])) - pretty_output("Status", d.info['Status']) - pretty_output("Comment", dc.info['Comment']) - pretty_output("Enabled", dc.info['Enabled']) - pretty_output("DfltRootObject", dc.info['DefaultRootObject']) - pretty_output("Logging", dc.info['Logging'] or "Disabled") - pretty_output("Etag", response['headers']['etag']) - - @staticmethod - def create(args): - cf = CloudFront(Config()) - buckets = [] - for arg in args: - uri = S3Uri(arg) - if uri.type != "s3": - raise ParameterError("Bucket can only be created from a s3:// URI instead of: %s" % arg) - if uri.object(): - raise ParameterError("Use s3:// URI with a bucket name only instead of: %s" % arg) - if not uri.is_dns_compatible(): - raise ParameterError("CloudFront can only handle lowercase-named buckets.") - buckets.append(uri) - if not buckets: - raise ParameterError("No valid bucket names found") - for uri in buckets: - info("Creating distribution from: %s" % uri) - response = cf.CreateDistribution(uri, cnames_add = Cmd.options.cf_cnames_add, - comment = Cmd.options.cf_comment, - logging = Cmd.options.cf_logging, - default_root_object = Cmd.options.cf_default_root_object) - d = response['distribution'] - dc = d.info['DistributionConfig'] - output("Distribution created:") - pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['S3Origin']['DNSName'])) - pretty_output("DistId", d.uri()) - pretty_output("DomainName", d.info['DomainName']) - pretty_output("CNAMEs", ", ".join(dc.info['CNAME'])) - pretty_output("Comment", dc.info['Comment']) - pretty_output("Status", d.info['Status']) - pretty_output("Enabled", dc.info['Enabled']) - pretty_output("DefaultRootObject", dc.info['DefaultRootObject']) - pretty_output("Etag", response['headers']['etag']) - - @staticmethod - def delete(args): - cf = CloudFront(Config()) - cfuris = Cmd._parse_args(args) - for cfuri in cfuris: - response = cf.DeleteDistribution(cfuri) - if response['status'] >= 400: - error("Distribution %s could not be deleted: %s" % (cfuri, response['reason'])) - output("Distribution %s deleted" % cfuri) - - @staticmethod - def modify(args): - cf = CloudFront(Config()) - if len(args) > 1: - raise ParameterError("Too many parameters. Modify one Distribution at a time.") - try: - cfuri = Cmd._parse_args(args)[0] - except IndexError, e: - raise ParameterError("No valid Distribution URI found.") - response = cf.ModifyDistribution(cfuri, - cnames_add = Cmd.options.cf_cnames_add, - cnames_remove = Cmd.options.cf_cnames_remove, - comment = Cmd.options.cf_comment, - enabled = Cmd.options.cf_enable, - logging = Cmd.options.cf_logging, - default_root_object = Cmd.options.cf_default_root_object) - if response['status'] >= 400: - error("Distribution %s could not be modified: %s" % (cfuri, response['reason'])) - output("Distribution modified: %s" % cfuri) - response = cf.GetDistInfo(cfuri) - d = response['distribution'] - dc = d.info['DistributionConfig'] - pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['S3Origin']['DNSName'])) - pretty_output("DistId", d.uri()) - pretty_output("DomainName", d.info['DomainName']) - pretty_output("Status", d.info['Status']) - pretty_output("CNAMEs", ", ".join(dc.info['CNAME'])) - pretty_output("Comment", dc.info['Comment']) - pretty_output("Enabled", dc.info['Enabled']) - pretty_output("DefaultRootObject", dc.info['DefaultRootObject']) - pretty_output("Etag", response['headers']['etag']) - - @staticmethod - def invalinfo(args): - cf = CloudFront(Config()) - cfuris = Cmd._parse_args(args) - requests = [] - for cfuri in cfuris: - if cfuri.request_id(): - requests.append(str(cfuri)) - else: - inval_list = cf.GetInvalList(cfuri) - try: - for i in inval_list['inval_list'].info['InvalidationSummary']: - requests.append("/".join(["cf:/", cfuri.dist_id(), i["Id"]])) - except: - continue - for req in requests: - cfuri = S3Uri(req) - inval_info = cf.GetInvalInfo(cfuri) - st = inval_info['inval_status'].info - pretty_output("URI", str(cfuri)) - pretty_output("Status", st['Status']) - pretty_output("Created", st['CreateTime']) - pretty_output("Nr of paths", len(st['InvalidationBatch']['Path'])) - pretty_output("Reference", st['InvalidationBatch']['CallerReference']) - output("") - -# vim:et:ts=4:sts=4:ai diff --git a/fabfile/S3/Config.py b/fabfile/S3/Config.py deleted file mode 100644 index 04543c2..0000000 --- a/fabfile/S3/Config.py +++ /dev/null @@ -1,294 +0,0 @@ -## Amazon S3 manager -## Author: Michal Ludvig -## http://www.logix.cz/michal -## License: GPL Version 2 - -import logging -from logging import debug, info, warning, error -import re -import os -import sys -import Progress -from SortedDict import SortedDict -import httplib -import json - -class Config(object): - _instance = None - _parsed_files = [] - _doc = {} - access_key = "" - secret_key = "" - access_token = "" - host_base = "s3.amazonaws.com" - host_bucket = "%(bucket)s.s3.amazonaws.com" - simpledb_host = "sdb.amazonaws.com" - cloudfront_host = "cloudfront.amazonaws.com" - verbosity = logging.WARNING - progress_meter = True - progress_class = Progress.ProgressCR - send_chunk = 4096 - recv_chunk = 4096 - list_md5 = False - human_readable_sizes = False - extra_headers = SortedDict(ignore_case = True) - force = False - enable = None - get_continue = False - skip_existing = False - recursive = False - acl_public = None - acl_grants = [] - acl_revokes = [] - proxy_host = "" - proxy_port = 3128 - encrypt = False - dry_run = False - add_encoding_exts = "" - preserve_attrs = True - preserve_attrs_list = [ - 'uname', # Verbose owner Name (e.g. 'root') - 'uid', # Numeric user ID (e.g. 0) - 'gname', # Group name (e.g. 'users') - 'gid', # Numeric group ID (e.g. 100) - 'atime', # Last access timestamp - 'mtime', # Modification timestamp - 'ctime', # Creation timestamp - 'mode', # File mode (e.g. rwxr-xr-x = 755) - 'md5', # File MD5 (if known) - #'acl', # Full ACL (not yet supported) - ] - delete_removed = False - delete_after = False - delete_after_fetch = False - _doc['delete_removed'] = "[sync] Remove remote S3 objects when local file has been deleted" - delay_updates = False - gpg_passphrase = "" - gpg_command = "" - gpg_encrypt = "%(gpg_command)s -c --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s" - gpg_decrypt = "%(gpg_command)s -d --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s" - use_https = False - bucket_location = "US" - default_mime_type = "binary/octet-stream" - guess_mime_type = True - mime_type = "" - enable_multipart = True - multipart_chunk_size_mb = 15 # MB - # List of checks to be performed for 'sync' - sync_checks = ['size', 'md5'] # 'weak-timestamp' - # List of compiled REGEXPs - exclude = [] - include = [] - # Dict mapping compiled REGEXPs back to their textual form - debug_exclude = {} - debug_include = {} - encoding = "utf-8" - urlencoding_mode = "normal" - log_target_prefix = "" - reduced_redundancy = False - follow_symlinks = False - socket_timeout = 300 - invalidate_on_cf = False - # joseprio: new flags for default index invalidation - invalidate_default_index_on_cf = False - invalidate_default_index_root_on_cf = True - website_index = "index.html" - website_error = "" - website_endpoint = "http://%(bucket)s.s3-website-%(location)s.amazonaws.com/" - additional_destinations = [] - cache_file = "" - add_headers = "" - - ## Creating a singleton - def __new__(self, configfile = None): - if self._instance is None: - self._instance = object.__new__(self) - return self._instance - - def __init__(self, configfile = None): - if configfile: - try: - self.read_config_file(configfile) - except IOError, e: - if 'AWS_CREDENTIAL_FILE' in os.environ: - self.env_config() - if len(self.access_key)==0: - self.role_config() - - def role_config(self): - conn = httplib.HTTPConnection(host='169.254.169.254',timeout=0.1) - try: - conn.request('GET', "/latest/meta-data/iam/security-credentials/") - resp = conn.getresponse() - files = resp.read() - if resp.status == 200 and len(files)>1: - conn.request('GET', "/latest/meta-data/iam/security-credentials/%s"%files) - resp=conn.getresponse() - if resp.status == 200: - creds=json.load(resp) - Config().update_option('access_key', creds['AccessKeyId'].encode('ascii')) - Config().update_option('secret_key', creds['SecretAccessKey'].encode('ascii')) - Config().update_option('access_token', creds['Token'].encode('ascii')) - else: - raise IOError - else: - raise IOError - except: - raise - - def role_refresh(self): - try: - self.role_config() - except: - warning("Could not refresh role") - - def env_config(self): - cred_content = "" - try: - cred_file = open(os.environ['AWS_CREDENTIAL_FILE'],'r') - cred_content = cred_file.read() - except IOError, e: - debug("Error %d accessing credentials file %s" % (e.errno,os.environ['AWS_CREDENTIAL_FILE'])) - r_data = re.compile("^\s*(?P\w+)\s*=\s*(?P.*)") - r_quotes = re.compile("^\"(.*)\"\s*$") - if len(cred_content)>0: - for line in cred_content.splitlines(): - is_data = r_data.match(line) - is_data = r_data.match(line) - if is_data: - data = is_data.groupdict() - if r_quotes.match(data["value"]): - data["value"] = data["value"][1:-1] - if data["orig_key"]=="AWSAccessKeyId": - data["key"] = "access_key" - elif data["orig_key"]=="AWSSecretKey": - data["key"] = "secret_key" - else: - del data["key"] - if "key" in data: - Config().update_option(data["key"], data["value"]) - if data["key"] in ("access_key", "secret_key", "gpg_passphrase"): - print_value = (data["value"][:2]+"...%d_chars..."+data["value"][-1:]) % (len(data["value"]) - 3) - else: - print_value = data["value"] - debug("env_Config: %s->%s" % (data["key"], print_value)) - - - - def option_list(self): - retval = [] - for option in dir(self): - ## Skip attributes that start with underscore or are not string, int or bool - option_type = type(getattr(Config, option)) - if option.startswith("_") or \ - not (option_type in ( - type("string"), # str - type(42), # int - type(True))): # bool - continue - retval.append(option) - return retval - - def read_config_file(self, configfile): - cp = ConfigParser(configfile) - for option in self.option_list(): - self.update_option(option, cp.get(option)) - - if cp.get('add_headers'): - for option in cp.get('add_headers').split(","): - (key, value) = option.split(':') - self.extra_headers[key.replace('_', '-').strip()] = value.strip() - - self._parsed_files.append(configfile) - - def dump_config(self, stream): - ConfigDumper(stream).dump("default", self) - - def update_option(self, option, value): - if value is None: - return - #### Handle environment reference - if str(value).startswith("$"): - return self.update_option(option, os.getenv(str(value)[1:])) - #### Special treatment of some options - ## verbosity must be known to "logging" module - if option == "verbosity": - try: - setattr(Config, "verbosity", logging._levelNames[value]) - except KeyError: - error("Config: verbosity level '%s' is not valid" % value) - ## allow yes/no, true/false, on/off and 1/0 for boolean options - elif type(getattr(Config, option)) is type(True): # bool - if str(value).lower() in ("true", "yes", "on", "1"): - setattr(Config, option, True) - elif str(value).lower() in ("false", "no", "off", "0"): - setattr(Config, option, False) - else: - error("Config: value of option '%s' must be Yes or No, not '%s'" % (option, value)) - elif type(getattr(Config, option)) is type(42): # int - try: - setattr(Config, option, int(value)) - except ValueError, e: - error("Config: value of option '%s' must be an integer, not '%s'" % (option, value)) - else: # string - setattr(Config, option, value) - -class ConfigParser(object): - def __init__(self, file, sections = []): - self.cfg = {} - self.parse_file(file, sections) - - def parse_file(self, file, sections = []): - debug("ConfigParser: Reading file '%s'" % file) - if type(sections) != type([]): - sections = [sections] - in_our_section = True - f = open(file, "r") - r_comment = re.compile("^\s*#.*") - r_empty = re.compile("^\s*$") - r_section = re.compile("^\[([^\]]+)\]") - r_data = re.compile("^\s*(?P\w+)\s*=\s*(?P.*)") - r_quotes = re.compile("^\"(.*)\"\s*$") - for line in f: - if r_comment.match(line) or r_empty.match(line): - continue - is_section = r_section.match(line) - if is_section: - section = is_section.groups()[0] - in_our_section = (section in sections) or (len(sections) == 0) - continue - is_data = r_data.match(line) - if is_data and in_our_section: - data = is_data.groupdict() - if r_quotes.match(data["value"]): - data["value"] = data["value"][1:-1] - self.__setitem__(data["key"], data["value"]) - if data["key"] in ("access_key", "secret_key", "gpg_passphrase"): - print_value = (data["value"][:2]+"...%d_chars..."+data["value"][-1:]) % (len(data["value"]) - 3) - else: - print_value = data["value"] - debug("ConfigParser: %s->%s" % (data["key"], print_value)) - continue - warning("Ignoring invalid line in '%s': %s" % (file, line)) - - def __getitem__(self, name): - return self.cfg[name] - - def __setitem__(self, name, value): - self.cfg[name] = value - - def get(self, name, default = None): - if self.cfg.has_key(name): - return self.cfg[name] - return default - -class ConfigDumper(object): - def __init__(self, stream): - self.stream = stream - - def dump(self, section, config): - self.stream.write("[%s]\n" % section) - for option in config.option_list(): - self.stream.write("%s = %s\n" % (option, getattr(config, option))) - -# vim:et:ts=4:sts=4:ai diff --git a/fabfile/S3/ConnMan.py b/fabfile/S3/ConnMan.py deleted file mode 100644 index fbec921..0000000 --- a/fabfile/S3/ConnMan.py +++ /dev/null @@ -1,71 +0,0 @@ -import httplib -from urlparse import urlparse -from threading import Semaphore -from logging import debug, info, warning, error - -from Config import Config -from Exceptions import ParameterError - -__all__ = [ "ConnMan" ] - -class http_connection(object): - def __init__(self, id, hostname, ssl, cfg): - self.hostname = hostname - self.ssl = ssl - self.id = id - self.counter = 0 - if cfg.proxy_host != "": - self.c = httplib.HTTPConnection(cfg.proxy_host, cfg.proxy_port) - elif not ssl: - self.c = httplib.HTTPConnection(hostname) - else: - self.c = httplib.HTTPSConnection(hostname) - -class ConnMan(object): - conn_pool_sem = Semaphore() - conn_pool = {} - conn_max_counter = 800 ## AWS closes connection after some ~90 requests - - @staticmethod - def get(hostname, ssl = None): - cfg = Config() - if ssl == None: - ssl = cfg.use_https - conn = None - if cfg.proxy_host != "": - if ssl: - raise ParameterError("use_ssl=True can't be used with proxy") - conn_id = "proxy://%s:%s" % (cfg.proxy_host, cfg.proxy_port) - else: - conn_id = "http%s://%s" % (ssl and "s" or "", hostname) - ConnMan.conn_pool_sem.acquire() - if not ConnMan.conn_pool.has_key(conn_id): - ConnMan.conn_pool[conn_id] = [] - if len(ConnMan.conn_pool[conn_id]): - conn = ConnMan.conn_pool[conn_id].pop() - debug("ConnMan.get(): re-using connection: %s#%d" % (conn.id, conn.counter)) - ConnMan.conn_pool_sem.release() - if not conn: - debug("ConnMan.get(): creating new connection: %s" % conn_id) - conn = http_connection(conn_id, hostname, ssl, cfg) - conn.c.connect() - conn.counter += 1 - return conn - - @staticmethod - def put(conn): - if conn.id.startswith("proxy://"): - conn.c.close() - debug("ConnMan.put(): closing proxy connection (keep-alive not yet supported)") - return - - if conn.counter >= ConnMan.conn_max_counter: - conn.c.close() - debug("ConnMan.put(): closing over-used connection") - return - - ConnMan.conn_pool_sem.acquire() - ConnMan.conn_pool[conn.id].append(conn) - ConnMan.conn_pool_sem.release() - debug("ConnMan.put(): connection put back to pool (%s#%d)" % (conn.id, conn.counter)) - diff --git a/fabfile/S3/Exceptions.py b/fabfile/S3/Exceptions.py deleted file mode 100644 index b0671e5..0000000 --- a/fabfile/S3/Exceptions.py +++ /dev/null @@ -1,88 +0,0 @@ -## Amazon S3 manager - Exceptions library -## Author: Michal Ludvig -## http://www.logix.cz/michal -## License: GPL Version 2 - -from Utils import getTreeFromXml, unicodise, deunicodise -from logging import debug, info, warning, error - -try: - import xml.etree.ElementTree as ET -except ImportError: - import elementtree.ElementTree as ET - -class S3Exception(Exception): - def __init__(self, message = ""): - self.message = unicodise(message) - - def __str__(self): - ## Call unicode(self) instead of self.message because - ## __unicode__() method could be overriden in subclasses! - return deunicodise(unicode(self)) - - def __unicode__(self): - return self.message - - ## (Base)Exception.message has been deprecated in Python 2.6 - def _get_message(self): - return self._message - def _set_message(self, message): - self._message = message - message = property(_get_message, _set_message) - - -class S3Error (S3Exception): - def __init__(self, response): - self.status = response["status"] - self.reason = response["reason"] - self.info = { - "Code" : "", - "Message" : "", - "Resource" : "" - } - debug("S3Error: %s (%s)" % (self.status, self.reason)) - if response.has_key("headers"): - for header in response["headers"]: - debug("HttpHeader: %s: %s" % (header, response["headers"][header])) - if response.has_key("data") and response["data"]: - tree = getTreeFromXml(response["data"]) - error_node = tree - if not error_node.tag == "Error": - error_node = tree.find(".//Error") - for child in error_node.getchildren(): - if child.text != "": - debug("ErrorXML: " + child.tag + ": " + repr(child.text)) - self.info[child.tag] = child.text - self.code = self.info["Code"] - self.message = self.info["Message"] - self.resource = self.info["Resource"] - - def __unicode__(self): - retval = u"%d " % (self.status) - retval += (u"(%s)" % (self.info.has_key("Code") and self.info["Code"] or self.reason)) - if self.info.has_key("Message"): - retval += (u": %s" % self.info["Message"]) - return retval - -class CloudFrontError(S3Error): - pass - -class S3UploadError(S3Exception): - pass - -class S3DownloadError(S3Exception): - pass - -class S3RequestError(S3Exception): - pass - -class S3ResponseError(S3Exception): - pass - -class InvalidFileError(S3Exception): - pass - -class ParameterError(S3Exception): - pass - -# vim:et:ts=4:sts=4:ai diff --git a/fabfile/S3/FileDict.py b/fabfile/S3/FileDict.py deleted file mode 100644 index c3a22ae..0000000 --- a/fabfile/S3/FileDict.py +++ /dev/null @@ -1,53 +0,0 @@ -## Amazon S3 manager -## Author: Michal Ludvig -## http://www.logix.cz/michal -## License: GPL Version 2 - -from SortedDict import SortedDict -import Utils - -class FileDict(SortedDict): - def __init__(self, mapping = {}, ignore_case = True, **kwargs): - SortedDict.__init__(self, mapping = mapping, ignore_case = ignore_case, **kwargs) - self.hardlinks = dict() # { dev: { inode : {'md5':, 'relative_files':}}} - self.by_md5 = dict() # {md5: set(relative_files)} - - def record_md5(self, relative_file, md5): - if md5 not in self.by_md5: - self.by_md5[md5] = set() - self.by_md5[md5].add(relative_file) - - def find_md5_one(self, md5): - try: - return list(self.by_md5.get(md5, set()))[0] - except: - return None - - def get_md5(self, relative_file): - """returns md5 if it can, or raises IOError if file is unreadable""" - md5 = None - if 'md5' in self[relative_file]: - return self[relative_file]['md5'] - md5 = self.get_hardlink_md5(relative_file) - if md5 is None: - md5 = Utils.hash_file_md5(self[relative_file]['full_name']) - self.record_md5(relative_file, md5) - self[relative_file]['md5'] = md5 - return md5 - - def record_hardlink(self, relative_file, dev, inode, md5): - if dev not in self.hardlinks: - self.hardlinks[dev] = dict() - if inode not in self.hardlinks[dev]: - self.hardlinks[dev][inode] = dict(md5=md5, relative_files=set()) - self.hardlinks[dev][inode]['relative_files'].add(relative_file) - - def get_hardlink_md5(self, relative_file): - md5 = None - dev = self[relative_file]['dev'] - inode = self[relative_file]['inode'] - try: - md5 = self.hardlinks[dev][inode]['md5'] - except: - pass - return md5 diff --git a/fabfile/S3/FileLists.py b/fabfile/S3/FileLists.py deleted file mode 100644 index 488ae47..0000000 --- a/fabfile/S3/FileLists.py +++ /dev/null @@ -1,517 +0,0 @@ -## Create and compare lists of files/objects -## Author: Michal Ludvig -## http://www.logix.cz/michal -## License: GPL Version 2 - -from S3 import S3 -from Config import Config -from S3Uri import S3Uri -from FileDict import FileDict -from Utils import * -from Exceptions import ParameterError -from HashCache import HashCache - -from logging import debug, info, warning, error - -import os -import glob -import copy - -__all__ = ["fetch_local_list", "fetch_remote_list", "compare_filelists", "filter_exclude_include", "parse_attrs_header"] - -def _fswalk_follow_symlinks(path): - ''' - Walk filesystem, following symbolic links (but without recursion), on python2.4 and later - - If a symlink directory loop is detected, emit a warning and skip. - E.g.: dir1/dir2/sym-dir -> ../dir2 - ''' - assert os.path.isdir(path) # only designed for directory argument - walkdirs = set([path]) - for dirpath, dirnames, filenames in os.walk(path): - handle_exclude_include_walk(dirpath, dirnames, []) - real_dirpath = os.path.realpath(dirpath) - for dirname in dirnames: - current = os.path.join(dirpath, dirname) - real_current = os.path.realpath(current) - if os.path.islink(current): - if (real_dirpath == real_current or - real_dirpath.startswith(real_current + os.path.sep)): - warning("Skipping recursively symlinked directory %s" % dirname) - else: - walkdirs.add(current) - for walkdir in walkdirs: - for dirpath, dirnames, filenames in os.walk(walkdir): - handle_exclude_include_walk(dirpath, dirnames, []) - yield (dirpath, dirnames, filenames) - -def _fswalk_no_symlinks(path): - ''' - Directory tree generator - - path (str) is the root of the directory tree to walk - ''' - for dirpath, dirnames, filenames in os.walk(path): - handle_exclude_include_walk(dirpath, dirnames, filenames) - yield (dirpath, dirnames, filenames) - -def filter_exclude_include(src_list): - info(u"Applying --exclude/--include") - cfg = Config() - exclude_list = FileDict(ignore_case = False) - for file in src_list.keys(): - debug(u"CHECK: %s" % file) - excluded = False - for r in cfg.exclude: - if r.search(file): - excluded = True - debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r])) - break - if excluded: - ## No need to check for --include if not excluded - for r in cfg.include: - if r.search(file): - excluded = False - debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r])) - break - if excluded: - ## Still excluded - ok, action it - debug(u"EXCLUDE: %s" % file) - exclude_list[file] = src_list[file] - del(src_list[file]) - continue - else: - debug(u"PASS: %r" % (file)) - return src_list, exclude_list - -def handle_exclude_include_walk(root, dirs, files): - cfg = Config() - copydirs = copy.copy(dirs) - copyfiles = copy.copy(files) - - # exclude dir matches in the current directory - # this prevents us from recursing down trees we know we want to ignore - for x in copydirs: - d = os.path.join(root, x, '') - debug(u"CHECK: %r" % d) - excluded = False - for r in cfg.exclude: - if r.search(d): - excluded = True - debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r])) - break - if excluded: - ## No need to check for --include if not excluded - for r in cfg.include: - if r.search(d): - excluded = False - debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r])) - break - if excluded: - ## Still excluded - ok, action it - debug(u"EXCLUDE: %r" % d) - dirs.remove(x) - continue - else: - debug(u"PASS: %r" % (d)) - - # exclude file matches in the current directory - for x in copyfiles: - file = os.path.join(root, x) - debug(u"CHECK: %r" % file) - excluded = False - for r in cfg.exclude: - if r.search(file): - excluded = True - debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r])) - break - if excluded: - ## No need to check for --include if not excluded - for r in cfg.include: - if r.search(file): - excluded = False - debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r])) - break - if excluded: - ## Still excluded - ok, action it - debug(u"EXCLUDE: %s" % file) - files.remove(x) - continue - else: - debug(u"PASS: %r" % (file)) - -def fetch_local_list(args, recursive = None): - def _get_filelist_local(loc_list, local_uri, cache): - info(u"Compiling list of local files...") - - if deunicodise(local_uri.basename()) == "-": - loc_list["-"] = { - 'full_name_unicode' : '-', - 'full_name' : '-', - 'size' : -1, - 'mtime' : -1, - } - return loc_list, True - if local_uri.isdir(): - local_base = deunicodise(local_uri.basename()) - local_path = deunicodise(local_uri.path()) - if cfg.follow_symlinks: - filelist = _fswalk_follow_symlinks(local_path) - else: - filelist = _fswalk_no_symlinks(local_path) - single_file = False - else: - local_base = "" - local_path = deunicodise(local_uri.dirname()) - filelist = [( local_path, [], [deunicodise(local_uri.basename())] )] - single_file = True - for root, dirs, files in filelist: - rel_root = root.replace(local_path, local_base, 1) - for f in files: - full_name = os.path.join(root, f) - if not os.path.isfile(full_name): - continue - if os.path.islink(full_name): - if not cfg.follow_symlinks: - continue - relative_file = unicodise(os.path.join(rel_root, f)) - if os.path.sep != "/": - # Convert non-unix dir separators to '/' - relative_file = "/".join(relative_file.split(os.path.sep)) - if cfg.urlencoding_mode == "normal": - relative_file = replace_nonprintables(relative_file) - if relative_file.startswith('./'): - relative_file = relative_file[2:] - sr = os.stat_result(os.lstat(full_name)) - loc_list[relative_file] = { - 'full_name_unicode' : unicodise(full_name), - 'full_name' : full_name, - 'size' : sr.st_size, - 'mtime' : sr.st_mtime, - 'dev' : sr.st_dev, - 'inode' : sr.st_ino, - 'uid' : sr.st_uid, - 'gid' : sr.st_gid, - 'sr': sr # save it all, may need it in preserve_attrs_list - ## TODO: Possibly more to save here... - } - if 'md5' in cfg.sync_checks: - md5 = cache.md5(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size) - if md5 is None: - try: - md5 = loc_list.get_md5(relative_file) # this does the file I/O - except IOError: - continue - cache.add(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size, md5) - loc_list.record_hardlink(relative_file, sr.st_dev, sr.st_ino, md5) - return loc_list, single_file - - def _maintain_cache(cache, local_list): - if cfg.cache_file: - cache.mark_all_for_purge() - for i in local_list.keys(): - cache.unmark_for_purge(local_list[i]['dev'], local_list[i]['inode'], local_list[i]['mtime'], local_list[i]['size']) - cache.purge() - cache.save(cfg.cache_file) - - cfg = Config() - - cache = HashCache() - if cfg.cache_file: - try: - cache.load(cfg.cache_file) - except IOError: - info(u"No cache file found, creating it.") - - local_uris = [] - local_list = FileDict(ignore_case = False) - single_file = False - - if type(args) not in (list, tuple): - args = [args] - - if recursive == None: - recursive = cfg.recursive - - for arg in args: - uri = S3Uri(arg) - if not uri.type == 'file': - raise ParameterError("Expecting filename or directory instead of: %s" % arg) - if uri.isdir() and not recursive: - raise ParameterError("Use --recursive to upload a directory: %s" % arg) - local_uris.append(uri) - - for uri in local_uris: - list_for_uri, single_file = _get_filelist_local(local_list, uri, cache) - - ## Single file is True if and only if the user - ## specified one local URI and that URI represents - ## a FILE. Ie it is False if the URI was of a DIR - ## and that dir contained only one FILE. That's not - ## a case of single_file==True. - if len(local_list) > 1: - single_file = False - - _maintain_cache(cache, local_list) - - return local_list, single_file - -def fetch_remote_list(args, require_attribs = False, recursive = None): - def _get_filelist_remote(remote_uri, recursive = True): - ## If remote_uri ends with '/' then all remote files will have - ## the remote_uri prefix removed in the relative path. - ## If, on the other hand, the remote_uri ends with something else - ## (probably alphanumeric symbol) we'll use the last path part - ## in the relative path. - ## - ## Complicated, eh? See an example: - ## _get_filelist_remote("s3://bckt/abc/def") may yield: - ## { 'def/file1.jpg' : {}, 'def/xyz/blah.txt' : {} } - ## _get_filelist_remote("s3://bckt/abc/def/") will yield: - ## { 'file1.jpg' : {}, 'xyz/blah.txt' : {} } - ## Furthermore a prefix-magic can restrict the return list: - ## _get_filelist_remote("s3://bckt/abc/def/x") yields: - ## { 'xyz/blah.txt' : {} } - - info(u"Retrieving list of remote files for %s ..." % remote_uri) - - s3 = S3(Config()) - response = s3.bucket_list(remote_uri.bucket(), prefix = remote_uri.object(), recursive = recursive) - - rem_base_original = rem_base = remote_uri.object() - remote_uri_original = remote_uri - if rem_base != '' and rem_base[-1] != '/': - rem_base = rem_base[:rem_base.rfind('/')+1] - remote_uri = S3Uri("s3://%s/%s" % (remote_uri.bucket(), rem_base)) - rem_base_len = len(rem_base) - rem_list = FileDict(ignore_case = False) - break_now = False - for object in response['list']: - if object['Key'] == rem_base_original and object['Key'][-1] != os.path.sep: - ## We asked for one file and we got that file :-) - key = os.path.basename(object['Key']) - object_uri_str = remote_uri_original.uri() - break_now = True - rem_list = FileDict(ignore_case = False) ## Remove whatever has already been put to rem_list - else: - key = object['Key'][rem_base_len:] ## Beware - this may be '' if object['Key']==rem_base !! - object_uri_str = remote_uri.uri() + key - rem_list[key] = { - 'size' : int(object['Size']), - 'timestamp' : dateS3toUnix(object['LastModified']), ## Sadly it's upload time, not our lastmod time :-( - 'md5' : object['ETag'][1:-1], - 'object_key' : object['Key'], - 'object_uri_str' : object_uri_str, - 'base_uri' : remote_uri, - 'dev' : None, - 'inode' : None, - } - md5 = object['ETag'][1:-1] - rem_list.record_md5(key, md5) - if break_now: - break - return rem_list - - cfg = Config() - remote_uris = [] - remote_list = FileDict(ignore_case = False) - - if type(args) not in (list, tuple): - args = [args] - - if recursive == None: - recursive = cfg.recursive - - for arg in args: - uri = S3Uri(arg) - if not uri.type == 's3': - raise ParameterError("Expecting S3 URI instead of '%s'" % arg) - remote_uris.append(uri) - - if recursive: - for uri in remote_uris: - objectlist = _get_filelist_remote(uri) - for key in objectlist: - remote_list[key] = objectlist[key] - remote_list.record_md5(key, objectlist.get_md5(key)) - else: - for uri in remote_uris: - uri_str = str(uri) - ## Wildcards used in remote URI? - ## If yes we'll need a bucket listing... - if uri_str.find('*') > -1 or uri_str.find('?') > -1: - first_wildcard = uri_str.find('*') - first_questionmark = uri_str.find('?') - if first_questionmark > -1 and first_questionmark < first_wildcard: - first_wildcard = first_questionmark - prefix = uri_str[:first_wildcard] - rest = uri_str[first_wildcard+1:] - ## Only request recursive listing if the 'rest' of the URI, - ## i.e. the part after first wildcard, contains '/' - need_recursion = rest.find('/') > -1 - objectlist = _get_filelist_remote(S3Uri(prefix), recursive = need_recursion) - for key in objectlist: - ## Check whether the 'key' matches the requested wildcards - if glob.fnmatch.fnmatch(objectlist[key]['object_uri_str'], uri_str): - remote_list[key] = objectlist[key] - else: - ## No wildcards - simply append the given URI to the list - key = os.path.basename(uri.object()) - if not key: - raise ParameterError(u"Expecting S3 URI with a filename or --recursive: %s" % uri.uri()) - remote_item = { - 'base_uri': uri, - 'object_uri_str': unicode(uri), - 'object_key': uri.object() - } - if require_attribs: - response = S3(cfg).object_info(uri) - remote_item.update({ - 'size': int(response['headers']['content-length']), - 'md5': response['headers']['etag'].strip('"\''), - 'timestamp' : dateRFC822toUnix(response['headers']['date']) - }) - # get md5 from header if it's present. We would have set that during upload - if response['headers'].has_key('x-amz-meta-s3cmd-attrs'): - attrs = parse_attrs_header(response['headers']['x-amz-meta-s3cmd-attrs']) - if attrs.has_key('md5'): - remote_item.update({'md5': attrs['md5']}) - - remote_list[key] = remote_item - return remote_list - -def parse_attrs_header(attrs_header): - attrs = {} - for attr in attrs_header.split("/"): - key, val = attr.split(":") - attrs[key] = val - return attrs - - -def compare_filelists(src_list, dst_list, src_remote, dst_remote, delay_updates = False): - def __direction_str(is_remote): - return is_remote and "remote" or "local" - - def _compare(src_list, dst_lst, src_remote, dst_remote, file): - """Return True if src_list[file] matches dst_list[file], else False""" - attribs_match = True - if not (src_list.has_key(file) and dst_list.has_key(file)): - info(u"%s: does not exist in one side or the other: src_list=%s, dst_list=%s" % (file, src_list.has_key(file), dst_list.has_key(file))) - return False - - ## check size first - if 'size' in cfg.sync_checks and dst_list[file]['size'] != src_list[file]['size']: - debug(u"xfer: %s (size mismatch: src=%s dst=%s)" % (file, src_list[file]['size'], dst_list[file]['size'])) - attribs_match = False - - ## check md5 - compare_md5 = 'md5' in cfg.sync_checks - # Multipart-uploaded files don't have a valid md5 sum - it ends with "...-nn" - if compare_md5: - if (src_remote == True and src_list[file]['md5'].find("-") >= 0) or (dst_remote == True and dst_list[file]['md5'].find("-") >= 0): - compare_md5 = False - info(u"disabled md5 check for %s" % file) - if attribs_match and compare_md5: - try: - src_md5 = src_list.get_md5(file) - dst_md5 = dst_list.get_md5(file) - except (IOError,OSError), e: - # md5 sum verification failed - ignore that file altogether - debug(u"IGNR: %s (disappeared)" % (file)) - warning(u"%s: file disappeared, ignoring." % (file)) - raise - - if src_md5 != dst_md5: - ## checksums are different. - attribs_match = False - debug(u"XFER: %s (md5 mismatch: src=%s dst=%s)" % (file, src_md5, dst_md5)) - - return attribs_match - - # we don't support local->local sync, use 'rsync' or something like that instead ;-) - assert(not(src_remote == False and dst_remote == False)) - - info(u"Verifying attributes...") - cfg = Config() - ## Items left on src_list will be transferred - ## Items left on update_list will be transferred after src_list - ## Items left on copy_pairs will be copied from dst1 to dst2 - update_list = FileDict(ignore_case = False) - ## Items left on dst_list will be deleted - copy_pairs = [] - - debug("Comparing filelists (direction: %s -> %s)" % (__direction_str(src_remote), __direction_str(dst_remote))) - - for relative_file in src_list.keys(): - debug(u"CHECK: %s" % (relative_file)) - - if dst_list.has_key(relative_file): - ## Was --skip-existing requested? - if cfg.skip_existing: - debug(u"IGNR: %s (used --skip-existing)" % (relative_file)) - del(src_list[relative_file]) - del(dst_list[relative_file]) - continue - - try: - same_file = _compare(src_list, dst_list, src_remote, dst_remote, relative_file) - except (IOError,OSError), e: - debug(u"IGNR: %s (disappeared)" % (relative_file)) - warning(u"%s: file disappeared, ignoring." % (relative_file)) - del(src_list[relative_file]) - del(dst_list[relative_file]) - continue - - if same_file: - debug(u"IGNR: %s (transfer not needed)" % relative_file) - del(src_list[relative_file]) - del(dst_list[relative_file]) - - else: - # look for matching file in src - try: - md5 = src_list.get_md5(relative_file) - except IOError: - md5 = None - if md5 is not None and dst_list.by_md5.has_key(md5): - # Found one, we want to copy - dst1 = list(dst_list.by_md5[md5])[0] - debug(u"DST COPY src: %s -> %s" % (dst1, relative_file)) - copy_pairs.append((src_list[relative_file], dst1, relative_file)) - del(src_list[relative_file]) - del(dst_list[relative_file]) - else: - # record that we will get this file transferred to us (before all the copies), so if we come across it later again, - # we can copy from _this_ copy (e.g. we only upload it once, and copy thereafter). - dst_list.record_md5(relative_file, md5) - update_list[relative_file] = src_list[relative_file] - del src_list[relative_file] - del dst_list[relative_file] - - else: - # dst doesn't have this file - # look for matching file elsewhere in dst - try: - md5 = src_list.get_md5(relative_file) - except IOError: - md5 = None - dst1 = dst_list.find_md5_one(md5) - if dst1 is not None: - # Found one, we want to copy - debug(u"DST COPY dst: %s -> %s" % (dst1, relative_file)) - copy_pairs.append((src_list[relative_file], dst1, relative_file)) - del(src_list[relative_file]) - else: - # we don't have this file, and we don't have a copy of this file elsewhere. Get it. - # record that we will get this file transferred to us (before all the copies), so if we come across it later again, - # we can copy from _this_ copy (e.g. we only upload it once, and copy thereafter). - dst_list.record_md5(relative_file, md5) - - for f in dst_list.keys(): - if src_list.has_key(f) or update_list.has_key(f): - # leave only those not on src_list + update_list - del dst_list[f] - - return src_list, dst_list, update_list, copy_pairs - -# vim:et:ts=4:sts=4:ai diff --git a/fabfile/S3/HashCache.py b/fabfile/S3/HashCache.py deleted file mode 100644 index 0c33cf0..0000000 --- a/fabfile/S3/HashCache.py +++ /dev/null @@ -1,53 +0,0 @@ -import cPickle as pickle - -class HashCache(object): - def __init__(self): - self.inodes = dict() - - def add(self, dev, inode, mtime, size, md5): - if dev not in self.inodes: - self.inodes[dev] = dict() - if inode not in self.inodes[dev]: - self.inodes[dev][inode] = dict() - self.inodes[dev][inode][mtime] = dict(md5=md5, size=size) - - def md5(self, dev, inode, mtime, size): - try: - d = self.inodes[dev][inode][mtime] - if d['size'] != size: - return None - except: - return None - return d['md5'] - - def mark_all_for_purge(self): - for d in self.inodes.keys(): - for i in self.inodes[d].keys(): - for c in self.inodes[d][i].keys(): - self.inodes[d][i][c]['purge'] = True - - def unmark_for_purge(self, dev, inode, mtime, size): - d = self.inodes[dev][inode][mtime] - if d['size'] == size and 'purge' in d: - del self.inodes[dev][inode][mtime]['purge'] - - def purge(self): - for d in self.inodes.keys(): - for i in self.inodes[d].keys(): - for m in self.inodes[d][i].keys(): - if 'purge' in self.inodes[d][i][m]: - del self.inodes[d][i] - break - - def save(self, f): - d = dict(inodes=self.inodes, version=1) - f = open(f, 'w') - p = pickle.dump(d, f) - f.close() - - def load(self, f): - f = open(f, 'r') - d = pickle.load(f) - f.close() - if d.get('version') == 1 and 'inodes' in d: - self.inodes = d['inodes'] diff --git a/fabfile/S3/MultiPart.py b/fabfile/S3/MultiPart.py deleted file mode 100644 index 3f57e13..0000000 --- a/fabfile/S3/MultiPart.py +++ /dev/null @@ -1,137 +0,0 @@ -## Amazon S3 Multipart upload support -## Author: Jerome Leclanche -## License: GPL Version 2 - -import os -from stat import ST_SIZE -from logging import debug, info, warning, error -from Utils import getTextFromXml, formatSize, unicodise -from Exceptions import S3UploadError - -class MultiPartUpload(object): - - MIN_CHUNK_SIZE_MB = 5 # 5MB - MAX_CHUNK_SIZE_MB = 5120 # 5GB - MAX_FILE_SIZE = 42949672960 # 5TB - - def __init__(self, s3, file, uri, headers_baseline = {}): - self.s3 = s3 - self.file = file - self.uri = uri - self.parts = {} - self.headers_baseline = headers_baseline - self.upload_id = self.initiate_multipart_upload() - - def initiate_multipart_upload(self): - """ - Begin a multipart upload - http://docs.amazonwebservices.com/AmazonS3/latest/API/index.html?mpUploadInitiate.html - """ - request = self.s3.create_request("OBJECT_POST", uri = self.uri, headers = self.headers_baseline, extra = "?uploads") - response = self.s3.send_request(request) - data = response["data"] - self.upload_id = getTextFromXml(data, "UploadId") - return self.upload_id - - def upload_all_parts(self): - """ - Execute a full multipart upload on a file - Returns the seq/etag dict - TODO use num_processes to thread it - """ - if not self.upload_id: - raise RuntimeError("Attempting to use a multipart upload that has not been initiated.") - - self.chunk_size = self.s3.config.multipart_chunk_size_mb * 1024 * 1024 - - if self.file.name != "": - size_left = file_size = os.stat(self.file.name)[ST_SIZE] - nr_parts = file_size / self.chunk_size + (file_size % self.chunk_size and 1) - debug("MultiPart: Uploading %s in %d parts" % (self.file.name, nr_parts)) - else: - debug("MultiPart: Uploading from %s" % (self.file.name)) - - seq = 1 - if self.file.name != "": - while size_left > 0: - offset = self.chunk_size * (seq - 1) - current_chunk_size = min(file_size - offset, self.chunk_size) - size_left -= current_chunk_size - labels = { - 'source' : unicodise(self.file.name), - 'destination' : unicodise(self.uri.uri()), - 'extra' : "[part %d of %d, %s]" % (seq, nr_parts, "%d%sB" % formatSize(current_chunk_size, human_readable = True)) - } - try: - self.upload_part(seq, offset, current_chunk_size, labels) - except: - error(u"Upload of '%s' part %d failed. Aborting multipart upload." % (self.file.name, seq)) - self.abort_upload() - raise - seq += 1 - else: - while True: - buffer = self.file.read(self.chunk_size) - offset = self.chunk_size * (seq - 1) - current_chunk_size = len(buffer) - labels = { - 'source' : unicodise(self.file.name), - 'destination' : unicodise(self.uri.uri()), - 'extra' : "[part %d, %s]" % (seq, "%d%sB" % formatSize(current_chunk_size, human_readable = True)) - } - if len(buffer) == 0: # EOF - break - try: - self.upload_part(seq, offset, current_chunk_size, labels, buffer) - except: - error(u"Upload of '%s' part %d failed. Aborting multipart upload." % (self.file.name, seq)) - self.abort_upload() - raise - seq += 1 - - debug("MultiPart: Upload finished: %d parts", seq - 1) - - def upload_part(self, seq, offset, chunk_size, labels, buffer = ''): - """ - Upload a file chunk - http://docs.amazonwebservices.com/AmazonS3/latest/API/index.html?mpUploadUploadPart.html - """ - # TODO implement Content-MD5 - debug("Uploading part %i of %r (%s bytes)" % (seq, self.upload_id, chunk_size)) - headers = { "content-length": chunk_size } - query_string = "?partNumber=%i&uploadId=%s" % (seq, self.upload_id) - request = self.s3.create_request("OBJECT_PUT", uri = self.uri, headers = headers, extra = query_string) - response = self.s3.send_file(request, self.file, labels, buffer, offset = offset, chunk_size = chunk_size) - self.parts[seq] = response["headers"]["etag"] - return response - - def complete_multipart_upload(self): - """ - Finish a multipart upload - http://docs.amazonwebservices.com/AmazonS3/latest/API/index.html?mpUploadComplete.html - """ - debug("MultiPart: Completing upload: %s" % self.upload_id) - - parts_xml = [] - part_xml = "%i%s" - for seq, etag in self.parts.items(): - parts_xml.append(part_xml % (seq, etag)) - body = "%s" % ("".join(parts_xml)) - - headers = { "content-length": len(body) } - request = self.s3.create_request("OBJECT_POST", uri = self.uri, headers = headers, extra = "?uploadId=%s" % (self.upload_id)) - response = self.s3.send_request(request, body = body) - - return response - - def abort_upload(self): - """ - Abort multipart upload - http://docs.amazonwebservices.com/AmazonS3/latest/API/index.html?mpUploadAbort.html - """ - debug("MultiPart: Aborting upload: %s" % self.upload_id) - request = self.s3.create_request("OBJECT_DELETE", uri = self.uri, extra = "?uploadId=%s" % (self.upload_id)) - response = self.s3.send_request(request) - return response - -# vim:et:ts=4:sts=4:ai diff --git a/fabfile/S3/PkgInfo.py b/fabfile/S3/PkgInfo.py deleted file mode 100644 index b38cd3d..0000000 --- a/fabfile/S3/PkgInfo.py +++ /dev/null @@ -1,14 +0,0 @@ -package = "s3cmd" -version = "1.5.0-alpha3" -url = "http://s3tools.org" -license = "GPL version 2" -short_description = "Command line tool for managing Amazon S3 and CloudFront services" -long_description = """ -S3cmd lets you copy files from/to Amazon S3 -(Simple Storage Service) using a simple to use -command line client. Supports rsync-like backup, -GPG encryption, and more. Also supports management -of Amazon's CloudFront content delivery network. -""" - -# vim:et:ts=4:sts=4:ai diff --git a/fabfile/S3/Progress.py b/fabfile/S3/Progress.py deleted file mode 100644 index 2b5571b..0000000 --- a/fabfile/S3/Progress.py +++ /dev/null @@ -1,173 +0,0 @@ -## Amazon S3 manager -## Author: Michal Ludvig -## http://www.logix.cz/michal -## License: GPL Version 2 - -import sys -import datetime -import time -import Utils - -class Progress(object): - _stdout = sys.stdout - _last_display = 0 - - def __init__(self, labels, total_size): - self._stdout = sys.stdout - self.new_file(labels, total_size) - - def new_file(self, labels, total_size): - self.labels = labels - self.total_size = total_size - # Set initial_position to something in the - # case we're not counting from 0. For instance - # when appending to a partially downloaded file. - # Setting initial_position will let the speed - # be computed right. - self.initial_position = 0 - self.current_position = self.initial_position - self.time_start = datetime.datetime.now() - self.time_last = self.time_start - self.time_current = self.time_start - - self.display(new_file = True) - - def update(self, current_position = -1, delta_position = -1): - self.time_last = self.time_current - self.time_current = datetime.datetime.now() - if current_position > -1: - self.current_position = current_position - elif delta_position > -1: - self.current_position += delta_position - #else: - # no update, just call display() - self.display() - - def done(self, message): - self.display(done_message = message) - - def output_labels(self): - self._stdout.write(u"%(source)s -> %(destination)s %(extra)s\n" % self.labels) - self._stdout.flush() - - def _display_needed(self): - # We only need to update the display every so often. - if time.time() - self._last_display > 1: - self._last_display = time.time() - return True - return False - - def display(self, new_file = False, done_message = None): - """ - display(new_file = False[/True], done = False[/True]) - - Override this method to provide a nicer output. - """ - if new_file: - self.output_labels() - self.last_milestone = 0 - return - - if self.current_position == self.total_size: - print_size = Utils.formatSize(self.current_position, True) - if print_size[1] != "": print_size[1] += "B" - timedelta = self.time_current - self.time_start - sec_elapsed = timedelta.days * 86400 + timedelta.seconds + float(timedelta.microseconds)/1000000.0 - print_speed = Utils.formatSize((self.current_position - self.initial_position) / sec_elapsed, True, True) - self._stdout.write("100%% %s%s in %.2fs (%.2f %sB/s)\n" % - (print_size[0], print_size[1], sec_elapsed, print_speed[0], print_speed[1])) - self._stdout.flush() - return - - rel_position = selfself.current_position * 100 / self.total_size - if rel_position >= self.last_milestone: - self.last_milestone = (int(rel_position) / 5) * 5 - self._stdout.write("%d%% ", self.last_milestone) - self._stdout.flush() - return - -class ProgressANSI(Progress): - ## http://en.wikipedia.org/wiki/ANSI_escape_code - SCI = '\x1b[' - ANSI_hide_cursor = SCI + "?25l" - ANSI_show_cursor = SCI + "?25h" - ANSI_save_cursor_pos = SCI + "s" - ANSI_restore_cursor_pos = SCI + "u" - ANSI_move_cursor_to_column = SCI + "%uG" - ANSI_erase_to_eol = SCI + "0K" - ANSI_erase_current_line = SCI + "2K" - - def display(self, new_file = False, done_message = None): - """ - display(new_file = False[/True], done_message = None) - """ - if new_file: - self.output_labels() - self._stdout.write(self.ANSI_save_cursor_pos) - self._stdout.flush() - return - - # Only display progress every so often - if not (new_file or done_message) and not self._display_needed(): - return - - timedelta = self.time_current - self.time_start - sec_elapsed = timedelta.days * 86400 + timedelta.seconds + float(timedelta.microseconds)/1000000.0 - if (sec_elapsed > 0): - print_speed = Utils.formatSize((self.current_position - self.initial_position) / sec_elapsed, True, True) - else: - print_speed = (0, "") - self._stdout.write(self.ANSI_restore_cursor_pos) - self._stdout.write(self.ANSI_erase_to_eol) - self._stdout.write("%(current)s of %(total)s %(percent)3d%% in %(elapsed)ds %(speed).2f %(speed_coeff)sB/s" % { - "current" : str(self.current_position).rjust(len(str(self.total_size))), - "total" : self.total_size, - "percent" : self.total_size and (self.current_position * 100 / self.total_size) or 0, - "elapsed" : sec_elapsed, - "speed" : print_speed[0], - "speed_coeff" : print_speed[1] - }) - - if done_message: - self._stdout.write(" %s\n" % done_message) - - self._stdout.flush() - -class ProgressCR(Progress): - ## Uses CR char (Carriage Return) just like other progress bars do. - CR_char = chr(13) - - def display(self, new_file = False, done_message = None): - """ - display(new_file = False[/True], done_message = None) - """ - if new_file: - self.output_labels() - return - - # Only display progress every so often - if not (new_file or done_message) and not self._display_needed(): - return - - timedelta = self.time_current - self.time_start - sec_elapsed = timedelta.days * 86400 + timedelta.seconds + float(timedelta.microseconds)/1000000.0 - if (sec_elapsed > 0): - print_speed = Utils.formatSize((self.current_position - self.initial_position) / sec_elapsed, True, True) - else: - print_speed = (0, "") - self._stdout.write(self.CR_char) - output = " %(current)s of %(total)s %(percent)3d%% in %(elapsed)4ds %(speed)7.2f %(speed_coeff)sB/s" % { - "current" : str(self.current_position).rjust(len(str(self.total_size))), - "total" : self.total_size, - "percent" : self.total_size and (self.current_position * 100 / self.total_size) or 0, - "elapsed" : sec_elapsed, - "speed" : print_speed[0], - "speed_coeff" : print_speed[1] - } - self._stdout.write(output) - if done_message: - self._stdout.write(" %s\n" % done_message) - - self._stdout.flush() - -# vim:et:ts=4:sts=4:ai diff --git a/fabfile/S3/S3.py b/fabfile/S3/S3.py deleted file mode 100644 index 4882d3e..0000000 --- a/fabfile/S3/S3.py +++ /dev/null @@ -1,979 +0,0 @@ -## Amazon S3 manager -## Author: Michal Ludvig -## http://www.logix.cz/michal -## License: GPL Version 2 - -import sys -import os, os.path -import time -import httplib -import logging -import mimetypes -import re -from logging import debug, info, warning, error -from stat import ST_SIZE - -try: - from hashlib import md5 -except ImportError: - from md5 import md5 - -from Utils import * -from SortedDict import SortedDict -from AccessLog import AccessLog -from ACL import ACL, GranteeLogDelivery -from BidirMap import BidirMap -from Config import Config -from Exceptions import * -from MultiPart import MultiPartUpload -from S3Uri import S3Uri -from ConnMan import ConnMan - -try: - import magic, gzip - try: - ## https://github.com/ahupp/python-magic - magic_ = magic.Magic(mime=True) - def mime_magic_file(file): - return magic_.from_file(file) - def mime_magic_buffer(buffer): - return magic_.from_buffer(buffer) - except TypeError: - ## http://pypi.python.org/pypi/filemagic - try: - magic_ = magic.Magic(flags=magic.MAGIC_MIME) - def mime_magic_file(file): - return magic_.id_filename(file) - def mime_magic_buffer(buffer): - return magic_.id_buffer(buffer) - except TypeError: - ## file-5.11 built-in python bindings - magic_ = magic.open(magic.MAGIC_MIME) - magic_.load() - def mime_magic_file(file): - return magic_.file(file) - def mime_magic_buffer(buffer): - return magic_.buffer(buffer) - - except AttributeError: - ## Older python-magic versions - magic_ = magic.open(magic.MAGIC_MIME) - magic_.load() - def mime_magic_file(file): - return magic_.file(file) - def mime_magic_buffer(buffer): - return magic_.buffer(buffer) - - def mime_magic(file): - type = mime_magic_file(file) - if type != "application/x-gzip; charset=binary": - return (type, None) - else: - return (mime_magic_buffer(gzip.open(file).read(8192)), 'gzip') - -except ImportError, e: - if str(e).find("magic") >= 0: - magic_message = "Module python-magic is not available." - else: - magic_message = "Module python-magic can't be used (%s)." % e.message - magic_message += " Guessing MIME types based on file extensions." - magic_warned = False - def mime_magic(file): - global magic_warned - if (not magic_warned): - warning(magic_message) - magic_warned = True - return mimetypes.guess_type(file) - -__all__ = [] -class S3Request(object): - def __init__(self, s3, method_string, resource, headers, params = {}): - self.s3 = s3 - self.headers = SortedDict(headers or {}, ignore_case = True) - # Add in any extra headers from s3 config object - if self.s3.config.extra_headers: - self.headers.update(self.s3.config.extra_headers) - if len(self.s3.config.access_token)>0: - self.s3.config.role_refresh() - self.headers['x-amz-security-token']=self.s3.config.access_token - self.resource = resource - self.method_string = method_string - self.params = params - - self.update_timestamp() - self.sign() - - def update_timestamp(self): - if self.headers.has_key("date"): - del(self.headers["date"]) - self.headers["x-amz-date"] = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) - - def format_param_str(self): - """ - Format URL parameters from self.params and returns - ?parm1=val1&parm2=val2 or an empty string if there - are no parameters. Output of this function should - be appended directly to self.resource['uri'] - """ - param_str = "" - for param in self.params: - if self.params[param] not in (None, ""): - param_str += "&%s=%s" % (param, self.params[param]) - else: - param_str += "&%s" % param - return param_str and "?" + param_str[1:] - - def sign(self): - h = self.method_string + "\n" - h += self.headers.get("content-md5", "")+"\n" - h += self.headers.get("content-type", "")+"\n" - h += self.headers.get("date", "")+"\n" - for header in self.headers.keys(): - if header.startswith("x-amz-"): - h += header+":"+str(self.headers[header])+"\n" - if self.resource['bucket']: - h += "/" + self.resource['bucket'] - h += self.resource['uri'] - debug("SignHeaders: " + repr(h)) - signature = sign_string(h) - - self.headers["Authorization"] = "AWS "+self.s3.config.access_key+":"+signature - - def get_triplet(self): - self.update_timestamp() - self.sign() - resource = dict(self.resource) ## take a copy - resource['uri'] += self.format_param_str() - return (self.method_string, resource, self.headers) - -class S3(object): - http_methods = BidirMap( - GET = 0x01, - PUT = 0x02, - HEAD = 0x04, - DELETE = 0x08, - POST = 0x10, - MASK = 0x1F, - ) - - targets = BidirMap( - SERVICE = 0x0100, - BUCKET = 0x0200, - OBJECT = 0x0400, - MASK = 0x0700, - ) - - operations = BidirMap( - UNDFINED = 0x0000, - LIST_ALL_BUCKETS = targets["SERVICE"] | http_methods["GET"], - BUCKET_CREATE = targets["BUCKET"] | http_methods["PUT"], - BUCKET_LIST = targets["BUCKET"] | http_methods["GET"], - BUCKET_DELETE = targets["BUCKET"] | http_methods["DELETE"], - OBJECT_PUT = targets["OBJECT"] | http_methods["PUT"], - OBJECT_GET = targets["OBJECT"] | http_methods["GET"], - OBJECT_HEAD = targets["OBJECT"] | http_methods["HEAD"], - OBJECT_DELETE = targets["OBJECT"] | http_methods["DELETE"], - OBJECT_POST = targets["OBJECT"] | http_methods["POST"], - ) - - codes = { - "NoSuchBucket" : "Bucket '%s' does not exist", - "AccessDenied" : "Access to bucket '%s' was denied", - "BucketAlreadyExists" : "Bucket '%s' already exists", - } - - ## S3 sometimes sends HTTP-307 response - redir_map = {} - - ## Maximum attempts of re-issuing failed requests - _max_retries = 5 - - def __init__(self, config): - self.config = config - - def get_hostname(self, bucket): - if bucket and check_bucket_name_dns_conformity(bucket): - if self.redir_map.has_key(bucket): - host = self.redir_map[bucket] - else: - host = getHostnameFromBucket(bucket) - else: - host = self.config.host_base - debug('get_hostname(%s): %s' % (bucket, host)) - return host - - def set_hostname(self, bucket, redir_hostname): - self.redir_map[bucket] = redir_hostname - - def format_uri(self, resource): - if resource['bucket'] and not check_bucket_name_dns_conformity(resource['bucket']): - uri = "/%s%s" % (resource['bucket'], resource['uri']) - else: - uri = resource['uri'] - if self.config.proxy_host != "": - uri = "http://%s%s" % (self.get_hostname(resource['bucket']), uri) - debug('format_uri(): ' + uri) - return uri - - ## Commands / Actions - def list_all_buckets(self): - request = self.create_request("LIST_ALL_BUCKETS") - response = self.send_request(request) - response["list"] = getListFromXml(response["data"], "Bucket") - return response - - def bucket_list(self, bucket, prefix = None, recursive = None): - def _list_truncated(data): - ## can either be "true" or "false" or be missing completely - is_truncated = getTextFromXml(data, ".//IsTruncated") or "false" - return is_truncated.lower() != "false" - - def _get_contents(data): - return getListFromXml(data, "Contents") - - def _get_common_prefixes(data): - return getListFromXml(data, "CommonPrefixes") - - uri_params = {} - truncated = True - list = [] - prefixes = [] - - while truncated: - response = self.bucket_list_noparse(bucket, prefix, recursive, uri_params) - current_list = _get_contents(response["data"]) - current_prefixes = _get_common_prefixes(response["data"]) - truncated = _list_truncated(response["data"]) - if truncated: - if current_list: - uri_params['marker'] = self.urlencode_string(current_list[-1]["Key"]) - else: - uri_params['marker'] = self.urlencode_string(current_prefixes[-1]["Prefix"]) - debug("Listing continues after '%s'" % uri_params['marker']) - - list += current_list - prefixes += current_prefixes - - response['list'] = list - response['common_prefixes'] = prefixes - return response - - def bucket_list_noparse(self, bucket, prefix = None, recursive = None, uri_params = {}): - if prefix: - uri_params['prefix'] = self.urlencode_string(prefix) - if not self.config.recursive and not recursive: - uri_params['delimiter'] = "/" - request = self.create_request("BUCKET_LIST", bucket = bucket, **uri_params) - response = self.send_request(request) - #debug(response) - return response - - def bucket_create(self, bucket, bucket_location = None): - headers = SortedDict(ignore_case = True) - body = "" - if bucket_location and bucket_location.strip().upper() != "US": - bucket_location = bucket_location.strip() - if bucket_location.upper() == "EU": - bucket_location = bucket_location.upper() - else: - bucket_location = bucket_location.lower() - body = "" - body += bucket_location - body += "" - debug("bucket_location: " + body) - check_bucket_name(bucket, dns_strict = True) - else: - check_bucket_name(bucket, dns_strict = False) - if self.config.acl_public: - headers["x-amz-acl"] = "public-read" - request = self.create_request("BUCKET_CREATE", bucket = bucket, headers = headers) - response = self.send_request(request, body) - return response - - def bucket_delete(self, bucket): - request = self.create_request("BUCKET_DELETE", bucket = bucket) - response = self.send_request(request) - return response - - def get_bucket_location(self, uri): - request = self.create_request("BUCKET_LIST", bucket = uri.bucket(), extra = "?location") - response = self.send_request(request) - location = getTextFromXml(response['data'], "LocationConstraint") - if not location or location in [ "", "US" ]: - location = "us-east-1" - elif location == "EU": - location = "eu-west-1" - return location - - def bucket_info(self, uri): - # For now reports only "Location". One day perhaps more. - response = {} - response['bucket-location'] = self.get_bucket_location(uri) - return response - - def website_info(self, uri, bucket_location = None): - headers = SortedDict(ignore_case = True) - bucket = uri.bucket() - body = "" - - request = self.create_request("BUCKET_LIST", bucket = bucket, extra="?website") - try: - response = self.send_request(request, body) - response['index_document'] = getTextFromXml(response['data'], ".//IndexDocument//Suffix") - response['error_document'] = getTextFromXml(response['data'], ".//ErrorDocument//Key") - response['website_endpoint'] = self.config.website_endpoint % { - "bucket" : uri.bucket(), - "location" : self.get_bucket_location(uri)} - return response - except S3Error, e: - if e.status == 404: - debug("Could not get /?website - website probably not configured for this bucket") - return None - raise - - def website_create(self, uri, bucket_location = None): - headers = SortedDict(ignore_case = True) - bucket = uri.bucket() - body = '' - body += ' ' - body += (' %s' % self.config.website_index) - body += ' ' - if self.config.website_error: - body += ' ' - body += (' %s' % self.config.website_error) - body += ' ' - body += '' - - request = self.create_request("BUCKET_CREATE", bucket = bucket, extra="?website") - debug("About to send request '%s' with body '%s'" % (request, body)) - response = self.send_request(request, body) - debug("Received response '%s'" % (response)) - - return response - - def website_delete(self, uri, bucket_location = None): - headers = SortedDict(ignore_case = True) - bucket = uri.bucket() - body = "" - - request = self.create_request("BUCKET_DELETE", bucket = bucket, extra="?website") - debug("About to send request '%s' with body '%s'" % (request, body)) - response = self.send_request(request, body) - debug("Received response '%s'" % (response)) - - if response['status'] != 204: - raise S3ResponseError("Expected status 204: %s" % response) - - return response - - def add_encoding(self, filename, content_type): - if content_type.find("charset=") != -1: - return False - exts = self.config.add_encoding_exts.split(',') - if exts[0]=='': - return False - parts = filename.rsplit('.',2) - if len(parts) < 2: - return False - ext = parts[1] - if ext in exts: - return True - else: - return False - - def object_put(self, filename, uri, extra_headers = None, extra_label = ""): - # TODO TODO - # Make it consistent with stream-oriented object_get() - if uri.type != "s3": - raise ValueError("Expected URI type 's3', got '%s'" % uri.type) - - if filename != "-" and not os.path.isfile(filename): - raise InvalidFileError(u"%s is not a regular file" % unicodise(filename)) - try: - if filename == "-": - file = sys.stdin - size = 0 - else: - file = open(filename, "rb") - size = os.stat(filename)[ST_SIZE] - except (IOError, OSError), e: - raise InvalidFileError(u"%s: %s" % (unicodise(filename), e.strerror)) - - headers = SortedDict(ignore_case = True) - if extra_headers: - headers.update(extra_headers) - - ## MIME-type handling - content_type = self.config.mime_type - content_encoding = None - if filename != "-" and not content_type and self.config.guess_mime_type: - (content_type, content_encoding) = mime_magic(filename) - if not content_type: - content_type = self.config.default_mime_type - if not content_encoding: - content_encoding = self.config.encoding.upper() - - ## add charset to content type - if self.add_encoding(filename, content_type) and content_encoding is not None: - content_type = content_type + "; charset=" + content_encoding - - headers["content-type"] = content_type - if content_encoding is not None: - headers["content-encoding"] = content_encoding - - ## Other Amazon S3 attributes - if self.config.acl_public: - headers["x-amz-acl"] = "public-read" - if self.config.reduced_redundancy: - headers["x-amz-storage-class"] = "REDUCED_REDUNDANCY" - - ## Multipart decision - multipart = False - if not self.config.enable_multipart and filename == "-": - raise ParameterError("Multi-part upload is required to upload from stdin") - if self.config.enable_multipart: - if size > self.config.multipart_chunk_size_mb * 1024 * 1024 or filename == "-": - multipart = True - if multipart: - # Multipart requests are quite different... drop here - return self.send_file_multipart(file, headers, uri, size) - - ## Not multipart... - headers["content-length"] = size - request = self.create_request("OBJECT_PUT", uri = uri, headers = headers) - labels = { 'source' : unicodise(filename), 'destination' : unicodise(uri.uri()), 'extra' : extra_label } - response = self.send_file(request, file, labels) - return response - - def object_get(self, uri, stream, start_position = 0, extra_label = ""): - if uri.type != "s3": - raise ValueError("Expected URI type 's3', got '%s'" % uri.type) - request = self.create_request("OBJECT_GET", uri = uri) - labels = { 'source' : unicodise(uri.uri()), 'destination' : unicodise(stream.name), 'extra' : extra_label } - response = self.recv_file(request, stream, labels, start_position) - return response - - def object_delete(self, uri): - if uri.type != "s3": - raise ValueError("Expected URI type 's3', got '%s'" % uri.type) - request = self.create_request("OBJECT_DELETE", uri = uri) - response = self.send_request(request) - return response - - def object_copy(self, src_uri, dst_uri, extra_headers = None): - if src_uri.type != "s3": - raise ValueError("Expected URI type 's3', got '%s'" % src_uri.type) - if dst_uri.type != "s3": - raise ValueError("Expected URI type 's3', got '%s'" % dst_uri.type) - headers = SortedDict(ignore_case = True) - headers['x-amz-copy-source'] = "/%s/%s" % (src_uri.bucket(), self.urlencode_string(src_uri.object())) - ## TODO: For now COPY, later maybe add a switch? - headers['x-amz-metadata-directive'] = "COPY" - if self.config.acl_public: - headers["x-amz-acl"] = "public-read" - if self.config.reduced_redundancy: - headers["x-amz-storage-class"] = "REDUCED_REDUNDANCY" - # if extra_headers: - # headers.update(extra_headers) - request = self.create_request("OBJECT_PUT", uri = dst_uri, headers = headers) - response = self.send_request(request) - return response - - def object_move(self, src_uri, dst_uri, extra_headers = None): - response_copy = self.object_copy(src_uri, dst_uri, extra_headers) - debug("Object %s copied to %s" % (src_uri, dst_uri)) - if getRootTagName(response_copy["data"]) == "CopyObjectResult": - response_delete = self.object_delete(src_uri) - debug("Object %s deleted" % src_uri) - return response_copy - - def object_info(self, uri): - request = self.create_request("OBJECT_HEAD", uri = uri) - response = self.send_request(request) - return response - - def get_acl(self, uri): - if uri.has_object(): - request = self.create_request("OBJECT_GET", uri = uri, extra = "?acl") - else: - request = self.create_request("BUCKET_LIST", bucket = uri.bucket(), extra = "?acl") - - response = self.send_request(request) - acl = ACL(response['data']) - return acl - - def set_acl(self, uri, acl): - if uri.has_object(): - request = self.create_request("OBJECT_PUT", uri = uri, extra = "?acl") - else: - request = self.create_request("BUCKET_CREATE", bucket = uri.bucket(), extra = "?acl") - - body = str(acl) - debug(u"set_acl(%s): acl-xml: %s" % (uri, body)) - response = self.send_request(request, body) - return response - - def get_policy(self, uri): - request = self.create_request("BUCKET_LIST", bucket = uri.bucket(), extra = "?policy") - response = self.send_request(request) - return response['data'] - - def set_policy(self, uri, policy): - headers = {} - # TODO check policy is proper json string - headers['content-type'] = 'application/json' - request = self.create_request("BUCKET_CREATE", uri = uri, - extra = "?policy", headers=headers) - body = policy - debug(u"set_policy(%s): policy-json: %s" % (uri, body)) - request.sign() - response = self.send_request(request, body=body) - return response - - def delete_policy(self, uri): - request = self.create_request("BUCKET_DELETE", uri = uri, extra = "?policy") - debug(u"delete_policy(%s)" % uri) - response = self.send_request(request) - return response - - def get_accesslog(self, uri): - request = self.create_request("BUCKET_LIST", bucket = uri.bucket(), extra = "?logging") - response = self.send_request(request) - accesslog = AccessLog(response['data']) - return accesslog - - def set_accesslog_acl(self, uri): - acl = self.get_acl(uri) - debug("Current ACL(%s): %s" % (uri.uri(), str(acl))) - acl.appendGrantee(GranteeLogDelivery("READ_ACP")) - acl.appendGrantee(GranteeLogDelivery("WRITE")) - debug("Updated ACL(%s): %s" % (uri.uri(), str(acl))) - self.set_acl(uri, acl) - - def set_accesslog(self, uri, enable, log_target_prefix_uri = None, acl_public = False): - request = self.create_request("BUCKET_CREATE", bucket = uri.bucket(), extra = "?logging") - accesslog = AccessLog() - if enable: - accesslog.enableLogging(log_target_prefix_uri) - accesslog.setAclPublic(acl_public) - else: - accesslog.disableLogging() - body = str(accesslog) - debug(u"set_accesslog(%s): accesslog-xml: %s" % (uri, body)) - try: - response = self.send_request(request, body) - except S3Error, e: - if e.info['Code'] == "InvalidTargetBucketForLogging": - info("Setting up log-delivery ACL for target bucket.") - self.set_accesslog_acl(S3Uri("s3://%s" % log_target_prefix_uri.bucket())) - response = self.send_request(request, body) - else: - raise - return accesslog, response - - ## Low level methods - def urlencode_string(self, string, urlencoding_mode = None): - if type(string) == unicode: - string = string.encode("utf-8") - - if urlencoding_mode is None: - urlencoding_mode = self.config.urlencoding_mode - - if urlencoding_mode == "verbatim": - ## Don't do any pre-processing - return string - - encoded = "" - ## List of characters that must be escaped for S3 - ## Haven't found this in any official docs - ## but my tests show it's more less correct. - ## If you start getting InvalidSignature errors - ## from S3 check the error headers returned - ## from S3 to see whether the list hasn't - ## changed. - for c in string: # I'm not sure how to know in what encoding - # 'object' is. Apparently "type(object)==str" - # but the contents is a string of unicode - # bytes, e.g. '\xc4\x8d\xc5\xafr\xc3\xa1k' - # Don't know what it will do on non-utf8 - # systems. - # [hope that sounds reassuring ;-)] - o = ord(c) - if (o < 0x20 or o == 0x7f): - if urlencoding_mode == "fixbucket": - encoded += "%%%02X" % o - else: - error(u"Non-printable character 0x%02x in: %s" % (o, string)) - error(u"Please report it to s3tools-bugs@lists.sourceforge.net") - encoded += replace_nonprintables(c) - elif (o == 0x20 or # Space and below - o == 0x22 or # " - o == 0x23 or # # - o == 0x25 or # % (escape character) - o == 0x26 or # & - o == 0x2B or # + (or it would become ) - o == 0x3C or # < - o == 0x3E or # > - o == 0x3F or # ? - o == 0x60 or # ` - o >= 123): # { and above, including >= 128 for UTF-8 - encoded += "%%%02X" % o - else: - encoded += c - debug("String '%s' encoded to '%s'" % (string, encoded)) - return encoded - - def create_request(self, operation, uri = None, bucket = None, object = None, headers = None, extra = None, **params): - resource = { 'bucket' : None, 'uri' : "/" } - - if uri and (bucket or object): - raise ValueError("Both 'uri' and either 'bucket' or 'object' parameters supplied") - ## If URI is given use that instead of bucket/object parameters - if uri: - bucket = uri.bucket() - object = uri.has_object() and uri.object() or None - - if bucket: - resource['bucket'] = str(bucket) - if object: - resource['uri'] = "/" + self.urlencode_string(object) - if extra: - resource['uri'] += extra - - method_string = S3.http_methods.getkey(S3.operations[operation] & S3.http_methods["MASK"]) - - request = S3Request(self, method_string, resource, headers, params) - - debug("CreateRequest: resource[uri]=" + resource['uri']) - return request - - def _fail_wait(self, retries): - # Wait a few seconds. The more it fails the more we wait. - return (self._max_retries - retries + 1) * 3 - - def send_request(self, request, body = None, retries = _max_retries): - method_string, resource, headers = request.get_triplet() - debug("Processing request, please wait...") - if not headers.has_key('content-length'): - headers['content-length'] = body and len(body) or 0 - try: - # "Stringify" all headers - for header in headers.keys(): - headers[header] = str(headers[header]) - conn = ConnMan.get(self.get_hostname(resource['bucket'])) - uri = self.format_uri(resource) - debug("Sending request method_string=%r, uri=%r, headers=%r, body=(%i bytes)" % (method_string, uri, headers, len(body or ""))) - conn.c.request(method_string, uri, body, headers) - response = {} - http_response = conn.c.getresponse() - response["status"] = http_response.status - response["reason"] = http_response.reason - response["headers"] = convertTupleListToDict(http_response.getheaders()) - response["data"] = http_response.read() - debug("Response: " + str(response)) - ConnMan.put(conn) - except ParameterError, e: - raise - except Exception, e: - if retries: - warning("Retrying failed request: %s (%s)" % (resource['uri'], e)) - warning("Waiting %d sec..." % self._fail_wait(retries)) - time.sleep(self._fail_wait(retries)) - return self.send_request(request, body, retries - 1) - else: - raise S3RequestError("Request failed for: %s" % resource['uri']) - - if response["status"] == 307: - ## RedirectPermanent - redir_bucket = getTextFromXml(response['data'], ".//Bucket") - redir_hostname = getTextFromXml(response['data'], ".//Endpoint") - self.set_hostname(redir_bucket, redir_hostname) - warning("Redirected to: %s" % (redir_hostname)) - return self.send_request(request, body) - - if response["status"] >= 500: - e = S3Error(response) - if retries: - warning(u"Retrying failed request: %s" % resource['uri']) - warning(unicode(e)) - warning("Waiting %d sec..." % self._fail_wait(retries)) - time.sleep(self._fail_wait(retries)) - return self.send_request(request, body, retries - 1) - else: - raise e - - if response["status"] < 200 or response["status"] > 299: - raise S3Error(response) - - return response - - def send_file(self, request, file, labels, buffer = '', throttle = 0, retries = _max_retries, offset = 0, chunk_size = -1): - method_string, resource, headers = request.get_triplet() - size_left = size_total = headers.get("content-length") - if self.config.progress_meter: - progress = self.config.progress_class(labels, size_total) - else: - info("Sending file '%s', please wait..." % file.name) - timestamp_start = time.time() - try: - conn = ConnMan.get(self.get_hostname(resource['bucket'])) - conn.c.putrequest(method_string, self.format_uri(resource)) - for header in headers.keys(): - conn.c.putheader(header, str(headers[header])) - conn.c.endheaders() - except ParameterError, e: - raise - except Exception, e: - if self.config.progress_meter: - progress.done("failed") - if retries: - warning("Retrying failed request: %s (%s)" % (resource['uri'], e)) - warning("Waiting %d sec..." % self._fail_wait(retries)) - time.sleep(self._fail_wait(retries)) - # Connection error -> same throttle value - return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size) - else: - raise S3UploadError("Upload failed for: %s" % resource['uri']) - if buffer == '': - file.seek(offset) - md5_hash = md5() - try: - while (size_left > 0): - #debug("SendFile: Reading up to %d bytes from '%s' - remaining bytes: %s" % (self.config.send_chunk, file.name, size_left)) - if buffer == '': - data = file.read(min(self.config.send_chunk, size_left)) - else: - data = buffer - md5_hash.update(data) - conn.c.send(data) - if self.config.progress_meter: - progress.update(delta_position = len(data)) - size_left -= len(data) - if throttle: - time.sleep(throttle) - md5_computed = md5_hash.hexdigest() - response = {} - http_response = conn.c.getresponse() - response["status"] = http_response.status - response["reason"] = http_response.reason - response["headers"] = convertTupleListToDict(http_response.getheaders()) - response["data"] = http_response.read() - response["size"] = size_total - ConnMan.put(conn) - debug(u"Response: %s" % response) - except ParameterError, e: - raise - except Exception, e: - if self.config.progress_meter: - progress.done("failed") - if retries: - if retries < self._max_retries: - throttle = throttle and throttle * 5 or 0.01 - warning("Upload failed: %s (%s)" % (resource['uri'], e)) - warning("Retrying on lower speed (throttle=%0.2f)" % throttle) - warning("Waiting %d sec..." % self._fail_wait(retries)) - time.sleep(self._fail_wait(retries)) - # Connection error -> same throttle value - return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size) - else: - debug("Giving up on '%s' %s" % (file.name, e)) - raise S3UploadError("Upload failed for: %s" % resource['uri']) - - timestamp_end = time.time() - response["elapsed"] = timestamp_end - timestamp_start - response["speed"] = response["elapsed"] and float(response["size"]) / response["elapsed"] or float(-1) - - if self.config.progress_meter: - ## Finalising the upload takes some time -> update() progress meter - ## to correct the average speed. Otherwise people will complain that - ## 'progress' and response["speed"] are inconsistent ;-) - progress.update() - progress.done("done") - - if response["status"] == 307: - ## RedirectPermanent - redir_bucket = getTextFromXml(response['data'], ".//Bucket") - redir_hostname = getTextFromXml(response['data'], ".//Endpoint") - self.set_hostname(redir_bucket, redir_hostname) - warning("Redirected to: %s" % (redir_hostname)) - return self.send_file(request, file, labels, buffer, offset = offset, chunk_size = chunk_size) - - # S3 from time to time doesn't send ETag back in a response :-( - # Force re-upload here. - if not response['headers'].has_key('etag'): - response['headers']['etag'] = '' - - if response["status"] < 200 or response["status"] > 299: - try_retry = False - if response["status"] >= 500: - ## AWS internal error - retry - try_retry = True - elif response["status"] >= 400: - err = S3Error(response) - ## Retriable client error? - if err.code in [ 'BadDigest', 'OperationAborted', 'TokenRefreshRequired', 'RequestTimeout' ]: - try_retry = True - - if try_retry: - if retries: - warning("Upload failed: %s (%s)" % (resource['uri'], S3Error(response))) - warning("Waiting %d sec..." % self._fail_wait(retries)) - time.sleep(self._fail_wait(retries)) - return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size) - else: - warning("Too many failures. Giving up on '%s'" % (file.name)) - raise S3UploadError - - ## Non-recoverable error - raise S3Error(response) - - debug("MD5 sums: computed=%s, received=%s" % (md5_computed, response["headers"]["etag"])) - if response["headers"]["etag"].strip('"\'') != md5_hash.hexdigest(): - warning("MD5 Sums don't match!") - if retries: - warning("Retrying upload of %s" % (file.name)) - return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size) - else: - warning("Too many failures. Giving up on '%s'" % (file.name)) - raise S3UploadError - - return response - - def send_file_multipart(self, file, headers, uri, size): - chunk_size = self.config.multipart_chunk_size_mb * 1024 * 1024 - timestamp_start = time.time() - upload = MultiPartUpload(self, file, uri, headers) - upload.upload_all_parts() - response = upload.complete_multipart_upload() - timestamp_end = time.time() - response["elapsed"] = timestamp_end - timestamp_start - response["size"] = size - response["speed"] = response["elapsed"] and float(response["size"]) / response["elapsed"] or float(-1) - return response - - def recv_file(self, request, stream, labels, start_position = 0, retries = _max_retries): - method_string, resource, headers = request.get_triplet() - if self.config.progress_meter: - progress = self.config.progress_class(labels, 0) - else: - info("Receiving file '%s', please wait..." % stream.name) - timestamp_start = time.time() - try: - conn = ConnMan.get(self.get_hostname(resource['bucket'])) - conn.c.putrequest(method_string, self.format_uri(resource)) - for header in headers.keys(): - conn.c.putheader(header, str(headers[header])) - if start_position > 0: - debug("Requesting Range: %d .. end" % start_position) - conn.c.putheader("Range", "bytes=%d-" % start_position) - conn.c.endheaders() - response = {} - http_response = conn.c.getresponse() - response["status"] = http_response.status - response["reason"] = http_response.reason - response["headers"] = convertTupleListToDict(http_response.getheaders()) - debug("Response: %s" % response) - except ParameterError, e: - raise - except Exception, e: - if self.config.progress_meter: - progress.done("failed") - if retries: - warning("Retrying failed request: %s (%s)" % (resource['uri'], e)) - warning("Waiting %d sec..." % self._fail_wait(retries)) - time.sleep(self._fail_wait(retries)) - # Connection error -> same throttle value - return self.recv_file(request, stream, labels, start_position, retries - 1) - else: - raise S3DownloadError("Download failed for: %s" % resource['uri']) - - if response["status"] == 307: - ## RedirectPermanent - response['data'] = http_response.read() - redir_bucket = getTextFromXml(response['data'], ".//Bucket") - redir_hostname = getTextFromXml(response['data'], ".//Endpoint") - self.set_hostname(redir_bucket, redir_hostname) - warning("Redirected to: %s" % (redir_hostname)) - return self.recv_file(request, stream, labels) - - if response["status"] < 200 or response["status"] > 299: - raise S3Error(response) - - if start_position == 0: - # Only compute MD5 on the fly if we're downloading from beginning - # Otherwise we'd get a nonsense. - md5_hash = md5() - size_left = int(response["headers"]["content-length"]) - size_total = start_position + size_left - current_position = start_position - - if self.config.progress_meter: - progress.total_size = size_total - progress.initial_position = current_position - progress.current_position = current_position - - try: - while (current_position < size_total): - this_chunk = size_left > self.config.recv_chunk and self.config.recv_chunk or size_left - data = http_response.read(this_chunk) - if len(data) == 0: - raise S3Error("EOF from S3!") - - stream.write(data) - if start_position == 0: - md5_hash.update(data) - current_position += len(data) - ## Call progress meter from here... - if self.config.progress_meter: - progress.update(delta_position = len(data)) - ConnMan.put(conn) - except Exception, e: - if self.config.progress_meter: - progress.done("failed") - if retries: - warning("Retrying failed request: %s (%s)" % (resource['uri'], e)) - warning("Waiting %d sec..." % self._fail_wait(retries)) - time.sleep(self._fail_wait(retries)) - # Connection error -> same throttle value - return self.recv_file(request, stream, labels, current_position, retries - 1) - else: - raise S3DownloadError("Download failed for: %s" % resource['uri']) - - stream.flush() - timestamp_end = time.time() - - if self.config.progress_meter: - ## The above stream.flush() may take some time -> update() progress meter - ## to correct the average speed. Otherwise people will complain that - ## 'progress' and response["speed"] are inconsistent ;-) - progress.update() - progress.done("done") - - if start_position == 0: - # Only compute MD5 on the fly if we were downloading from the beginning - response["md5"] = md5_hash.hexdigest() - else: - # Otherwise try to compute MD5 of the output file - try: - response["md5"] = hash_file_md5(stream.name) - except IOError, e: - if e.errno != errno.ENOENT: - warning("Unable to open file: %s: %s" % (stream.name, e)) - warning("Unable to verify MD5. Assume it matches.") - response["md5"] = response["headers"]["etag"] - - response["md5match"] = response["headers"]["etag"].find(response["md5"]) >= 0 - response["elapsed"] = timestamp_end - timestamp_start - response["size"] = current_position - response["speed"] = response["elapsed"] and float(response["size"]) / response["elapsed"] or float(-1) - if response["size"] != start_position + long(response["headers"]["content-length"]): - warning("Reported size (%s) does not match received size (%s)" % ( - start_position + response["headers"]["content-length"], response["size"])) - debug("ReceiveFile: Computed MD5 = %s" % response["md5"]) - if not response["md5match"]: - warning("MD5 signatures do not match: computed=%s, received=%s" % ( - response["md5"], response["headers"]["etag"])) - return response -__all__.append("S3") - -# vim:et:ts=4:sts=4:ai diff --git a/fabfile/S3/S3Uri.py b/fabfile/S3/S3Uri.py deleted file mode 100644 index 48692ec..0000000 --- a/fabfile/S3/S3Uri.py +++ /dev/null @@ -1,223 +0,0 @@ -## Amazon S3 manager -## Author: Michal Ludvig -## http://www.logix.cz/michal -## License: GPL Version 2 - -import os -import re -import sys -from BidirMap import BidirMap -from logging import debug -import S3 -from Utils import unicodise, check_bucket_name_dns_conformity -import Config - -class S3Uri(object): - type = None - _subclasses = None - - def __new__(self, string): - if not self._subclasses: - ## Generate a list of all subclasses of S3Uri - self._subclasses = [] - dict = sys.modules[__name__].__dict__ - for something in dict: - if type(dict[something]) is not type(self): - continue - if issubclass(dict[something], self) and dict[something] != self: - self._subclasses.append(dict[something]) - for subclass in self._subclasses: - try: - instance = object.__new__(subclass) - instance.__init__(string) - return instance - except ValueError, e: - continue - raise ValueError("%s: not a recognized URI" % string) - - def __str__(self): - return self.uri() - - def __unicode__(self): - return self.uri() - - def __repr__(self): - return "<%s: %s>" % (self.__class__.__name__, self.__unicode__()) - - def public_url(self): - raise ValueError("This S3 URI does not have Anonymous URL representation") - - def basename(self): - return self.__unicode__().split("/")[-1] - -class S3UriS3(S3Uri): - type = "s3" - _re = re.compile("^s3://([^/]+)/?(.*)", re.IGNORECASE) - def __init__(self, string): - match = self._re.match(string) - if not match: - raise ValueError("%s: not a S3 URI" % string) - groups = match.groups() - self._bucket = groups[0] - self._object = unicodise(groups[1]) - - def bucket(self): - return self._bucket - - def object(self): - return self._object - - def has_bucket(self): - return bool(self._bucket) - - def has_object(self): - return bool(self._object) - - def uri(self): - return "/".join(["s3:/", self._bucket, self._object]) - - def is_dns_compatible(self): - return check_bucket_name_dns_conformity(self._bucket) - - def public_url(self): - if self.is_dns_compatible(): - return "http://%s.%s/%s" % (self._bucket, Config.Config().host_base, self._object) - else: - return "http://%s/%s/%s" % (self._bucket, Config.Config().host_base, self._object) - - def host_name(self): - if self.is_dns_compatible(): - return "%s.s3.amazonaws.com" % (self._bucket) - else: - return "s3.amazonaws.com" - - @staticmethod - def compose_uri(bucket, object = ""): - return "s3://%s/%s" % (bucket, object) - - @staticmethod - def httpurl_to_s3uri(http_url): - m=re.match("(https?://)?([^/]+)/?(.*)", http_url, re.IGNORECASE) - hostname, object = m.groups()[1:] - hostname = hostname.lower() - if hostname == "s3.amazonaws.com": - ## old-style url: http://s3.amazonaws.com/bucket/object - if object.count("/") == 0: - ## no object given - bucket = object - object = "" - else: - ## bucket/object - bucket, object = object.split("/", 1) - elif hostname.endswith(".s3.amazonaws.com"): - ## new-style url: http://bucket.s3.amazonaws.com/object - bucket = hostname[:-(len(".s3.amazonaws.com"))] - else: - raise ValueError("Unable to parse URL: %s" % http_url) - return S3Uri("s3://%(bucket)s/%(object)s" % { - 'bucket' : bucket, - 'object' : object }) - -class S3UriS3FS(S3Uri): - type = "s3fs" - _re = re.compile("^s3fs://([^/]*)/?(.*)", re.IGNORECASE) - def __init__(self, string): - match = self._re.match(string) - if not match: - raise ValueError("%s: not a S3fs URI" % string) - groups = match.groups() - self._fsname = groups[0] - self._path = unicodise(groups[1]).split("/") - - def fsname(self): - return self._fsname - - def path(self): - return "/".join(self._path) - - def uri(self): - return "/".join(["s3fs:/", self._fsname, self.path()]) - -class S3UriFile(S3Uri): - type = "file" - _re = re.compile("^(\w+://)?(.*)") - def __init__(self, string): - match = self._re.match(string) - groups = match.groups() - if groups[0] not in (None, "file://"): - raise ValueError("%s: not a file:// URI" % string) - self._path = unicodise(groups[1]).split("/") - - def path(self): - return "/".join(self._path) - - def uri(self): - return "/".join(["file:/", self.path()]) - - def isdir(self): - return os.path.isdir(self.path()) - - def dirname(self): - return os.path.dirname(self.path()) - -class S3UriCloudFront(S3Uri): - type = "cf" - _re = re.compile("^cf://([^/]*)/*(.*)", re.IGNORECASE) - def __init__(self, string): - match = self._re.match(string) - if not match: - raise ValueError("%s: not a CloudFront URI" % string) - groups = match.groups() - self._dist_id = groups[0] - self._request_id = groups[1] != "/" and groups[1] or None - - def dist_id(self): - return self._dist_id - - def request_id(self): - return self._request_id - - def uri(self): - uri = "cf://" + self.dist_id() - if self.request_id(): - uri += "/" + self.request_id() - return uri - -if __name__ == "__main__": - uri = S3Uri("s3://bucket/object") - print "type() =", type(uri) - print "uri =", uri - print "uri.type=", uri.type - print "bucket =", uri.bucket() - print "object =", uri.object() - print - - uri = S3Uri("s3://bucket") - print "type() =", type(uri) - print "uri =", uri - print "uri.type=", uri.type - print "bucket =", uri.bucket() - print - - uri = S3Uri("s3fs://filesystem1/path/to/remote/file.txt") - print "type() =", type(uri) - print "uri =", uri - print "uri.type=", uri.type - print "path =", uri.path() - print - - uri = S3Uri("/path/to/local/file.txt") - print "type() =", type(uri) - print "uri =", uri - print "uri.type=", uri.type - print "path =", uri.path() - print - - uri = S3Uri("cf://1234567890ABCD/") - print "type() =", type(uri) - print "uri =", uri - print "uri.type=", uri.type - print "dist_id =", uri.dist_id() - print - -# vim:et:ts=4:sts=4:ai diff --git a/fabfile/S3/SimpleDB.py b/fabfile/S3/SimpleDB.py deleted file mode 100644 index f35ac61..0000000 --- a/fabfile/S3/SimpleDB.py +++ /dev/null @@ -1,178 +0,0 @@ -## Amazon SimpleDB library -## Author: Michal Ludvig -## http://www.logix.cz/michal -## License: GPL Version 2 - -""" -Low-level class for working with Amazon SimpleDB -""" - -import time -import urllib -import base64 -import hmac -import sha -import httplib -from logging import debug, info, warning, error - -from Utils import convertTupleListToDict -from SortedDict import SortedDict -from Exceptions import * - -class SimpleDB(object): - # API Version - # See http://docs.amazonwebservices.com/AmazonSimpleDB/2007-11-07/DeveloperGuide/ - Version = "2007-11-07" - SignatureVersion = 1 - - def __init__(self, config): - self.config = config - - ## ------------------------------------------------ - ## Methods implementing SimpleDB API - ## ------------------------------------------------ - - def ListDomains(self, MaxNumberOfDomains = 100): - ''' - Lists all domains associated with our Access Key. Returns - domain names up to the limit set by MaxNumberOfDomains. - ''' - parameters = SortedDict() - parameters['MaxNumberOfDomains'] = MaxNumberOfDomains - return self.send_request("ListDomains", DomainName = None, parameters = parameters) - - def CreateDomain(self, DomainName): - return self.send_request("CreateDomain", DomainName = DomainName) - - def DeleteDomain(self, DomainName): - return self.send_request("DeleteDomain", DomainName = DomainName) - - def PutAttributes(self, DomainName, ItemName, Attributes): - parameters = SortedDict() - parameters['ItemName'] = ItemName - seq = 0 - for attrib in Attributes: - if type(Attributes[attrib]) == type(list()): - for value in Attributes[attrib]: - parameters['Attribute.%d.Name' % seq] = attrib - parameters['Attribute.%d.Value' % seq] = unicode(value) - seq += 1 - else: - parameters['Attribute.%d.Name' % seq] = attrib - parameters['Attribute.%d.Value' % seq] = unicode(Attributes[attrib]) - seq += 1 - ## TODO: - ## - support for Attribute.N.Replace - ## - support for multiple values for one attribute - return self.send_request("PutAttributes", DomainName = DomainName, parameters = parameters) - - def GetAttributes(self, DomainName, ItemName, Attributes = []): - parameters = SortedDict() - parameters['ItemName'] = ItemName - seq = 0 - for attrib in Attributes: - parameters['AttributeName.%d' % seq] = attrib - seq += 1 - return self.send_request("GetAttributes", DomainName = DomainName, parameters = parameters) - - def DeleteAttributes(self, DomainName, ItemName, Attributes = {}): - """ - Remove specified Attributes from ItemName. - Attributes parameter can be either: - - not specified, in which case the whole Item is removed - - list, e.g. ['Attr1', 'Attr2'] in which case these parameters are removed - - dict, e.g. {'Attr' : 'One', 'Attr' : 'Two'} in which case the - specified values are removed from multi-value attributes. - """ - parameters = SortedDict() - parameters['ItemName'] = ItemName - seq = 0 - for attrib in Attributes: - parameters['Attribute.%d.Name' % seq] = attrib - if type(Attributes) == type(dict()): - parameters['Attribute.%d.Value' % seq] = unicode(Attributes[attrib]) - seq += 1 - return self.send_request("DeleteAttributes", DomainName = DomainName, parameters = parameters) - - def Query(self, DomainName, QueryExpression = None, MaxNumberOfItems = None, NextToken = None): - parameters = SortedDict() - if QueryExpression: - parameters['QueryExpression'] = QueryExpression - if MaxNumberOfItems: - parameters['MaxNumberOfItems'] = MaxNumberOfItems - if NextToken: - parameters['NextToken'] = NextToken - return self.send_request("Query", DomainName = DomainName, parameters = parameters) - ## Handle NextToken? Or maybe not - let the upper level do it - - ## ------------------------------------------------ - ## Low-level methods for handling SimpleDB requests - ## ------------------------------------------------ - - def send_request(self, *args, **kwargs): - request = self.create_request(*args, **kwargs) - #debug("Request: %s" % repr(request)) - conn = self.get_connection() - conn.request("GET", self.format_uri(request['uri_params'])) - http_response = conn.getresponse() - response = {} - response["status"] = http_response.status - response["reason"] = http_response.reason - response["headers"] = convertTupleListToDict(http_response.getheaders()) - response["data"] = http_response.read() - conn.close() - - if response["status"] < 200 or response["status"] > 299: - debug("Response: " + str(response)) - raise S3Error(response) - - return response - - def create_request(self, Action, DomainName, parameters = None): - if not parameters: - parameters = SortedDict() - if len(self.config.access_token) > 0: - self.config.refresh_role() - parameters['Signature']=self.config.access_token - parameters['AWSAccessKeyId'] = self.config.access_key - parameters['Version'] = self.Version - parameters['SignatureVersion'] = self.SignatureVersion - parameters['Action'] = Action - parameters['Timestamp'] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) - if DomainName: - parameters['DomainName'] = DomainName - parameters['Signature'] = self.sign_request(parameters) - parameters.keys_return_lowercase = False - uri_params = urllib.urlencode(parameters) - request = {} - request['uri_params'] = uri_params - request['parameters'] = parameters - return request - - def sign_request(self, parameters): - h = "" - parameters.keys_sort_lowercase = True - parameters.keys_return_lowercase = False - for key in parameters: - h += "%s%s" % (key, parameters[key]) - #debug("SignRequest: %s" % h) - return base64.encodestring(hmac.new(self.config.secret_key, h, sha).digest()).strip() - - def get_connection(self): - if self.config.proxy_host != "": - return httplib.HTTPConnection(self.config.proxy_host, self.config.proxy_port) - else: - if self.config.use_https: - return httplib.HTTPSConnection(self.config.simpledb_host) - else: - return httplib.HTTPConnection(self.config.simpledb_host) - - def format_uri(self, uri_params): - if self.config.proxy_host != "": - uri = "http://%s/?%s" % (self.config.simpledb_host, uri_params) - else: - uri = "/?%s" % uri_params - #debug('format_uri(): ' + uri) - return uri - -# vim:et:ts=4:sts=4:ai diff --git a/fabfile/S3/SortedDict.py b/fabfile/S3/SortedDict.py deleted file mode 100644 index b7b2247..0000000 --- a/fabfile/S3/SortedDict.py +++ /dev/null @@ -1,66 +0,0 @@ -## Amazon S3 manager -## Author: Michal Ludvig -## http://www.logix.cz/michal -## License: GPL Version 2 - -from BidirMap import BidirMap -import Utils - -class SortedDictIterator(object): - def __init__(self, sorted_dict, keys): - self.sorted_dict = sorted_dict - self.keys = keys - - def next(self): - try: - return self.keys.pop(0) - except IndexError: - raise StopIteration - -class SortedDict(dict): - def __init__(self, mapping = {}, ignore_case = True, **kwargs): - """ - WARNING: SortedDict() with ignore_case==True will - drop entries differing only in capitalisation! - Eg: SortedDict({'auckland':1, 'Auckland':2}).keys() => ['Auckland'] - With ignore_case==False it's all right - """ - dict.__init__(self, mapping, **kwargs) - self.ignore_case = ignore_case - - def keys(self): - keys = dict.keys(self) - if self.ignore_case: - # Translation map - xlat_map = BidirMap() - for key in keys: - xlat_map[key.lower()] = key - # Lowercase keys - lc_keys = xlat_map.keys() - lc_keys.sort() - return [xlat_map[k] for k in lc_keys] - else: - keys.sort() - return keys - - def __iter__(self): - return SortedDictIterator(self, self.keys()) - - - -if __name__ == "__main__": - d = { 'AWS' : 1, 'Action' : 2, 'america' : 3, 'Auckland' : 4, 'America' : 5 } - sd = SortedDict(d) - print "Wanted: Action, america, Auckland, AWS, [ignore case]" - print "Got: ", - for key in sd: - print "%s," % key, - print " [used: __iter__()]" - d = SortedDict(d, ignore_case = False) - print "Wanted: AWS, Action, Auckland, america, [case sensitive]" - print "Got: ", - for key in d.keys(): - print "%s," % key, - print " [used: keys()]" - -# vim:et:ts=4:sts=4:ai diff --git a/fabfile/S3/Utils.py b/fabfile/S3/Utils.py deleted file mode 100644 index bb98c67..0000000 --- a/fabfile/S3/Utils.py +++ /dev/null @@ -1,462 +0,0 @@ -## Amazon S3 manager -## Author: Michal Ludvig -## http://www.logix.cz/michal -## License: GPL Version 2 - -import datetime -import os -import sys -import time -import re -import string -import random -import rfc822 -import hmac -import base64 -import errno -import urllib - -from logging import debug, info, warning, error - - -import Config -import Exceptions - -# hashlib backported to python 2.4 / 2.5 is not compatible with hmac! -if sys.version_info[0] == 2 and sys.version_info[1] < 6: - from md5 import md5 - import sha as sha1 -else: - from hashlib import md5, sha1 - -try: - import xml.etree.ElementTree as ET -except ImportError: - import elementtree.ElementTree as ET -from xml.parsers.expat import ExpatError - -__all__ = [] -def parseNodes(nodes): - ## WARNING: Ignores text nodes from mixed xml/text. - ## For instance some textother text - ## will be ignore "some text" node - retval = [] - for node in nodes: - retval_item = {} - for child in node.getchildren(): - name = child.tag - if child.getchildren(): - retval_item[name] = parseNodes([child]) - else: - retval_item[name] = node.findtext(".//%s" % child.tag) - retval.append(retval_item) - return retval -__all__.append("parseNodes") - -def stripNameSpace(xml): - """ - removeNameSpace(xml) -- remove top-level AWS namespace - """ - r = re.compile('^(]+?>\s?)(<\w+) xmlns=[\'"](http://[^\'"]+)[\'"](.*)', re.MULTILINE) - if r.match(xml): - xmlns = r.match(xml).groups()[2] - xml = r.sub("\\1\\2\\4", xml) - else: - xmlns = None - return xml, xmlns -__all__.append("stripNameSpace") - -def getTreeFromXml(xml): - xml, xmlns = stripNameSpace(xml) - try: - tree = ET.fromstring(xml) - if xmlns: - tree.attrib['xmlns'] = xmlns - return tree - except ExpatError, e: - error(e) - raise Exceptions.ParameterError("Bucket contains invalid filenames. Please run: s3cmd fixbucket s3://your-bucket/") -__all__.append("getTreeFromXml") - -def getListFromXml(xml, node): - tree = getTreeFromXml(xml) - nodes = tree.findall('.//%s' % (node)) - return parseNodes(nodes) -__all__.append("getListFromXml") - -def getDictFromTree(tree): - ret_dict = {} - for child in tree.getchildren(): - if child.getchildren(): - ## Complex-type child. Recurse - content = getDictFromTree(child) - else: - content = child.text - if ret_dict.has_key(child.tag): - if not type(ret_dict[child.tag]) == list: - ret_dict[child.tag] = [ret_dict[child.tag]] - ret_dict[child.tag].append(content or "") - else: - ret_dict[child.tag] = content or "" - return ret_dict -__all__.append("getDictFromTree") - -def getTextFromXml(xml, xpath): - tree = getTreeFromXml(xml) - if tree.tag.endswith(xpath): - return tree.text - else: - return tree.findtext(xpath) -__all__.append("getTextFromXml") - -def getRootTagName(xml): - tree = getTreeFromXml(xml) - return tree.tag -__all__.append("getRootTagName") - -def xmlTextNode(tag_name, text): - el = ET.Element(tag_name) - el.text = unicode(text) - return el -__all__.append("xmlTextNode") - -def appendXmlTextNode(tag_name, text, parent): - """ - Creates a new Node and sets - its content to 'text'. Then appends the - created Node to 'parent' element if given. - Returns the newly created Node. - """ - el = xmlTextNode(tag_name, text) - parent.append(el) - return el -__all__.append("appendXmlTextNode") - -def dateS3toPython(date): - date = re.compile("(\.\d*)?Z").sub(".000Z", date) - return time.strptime(date, "%Y-%m-%dT%H:%M:%S.000Z") -__all__.append("dateS3toPython") - -def dateS3toUnix(date): - ## FIXME: This should be timezone-aware. - ## Currently the argument to strptime() is GMT but mktime() - ## treats it as "localtime". Anyway... - return time.mktime(dateS3toPython(date)) -__all__.append("dateS3toUnix") - -def dateRFC822toPython(date): - return rfc822.parsedate(date) -__all__.append("dateRFC822toPython") - -def dateRFC822toUnix(date): - return time.mktime(dateRFC822toPython(date)) -__all__.append("dateRFC822toUnix") - -def formatSize(size, human_readable = False, floating_point = False): - size = floating_point and float(size) or int(size) - if human_readable: - coeffs = ['k', 'M', 'G', 'T'] - coeff = "" - while size > 2048: - size /= 1024 - coeff = coeffs.pop(0) - return (size, coeff) - else: - return (size, "") -__all__.append("formatSize") - -def formatDateTime(s3timestamp): - try: - import pytz - timezone = pytz.timezone(os.environ.get('TZ', 'UTC')) - tz = pytz.timezone('UTC') - ## Can't unpack args and follow that with kwargs in python 2.5 - ## So we pass them all as kwargs - params = zip(('year', 'month', 'day', 'hour', 'minute', 'second', 'tzinfo'), - dateS3toPython(s3timestamp)[0:6] + (tz,)) - params = dict(params) - utc_dt = datetime.datetime(**params) - dt_object = utc_dt.astimezone(timezone) - except ImportError: - dt_object = datetime.datetime(*dateS3toPython(s3timestamp)[0:6]) - return dt_object.strftime("%Y-%m-%d %H:%M") -__all__.append("formatDateTime") - -def convertTupleListToDict(list): - retval = {} - for tuple in list: - retval[tuple[0]] = tuple[1] - return retval -__all__.append("convertTupleListToDict") - -_rnd_chars = string.ascii_letters+string.digits -_rnd_chars_len = len(_rnd_chars) -def rndstr(len): - retval = "" - while len > 0: - retval += _rnd_chars[random.randint(0, _rnd_chars_len-1)] - len -= 1 - return retval -__all__.append("rndstr") - -def mktmpsomething(prefix, randchars, createfunc): - old_umask = os.umask(0077) - tries = 5 - while tries > 0: - dirname = prefix + rndstr(randchars) - try: - createfunc(dirname) - break - except OSError, e: - if e.errno != errno.EEXIST: - os.umask(old_umask) - raise - tries -= 1 - - os.umask(old_umask) - return dirname -__all__.append("mktmpsomething") - -def mktmpdir(prefix = "/tmp/tmpdir-", randchars = 10): - return mktmpsomething(prefix, randchars, os.mkdir) -__all__.append("mktmpdir") - -def mktmpfile(prefix = "/tmp/tmpfile-", randchars = 20): - createfunc = lambda filename : os.close(os.open(filename, os.O_CREAT | os.O_EXCL)) - return mktmpsomething(prefix, randchars, createfunc) -__all__.append("mktmpfile") - -def hash_file_md5(filename): - h = md5() - f = open(filename, "rb") - while True: - # Hash 32kB chunks - data = f.read(32*1024) - if not data: - break - h.update(data) - f.close() - return h.hexdigest() -__all__.append("hash_file_md5") - -def mkdir_with_parents(dir_name): - """ - mkdir_with_parents(dst_dir) - - Create directory 'dir_name' with all parent directories - - Returns True on success, False otherwise. - """ - pathmembers = dir_name.split(os.sep) - tmp_stack = [] - while pathmembers and not os.path.isdir(os.sep.join(pathmembers)): - tmp_stack.append(pathmembers.pop()) - while tmp_stack: - pathmembers.append(tmp_stack.pop()) - cur_dir = os.sep.join(pathmembers) - try: - debug("mkdir(%s)" % cur_dir) - os.mkdir(cur_dir) - except (OSError, IOError), e: - warning("%s: can not make directory: %s" % (cur_dir, e.strerror)) - return False - except Exception, e: - warning("%s: %s" % (cur_dir, e)) - return False - return True -__all__.append("mkdir_with_parents") - -def unicodise(string, encoding = None, errors = "replace"): - """ - Convert 'string' to Unicode or raise an exception. - """ - - if not encoding: - encoding = Config.Config().encoding - - if type(string) == unicode: - return string - debug("Unicodising %r using %s" % (string, encoding)) - try: - return string.decode(encoding, errors) - except UnicodeDecodeError: - raise UnicodeDecodeError("Conversion to unicode failed: %r" % string) -__all__.append("unicodise") - -def deunicodise(string, encoding = None, errors = "replace"): - """ - Convert unicode 'string' to , by default replacing - all invalid characters with '?' or raise an exception. - """ - - if not encoding: - encoding = Config.Config().encoding - - if type(string) != unicode: - return str(string) - debug("DeUnicodising %r using %s" % (string, encoding)) - try: - return string.encode(encoding, errors) - except UnicodeEncodeError: - raise UnicodeEncodeError("Conversion from unicode failed: %r" % string) -__all__.append("deunicodise") - -def unicodise_safe(string, encoding = None): - """ - Convert 'string' to Unicode according to current encoding - and replace all invalid characters with '?' - """ - - return unicodise(deunicodise(string, encoding), encoding).replace(u'\ufffd', '?') -__all__.append("unicodise_safe") - -def replace_nonprintables(string): - """ - replace_nonprintables(string) - - Replaces all non-printable characters 'ch' in 'string' - where ord(ch) <= 26 with ^@, ^A, ... ^Z - """ - new_string = "" - modified = 0 - for c in string: - o = ord(c) - if (o <= 31): - new_string += "^" + chr(ord('@') + o) - modified += 1 - elif (o == 127): - new_string += "^?" - modified += 1 - else: - new_string += c - if modified and Config.Config().urlencoding_mode != "fixbucket": - warning("%d non-printable characters replaced in: %s" % (modified, new_string)) - return new_string -__all__.append("replace_nonprintables") - -def sign_string(string_to_sign): - """Sign a string with the secret key, returning base64 encoded results. - By default the configured secret key is used, but may be overridden as - an argument. - - Useful for REST authentication. See http://s3.amazonaws.com/doc/s3-developer-guide/RESTAuthentication.html - """ - signature = base64.encodestring(hmac.new(Config.Config().secret_key, string_to_sign, sha1).digest()).strip() - return signature -__all__.append("sign_string") - -def sign_url(url_to_sign, expiry): - """Sign a URL in s3://bucket/object form with the given expiry - time. The object will be accessible via the signed URL until the - AWS key and secret are revoked or the expiry time is reached, even - if the object is otherwise private. - - See: http://s3.amazonaws.com/doc/s3-developer-guide/RESTAuthentication.html - """ - return sign_url_base( - bucket = url_to_sign.bucket(), - object = url_to_sign.object(), - expiry = expiry - ) -__all__.append("sign_url") - -def sign_url_base(**parms): - """Shared implementation of sign_url methods. Takes a hash of 'bucket', 'object' and 'expiry' as args.""" - parms['expiry']=time_to_epoch(parms['expiry']) - parms['access_key']=Config.Config().access_key - debug("Expiry interpreted as epoch time %s", parms['expiry']) - signtext = 'GET\n\n\n%(expiry)d\n/%(bucket)s/%(object)s' % parms - debug("Signing plaintext: %r", signtext) - parms['sig'] = urllib.quote_plus(sign_string(signtext)) - debug("Urlencoded signature: %s", parms['sig']) - return "http://%(bucket)s.s3.amazonaws.com/%(object)s?AWSAccessKeyId=%(access_key)s&Expires=%(expiry)d&Signature=%(sig)s" % parms - -def time_to_epoch(t): - """Convert time specified in a variety of forms into UNIX epoch time. - Accepts datetime.datetime, int, anything that has a strftime() method, and standard time 9-tuples - """ - if isinstance(t, int): - # Already an int - return t - elif isinstance(t, tuple) or isinstance(t, time.struct_time): - # Assume it's a time 9-tuple - return int(time.mktime(t)) - elif hasattr(t, 'timetuple'): - # Looks like a datetime object or compatible - return int(time.mktime(ex.timetuple())) - elif hasattr(t, 'strftime'): - # Looks like the object supports standard srftime() - return int(t.strftime('%s')) - elif isinstance(t, str) or isinstance(t, unicode): - # See if it's a string representation of an epoch - try: - return int(t) - except ValueError: - # Try to parse it as a timestamp string - try: - return time.strptime(t) - except ValueError, ex: - # Will fall through - debug("Failed to parse date with strptime: %s", ex) - pass - raise Exceptions.ParameterError('Unable to convert %r to an epoch time. Pass an epoch time. Try `date -d \'now + 1 year\' +%%s` (shell) or time.mktime (Python).' % t) - - -def check_bucket_name(bucket, dns_strict = True): - if dns_strict: - invalid = re.search("([^a-z0-9\.-])", bucket) - if invalid: - raise Exceptions.ParameterError("Bucket name '%s' contains disallowed character '%s'. The only supported ones are: lowercase us-ascii letters (a-z), digits (0-9), dot (.) and hyphen (-)." % (bucket, invalid.groups()[0])) - else: - invalid = re.search("([^A-Za-z0-9\._-])", bucket) - if invalid: - raise Exceptions.ParameterError("Bucket name '%s' contains disallowed character '%s'. The only supported ones are: us-ascii letters (a-z, A-Z), digits (0-9), dot (.), hyphen (-) and underscore (_)." % (bucket, invalid.groups()[0])) - - if len(bucket) < 3: - raise Exceptions.ParameterError("Bucket name '%s' is too short (min 3 characters)" % bucket) - if len(bucket) > 255: - raise Exceptions.ParameterError("Bucket name '%s' is too long (max 255 characters)" % bucket) - if dns_strict: - if len(bucket) > 63: - raise Exceptions.ParameterError("Bucket name '%s' is too long (max 63 characters)" % bucket) - if re.search("-\.", bucket): - raise Exceptions.ParameterError("Bucket name '%s' must not contain sequence '-.' for DNS compatibility" % bucket) - if re.search("\.\.", bucket): - raise Exceptions.ParameterError("Bucket name '%s' must not contain sequence '..' for DNS compatibility" % bucket) - if not re.search("^[0-9a-z]", bucket): - raise Exceptions.ParameterError("Bucket name '%s' must start with a letter or a digit" % bucket) - if not re.search("[0-9a-z]$", bucket): - raise Exceptions.ParameterError("Bucket name '%s' must end with a letter or a digit" % bucket) - return True -__all__.append("check_bucket_name") - -def check_bucket_name_dns_conformity(bucket): - try: - return check_bucket_name(bucket, dns_strict = True) - except Exceptions.ParameterError: - return False -__all__.append("check_bucket_name_dns_conformity") - -def getBucketFromHostname(hostname): - """ - bucket, success = getBucketFromHostname(hostname) - - Only works for hostnames derived from bucket names - using Config.host_bucket pattern. - - Returns bucket name and a boolean success flag. - """ - - # Create RE pattern from Config.host_bucket - pattern = Config.Config().host_bucket % { 'bucket' : '(?P.*)' } - m = re.match(pattern, hostname) - if not m: - return (hostname, False) - return m.groups()[0], True -__all__.append("getBucketFromHostname") - -def getHostnameFromBucket(bucket): - return Config.Config().host_bucket % { 'bucket' : bucket } -__all__.append("getHostnameFromBucket") - -# vim:et:ts=4:sts=4:ai diff --git a/fabfile/S3/__init__.py b/fabfile/S3/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/fabfile/__init__.py b/fabfile/__init__.py deleted file mode 100644 index 471d5aa..0000000 --- a/fabfile/__init__.py +++ /dev/null @@ -1,560 +0,0 @@ -# -*- coding: utf-8 -*- -import sys -import os -from os.path import dirname, abspath, join -from datetime import date -import json -import codecs -import shutil -import fnmatch -import re -import collections -from fabric.api import env, settings, hide, local, lcd -from fabric.decorators import task -from fabric.operations import prompt -from fabric.utils import puts, abort, warn - -env.debug = False - -# -# Set paths -# -env.project_path = dirname(dirname(abspath(__file__))) -env.sites_path = dirname(env.project_path) -env.build_path = join(env.project_path, 'build') -env.source_path = join(env.project_path, 'source') - -# -# Read config.json and update vars -# -with open(join(env.project_path, 'config.json')) as fp: - s = fp.read() - s = re.sub(r'//.*', '', s) - s = re.sub(r'/\*.*?\*/', '', s, flags=re.DOTALL) - CONFIG = json.loads(s, object_pairs_hook=collections.OrderedDict) - -today = date.today() -CONFIG['date'] = today -CONFIG['year'] = today.year - -# Path to cdn deployment -env.cdn_path = abspath(join( - env.sites_path, 'cdn.knightlab.com', 'app', 'libs', CONFIG['name'])) - -# Path to s3cmd.cnf in secrets repository -env.s3cmd_cfg = join(env.sites_path, 'secrets', 's3cmd.cfg') - -# Banner for the top of CSS and JS files -BANNER = """ -/* - TimelineJS - ver. %(version)s - %(date)s - Copyright (c) 2012-%(year)s Northwestern University - a project of the Northwestern University Knight Lab, originally created by Zach Wise - https://github.com/NUKnightLab/TimelineJS - This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. - If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - */ -""".lstrip() - - -def _check_path(path): - """Check for the existence of a path""" - if not os.path.exists(path): - abort('Could not find %s.' % path) - -def _clean(path): - """Delete directory contents""" - path = os.path.abspath(path) - puts('clean: %s' % path) - - if os.path.exists(path): - if os.path.isdir(path): - for item in [join(path, x) for x in os.listdir(path)]: - if os.path.isfile(item): - os.unlink(item) - else: - shutil.rmtree(item) - else: - os.unlink(path) - -def _find_file(file_name, cur_dir): - """Find a file. Look first in cur_dir, then env.source_path""" - file_path = os.path.abspath(join(cur_dir, file_name)) - if os.path.exists(file_path): - return file_path - for dirpath, dirs, files in os.walk(env.source_path): - if file_name in files: - return os.path.join(dirpath, file_name) - raise Exception('Could not find "%s" in %s' % (file_name, env.source_path)) - -def _match_files(src, regex): - """Return relative filepaths matching regex in src""" - re_match = re.compile(regex) - - for (dirpath, dirnames, filenames) in os.walk(src): - rel_dir = _relpath(src, dirpath) - - for f in filter(lambda x: not x.startswith('.'), filenames): - rel_path = join(rel_dir, f) - if re_match.match(rel_path): - yield rel_path - -def _makedirs(path, isfile=False): - """Make directories in path""" - if isfile: - path = dirname(path) - if not os.path.exists(path): - os.makedirs(path) - -def _open_file(path, mode, encoding=''): - """Open a file with character encoding detection""" - if mode.startswith('r'): - bytes = min(32, os.path.getsize(path)) - - with open(path, 'rb') as fd: - raw = fd.read() - if raw.startswith(codecs.BOM_UTF8): - encoding = 'utf-8-sig' - else: - encoding = 'utf-8' - - return codecs.open(path, mode, encoding) - -def _relpath(root_path, path): - """Get relative path from root_path""" - if root_path == path: - return '' - return os.path.relpath(path, root_path) - -# -# tagging -# - -def _get_tags(): - """Get list of current tags from the repo""" - tags = os.popen('cd %(project_path)s;git tag' % env).read().strip() - if tags: - return [x.strip() for x in tags.split('\n')] - return [] - -def _last_version_tag(): - """Get the last version tag""" - re_num = re.compile('[^0-9.]') - - tags = sorted([map(int, re_num.sub('', t).split('.')) for t in _get_tags()]) - if tags: - return '.'.join(map(str, tags[-1])) - return None - -def _get_version_tag(): - """Get a new version tag from user""" - tags = _get_tags() - puts('This project has the following tags:') - puts(tags) - - while True: - version = prompt("Enter a new version number: ").strip() - - if not re.match(r'^[0-9]+\.[0-9]+\.[0-9]+$', version): - warn('Invalid version number, must be in the format:' \ - ' major.minor.revision') - elif version in tags: - warn('Invalid version number, tag already exists') - else: - break - - return version - -def _render_templates(src_path, dst_path): - """Render flask templates""" - puts('render: %s >> %s' % (src_path, dst_path)) - from website import app - from flask import g, request - - compiled_includes = [] - - for f in _match_files(src_path, '^[^_].*$'): - with app.app.test_request_context(): - g.compile_includes = True - g.compiled_includes = compiled_includes - content = app.catch_all(f) - compiled_includes = g.compiled_includes - - page_file = join(dst_path, f) - puts(' %s' % page_file) - _makedirs(page_file, isfile=True) - with open(page_file, 'w') as fd: - fd.write(content.encode('utf-8')) - - -# -# build steps -# - -def banner(conf): - """ - Place banner at top of js and css files in-place. - """ - _banner_text = BANNER % CONFIG - - def _do(file_path): - puts(' %s' % file_path) - with _open_file(file_path, 'r+') as fd: - s = fd.read() - fd.seek(0) - fd.write(_banner_text+s) - - for r in conf: - src = join(env.project_path, r) - puts('banner: %s' % src) - if os.path.isdir(src): - for f in _match_files(src, '.*\.(css|js)$'): - _do(join(src, f)) - else: - _do(src) - -def concat(conf): - """ - Concatenate files - """ - for r in conf: - dst = join(env.project_path, r['dst']) - src = map(lambda x: join(env.project_path, x), r['src']) - _makedirs(dst, isfile=True) - local('cat %s > %s' % (' '.join(src), dst)) - -def copy(conf): - """ - Copy files - """ - def _do(src_path, dst_path): - puts(' %s' % src_path) - _makedirs(dst_path, isfile=True) - shutil.copy2(src_path, dst_path) - - for r in conf: - src = join(env.project_path, r['src']) - dst = join(env.project_path, r['dst']) - puts('copy: %s >> %s' % (src, dst)) - if os.path.isdir(src): - regex = r['regex'] if 'regex' in r else '.*' - for f in _match_files(src, regex): - _do(join(src, f), join(dst, f)) - else: - _do(src, dst) - -def lessc(conf): - """ - Compile LESS - """ - def _do(src_path, dst_path): - _makedirs(dst_path, isfile=True) - with hide('warnings'), settings(warn_only=True): - result = local('lessc -x %s %s' % (src_path, dst_path)) - if result.failed: - abort('Error running lessc on %s' % src_path) - - if not os.popen('which lessc').read().strip(): - abort('You must install the LESS compiler') - - for r in conf: - src = join(env.project_path, r['src']) - dst = join(env.project_path, r['dst']) - - if os.path.isdir(src): - regex = r['regex'] if 'regex' in r else '.*' - for f in _match_files(src, regex): - (base, ext) = os.path.splitext(join(dst, f)) - _do(join(src, f), base+".css") - else: - _do(src, dst) - - -def minify(conf): - """ - Minify javascript - """ - def _do(src_path, dst_path, opt): - local('uglifyjs %s --output %s %s' % (opt, dst_path, src_path)) - - for r in conf: - src = join(env.project_path, r['src']) - dst = join(env.project_path, r['dst']) - puts('minify: %s >> %s' % (src, dst)) - - opt = r['opt'] if ('opt' in r) else '' - out_ext = r['ext'] if ('ext' in r) else '' - - if os.path.isdir(src): - _makedirs(dst, isfile=False) - for f in _match_files(src, '.*\.js'): - (base, in_ext) = os.path.splitext(join(dst, f)) - _do(join(src, f), base+out_ext+in_ext, opt) - else: - _makedirs(dst, isfile=True) - _do(src, dst, opt) - - -def process(conf): - """ - Process codekit style imports - """ - _re_prepend = re.compile(r'@codekit-prepend\s*[\'"](?P.+)[\'"]\s*;') - _re_append = re.compile(r'@codekit-append\s*[\'"](?P.+)[\'"]\s*;') - - def _mark(f_out, path): - f_out.write(""" -/* ********************************************** - Begin %s -********************************************** */ - -""" % os.path.basename(path)) - - def _do(f_out, path, imported): - s = '' - dirpath = dirname(path) - with _open_file(path, 'r') as f_in: - s = f_in.read() - - # Write out prepends - for m in _re_prepend.finditer(s): - file_path = _find_file(m.group('file'), dirpath) - if not file_path in imported: - puts(' prepend: %s' % file_path) - imported.append(file_path) - _do(f_out, file_path, imported) - - # Write out file - _mark(f_out, os.path.basename(path)) - f_out.write(s+'\n') - - # Write out appends - for m in _re_append.finditer(s): - file_path = _find_file(m.group('file'), dirpath) - if not file_path in imported: - puts(' append: %s' % file_path) - imported.append(file_path) - _do(f_out, file_path, imported) - - for r in conf: - src = join(env.project_path, r['src']) - dst = join(env.project_path, r['dst']) - puts('process: %s >> %s' % (src, dst)) - - _makedirs(dst, isfile=True) - with _open_file(dst, 'w', 'utf-8') as out_file: - _do(out_file, src, []) - - -def usemin(conf): - """ - Replaces usemin-style build blocks with a reference to a single file. - - Build blocks take the format: - - - (references to unoptimized files go here) - - - where: - type = css | js - path = reference to the optimized file - - Any leading backslashes will be stripped, but the path will otherwise - by used as it appears within the opening build tag. - """ - _re_build = re.compile(r""" - - .*? - - """, - re.VERBOSE | re.DOTALL) - - def _sub(m): - type = m.group('type') - dest = m.group('dest').strip('\\') - - if type == 'css': - return '' % dest - elif type == 'js': - return '' % dest - else: - warn('Unknown build block type (%s)' % type) - return m.group(0) - - def _do(file_path): - with _open_file(file_path, 'r+') as fd: - s = fd.read() - (new_s, n) = _re_build.subn(_sub, s) - if n: - puts(' (%d) %s' % (n, file_path)) - fd.seek(0) - fd.write(new_s) - fd.truncate() - - for r in conf: - src = join(env.project_path, r) - puts('usemin: %s' % src) - - if os.path.isdir(src): - for f in _match_files(src, '.*\.html'): - _do(join(src, f)) - else: - _do(src) - - -# -# tasks -# - -@task -def debug(): - """Setup debug settings""" - warn('DEBUG IS ON:') - CONFIG['deploy']['bucket'] = 'test.knilab.com' - CONFIG['version'] = '0.0.0' - - print 'deploy.bucket:', CONFIG['deploy']['bucket'] - print 'version:', CONFIG['version'] - print 'version tagging is OFF' - print '' - - doit = prompt("Continue? (y/n): ").strip() - if doit != 'y': - abort('Stopped') - - env.debug = True - -@task -def serve(): - """Run the local version of the documentation site (timeline.knightlab.com)""" - with lcd(join(env.project_path)): - local('python website/app.py') - - -@task -def build(): - """Build version""" - # Get build config - if not 'build' in CONFIG: - abort('Could not find "build" in config file') - - # Determine version - if not 'version' in CONFIG: - CONFIG['version'] = _last_version_tag() - if not CONFIG['version']: - abort('No available version tag') - - print 'Building version %(version)s...' % CONFIG - - # Clean build directory - _clean(env.build_path) - - for key, param in CONFIG['build'].iteritems(): - getattr(sys.modules[__name__], key)(param) - - -@task -def stage(): - """ - Build version, copy to local cdn repository, tag last commit - """ - if not 'stage' in CONFIG: - abort('Could not find "stage" in config file') - - # Make sure cdn exists - _check_path(dirname(env.cdn_path)) - - # Ask user for a new version - if not env.debug: - CONFIG['version'] = _get_version_tag() - - build() - - cdn_path = join(env.cdn_path, CONFIG['version']) - - _clean(cdn_path) - - for r in CONFIG['stage']: - copy([{"src": r['src'], "dst": cdn_path, "regex": r['regex']}]) - - if not env.debug: - with lcd(env.project_path): - local('git tag %(version)s' % CONFIG) - local('git push origin %(version)s' % CONFIG) - - -@task -def stage_latest(): - """ - Copy version to latest within local cdn repository - """ - if 'version' in CONFIG: - version = CONFIG['version'] - else: - tags = _get_tags() - puts('This project has the following tags:') - puts(tags) - - while True: - version = prompt("Which version to stage as 'latest'? ").strip() - if not version in tags: - warn('You must enter an existing version') - else: - break - - print 'stage_latest: %s' % version - - # Make sure version has been staged - version_cdn_path = join(env.cdn_path, version) - if not os.path.exists(version_cdn_path): - abort("Version '%s' has not been staged" % version) - - # Stage version as latest - latest_cdn_path = join(env.cdn_path, 'latest') - _clean(latest_cdn_path) - copy([{"src": version_cdn_path, "dst": latest_cdn_path}]) - - -@task -def deploy(): - """Deploy to S3 bucket""" - if not 'deploy' in CONFIG: - abort('Could not find "deploy" in config file') - - # Make sure s3cmd.cnf exists - _check_path(env.s3cmd_cfg) - - # Do we need to build anything here?!? - #build() - - template_path = join(env.project_path, 'website', 'templates') - deploy_path = join(env.project_path, 'build', 'website') - - _clean(deploy_path) - - # render templates and run usemin - _render_templates(template_path, deploy_path) - usemin([deploy_path]) - - # copy static fiels - copy([{ - "src": join(env.project_path, 'website', 'static'), - "dst": join(deploy_path, 'static') - }]) - - # additional copy? - if 'copy' in CONFIG['deploy']: - copy(CONFIG['deploy']['copy']) - - # sync to S3 - with lcd(env.project_path): - local('fabfile/s3cmd --config=%s sync' \ - ' --rexclude ".*/\.[^/]*$"' \ - ' --delete-removed --acl-public' \ - ' %s/ s3://%s/' \ - % (env.s3cmd_cfg, deploy_path, CONFIG['deploy']['bucket']) - ) - - - \ No newline at end of file diff --git a/fabfile/s3cmd b/fabfile/s3cmd deleted file mode 100755 index 2db03e2..0000000 --- a/fabfile/s3cmd +++ /dev/null @@ -1,2116 +0,0 @@ -#!/usr/bin/python - -## Amazon S3 manager -## Author: Michal Ludvig -## http://www.logix.cz/michal -## License: GPL Version 2 - -import sys - -if float("%d.%d" %(sys.version_info[0], sys.version_info[1])) < 2.4: - sys.stderr.write("ERROR: Python 2.4 or higher required, sorry.\n") - sys.exit(1) - -import logging -import time -import os -import re -import errno -import glob -import traceback -import codecs -import locale -import subprocess -import htmlentitydefs -import socket -import shutil -import tempfile -import S3.Exceptions - -from copy import copy -from optparse import OptionParser, Option, OptionValueError, IndentedHelpFormatter -from logging import debug, info, warning, error -from distutils.spawn import find_executable - -def output(message): - sys.stdout.write(message + "\n") - sys.stdout.flush() - -def check_args_type(args, type, verbose_type): - for arg in args: - if S3Uri(arg).type != type: - raise ParameterError("Expecting %s instead of '%s'" % (verbose_type, arg)) - -def cmd_du(args): - s3 = S3(Config()) - if len(args) > 0: - uri = S3Uri(args[0]) - if uri.type == "s3" and uri.has_bucket(): - subcmd_bucket_usage(s3, uri) - return - subcmd_bucket_usage_all(s3) - -def subcmd_bucket_usage_all(s3): - response = s3.list_all_buckets() - - buckets_size = 0 - for bucket in response["list"]: - size = subcmd_bucket_usage(s3, S3Uri("s3://" + bucket["Name"])) - if size != None: - buckets_size += size - total_size, size_coeff = formatSize(buckets_size, Config().human_readable_sizes) - total_size_str = str(total_size) + size_coeff - output(u"".rjust(8, "-")) - output(u"%s Total" % (total_size_str.ljust(8))) - -def subcmd_bucket_usage(s3, uri): - bucket = uri.bucket() - object = uri.object() - - if object.endswith('*'): - object = object[:-1] - - bucket_size = 0 - # iterate and store directories to traverse, while summing objects: - dirs = [object] - while dirs: - try: - response = s3.bucket_list(bucket, prefix=dirs.pop()) - except S3Error, e: - if S3.codes.has_key(e.info["Code"]): - error(S3.codes[e.info["Code"]] % bucket) - return - else: - raise - - # objects in the current scope: - for obj in response["list"]: - bucket_size += int(obj["Size"]) - - # directories found in current scope: - for obj in response["common_prefixes"]: - dirs.append(obj["Prefix"]) - - total_size, size_coeff = formatSize(bucket_size, Config().human_readable_sizes) - total_size_str = str(total_size) + size_coeff - output(u"%s %s" % (total_size_str.ljust(8), uri)) - return bucket_size - -def cmd_ls(args): - s3 = S3(Config()) - if len(args) > 0: - uri = S3Uri(args[0]) - if uri.type == "s3" and uri.has_bucket(): - subcmd_bucket_list(s3, uri) - return - subcmd_buckets_list_all(s3) - -def cmd_buckets_list_all_all(args): - s3 = S3(Config()) - - response = s3.list_all_buckets() - - for bucket in response["list"]: - subcmd_bucket_list(s3, S3Uri("s3://" + bucket["Name"])) - output(u"") - - -def subcmd_buckets_list_all(s3): - response = s3.list_all_buckets() - for bucket in response["list"]: - output(u"%s s3://%s" % ( - formatDateTime(bucket["CreationDate"]), - bucket["Name"], - )) - -def subcmd_bucket_list(s3, uri): - bucket = uri.bucket() - prefix = uri.object() - - debug(u"Bucket 's3://%s':" % bucket) - if prefix.endswith('*'): - prefix = prefix[:-1] - try: - response = s3.bucket_list(bucket, prefix = prefix) - except S3Error, e: - if S3.codes.has_key(e.info["Code"]): - error(S3.codes[e.info["Code"]] % bucket) - return - else: - raise - - if cfg.list_md5: - format_string = u"%(timestamp)16s %(size)9s%(coeff)1s %(md5)32s %(uri)s" - else: - format_string = u"%(timestamp)16s %(size)9s%(coeff)1s %(uri)s" - - for prefix in response['common_prefixes']: - output(format_string % { - "timestamp": "", - "size": "DIR", - "coeff": "", - "md5": "", - "uri": uri.compose_uri(bucket, prefix["Prefix"])}) - - for object in response["list"]: - size, size_coeff = formatSize(object["Size"], Config().human_readable_sizes) - output(format_string % { - "timestamp": formatDateTime(object["LastModified"]), - "size" : str(size), - "coeff": size_coeff, - "md5" : object['ETag'].strip('"'), - "uri": uri.compose_uri(bucket, object["Key"]), - }) - -def cmd_bucket_create(args): - s3 = S3(Config()) - for arg in args: - uri = S3Uri(arg) - if not uri.type == "s3" or not uri.has_bucket() or uri.has_object(): - raise ParameterError("Expecting S3 URI with just the bucket name set instead of '%s'" % arg) - try: - response = s3.bucket_create(uri.bucket(), cfg.bucket_location) - output(u"Bucket '%s' created" % uri.uri()) - except S3Error, e: - if S3.codes.has_key(e.info["Code"]): - error(S3.codes[e.info["Code"]] % uri.bucket()) - return - else: - raise - -def cmd_website_info(args): - s3 = S3(Config()) - for arg in args: - uri = S3Uri(arg) - if not uri.type == "s3" or not uri.has_bucket() or uri.has_object(): - raise ParameterError("Expecting S3 URI with just the bucket name set instead of '%s'" % arg) - try: - response = s3.website_info(uri, cfg.bucket_location) - if response: - output(u"Bucket %s: Website configuration" % uri.uri()) - output(u"Website endpoint: %s" % response['website_endpoint']) - output(u"Index document: %s" % response['index_document']) - output(u"Error document: %s" % response['error_document']) - else: - output(u"Bucket %s: Unable to receive website configuration." % (uri.uri())) - except S3Error, e: - if S3.codes.has_key(e.info["Code"]): - error(S3.codes[e.info["Code"]] % uri.bucket()) - return - else: - raise - -def cmd_website_create(args): - s3 = S3(Config()) - for arg in args: - uri = S3Uri(arg) - if not uri.type == "s3" or not uri.has_bucket() or uri.has_object(): - raise ParameterError("Expecting S3 URI with just the bucket name set instead of '%s'" % arg) - try: - response = s3.website_create(uri, cfg.bucket_location) - output(u"Bucket '%s': website configuration created." % (uri.uri())) - except S3Error, e: - if S3.codes.has_key(e.info["Code"]): - error(S3.codes[e.info["Code"]] % uri.bucket()) - return - else: - raise - -def cmd_website_delete(args): - s3 = S3(Config()) - for arg in args: - uri = S3Uri(arg) - if not uri.type == "s3" or not uri.has_bucket() or uri.has_object(): - raise ParameterError("Expecting S3 URI with just the bucket name set instead of '%s'" % arg) - try: - response = s3.website_delete(uri, cfg.bucket_location) - output(u"Bucket '%s': website configuration deleted." % (uri.uri())) - except S3Error, e: - if S3.codes.has_key(e.info["Code"]): - error(S3.codes[e.info["Code"]] % uri.bucket()) - return - else: - raise - -def cmd_bucket_delete(args): - def _bucket_delete_one(uri): - try: - response = s3.bucket_delete(uri.bucket()) - except S3Error, e: - if e.info['Code'] == 'BucketNotEmpty' and (cfg.force or cfg.recursive): - warning(u"Bucket is not empty. Removing all the objects from it first. This may take some time...") - subcmd_object_del_uri(uri.uri(), recursive = True) - return _bucket_delete_one(uri) - elif S3.codes.has_key(e.info["Code"]): - error(S3.codes[e.info["Code"]] % uri.bucket()) - return - else: - raise - - s3 = S3(Config()) - for arg in args: - uri = S3Uri(arg) - if not uri.type == "s3" or not uri.has_bucket() or uri.has_object(): - raise ParameterError("Expecting S3 URI with just the bucket name set instead of '%s'" % arg) - _bucket_delete_one(uri) - output(u"Bucket '%s' removed" % uri.uri()) - -def cmd_object_put(args): - cfg = Config() - s3 = S3(cfg) - - if len(args) == 0: - raise ParameterError("Nothing to upload. Expecting a local file or directory and a S3 URI destination.") - - ## Normalize URI to convert s3://bkt to s3://bkt/ (trailing slash) - destination_base_uri = S3Uri(args.pop()) - if destination_base_uri.type != 's3': - raise ParameterError("Destination must be S3Uri. Got: %s" % destination_base_uri) - destination_base = str(destination_base_uri) - - if len(args) == 0: - raise ParameterError("Nothing to upload. Expecting a local file or directory.") - - local_list, single_file_local = fetch_local_list(args) - - local_list, exclude_list = filter_exclude_include(local_list) - - local_count = len(local_list) - - info(u"Summary: %d local files to upload" % local_count) - - if local_count > 0: - if not single_file_local and '-' in local_list.keys(): - raise ParameterError("Cannot specify multiple local files if uploading from '-' (ie stdin)") - elif single_file_local and local_list.keys()[0] == "-" and destination_base.endswith("/"): - raise ParameterError("Destination S3 URI must not end with '/' when uploading from stdin.") - elif not destination_base.endswith("/"): - if not single_file_local: - raise ParameterError("Destination S3 URI must end with '/' (ie must refer to a directory on the remote side).") - local_list[local_list.keys()[0]]['remote_uri'] = unicodise(destination_base) - else: - for key in local_list: - local_list[key]['remote_uri'] = unicodise(destination_base + key) - - if cfg.dry_run: - for key in exclude_list: - output(u"exclude: %s" % unicodise(key)) - for key in local_list: - if key != "-": - nicekey = local_list[key]['full_name_unicode'] - else: - nicekey = "" - output(u"upload: %s -> %s" % (nicekey, local_list[key]['remote_uri'])) - - warning(u"Exiting now because of --dry-run") - return - - seq = 0 - for key in local_list: - seq += 1 - - uri_final = S3Uri(local_list[key]['remote_uri']) - - extra_headers = copy(cfg.extra_headers) - full_name_orig = local_list[key]['full_name'] - full_name = full_name_orig - seq_label = "[%d of %d]" % (seq, local_count) - if Config().encrypt: - exitcode, full_name, extra_headers["x-amz-meta-s3tools-gpgenc"] = gpg_encrypt(full_name_orig) - try: - response = s3.object_put(full_name, uri_final, extra_headers, extra_label = seq_label) - except S3UploadError, e: - error(u"Upload of '%s' failed too many times. Skipping that file." % full_name_orig) - continue - except InvalidFileError, e: - warning(u"File can not be uploaded: %s" % e) - continue - speed_fmt = formatSize(response["speed"], human_readable = True, floating_point = True) - if not Config().progress_meter: - output(u"File '%s' stored as '%s' (%d bytes in %0.1f seconds, %0.2f %sB/s) %s" % - (unicodise(full_name_orig), uri_final, response["size"], response["elapsed"], - speed_fmt[0], speed_fmt[1], seq_label)) - if Config().acl_public: - output(u"Public URL of the object is: %s" % - (uri_final.public_url())) - if Config().encrypt and full_name != full_name_orig: - debug(u"Removing temporary encrypted file: %s" % unicodise(full_name)) - os.remove(full_name) - -def cmd_object_get(args): - cfg = Config() - s3 = S3(cfg) - - ## Check arguments: - ## if not --recursive: - ## - first N arguments must be S3Uri - ## - if the last one is S3 make current dir the destination_base - ## - if the last one is a directory: - ## - take all 'basenames' of the remote objects and - ## make the destination name be 'destination_base'+'basename' - ## - if the last one is a file or not existing: - ## - if the number of sources (N, above) == 1 treat it - ## as a filename and save the object there. - ## - if there's more sources -> Error - ## if --recursive: - ## - first N arguments must be S3Uri - ## - for each Uri get a list of remote objects with that Uri as a prefix - ## - apply exclude/include rules - ## - each list item will have MD5sum, Timestamp and pointer to S3Uri - ## used as a prefix. - ## - the last arg may be '-' (stdout) - ## - the last arg may be a local directory - destination_base - ## - if the last one is S3 make current dir the destination_base - ## - if the last one doesn't exist check remote list: - ## - if there is only one item and its_prefix==its_name - ## download that item to the name given in last arg. - ## - if there are more remote items use the last arg as a destination_base - ## and try to create the directory (incl. all parents). - ## - ## In both cases we end up with a list mapping remote object names (keys) to local file names. - - ## Each item will be a dict with the following attributes - # {'remote_uri', 'local_filename'} - download_list = [] - - if len(args) == 0: - raise ParameterError("Nothing to download. Expecting S3 URI.") - - if S3Uri(args[-1]).type == 'file': - destination_base = args.pop() - else: - destination_base = "." - - if len(args) == 0: - raise ParameterError("Nothing to download. Expecting S3 URI.") - - remote_list = fetch_remote_list(args, require_attribs = False) - remote_list, exclude_list = filter_exclude_include(remote_list) - - remote_count = len(remote_list) - - info(u"Summary: %d remote files to download" % remote_count) - - if remote_count > 0: - if destination_base == "-": - ## stdout is ok for multiple remote files! - for key in remote_list: - remote_list[key]['local_filename'] = "-" - elif not os.path.isdir(destination_base): - ## We were either given a file name (existing or not) - if remote_count > 1: - raise ParameterError("Destination must be a directory or stdout when downloading multiple sources.") - remote_list[remote_list.keys()[0]]['local_filename'] = deunicodise(destination_base) - elif os.path.isdir(destination_base): - if destination_base[-1] != os.path.sep: - destination_base += os.path.sep - for key in remote_list: - remote_list[key]['local_filename'] = destination_base + key - else: - raise InternalError("WTF? Is it a dir or not? -- %s" % destination_base) - - if cfg.dry_run: - for key in exclude_list: - output(u"exclude: %s" % unicodise(key)) - for key in remote_list: - output(u"download: %s -> %s" % (remote_list[key]['object_uri_str'], remote_list[key]['local_filename'])) - - warning(u"Exiting now because of --dry-run") - return - - seq = 0 - for key in remote_list: - seq += 1 - item = remote_list[key] - uri = S3Uri(item['object_uri_str']) - ## Encode / Decode destination with "replace" to make sure it's compatible with current encoding - destination = unicodise_safe(item['local_filename']) - seq_label = "[%d of %d]" % (seq, remote_count) - - start_position = 0 - - if destination == "-": - ## stdout - dst_stream = sys.__stdout__ - else: - ## File - try: - file_exists = os.path.exists(destination) - try: - dst_stream = open(destination, "ab") - except IOError, e: - if e.errno == errno.ENOENT: - basename = destination[:destination.rindex(os.path.sep)] - info(u"Creating directory: %s" % basename) - os.makedirs(basename) - dst_stream = open(destination, "ab") - else: - raise - if file_exists: - if Config().get_continue: - start_position = dst_stream.tell() - elif Config().force: - start_position = 0L - dst_stream.seek(0L) - dst_stream.truncate() - elif Config().skip_existing: - info(u"Skipping over existing file: %s" % (destination)) - continue - else: - dst_stream.close() - raise ParameterError(u"File %s already exists. Use either of --force / --continue / --skip-existing or give it a new name." % destination) - except IOError, e: - error(u"Skipping %s: %s" % (destination, e.strerror)) - continue - try: - response = s3.object_get(uri, dst_stream, start_position = start_position, extra_label = seq_label) - except S3Error, e: - if not file_exists: # Delete, only if file didn't exist before! - debug(u"object_get failed for '%s', deleting..." % (destination,)) - os.unlink(destination) - raise - - if response["headers"].has_key("x-amz-meta-s3tools-gpgenc"): - gpg_decrypt(destination, response["headers"]["x-amz-meta-s3tools-gpgenc"]) - response["size"] = os.stat(destination)[6] - if not Config().progress_meter and destination != "-": - speed_fmt = formatSize(response["speed"], human_readable = True, floating_point = True) - output(u"File %s saved as '%s' (%d bytes in %0.1f seconds, %0.2f %sB/s)" % - (uri, destination, response["size"], response["elapsed"], speed_fmt[0], speed_fmt[1])) - if Config().delete_after_fetch: - s3.object_delete(uri) - output(u"File %s removed after fetch" % (uri)) - -def cmd_object_del(args): - for uri_str in args: - uri = S3Uri(uri_str) - if uri.type != "s3": - raise ParameterError("Expecting S3 URI instead of '%s'" % uri_str) - if not uri.has_object(): - if Config().recursive and not Config().force: - raise ParameterError("Please use --force to delete ALL contents of %s" % uri_str) - elif not Config().recursive: - raise ParameterError("File name required, not only the bucket name. Alternatively use --recursive") - subcmd_object_del_uri(uri_str) - -def subcmd_object_del_uri(uri_str, recursive = None): - s3 = S3(cfg) - - if recursive is None: - recursive = cfg.recursive - - remote_list = fetch_remote_list(uri_str, require_attribs = False, recursive = recursive) - remote_list, exclude_list = filter_exclude_include(remote_list) - - remote_count = len(remote_list) - - info(u"Summary: %d remote files to delete" % remote_count) - - if cfg.dry_run: - for key in exclude_list: - output(u"exclude: %s" % unicodise(key)) - for key in remote_list: - output(u"delete: %s" % remote_list[key]['object_uri_str']) - - warning(u"Exiting now because of --dry-run") - return - - for key in remote_list: - item = remote_list[key] - response = s3.object_delete(S3Uri(item['object_uri_str'])) - output(u"File %s deleted" % item['object_uri_str']) - -def subcmd_cp_mv(args, process_fce, action_str, message): - if len(args) < 2: - raise ParameterError("Expecting two or more S3 URIs for " + action_str) - dst_base_uri = S3Uri(args.pop()) - if dst_base_uri.type != "s3": - raise ParameterError("Destination must be S3 URI. To download a file use 'get' or 'sync'.") - destination_base = dst_base_uri.uri() - - remote_list = fetch_remote_list(args, require_attribs = False) - remote_list, exclude_list = filter_exclude_include(remote_list) - - remote_count = len(remote_list) - - info(u"Summary: %d remote files to %s" % (remote_count, action_str)) - - if cfg.recursive: - if not destination_base.endswith("/"): - destination_base += "/" - for key in remote_list: - remote_list[key]['dest_name'] = destination_base + key - else: - for key in remote_list: - if destination_base.endswith("/"): - remote_list[key]['dest_name'] = destination_base + key - else: - remote_list[key]['dest_name'] = destination_base - - if cfg.dry_run: - for key in exclude_list: - output(u"exclude: %s" % unicodise(key)) - for key in remote_list: - output(u"%s: %s -> %s" % (action_str, remote_list[key]['object_uri_str'], remote_list[key]['dest_name'])) - - warning(u"Exiting now because of --dry-run") - return - - seq = 0 - for key in remote_list: - seq += 1 - seq_label = "[%d of %d]" % (seq, remote_count) - - item = remote_list[key] - src_uri = S3Uri(item['object_uri_str']) - dst_uri = S3Uri(item['dest_name']) - - extra_headers = copy(cfg.extra_headers) - response = process_fce(src_uri, dst_uri, extra_headers) - output(message % { "src" : src_uri, "dst" : dst_uri }) - if Config().acl_public: - info(u"Public URL is: %s" % dst_uri.public_url()) - -def cmd_cp(args): - s3 = S3(Config()) - subcmd_cp_mv(args, s3.object_copy, "copy", "File %(src)s copied to %(dst)s") - -def cmd_mv(args): - s3 = S3(Config()) - subcmd_cp_mv(args, s3.object_move, "move", "File %(src)s moved to %(dst)s") - -def cmd_info(args): - s3 = S3(Config()) - - while (len(args)): - uri_arg = args.pop(0) - uri = S3Uri(uri_arg) - if uri.type != "s3" or not uri.has_bucket(): - raise ParameterError("Expecting S3 URI instead of '%s'" % uri_arg) - - try: - if uri.has_object(): - info = s3.object_info(uri) - output(u"%s (object):" % uri.uri()) - output(u" File size: %s" % info['headers']['content-length']) - output(u" Last mod: %s" % info['headers']['last-modified']) - output(u" MIME type: %s" % info['headers']['content-type']) - output(u" MD5 sum: %s" % info['headers']['etag'].strip('"')) - else: - info = s3.bucket_info(uri) - output(u"%s (bucket):" % uri.uri()) - output(u" Location: %s" % info['bucket-location']) - acl = s3.get_acl(uri) - acl_grant_list = acl.getGrantList() - - try: - policy = s3.get_policy(uri) - output(u" policy: %s" % policy) - except: - output(u" policy: none") - - for grant in acl_grant_list: - output(u" ACL: %s: %s" % (grant['grantee'], grant['permission'])) - if acl.isAnonRead(): - output(u" URL: %s" % uri.public_url()) - - except S3Error, e: - if S3.codes.has_key(e.info["Code"]): - error(S3.codes[e.info["Code"]] % uri.bucket()) - return - else: - raise - -def cmd_sync_remote2remote(args): - def _do_deletes(s3, dst_list): - # Delete items in destination that are not in source - if cfg.dry_run: - for key in dst_list: - output(u"delete: %s" % dst_list[key]['object_uri_str']) - else: - for key in dst_list: - uri = S3Uri(dst_list[key]['object_uri_str']) - s3.object_delete(uri) - output(u"deleted: '%s'" % uri) - - s3 = S3(Config()) - - # Normalise s3://uri (e.g. assert trailing slash) - destination_base = unicode(S3Uri(args[-1])) - - src_list = fetch_remote_list(args[:-1], recursive = True, require_attribs = True) - dst_list = fetch_remote_list(destination_base, recursive = True, require_attribs = True) - - src_count = len(src_list) - dst_count = len(dst_list) - - info(u"Found %d source files, %d destination files" % (src_count, dst_count)) - - src_list, exclude_list = filter_exclude_include(src_list) - - src_list, dst_list, update_list, copy_pairs = compare_filelists(src_list, dst_list, src_remote = True, dst_remote = True, delay_updates = cfg.delay_updates) - - src_count = len(src_list) - update_count = len(update_list) - dst_count = len(dst_list) - - print(u"Summary: %d source files to copy, %d files at destination to delete" % (src_count, dst_count)) - - ### Populate 'target_uri' only if we've got something to sync from src to dst - for key in src_list: - src_list[key]['target_uri'] = destination_base + key - for key in update_list: - update_list[key]['target_uri'] = destination_base + key - - if cfg.dry_run: - for key in exclude_list: - output(u"exclude: %s" % unicodise(key)) - if cfg.delete_removed: - for key in dst_list: - output(u"delete: %s" % dst_list[key]['object_uri_str']) - for key in src_list: - output(u"Sync: %s -> %s" % (src_list[key]['object_uri_str'], src_list[key]['target_uri'])) - warning(u"Exiting now because of --dry-run") - return - - # if there are copy pairs, we can't do delete_before, on the chance - # we need one of the to-be-deleted files as a copy source. - if len(copy_pairs) > 0: - cfg.delete_after = True - - # Delete items in destination that are not in source - if cfg.delete_removed and not cfg.delete_after: - _do_deletes(s3, dst_list) - - def _upload(src_list, seq, src_count): - file_list = src_list.keys() - file_list.sort() - for file in file_list: - seq += 1 - item = src_list[file] - src_uri = S3Uri(item['object_uri_str']) - dst_uri = S3Uri(item['target_uri']) - seq_label = "[%d of %d]" % (seq, src_count) - extra_headers = copy(cfg.extra_headers) - try: - response = s3.object_copy(src_uri, dst_uri, extra_headers) - output("File %(src)s copied to %(dst)s" % { "src" : src_uri, "dst" : dst_uri }) - except S3Error, e: - error("File %(src)s could not be copied: %(e)s" % { "src" : src_uri, "e" : e }) - return seq - - # Perform the synchronization of files - timestamp_start = time.time() - seq = 0 - seq = _upload(src_list, seq, src_count + update_count) - seq = _upload(update_list, seq, src_count + update_count) - n_copied, bytes_saved = remote_copy(s3, copy_pairs, destination_base) - - total_elapsed = time.time() - timestamp_start - outstr = "Done. Copied %d files in %0.1f seconds, %0.2f files/s" % (seq, total_elapsed, seq/total_elapsed) - if seq > 0: - output(outstr) - else: - info(outstr) - - # Delete items in destination that are not in source - if cfg.delete_removed and cfg.delete_after: - _do_deletes(s3, dst_list) - -def cmd_sync_remote2local(args): - def _do_deletes(local_list): - for key in local_list: - os.unlink(local_list[key]['full_name']) - output(u"deleted: %s" % local_list[key]['full_name_unicode']) - - s3 = S3(Config()) - - destination_base = args[-1] - local_list, single_file_local = fetch_local_list(destination_base, recursive = True) - remote_list = fetch_remote_list(args[:-1], recursive = True, require_attribs = True) - - local_count = len(local_list) - remote_count = len(remote_list) - - info(u"Found %d remote files, %d local files" % (remote_count, local_count)) - - remote_list, exclude_list = filter_exclude_include(remote_list) - - remote_list, local_list, update_list, copy_pairs = compare_filelists(remote_list, local_list, src_remote = True, dst_remote = False, delay_updates = cfg.delay_updates) - - local_count = len(local_list) - remote_count = len(remote_list) - update_count = len(update_list) - copy_pairs_count = len(copy_pairs) - - info(u"Summary: %d remote files to download, %d local files to delete, %d local files to hardlink" % (remote_count + update_count, local_count, copy_pairs_count)) - - def _set_local_filename(remote_list, destination_base): - if len(remote_list) == 0: - return - if not os.path.isdir(destination_base): - ## We were either given a file name (existing or not) or want STDOUT - if len(remote_list) > 1: - raise ParameterError("Destination must be a directory when downloading multiple sources.") - remote_list[remote_list.keys()[0]]['local_filename'] = deunicodise(destination_base) - else: - if destination_base[-1] != os.path.sep: - destination_base += os.path.sep - for key in remote_list: - local_filename = destination_base + key - if os.path.sep != "/": - local_filename = os.path.sep.join(local_filename.split("/")) - remote_list[key]['local_filename'] = deunicodise(local_filename) - - _set_local_filename(remote_list, destination_base) - _set_local_filename(update_list, destination_base) - - if cfg.dry_run: - for key in exclude_list: - output(u"exclude: %s" % unicodise(key)) - if cfg.delete_removed: - for key in local_list: - output(u"delete: %s" % local_list[key]['full_name_unicode']) - for key in remote_list: - output(u"download: %s -> %s" % (unicodise(remote_list[key]['object_uri_str']), unicodise(remote_list[key]['local_filename']))) - for key in update_list: - output(u"download: %s -> %s" % (update_list[key]['object_uri_str'], update_list[key]['local_filename'])) - - warning(u"Exiting now because of --dry-run") - return - - # if there are copy pairs, we can't do delete_before, on the chance - # we need one of the to-be-deleted files as a copy source. - if len(copy_pairs) > 0: - cfg.delete_after = True - - if cfg.delete_removed and not cfg.delete_after: - _do_deletes(local_list) - - def _download(remote_list, seq, total, total_size, dir_cache): - file_list = remote_list.keys() - file_list.sort() - for file in file_list: - seq += 1 - item = remote_list[file] - uri = S3Uri(item['object_uri_str']) - dst_file = item['local_filename'] - seq_label = "[%d of %d]" % (seq, total) - try: - dst_dir = os.path.dirname(dst_file) - if not dir_cache.has_key(dst_dir): - dir_cache[dst_dir] = Utils.mkdir_with_parents(dst_dir) - if dir_cache[dst_dir] == False: - warning(u"%s: destination directory not writable: %s" % (file, dst_dir)) - continue - try: - debug(u"dst_file=%s" % unicodise(dst_file)) - # create temporary files (of type .s3cmd.XXXX.tmp) in the same directory - # for downloading and then rename once downloaded - chkptfd, chkptfname = tempfile.mkstemp(".tmp",".s3cmd.",os.path.dirname(dst_file)) - debug(u"created chkptfname=%s" % unicodise(chkptfname)) - dst_stream = os.fdopen(chkptfd, "wb") - response = s3.object_get(uri, dst_stream, extra_label = seq_label) - dst_stream.close() - # download completed, rename the file to destination - os.rename(chkptfname, dst_file) - - # set permissions on destination file - original_umask = os.umask(0); - os.umask(original_umask); - mode = 0777 - original_umask; - debug(u"mode=%s" % oct(mode)) - - os.chmod(dst_file, mode); - - debug(u"renamed chkptfname=%s to dst_file=%s" % (unicodise(chkptfname), unicodise(dst_file))) - if response['headers'].has_key('x-amz-meta-s3cmd-attrs') and cfg.preserve_attrs: - attrs = parse_attrs_header(response['headers']['x-amz-meta-s3cmd-attrs']) - if attrs.has_key('mode'): - os.chmod(dst_file, int(attrs['mode'])) - if attrs.has_key('mtime') or attrs.has_key('atime'): - mtime = attrs.has_key('mtime') and int(attrs['mtime']) or int(time.time()) - atime = attrs.has_key('atime') and int(attrs['atime']) or int(time.time()) - os.utime(dst_file, (atime, mtime)) - ## FIXME: uid/gid / uname/gname handling comes here! TODO - except OSError, e: - try: - dst_stream.close() - os.remove(chkptfname) - except: pass - if e.errno == errno.EEXIST: - warning(u"%s exists - not overwriting" % (dst_file)) - continue - if e.errno in (errno.EPERM, errno.EACCES): - warning(u"%s not writable: %s" % (dst_file, e.strerror)) - continue - if e.errno == errno.EISDIR: - warning(u"%s is a directory - skipping over" % dst_file) - continue - raise e - except KeyboardInterrupt: - try: - dst_stream.close() - os.remove(chkptfname) - except: pass - warning(u"Exiting after keyboard interrupt") - return - except Exception, e: - try: - dst_stream.close() - os.remove(chkptfname) - except: pass - error(u"%s: %s" % (file, e)) - continue - # We have to keep repeating this call because - # Python 2.4 doesn't support try/except/finally - # construction :-( - try: - dst_stream.close() - os.remove(chkptfname) - except: pass - except S3DownloadError, e: - error(u"%s: download failed too many times. Skipping that file." % file) - continue - speed_fmt = formatSize(response["speed"], human_readable = True, floating_point = True) - if not Config().progress_meter: - output(u"File '%s' stored as '%s' (%d bytes in %0.1f seconds, %0.2f %sB/s) %s" % - (uri, unicodise(dst_file), response["size"], response["elapsed"], speed_fmt[0], speed_fmt[1], - seq_label)) - total_size += response["size"] - if Config().delete_after_fetch: - s3.object_delete(uri) - output(u"File '%s' removed after syncing" % (uri)) - return seq, total_size - - total_size = 0 - total_elapsed = 0.0 - timestamp_start = time.time() - dir_cache = {} - seq = 0 - seq, total_size = _download(remote_list, seq, remote_count + update_count, total_size, dir_cache) - seq, total_size = _download(update_list, seq, remote_count + update_count, total_size, dir_cache) - - failed_copy_list = local_copy(copy_pairs, destination_base) - _set_local_filename(failed_copy_list, destination_base) - seq, total_size = _download(failed_copy_list, seq, len(failed_copy_list) + remote_count + update_count, total_size, dir_cache) - - total_elapsed = time.time() - timestamp_start - speed_fmt = formatSize(total_size/total_elapsed, human_readable = True, floating_point = True) - - # Only print out the result if any work has been done or - # if the user asked for verbose output - outstr = "Done. Downloaded %d bytes in %0.1f seconds, %0.2f %sB/s" % (total_size, total_elapsed, speed_fmt[0], speed_fmt[1]) - if total_size > 0: - output(outstr) - else: - info(outstr) - - if cfg.delete_removed and cfg.delete_after: - _do_deletes(local_list) - -def local_copy(copy_pairs, destination_base): - # Do NOT hardlink local files by default, that'd be silly - # For instance all empty files would become hardlinked together! - - failed_copy_list = FileDict() - for (src_obj, dst1, relative_file) in copy_pairs: - src_file = os.path.join(destination_base, dst1) - dst_file = os.path.join(destination_base, relative_file) - dst_dir = os.path.dirname(dst_file) - try: - if not os.path.isdir(dst_dir): - debug("MKDIR %s" % dst_dir) - os.makedirs(dst_dir) - debug(u"Copying %s to %s" % (src_file, dst_file)) - shutil.copy2(src_file, dst_file) - except (IOError, OSError), e: - warning(u'Unable to hardlink or copy files %s -> %s: %s' % (src_file, dst_file, e)) - failed_copy_list[relative_file] = src_obj - return failed_copy_list - -def remote_copy(s3, copy_pairs, destination_base): - saved_bytes = 0 - for (src_obj, dst1, dst2) in copy_pairs: - debug(u"Remote Copying from %s to %s" % (dst1, dst2)) - dst1_uri = S3Uri(destination_base + dst1) - dst2_uri = S3Uri(destination_base + dst2) - extra_headers = copy(cfg.extra_headers) - try: - s3.object_copy(dst1_uri, dst2_uri, extra_headers) - info = s3.object_info(dst2_uri) - saved_bytes = saved_bytes + int(info['headers']['content-length']) - output(u"remote copy: %s -> %s" % (dst1, dst2)) - except: - raise - return (len(copy_pairs), saved_bytes) - - -def cmd_sync_local2remote(args): - def _build_attr_header(local_list, src): - import pwd, grp - attrs = {} - for attr in cfg.preserve_attrs_list: - if attr == 'uname': - try: - val = pwd.getpwuid(local_list[src]['uid']).pw_name - except KeyError: - attr = "uid" - val = local_list[src].get('uid') - warning(u"%s: Owner username not known. Storing UID=%d instead." % (src, val)) - elif attr == 'gname': - try: - val = grp.getgrgid(local_list[src].get('gid')).gr_name - except KeyError: - attr = "gid" - val = local_list[src].get('gid') - warning(u"%s: Owner groupname not known. Storing GID=%d instead." % (src, val)) - elif attr == 'md5': - try: - val = local_list.get_md5(src) - except IOError: - val = None - else: - val = getattr(local_list[src]['sr'], 'st_' + attr) - attrs[attr] = val - - if 'md5' in attrs and attrs['md5'] is None: - del attrs['md5'] - - result = "" - for k in attrs: result += "%s:%s/" % (k, attrs[k]) - return { 'x-amz-meta-s3cmd-attrs' : result[:-1] } - - def _do_deletes(s3, remote_list): - for key in remote_list: - uri = S3Uri(remote_list[key]['object_uri_str']) - s3.object_delete(uri) - output(u"deleted: '%s'" % uri) - - def _single_process(local_list): - for dest in destinations: - ## Normalize URI to convert s3://bkt to s3://bkt/ (trailing slash) - destination_base_uri = S3Uri(dest) - if destination_base_uri.type != 's3': - raise ParameterError("Destination must be S3Uri. Got: %s" % destination_base_uri) - destination_base = str(destination_base_uri) - _child(destination_base, local_list) - return destination_base_uri - - def _parent(): - # Now that we've done all the disk I/O to look at the local file system and - # calculate the md5 for each file, fork for each destination to upload to them separately - # and in parallel - child_pids = [] - - for dest in destinations: - ## Normalize URI to convert s3://bkt to s3://bkt/ (trailing slash) - destination_base_uri = S3Uri(dest) - if destination_base_uri.type != 's3': - raise ParameterError("Destination must be S3Uri. Got: %s" % destination_base_uri) - destination_base = str(destination_base_uri) - child_pid = os.fork() - if child_pid == 0: - _child(destination_base, local_list) - os._exit(0) - else: - child_pids.append(child_pid) - - while len(child_pids): - (pid, status) = os.wait() - child_pids.remove(pid) - - return - - def _child(destination_base, local_list): - def _set_remote_uri(local_list, destination_base, single_file_local): - if len(local_list) > 0: - ## Populate 'remote_uri' only if we've got something to upload - if not destination_base.endswith("/"): - if not single_file_local: - raise ParameterError("Destination S3 URI must end with '/' (ie must refer to a directory on the remote side).") - local_list[local_list.keys()[0]]['remote_uri'] = unicodise(destination_base) - else: - for key in local_list: - local_list[key]['remote_uri'] = unicodise(destination_base + key) - - def _upload(local_list, seq, total, total_size): - file_list = local_list.keys() - file_list.sort() - for file in file_list: - seq += 1 - item = local_list[file] - src = item['full_name'] - uri = S3Uri(item['remote_uri']) - seq_label = "[%d of %d]" % (seq, total) - extra_headers = copy(cfg.extra_headers) - try: - if cfg.preserve_attrs: - attr_header = _build_attr_header(local_list, file) - debug(u"attr_header: %s" % attr_header) - extra_headers.update(attr_header) - response = s3.object_put(src, uri, extra_headers, extra_label = seq_label) - except InvalidFileError, e: - warning(u"File can not be uploaded: %s" % e) - continue - except S3UploadError, e: - error(u"%s: upload failed too many times. Skipping that file." % item['full_name_unicode']) - continue - speed_fmt = formatSize(response["speed"], human_readable = True, floating_point = True) - if not cfg.progress_meter: - output(u"File '%s' stored as '%s' (%d bytes in %0.1f seconds, %0.2f %sB/s) %s" % - (item['full_name_unicode'], uri, response["size"], response["elapsed"], - speed_fmt[0], speed_fmt[1], seq_label)) - total_size += response["size"] - uploaded_objects_list.append(uri.object()) - return seq, total_size - - remote_list = fetch_remote_list(destination_base, recursive = True, require_attribs = True) - - local_count = len(local_list) - remote_count = len(remote_list) - - info(u"Found %d local files, %d remote files" % (local_count, remote_count)) - - local_list, exclude_list = filter_exclude_include(local_list) - - if single_file_local and len(local_list) == 1 and len(remote_list) == 1: - ## Make remote_key same as local_key for comparison if we're dealing with only one file - remote_list_entry = remote_list[remote_list.keys()[0]] - # Flush remote_list, by the way - remote_list = FileDict() - remote_list[local_list.keys()[0]] = remote_list_entry - - local_list, remote_list, update_list, copy_pairs = compare_filelists(local_list, remote_list, src_remote = False, dst_remote = True, delay_updates = cfg.delay_updates) - - local_count = len(local_list) - update_count = len(update_list) - copy_count = len(copy_pairs) - remote_count = len(remote_list) - - info(u"Summary: %d local files to upload, %d files to remote copy, %d remote files to delete" % (local_count + update_count, copy_count, remote_count)) - - _set_remote_uri(local_list, destination_base, single_file_local) - _set_remote_uri(update_list, destination_base, single_file_local) - - if cfg.dry_run: - for key in exclude_list: - output(u"exclude: %s" % unicodise(key)) - for key in local_list: - output(u"upload: %s -> %s" % (local_list[key]['full_name_unicode'], local_list[key]['remote_uri'])) - for key in update_list: - output(u"upload: %s -> %s" % (update_list[key]['full_name_unicode'], update_list[key]['remote_uri'])) - for (src_obj, dst1, dst2) in copy_pairs: - output(u"remote copy: %s -> %s" % (dst1, dst2)) - if cfg.delete_removed: - for key in remote_list: - output(u"delete: %s" % remote_list[key]['object_uri_str']) - - warning(u"Exiting now because of --dry-run") - return - - # if there are copy pairs, we can't do delete_before, on the chance - # we need one of the to-be-deleted files as a copy source. - if len(copy_pairs) > 0: - cfg.delete_after = True - - if cfg.delete_removed and not cfg.delete_after: - _do_deletes(s3, remote_list) - - total_size = 0 - total_elapsed = 0.0 - timestamp_start = time.time() - n, total_size = _upload(local_list, 0, local_count, total_size) - n, total_size = _upload(update_list, n, local_count, total_size) - n_copies, saved_bytes = remote_copy(s3, copy_pairs, destination_base) - if cfg.delete_removed and cfg.delete_after: - _do_deletes(s3, remote_list) - total_elapsed = time.time() - timestamp_start - total_speed = total_elapsed and total_size/total_elapsed or 0.0 - speed_fmt = formatSize(total_speed, human_readable = True, floating_point = True) - - # Only print out the result if any work has been done or - # if the user asked for verbose output - outstr = "Done. Uploaded %d bytes in %0.1f seconds, %0.2f %sB/s. Copied %d files saving %d bytes transfer." % (total_size, total_elapsed, speed_fmt[0], speed_fmt[1], n_copies, saved_bytes) - if total_size + saved_bytes > 0: - output(outstr) - else: - info(outstr) - - return - - def _invalidate_on_cf(destination_base_uri): - cf = CloudFront(cfg) - default_index_file = None - if cfg.invalidate_default_index_on_cf or cfg.invalidate_default_index_root_on_cf: - info_response = s3.website_info(destination_base_uri, cfg.bucket_location) - if info_response: - default_index_file = info_response['index_document'] - if len(default_index_file) < 1: - default_index_file = None - - result = cf.InvalidateObjects(destination_base_uri, uploaded_objects_list, default_index_file, cfg.invalidate_default_index_on_cf, cfg.invalidate_default_index_root_on_cf) - if result['status'] == 201: - output("Created invalidation request for %d paths" % len(uploaded_objects_list)) - output("Check progress with: s3cmd cfinvalinfo cf://%s/%s" % (result['dist_id'], result['request_id'])) - - - # main execution - s3 = S3(cfg) - uploaded_objects_list = [] - - if cfg.encrypt: - error(u"S3cmd 'sync' doesn't yet support GPG encryption, sorry.") - error(u"Either use unconditional 's3cmd put --recursive'") - error(u"or disable encryption with --no-encrypt parameter.") - sys.exit(1) - - local_list, single_file_local = fetch_local_list(args[:-1], recursive = True) - - destinations = [args[-1]] - if cfg.additional_destinations: - destinations = destinations + cfg.additional_destinations - - if 'fork' not in os.__all__ or len(destinations) < 2: - destination_base_uri = _single_process(local_list) - if cfg.invalidate_on_cf: - if len(uploaded_objects_list) == 0: - info("Nothing to invalidate in CloudFront") - else: - _invalidate_on_cf(destination_base_uri) - else: - _parent() - if cfg.invalidate_on_cf: - error(u"You cannot use both --cf-invalidate and --add-destination.") - -def cmd_sync(args): - if (len(args) < 2): - raise ParameterError("Too few parameters! Expected: %s" % commands['sync']['param']) - - if S3Uri(args[0]).type == "file" and S3Uri(args[-1]).type == "s3": - return cmd_sync_local2remote(args) - if S3Uri(args[0]).type == "s3" and S3Uri(args[-1]).type == "file": - return cmd_sync_remote2local(args) - if S3Uri(args[0]).type == "s3" and S3Uri(args[-1]).type == "s3": - return cmd_sync_remote2remote(args) - raise ParameterError("Invalid source/destination: '%s'" % "' '".join(args)) - -def cmd_setacl(args): - s3 = S3(cfg) - - set_to_acl = cfg.acl_public and "Public" or "Private" - - if not cfg.recursive: - old_args = args - args = [] - for arg in old_args: - uri = S3Uri(arg) - if not uri.has_object(): - if cfg.acl_public != None: - info("Setting bucket-level ACL for %s to %s" % (uri.uri(), set_to_acl)) - else: - info("Setting bucket-level ACL for %s" % (uri.uri())) - if not cfg.dry_run: - update_acl(s3, uri) - else: - args.append(arg) - - remote_list = fetch_remote_list(args) - remote_list, exclude_list = filter_exclude_include(remote_list) - - remote_count = len(remote_list) - - info(u"Summary: %d remote files to update" % remote_count) - - if cfg.dry_run: - for key in exclude_list: - output(u"exclude: %s" % unicodise(key)) - for key in remote_list: - output(u"setacl: %s" % remote_list[key]['object_uri_str']) - - warning(u"Exiting now because of --dry-run") - return - - seq = 0 - for key in remote_list: - seq += 1 - seq_label = "[%d of %d]" % (seq, remote_count) - uri = S3Uri(remote_list[key]['object_uri_str']) - update_acl(s3, uri, seq_label) - -def cmd_setpolicy(args): - s3 = S3(cfg) - uri = S3Uri(args[1]) - policy_file = args[0] - policy = open(policy_file, 'r').read() - - if cfg.dry_run: return - - response = s3.set_policy(uri, policy) - - #if retsponse['status'] == 200: - debug(u"response - %s" % response['status']) - if response['status'] == 204: - output(u"%s: Policy updated" % uri) - -def cmd_delpolicy(args): - s3 = S3(cfg) - uri = S3Uri(args[0]) - if cfg.dry_run: return - - response = s3.delete_policy(uri) - - #if retsponse['status'] == 200: - debug(u"response - %s" % response['status']) - output(u"%s: Policy deleted" % uri) - - -def cmd_accesslog(args): - s3 = S3(cfg) - bucket_uri = S3Uri(args.pop()) - if bucket_uri.object(): - raise ParameterError("Only bucket name is required for [accesslog] command") - if cfg.log_target_prefix == False: - accesslog, response = s3.set_accesslog(bucket_uri, enable = False) - elif cfg.log_target_prefix: - log_target_prefix_uri = S3Uri(cfg.log_target_prefix) - if log_target_prefix_uri.type != "s3": - raise ParameterError("--log-target-prefix must be a S3 URI") - accesslog, response = s3.set_accesslog(bucket_uri, enable = True, log_target_prefix_uri = log_target_prefix_uri, acl_public = cfg.acl_public) - else: # cfg.log_target_prefix == None - accesslog = s3.get_accesslog(bucket_uri) - - output(u"Access logging for: %s" % bucket_uri.uri()) - output(u" Logging Enabled: %s" % accesslog.isLoggingEnabled()) - if accesslog.isLoggingEnabled(): - output(u" Target prefix: %s" % accesslog.targetPrefix().uri()) - #output(u" Public Access: %s" % accesslog.isAclPublic()) - -def cmd_sign(args): - string_to_sign = args.pop() - debug("string-to-sign: %r" % string_to_sign) - signature = Utils.sign_string(string_to_sign) - output("Signature: %s" % signature) - -def cmd_signurl(args): - expiry = args.pop() - url_to_sign = S3Uri(args.pop()) - if url_to_sign.type != 's3': - raise ParameterError("Must be S3Uri. Got: %s" % url_to_sign) - debug("url to sign: %r" % url_to_sign) - signed_url = Utils.sign_url(url_to_sign, expiry) - output(signed_url) - -def cmd_fixbucket(args): - def _unescape(text): - ## - # Removes HTML or XML character references and entities from a text string. - # - # @param text The HTML (or XML) source text. - # @return The plain text, as a Unicode string, if necessary. - # - # From: http://effbot.org/zone/re-sub.htm#unescape-html - def _unescape_fixup(m): - text = m.group(0) - if not htmlentitydefs.name2codepoint.has_key('apos'): - htmlentitydefs.name2codepoint['apos'] = ord("'") - if text[:2] == "&#": - # character reference - try: - if text[:3] == "&#x": - return unichr(int(text[3:-1], 16)) - else: - return unichr(int(text[2:-1])) - except ValueError: - pass - else: - # named entity - try: - text = unichr(htmlentitydefs.name2codepoint[text[1:-1]]) - except KeyError: - pass - return text # leave as is - text = text.encode('ascii', 'xmlcharrefreplace') - return re.sub("&#?\w+;", _unescape_fixup, text) - - cfg.urlencoding_mode = "fixbucket" - s3 = S3(cfg) - - count = 0 - for arg in args: - culprit = S3Uri(arg) - if culprit.type != "s3": - raise ParameterError("Expecting S3Uri instead of: %s" % arg) - response = s3.bucket_list_noparse(culprit.bucket(), culprit.object(), recursive = True) - r_xent = re.compile("&#x[\da-fA-F]+;") - response['data'] = unicode(response['data'], 'UTF-8') - keys = re.findall("(.*?)", response['data'], re.MULTILINE) - debug("Keys: %r" % keys) - for key in keys: - if r_xent.search(key): - info("Fixing: %s" % key) - debug("Step 1: Transforming %s" % key) - key_bin = _unescape(key) - debug("Step 2: ... to %s" % key_bin) - key_new = replace_nonprintables(key_bin) - debug("Step 3: ... then to %s" % key_new) - src = S3Uri("s3://%s/%s" % (culprit.bucket(), key_bin)) - dst = S3Uri("s3://%s/%s" % (culprit.bucket(), key_new)) - resp_move = s3.object_move(src, dst) - if resp_move['status'] == 200: - output("File %r renamed to %s" % (key_bin, key_new)) - count += 1 - else: - error("Something went wrong for: %r" % key) - error("Please report the problem to s3tools-bugs@lists.sourceforge.net") - if count > 0: - warning("Fixed %d files' names. Their ACL were reset to Private." % count) - warning("Use 's3cmd setacl --acl-public s3://...' to make") - warning("them publicly readable if required.") - -def resolve_list(lst, args): - retval = [] - for item in lst: - retval.append(item % args) - return retval - -def gpg_command(command, passphrase = ""): - debug("GPG command: " + " ".join(command)) - p = subprocess.Popen(command, stdin = subprocess.PIPE, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) - p_stdout, p_stderr = p.communicate(passphrase + "\n") - debug("GPG output:") - for line in p_stdout.split("\n"): - debug("GPG: " + line) - p_exitcode = p.wait() - return p_exitcode - -def gpg_encrypt(filename): - tmp_filename = Utils.mktmpfile() - args = { - "gpg_command" : cfg.gpg_command, - "passphrase_fd" : "0", - "input_file" : filename, - "output_file" : tmp_filename, - } - info(u"Encrypting file %(input_file)s to %(output_file)s..." % args) - command = resolve_list(cfg.gpg_encrypt.split(" "), args) - code = gpg_command(command, cfg.gpg_passphrase) - return (code, tmp_filename, "gpg") - -def gpg_decrypt(filename, gpgenc_header = "", in_place = True): - tmp_filename = Utils.mktmpfile(filename) - args = { - "gpg_command" : cfg.gpg_command, - "passphrase_fd" : "0", - "input_file" : filename, - "output_file" : tmp_filename, - } - info(u"Decrypting file %(input_file)s to %(output_file)s..." % args) - command = resolve_list(cfg.gpg_decrypt.split(" "), args) - code = gpg_command(command, cfg.gpg_passphrase) - if code == 0 and in_place: - debug(u"Renaming %s to %s" % (tmp_filename, filename)) - os.unlink(filename) - os.rename(tmp_filename, filename) - tmp_filename = filename - return (code, tmp_filename) - -def run_configure(config_file, args): - cfg = Config() - options = [ - ("access_key", "Access Key", "Access key and Secret key are your identifiers for Amazon S3"), - ("secret_key", "Secret Key"), - ("gpg_passphrase", "Encryption password", "Encryption password is used to protect your files from reading\nby unauthorized persons while in transfer to S3"), - ("gpg_command", "Path to GPG program"), - ("use_https", "Use HTTPS protocol", "When using secure HTTPS protocol all communication with Amazon S3\nservers is protected from 3rd party eavesdropping. This method is\nslower than plain HTTP and can't be used if you're behind a proxy"), - ("proxy_host", "HTTP Proxy server name", "On some networks all internet access must go through a HTTP proxy.\nTry setting it here if you can't conect to S3 directly"), - ("proxy_port", "HTTP Proxy server port"), - ] - ## Option-specfic defaults - if getattr(cfg, "gpg_command") == "": - setattr(cfg, "gpg_command", find_executable("gpg")) - - if getattr(cfg, "proxy_host") == "" and os.getenv("http_proxy"): - re_match=re.match("(http://)?([^:]+):(\d+)", os.getenv("http_proxy")) - if re_match: - setattr(cfg, "proxy_host", re_match.groups()[1]) - setattr(cfg, "proxy_port", re_match.groups()[2]) - - try: - while 1: - output(u"\nEnter new values or accept defaults in brackets with Enter.") - output(u"Refer to user manual for detailed description of all options.") - for option in options: - prompt = option[1] - ## Option-specific handling - if option[0] == 'proxy_host' and getattr(cfg, 'use_https') == True: - setattr(cfg, option[0], "") - continue - if option[0] == 'proxy_port' and getattr(cfg, 'proxy_host') == "": - setattr(cfg, option[0], 0) - continue - - try: - val = getattr(cfg, option[0]) - if type(val) is bool: - val = val and "Yes" or "No" - if val not in (None, ""): - prompt += " [%s]" % val - except AttributeError: - pass - - if len(option) >= 3: - output(u"\n%s" % option[2]) - - val = raw_input(prompt + ": ") - if val != "": - if type(getattr(cfg, option[0])) is bool: - # Turn 'Yes' into True, everything else into False - val = val.lower().startswith('y') - setattr(cfg, option[0], val) - output(u"\nNew settings:") - for option in options: - output(u" %s: %s" % (option[1], getattr(cfg, option[0]))) - val = raw_input("\nTest access with supplied credentials? [Y/n] ") - if val.lower().startswith("y") or val == "": - try: - # Default, we try to list 'all' buckets which requires - # ListAllMyBuckets permission - if len(args) == 0: - output(u"Please wait, attempting to list all buckets...") - S3(Config()).bucket_list("", "") - else: - # If user specified a bucket name directly, we check it and only it. - # Thus, access check can succeed even if user only has access to - # to a single bucket and not ListAllMyBuckets permission. - output(u"Please wait, attempting to list bucket: " + args[0]) - uri = S3Uri(args[0]) - if uri.type == "s3" and uri.has_bucket(): - S3(Config()).bucket_list(uri.bucket(), "") - else: - raise Exception(u"Invalid bucket uri: " + args[0]) - - output(u"Success. Your access key and secret key worked fine :-)") - - output(u"\nNow verifying that encryption works...") - if not getattr(cfg, "gpg_command") or not getattr(cfg, "gpg_passphrase"): - output(u"Not configured. Never mind.") - else: - if not getattr(cfg, "gpg_command"): - raise Exception("Path to GPG program not set") - if not os.path.isfile(getattr(cfg, "gpg_command")): - raise Exception("GPG program not found") - filename = Utils.mktmpfile() - f = open(filename, "w") - f.write(os.sys.copyright) - f.close() - ret_enc = gpg_encrypt(filename) - ret_dec = gpg_decrypt(ret_enc[1], ret_enc[2], False) - hash = [ - Utils.hash_file_md5(filename), - Utils.hash_file_md5(ret_enc[1]), - Utils.hash_file_md5(ret_dec[1]), - ] - os.unlink(filename) - os.unlink(ret_enc[1]) - os.unlink(ret_dec[1]) - if hash[0] == hash[2] and hash[0] != hash[1]: - output ("Success. Encryption and decryption worked fine :-)") - else: - raise Exception("Encryption verification error.") - - except Exception, e: - error(u"Test failed: %s" % (e)) - val = raw_input("\nRetry configuration? [Y/n] ") - if val.lower().startswith("y") or val == "": - continue - - - val = raw_input("\nSave settings? [y/N] ") - if val.lower().startswith("y"): - break - val = raw_input("Retry configuration? [Y/n] ") - if val.lower().startswith("n"): - raise EOFError() - - ## Overwrite existing config file, make it user-readable only - old_mask = os.umask(0077) - try: - os.remove(config_file) - except OSError, e: - if e.errno != errno.ENOENT: - raise - f = open(config_file, "w") - os.umask(old_mask) - cfg.dump_config(f) - f.close() - output(u"Configuration saved to '%s'" % config_file) - - except (EOFError, KeyboardInterrupt): - output(u"\nConfiguration aborted. Changes were NOT saved.") - return - - except IOError, e: - error(u"Writing config file failed: %s: %s" % (config_file, e.strerror)) - sys.exit(1) - -def process_patterns_from_file(fname, patterns_list): - try: - fn = open(fname, "rt") - except IOError, e: - error(e) - sys.exit(1) - for pattern in fn: - pattern = pattern.strip() - if re.match("^#", pattern) or re.match("^\s*$", pattern): - continue - debug(u"%s: adding rule: %s" % (fname, pattern)) - patterns_list.append(pattern) - - return patterns_list - -def process_patterns(patterns_list, patterns_from, is_glob, option_txt = ""): - """ - process_patterns(patterns, patterns_from, is_glob, option_txt = "") - Process --exclude / --include GLOB and REGEXP patterns. - 'option_txt' is 'exclude' / 'include' / 'rexclude' / 'rinclude' - Returns: patterns_compiled, patterns_text - """ - - patterns_compiled = [] - patterns_textual = {} - - if patterns_list is None: - patterns_list = [] - - if patterns_from: - ## Append patterns from glob_from - for fname in patterns_from: - debug(u"processing --%s-from %s" % (option_txt, fname)) - patterns_list = process_patterns_from_file(fname, patterns_list) - - for pattern in patterns_list: - debug(u"processing %s rule: %s" % (option_txt, patterns_list)) - if is_glob: - pattern = glob.fnmatch.translate(pattern) - r = re.compile(pattern) - patterns_compiled.append(r) - patterns_textual[r] = pattern - - return patterns_compiled, patterns_textual - -def get_commands_list(): - return [ - {"cmd":"mb", "label":"Make bucket", "param":"s3://BUCKET", "func":cmd_bucket_create, "argc":1}, - {"cmd":"rb", "label":"Remove bucket", "param":"s3://BUCKET", "func":cmd_bucket_delete, "argc":1}, - {"cmd":"ls", "label":"List objects or buckets", "param":"[s3://BUCKET[/PREFIX]]", "func":cmd_ls, "argc":0}, - {"cmd":"la", "label":"List all object in all buckets", "param":"", "func":cmd_buckets_list_all_all, "argc":0}, - {"cmd":"put", "label":"Put file into bucket", "param":"FILE [FILE...] s3://BUCKET[/PREFIX]", "func":cmd_object_put, "argc":2}, - {"cmd":"get", "label":"Get file from bucket", "param":"s3://BUCKET/OBJECT LOCAL_FILE", "func":cmd_object_get, "argc":1}, - {"cmd":"del", "label":"Delete file from bucket", "param":"s3://BUCKET/OBJECT", "func":cmd_object_del, "argc":1}, - #{"cmd":"mkdir", "label":"Make a virtual S3 directory", "param":"s3://BUCKET/path/to/dir", "func":cmd_mkdir, "argc":1}, - {"cmd":"sync", "label":"Synchronize a directory tree to S3", "param":"LOCAL_DIR s3://BUCKET[/PREFIX] or s3://BUCKET[/PREFIX] LOCAL_DIR", "func":cmd_sync, "argc":2}, - {"cmd":"du", "label":"Disk usage by buckets", "param":"[s3://BUCKET[/PREFIX]]", "func":cmd_du, "argc":0}, - {"cmd":"info", "label":"Get various information about Buckets or Files", "param":"s3://BUCKET[/OBJECT]", "func":cmd_info, "argc":1}, - {"cmd":"cp", "label":"Copy object", "param":"s3://BUCKET1/OBJECT1 s3://BUCKET2[/OBJECT2]", "func":cmd_cp, "argc":2}, - {"cmd":"mv", "label":"Move object", "param":"s3://BUCKET1/OBJECT1 s3://BUCKET2[/OBJECT2]", "func":cmd_mv, "argc":2}, - {"cmd":"setacl", "label":"Modify Access control list for Bucket or Files", "param":"s3://BUCKET[/OBJECT]", "func":cmd_setacl, "argc":1}, - - {"cmd":"setpolicy", "label":"Modify Bucket Policy", "param":"FILE s3://BUCKET", "func":cmd_setpolicy, "argc":2}, - {"cmd":"delpolicy", "label":"Delete Bucket Policy", "param":"s3://BUCKET", "func":cmd_delpolicy, "argc":1}, - - {"cmd":"accesslog", "label":"Enable/disable bucket access logging", "param":"s3://BUCKET", "func":cmd_accesslog, "argc":1}, - {"cmd":"sign", "label":"Sign arbitrary string using the secret key", "param":"STRING-TO-SIGN", "func":cmd_sign, "argc":1}, - {"cmd":"signurl", "label":"Sign an S3 URL to provide limited public access with expiry", "param":"s3://BUCKET/OBJECT expiry_epoch", "func":cmd_signurl, "argc":2}, - {"cmd":"fixbucket", "label":"Fix invalid file names in a bucket", "param":"s3://BUCKET[/PREFIX]", "func":cmd_fixbucket, "argc":1}, - - ## Website commands - {"cmd":"ws-create", "label":"Create Website from bucket", "param":"s3://BUCKET", "func":cmd_website_create, "argc":1}, - {"cmd":"ws-delete", "label":"Delete Website", "param":"s3://BUCKET", "func":cmd_website_delete, "argc":1}, - {"cmd":"ws-info", "label":"Info about Website", "param":"s3://BUCKET", "func":cmd_website_info, "argc":1}, - - ## CloudFront commands - {"cmd":"cflist", "label":"List CloudFront distribution points", "param":"", "func":CfCmd.info, "argc":0}, - {"cmd":"cfinfo", "label":"Display CloudFront distribution point parameters", "param":"[cf://DIST_ID]", "func":CfCmd.info, "argc":0}, - {"cmd":"cfcreate", "label":"Create CloudFront distribution point", "param":"s3://BUCKET", "func":CfCmd.create, "argc":1}, - {"cmd":"cfdelete", "label":"Delete CloudFront distribution point", "param":"cf://DIST_ID", "func":CfCmd.delete, "argc":1}, - {"cmd":"cfmodify", "label":"Change CloudFront distribution point parameters", "param":"cf://DIST_ID", "func":CfCmd.modify, "argc":1}, - #{"cmd":"cfinval", "label":"Invalidate CloudFront objects", "param":"s3://BUCKET/OBJECT [s3://BUCKET/OBJECT ...]", "func":CfCmd.invalidate, "argc":1}, - {"cmd":"cfinvalinfo", "label":"Display CloudFront invalidation request(s) status", "param":"cf://DIST_ID[/INVAL_ID]", "func":CfCmd.invalinfo, "argc":1}, - ] - -def format_commands(progname, commands_list): - help = "Commands:\n" - for cmd in commands_list: - help += " %s\n %s %s %s\n" % (cmd["label"], progname, cmd["cmd"], cmd["param"]) - return help - - -def update_acl(s3, uri, seq_label=""): - something_changed = False - acl = s3.get_acl(uri) - debug(u"acl: %s - %r" % (uri, acl.grantees)) - if cfg.acl_public == True: - if acl.isAnonRead(): - info(u"%s: already Public, skipping %s" % (uri, seq_label)) - else: - acl.grantAnonRead() - something_changed = True - elif cfg.acl_public == False: # we explicitely check for False, because it could be None - if not acl.isAnonRead(): - info(u"%s: already Private, skipping %s" % (uri, seq_label)) - else: - acl.revokeAnonRead() - something_changed = True - - # update acl with arguments - # grant first and revoke later, because revoke has priority - if cfg.acl_grants: - something_changed = True - for grant in cfg.acl_grants: - acl.grant(**grant) - - if cfg.acl_revokes: - something_changed = True - for revoke in cfg.acl_revokes: - acl.revoke(**revoke) - - if not something_changed: - return - - retsponse = s3.set_acl(uri, acl) - if retsponse['status'] == 200: - if cfg.acl_public in (True, False): - set_to_acl = cfg.acl_public and "Public" or "Private" - output(u"%s: ACL set to %s %s" % (uri, set_to_acl, seq_label)) - else: - output(u"%s: ACL updated" % uri) - -class OptionMimeType(Option): - def check_mimetype(option, opt, value): - if re.compile("^[a-z0-9]+/[a-z0-9+\.-]+(;.*)?$", re.IGNORECASE).match(value): - return value - raise OptionValueError("option %s: invalid MIME-Type format: %r" % (opt, value)) - -class OptionS3ACL(Option): - def check_s3acl(option, opt, value): - permissions = ('read', 'write', 'read_acp', 'write_acp', 'full_control', 'all') - try: - permission, grantee = re.compile("^(\w+):(.+)$", re.IGNORECASE).match(value).groups() - if not permission or not grantee: - raise - if permission in permissions: - return { 'name' : grantee, 'permission' : permission.upper() } - else: - raise OptionValueError("option %s: invalid S3 ACL permission: %s (valid values: %s)" % - (opt, permission, ", ".join(permissions))) - except: - raise OptionValueError("option %s: invalid S3 ACL format: %r" % (opt, value)) - -class OptionAll(OptionMimeType, OptionS3ACL): - TYPE_CHECKER = copy(Option.TYPE_CHECKER) - TYPE_CHECKER["mimetype"] = OptionMimeType.check_mimetype - TYPE_CHECKER["s3acl"] = OptionS3ACL.check_s3acl - TYPES = Option.TYPES + ("mimetype", "s3acl") - -class MyHelpFormatter(IndentedHelpFormatter): - def format_epilog(self, epilog): - if epilog: - return "\n" + epilog + "\n" - else: - return "" - -def main(): - global cfg - - commands_list = get_commands_list() - commands = {} - - ## Populate "commands" from "commands_list" - for cmd in commands_list: - if cmd.has_key("cmd"): - commands[cmd["cmd"]] = cmd - - default_verbosity = Config().verbosity - optparser = OptionParser(option_class=OptionAll, formatter=MyHelpFormatter()) - #optparser.disable_interspersed_args() - - config_file = None - if os.getenv("HOME"): - config_file = os.path.join(os.getenv("HOME"), ".s3cfg") - elif os.name == "nt" and os.getenv("USERPROFILE"): - config_file = os.path.join(os.getenv("USERPROFILE").decode('mbcs'), "Application Data", "s3cmd.ini") - - preferred_encoding = locale.getpreferredencoding() or "UTF-8" - - optparser.set_defaults(encoding = preferred_encoding) - optparser.set_defaults(config = config_file) - optparser.set_defaults(verbosity = default_verbosity) - - optparser.add_option( "--configure", dest="run_configure", action="store_true", help="Invoke interactive (re)configuration tool. Optionally use as '--configure s3://come-bucket' to test access to a specific bucket instead of attempting to list them all.") - optparser.add_option("-c", "--config", dest="config", metavar="FILE", help="Config file name. Defaults to %default") - optparser.add_option( "--dump-config", dest="dump_config", action="store_true", help="Dump current configuration after parsing config files and command line options and exit.") - optparser.add_option( "--access_key", dest="access_key", help="AWS Access Key") - optparser.add_option( "--secret_key", dest="secret_key", help="AWS Secret Key") - - optparser.add_option("-n", "--dry-run", dest="dry_run", action="store_true", help="Only show what should be uploaded or downloaded but don't actually do it. May still perform S3 requests to get bucket listings and other information though (only for file transfer commands)") - - optparser.add_option("-e", "--encrypt", dest="encrypt", action="store_true", help="Encrypt files before uploading to S3.") - optparser.add_option( "--no-encrypt", dest="encrypt", action="store_false", help="Don't encrypt files.") - optparser.add_option("-f", "--force", dest="force", action="store_true", help="Force overwrite and other dangerous operations.") - optparser.add_option( "--continue", dest="get_continue", action="store_true", help="Continue getting a partially downloaded file (only for [get] command).") - optparser.add_option( "--skip-existing", dest="skip_existing", action="store_true", help="Skip over files that exist at the destination (only for [get] and [sync] commands).") - optparser.add_option("-r", "--recursive", dest="recursive", action="store_true", help="Recursive upload, download or removal.") - optparser.add_option( "--check-md5", dest="check_md5", action="store_true", help="Check MD5 sums when comparing files for [sync]. (default)") - optparser.add_option( "--no-check-md5", dest="check_md5", action="store_false", help="Do not check MD5 sums when comparing files for [sync]. Only size will be compared. May significantly speed up transfer but may also miss some changed files.") - optparser.add_option("-P", "--acl-public", dest="acl_public", action="store_true", help="Store objects with ACL allowing read for anyone.") - optparser.add_option( "--acl-private", dest="acl_public", action="store_false", help="Store objects with default ACL allowing access for you only.") - optparser.add_option( "--acl-grant", dest="acl_grants", type="s3acl", action="append", metavar="PERMISSION:EMAIL or USER_CANONICAL_ID", help="Grant stated permission to a given amazon user. Permission is one of: read, write, read_acp, write_acp, full_control, all") - optparser.add_option( "--acl-revoke", dest="acl_revokes", type="s3acl", action="append", metavar="PERMISSION:USER_CANONICAL_ID", help="Revoke stated permission for a given amazon user. Permission is one of: read, write, read_acp, wr ite_acp, full_control, all") - - optparser.add_option( "--delete-removed", dest="delete_removed", action="store_true", help="Delete remote objects with no corresponding local file [sync]") - optparser.add_option( "--no-delete-removed", dest="delete_removed", action="store_false", help="Don't delete remote objects.") - optparser.add_option( "--delete-after", dest="delete_after", action="store_true", help="Perform deletes after new uploads [sync]") - optparser.add_option( "--delay-updates", dest="delay_updates", action="store_true", help="Put all updated files into place at end [sync]") - optparser.add_option( "--add-destination", dest="additional_destinations", action="append", help="Additional destination for parallel uploads, in addition to last arg. May be repeated.") - optparser.add_option( "--delete-after-fetch", dest="delete_after_fetch", action="store_true", help="Delete remote objects after fetching to local file (only for [get] and [sync] commands).") - optparser.add_option("-p", "--preserve", dest="preserve_attrs", action="store_true", help="Preserve filesystem attributes (mode, ownership, timestamps). Default for [sync] command.") - optparser.add_option( "--no-preserve", dest="preserve_attrs", action="store_false", help="Don't store FS attributes") - optparser.add_option( "--exclude", dest="exclude", action="append", metavar="GLOB", help="Filenames and paths matching GLOB will be excluded from sync") - optparser.add_option( "--exclude-from", dest="exclude_from", action="append", metavar="FILE", help="Read --exclude GLOBs from FILE") - optparser.add_option( "--rexclude", dest="rexclude", action="append", metavar="REGEXP", help="Filenames and paths matching REGEXP (regular expression) will be excluded from sync") - optparser.add_option( "--rexclude-from", dest="rexclude_from", action="append", metavar="FILE", help="Read --rexclude REGEXPs from FILE") - optparser.add_option( "--include", dest="include", action="append", metavar="GLOB", help="Filenames and paths matching GLOB will be included even if previously excluded by one of --(r)exclude(-from) patterns") - optparser.add_option( "--include-from", dest="include_from", action="append", metavar="FILE", help="Read --include GLOBs from FILE") - optparser.add_option( "--rinclude", dest="rinclude", action="append", metavar="REGEXP", help="Same as --include but uses REGEXP (regular expression) instead of GLOB") - optparser.add_option( "--rinclude-from", dest="rinclude_from", action="append", metavar="FILE", help="Read --rinclude REGEXPs from FILE") - - optparser.add_option( "--bucket-location", dest="bucket_location", help="Datacentre to create bucket in. As of now the datacenters are: US (default), EU, ap-northeast-1, ap-southeast-1, sa-east-1, us-west-1 and us-west-2") - optparser.add_option( "--reduced-redundancy", "--rr", dest="reduced_redundancy", action="store_true", help="Store object with 'Reduced redundancy'. Lower per-GB price. [put, cp, mv]") - - optparser.add_option( "--access-logging-target-prefix", dest="log_target_prefix", help="Target prefix for access logs (S3 URI) (for [cfmodify] and [accesslog] commands)") - optparser.add_option( "--no-access-logging", dest="log_target_prefix", action="store_false", help="Disable access logging (for [cfmodify] and [accesslog] commands)") - - optparser.add_option( "--default-mime-type", dest="default_mime_type", action="store_true", help="Default MIME-type for stored objects. Application default is binary/octet-stream.") - optparser.add_option("-M", "--guess-mime-type", dest="guess_mime_type", action="store_true", help="Guess MIME-type of files by their extension or mime magic. Fall back to default MIME-Type as specified by --default-mime-type option") - optparser.add_option( "--no-guess-mime-type", dest="guess_mime_type", action="store_false", help="Don't guess MIME-type and use the default type instead.") - optparser.add_option("-m", "--mime-type", dest="mime_type", type="mimetype", metavar="MIME/TYPE", help="Force MIME-type. Override both --default-mime-type and --guess-mime-type.") - - optparser.add_option( "--add-header", dest="add_header", action="append", metavar="NAME:VALUE", help="Add a given HTTP header to the upload request. Can be used multiple times. For instance set 'Expires' or 'Cache-Control' headers (or both) using this options if you like.") - - optparser.add_option( "--encoding", dest="encoding", metavar="ENCODING", help="Override autodetected terminal and filesystem encoding (character set). Autodetected: %s" % preferred_encoding) - optparser.add_option( "--add-encoding-exts", dest="add_encoding_exts", metavar="EXTENSIONs", help="Add encoding to these comma delimited extensions i.e. (css,js,html) when uploading to S3 )") - optparser.add_option( "--verbatim", dest="urlencoding_mode", action="store_const", const="verbatim", help="Use the S3 name as given on the command line. No pre-processing, encoding, etc. Use with caution!") - - optparser.add_option( "--disable-multipart", dest="enable_multipart", action="store_false", help="Disable multipart upload on files bigger than --multipart-chunk-size-mb") - optparser.add_option( "--multipart-chunk-size-mb", dest="multipart_chunk_size_mb", type="int", action="store", metavar="SIZE", help="Size of each chunk of a multipart upload. Files bigger than SIZE are automatically uploaded as multithreaded-multipart, smaller files are uploaded using the traditional method. SIZE is in Mega-Bytes, default chunk size is %defaultMB, minimum allowed chunk size is 5MB, maximum is 5GB.") - - optparser.add_option( "--list-md5", dest="list_md5", action="store_true", help="Include MD5 sums in bucket listings (only for 'ls' command).") - optparser.add_option("-H", "--human-readable-sizes", dest="human_readable_sizes", action="store_true", help="Print sizes in human readable form (eg 1kB instead of 1234).") - - optparser.add_option( "--ws-index", dest="website_index", action="store", help="Name of error-document (only for [ws-create] command)") - optparser.add_option( "--ws-error", dest="website_error", action="store", help="Name of index-document (only for [ws-create] command)") - - optparser.add_option( "--progress", dest="progress_meter", action="store_true", help="Display progress meter (default on TTY).") - optparser.add_option( "--no-progress", dest="progress_meter", action="store_false", help="Don't display progress meter (default on non-TTY).") - optparser.add_option( "--enable", dest="enable", action="store_true", help="Enable given CloudFront distribution (only for [cfmodify] command)") - optparser.add_option( "--disable", dest="enable", action="store_false", help="Enable given CloudFront distribution (only for [cfmodify] command)") - optparser.add_option( "--cf-invalidate", dest="invalidate_on_cf", action="store_true", help="Invalidate the uploaded filed in CloudFront. Also see [cfinval] command.") - # joseprio: adding options to invalidate the default index and the default - # index root - optparser.add_option( "--cf-invalidate-default-index", dest="invalidate_default_index_on_cf", action="store_true", help="When using Custom Origin and S3 static website, invalidate the default index file.") - optparser.add_option( "--cf-no-invalidate-default-index-root", dest="invalidate_default_index_root_on_cf", action="store_false", help="When using Custom Origin and S3 static website, don't invalidate the path to the default index file.") - optparser.add_option( "--cf-add-cname", dest="cf_cnames_add", action="append", metavar="CNAME", help="Add given CNAME to a CloudFront distribution (only for [cfcreate] and [cfmodify] commands)") - optparser.add_option( "--cf-remove-cname", dest="cf_cnames_remove", action="append", metavar="CNAME", help="Remove given CNAME from a CloudFront distribution (only for [cfmodify] command)") - optparser.add_option( "--cf-comment", dest="cf_comment", action="store", metavar="COMMENT", help="Set COMMENT for a given CloudFront distribution (only for [cfcreate] and [cfmodify] commands)") - optparser.add_option( "--cf-default-root-object", dest="cf_default_root_object", action="store", metavar="DEFAULT_ROOT_OBJECT", help="Set the default root object to return when no object is specified in the URL. Use a relative path, i.e. default/index.html instead of /default/index.html or s3://bucket/default/index.html (only for [cfcreate] and [cfmodify] commands)") - optparser.add_option("-v", "--verbose", dest="verbosity", action="store_const", const=logging.INFO, help="Enable verbose output.") - optparser.add_option("-d", "--debug", dest="verbosity", action="store_const", const=logging.DEBUG, help="Enable debug output.") - optparser.add_option( "--version", dest="show_version", action="store_true", help="Show s3cmd version (%s) and exit." % (PkgInfo.version)) - optparser.add_option("-F", "--follow-symlinks", dest="follow_symlinks", action="store_true", default=False, help="Follow symbolic links as if they are regular files") - optparser.add_option( "--cache-file", dest="cache_file", action="store", default="", metavar="FILE", help="Cache FILE containing local source MD5 values") - optparser.add_option("-q", "--quiet", dest="quiet", action="store_true", default=False, help="Silence output on stdout") - - optparser.set_usage(optparser.usage + " COMMAND [parameters]") - optparser.set_description('S3cmd is a tool for managing objects in '+ - 'Amazon S3 storage. It allows for making and removing '+ - '"buckets" and uploading, downloading and removing '+ - '"objects" from these buckets.') - optparser.epilog = format_commands(optparser.get_prog_name(), commands_list) - optparser.epilog += ("\nFor more informations see the progect homepage:\n%s\n" % PkgInfo.url) - optparser.epilog += ("\nConsider a donation if you have found s3cmd useful:\n%s/donate\n" % PkgInfo.url) - - (options, args) = optparser.parse_args() - - ## Some mucking with logging levels to enable - ## debugging/verbose output for config file parser on request - logging.basicConfig(level=options.verbosity, - format='%(levelname)s: %(message)s', - stream = sys.stderr) - - if options.show_version: - output(u"s3cmd version %s" % PkgInfo.version) - sys.exit(0) - - if options.quiet: - try: - f = open("/dev/null", "w") - sys.stdout.close() - sys.stdout = f - except IOError: - warning(u"Unable to open /dev/null: --quiet disabled.") - - ## Now finally parse the config file - if not options.config: - error(u"Can't find a config file. Please use --config option.") - sys.exit(1) - - try: - cfg = Config(options.config) - except IOError, e: - if options.run_configure: - cfg = Config() - else: - error(u"%s: %s" % (options.config, e.strerror)) - error(u"Configuration file not available.") - error(u"Consider using --configure parameter to create one.") - sys.exit(1) - - ## And again some logging level adjustments - ## according to configfile and command line parameters - if options.verbosity != default_verbosity: - cfg.verbosity = options.verbosity - logging.root.setLevel(cfg.verbosity) - - ## Default to --progress on TTY devices, --no-progress elsewhere - ## Can be overriden by actual --(no-)progress parameter - cfg.update_option('progress_meter', sys.stdout.isatty()) - - ## Unsupported features on Win32 platform - if os.name == "nt": - if cfg.preserve_attrs: - error(u"Option --preserve is not yet supported on MS Windows platform. Assuming --no-preserve.") - cfg.preserve_attrs = False - if cfg.progress_meter: - error(u"Option --progress is not yet supported on MS Windows platform. Assuming --no-progress.") - cfg.progress_meter = False - - ## Pre-process --add-header's and put them to Config.extra_headers SortedDict() - if options.add_header: - for hdr in options.add_header: - try: - key, val = hdr.split(":", 1) - except ValueError: - raise ParameterError("Invalid header format: %s" % hdr) - key_inval = re.sub("[a-zA-Z0-9-.]", "", key) - if key_inval: - key_inval = key_inval.replace(" ", "") - key_inval = key_inval.replace("\t", "") - raise ParameterError("Invalid character(s) in header name '%s': \"%s\"" % (key, key_inval)) - debug(u"Updating Config.Config extra_headers[%s] -> %s" % (key.strip(), val.strip())) - cfg.extra_headers[key.strip()] = val.strip() - - ## --acl-grant/--acl-revoke arguments are pre-parsed by OptionS3ACL() - if options.acl_grants: - for grant in options.acl_grants: - cfg.acl_grants.append(grant) - - if options.acl_revokes: - for grant in options.acl_revokes: - cfg.acl_revokes.append(grant) - - ## Process --(no-)check-md5 - if options.check_md5 == False: - try: - cfg.sync_checks.remove("md5") - except Exception: - pass - if options.check_md5 == True and cfg.sync_checks.count("md5") == 0: - cfg.sync_checks.append("md5") - - ## Update Config with other parameters - for option in cfg.option_list(): - try: - if getattr(options, option) != None: - debug(u"Updating Config.Config %s -> %s" % (option, getattr(options, option))) - cfg.update_option(option, getattr(options, option)) - except AttributeError: - ## Some Config() options are not settable from command line - pass - - ## Special handling for tri-state options (True, False, None) - cfg.update_option("enable", options.enable) - cfg.update_option("acl_public", options.acl_public) - - ## Check multipart chunk constraints - if cfg.multipart_chunk_size_mb < MultiPartUpload.MIN_CHUNK_SIZE_MB: - raise ParameterError("Chunk size %d MB is too small, must be >= %d MB. Please adjust --multipart-chunk-size-mb" % (cfg.multipart_chunk_size_mb, MultiPartUpload.MIN_CHUNK_SIZE_MB)) - if cfg.multipart_chunk_size_mb > MultiPartUpload.MAX_CHUNK_SIZE_MB: - raise ParameterError("Chunk size %d MB is too large, must be <= %d MB. Please adjust --multipart-chunk-size-mb" % (cfg.multipart_chunk_size_mb, MultiPartUpload.MAX_CHUNK_SIZE_MB)) - - ## CloudFront's cf_enable and Config's enable share the same --enable switch - options.cf_enable = options.enable - - ## CloudFront's cf_logging and Config's log_target_prefix share the same --log-target-prefix switch - options.cf_logging = options.log_target_prefix - - ## Update CloudFront options if some were set - for option in CfCmd.options.option_list(): - try: - if getattr(options, option) != None: - debug(u"Updating CloudFront.Cmd %s -> %s" % (option, getattr(options, option))) - CfCmd.options.update_option(option, getattr(options, option)) - except AttributeError: - ## Some CloudFront.Cmd.Options() options are not settable from command line - pass - - if options.additional_destinations: - cfg.additional_destinations = options.additional_destinations - - ## Set output and filesystem encoding for printing out filenames. - sys.stdout = codecs.getwriter(cfg.encoding)(sys.stdout, "replace") - sys.stderr = codecs.getwriter(cfg.encoding)(sys.stderr, "replace") - - ## Process --exclude and --exclude-from - patterns_list, patterns_textual = process_patterns(options.exclude, options.exclude_from, is_glob = True, option_txt = "exclude") - cfg.exclude.extend(patterns_list) - cfg.debug_exclude.update(patterns_textual) - - ## Process --rexclude and --rexclude-from - patterns_list, patterns_textual = process_patterns(options.rexclude, options.rexclude_from, is_glob = False, option_txt = "rexclude") - cfg.exclude.extend(patterns_list) - cfg.debug_exclude.update(patterns_textual) - - ## Process --include and --include-from - patterns_list, patterns_textual = process_patterns(options.include, options.include_from, is_glob = True, option_txt = "include") - cfg.include.extend(patterns_list) - cfg.debug_include.update(patterns_textual) - - ## Process --rinclude and --rinclude-from - patterns_list, patterns_textual = process_patterns(options.rinclude, options.rinclude_from, is_glob = False, option_txt = "rinclude") - cfg.include.extend(patterns_list) - cfg.debug_include.update(patterns_textual) - - ## Set socket read()/write() timeout - socket.setdefaulttimeout(cfg.socket_timeout) - - if cfg.encrypt and cfg.gpg_passphrase == "": - error(u"Encryption requested but no passphrase set in config file.") - error(u"Please re-run 's3cmd --configure' and supply it.") - sys.exit(1) - - if options.dump_config: - cfg.dump_config(sys.stdout) - sys.exit(0) - - if options.run_configure: - # 'args' may contain the test-bucket URI - run_configure(options.config, args) - sys.exit(0) - - if len(args) < 1: - error(u"Missing command. Please run with --help for more information.") - sys.exit(1) - - ## Unicodise all remaining arguments: - args = [unicodise(arg) for arg in args] - - command = args.pop(0) - try: - debug(u"Command: %s" % commands[command]["cmd"]) - ## We must do this lookup in extra step to - ## avoid catching all KeyError exceptions - ## from inner functions. - cmd_func = commands[command]["func"] - except KeyError, e: - error(u"Invalid command: %s" % e) - sys.exit(1) - - if len(args) < commands[command]["argc"]: - error(u"Not enough parameters for command '%s'" % command) - sys.exit(1) - - try: - cmd_func(args) - except S3Error, e: - error(u"S3 error: %s" % e) - sys.exit(1) - -def report_exception(e): - sys.stderr.write(""" -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - An unexpected error has occurred. - Please report the following lines to: - s3tools-bugs@lists.sourceforge.net -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - -""") - tb = traceback.format_exc(sys.exc_info()) - e_class = str(e.__class__) - e_class = e_class[e_class.rfind(".")+1 : -2] - sys.stderr.write(u"Problem: %s: %s\n" % (e_class, e)) - try: - sys.stderr.write("S3cmd: %s\n" % PkgInfo.version) - except NameError: - sys.stderr.write("S3cmd: unknown version. Module import problem?\n") - sys.stderr.write("\n") - sys.stderr.write(unicode(tb, errors="replace")) - - if type(e) == ImportError: - sys.stderr.write("\n") - sys.stderr.write("Your sys.path contains these entries:\n") - for path in sys.path: - sys.stderr.write(u"\t%s\n" % path) - sys.stderr.write("Now the question is where have the s3cmd modules been installed?\n") - - sys.stderr.write(""" -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - An unexpected error has occurred. - Please report the above lines to: - s3tools-bugs@lists.sourceforge.net -!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -""") - -if __name__ == '__main__': - try: - ## Our modules - ## Keep them in try/except block to - ## detect any syntax errors in there - from S3.Exceptions import * - from S3 import PkgInfo - from S3.S3 import S3 - from S3.Config import Config - from S3.SortedDict import SortedDict - from S3.FileDict import FileDict - from S3.S3Uri import S3Uri - from S3 import Utils - from S3.Utils import * - from S3.Progress import Progress - from S3.CloudFront import Cmd as CfCmd - from S3.CloudFront import CloudFront - from S3.FileLists import * - from S3.MultiPart import MultiPartUpload - - main() - sys.exit(0) - - except ImportError, e: - report_exception(e) - sys.exit(1) - - except ParameterError, e: - error(u"Parameter problem: %s" % e) - sys.exit(1) - - except SystemExit, e: - sys.exit(e.code) - - except KeyboardInterrupt: - sys.stderr.write("See ya!\n") - sys.exit(1) - - except Exception, e: - report_exception(e) - sys.exit(1) - -# vim:et:ts=4:sts=4:ai diff --git a/requirements.txt b/requirements.txt index 0f29967..9b3e380 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ Flask==0.10.1 Jinja2==2.7 MarkupSafe==0.18 Werkzeug==0.9.3 +boto==2.13.3 itsdangerous==0.22 paramiko==1.10.1 ply==3.4