Browse Source

use common fablib; use timestamp as default version; commit/push build directory; custom banner on config.json

pull/512/head
jywsn 11 years ago
parent
commit
fb0ab39f06
  1. 22
      config.json
  2. 29
      fabfile.py
  3. 224
      fabfile/S3/ACL.py
  4. 92
      fabfile/S3/AccessLog.py
  5. 42
      fabfile/S3/BidirMap.py
  6. 773
      fabfile/S3/CloudFront.py
  7. 294
      fabfile/S3/Config.py
  8. 71
      fabfile/S3/ConnMan.py
  9. 88
      fabfile/S3/Exceptions.py
  10. 53
      fabfile/S3/FileDict.py
  11. 517
      fabfile/S3/FileLists.py
  12. 53
      fabfile/S3/HashCache.py
  13. 137
      fabfile/S3/MultiPart.py
  14. 14
      fabfile/S3/PkgInfo.py
  15. 173
      fabfile/S3/Progress.py
  16. 979
      fabfile/S3/S3.py
  17. 223
      fabfile/S3/S3Uri.py
  18. 178
      fabfile/S3/SimpleDB.py
  19. 66
      fabfile/S3/SortedDict.py
  20. 462
      fabfile/S3/Utils.py
  21. 0
      fabfile/S3/__init__.py
  22. 560
      fabfile/__init__.py
  23. 2116
      fabfile/s3cmd
  24. 1
      requirements.txt

22
config.json

@ -135,9 +135,27 @@
], ],
// //
// banner-ize files // banner-ize files
// input: list of file paths // input: list of objects specifying inputs
// @src: source file/directory
// @regex: regular expression to match files (if @src is directory)
// @template: template to use for banner (optional)
// //
"banner": [ "build/js", "build/css" ] "banner": [
{
"src": "build",
"regex": "(js|css)/.*\\.(css|js)$",
"template": [
"/*",
" TimelineJS - ver. %(version)s - %(date)s",
" Copyright (c) 2012-2013 Northwestern University",
" a project of the Northwestern University Knight Lab, originally created by Zach Wise",
" https://github.com/NUKnightLab/TimelineJS",
" This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.",
" If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.",
"*/"
]
}
]
}, },
// //
// stage // stage

29
fabfile.py vendored

@ -0,0 +1,29 @@
from os.path import abspath, basename, dirname, join
import sys
from fabric.api import env
#
# Project-specific settings, alter as needed
#
# env.project_name = basename(dirname(__file__))
env.project_name = 'TimelineJS'
#
# Add paths
#
def add_paths(*args):
"""Make paths are in sys.path."""
for p in args:
if p not in sys.path:
sys.path.append(p)
project_path = dirname(abspath(__file__))
repos_path = dirname(project_path)
fablib_path = join(repos_path, 'fablib')
add_paths(project_path, repos_path, fablib_path)
#
# Import from fablib
#
from fablib import *

224
fabfile/S3/ACL.py

@ -1,224 +0,0 @@
## Amazon S3 - Access Control List representation
## Author: Michal Ludvig <michal@logix.cz>
## http://www.logix.cz/michal
## License: GPL Version 2
from Utils import getTreeFromXml
try:
import xml.etree.ElementTree as ET
except ImportError:
import elementtree.ElementTree as ET
class Grantee(object):
ALL_USERS_URI = "http://acs.amazonaws.com/groups/global/AllUsers"
LOG_DELIVERY_URI = "http://acs.amazonaws.com/groups/s3/LogDelivery"
def __init__(self):
self.xsi_type = None
self.tag = None
self.name = None
self.display_name = None
self.permission = None
def __repr__(self):
return 'Grantee("%(tag)s", "%(name)s", "%(permission)s")' % {
"tag" : self.tag,
"name" : self.name,
"permission" : self.permission
}
def isAllUsers(self):
return self.tag == "URI" and self.name == Grantee.ALL_USERS_URI
def isAnonRead(self):
return self.isAllUsers() and (self.permission == "READ" or self.permission == "FULL_CONTROL")
def getElement(self):
el = ET.Element("Grant")
grantee = ET.SubElement(el, "Grantee", {
'xmlns:xsi' : 'http://www.w3.org/2001/XMLSchema-instance',
'xsi:type' : self.xsi_type
})
name = ET.SubElement(grantee, self.tag)
name.text = self.name
permission = ET.SubElement(el, "Permission")
permission.text = self.permission
return el
class GranteeAnonRead(Grantee):
def __init__(self):
Grantee.__init__(self)
self.xsi_type = "Group"
self.tag = "URI"
self.name = Grantee.ALL_USERS_URI
self.permission = "READ"
class GranteeLogDelivery(Grantee):
def __init__(self, permission):
"""
permission must be either READ_ACP or WRITE
"""
Grantee.__init__(self)
self.xsi_type = "Group"
self.tag = "URI"
self.name = Grantee.LOG_DELIVERY_URI
self.permission = permission
class ACL(object):
EMPTY_ACL = "<AccessControlPolicy><Owner><ID></ID></Owner><AccessControlList></AccessControlList></AccessControlPolicy>"
def __init__(self, xml = None):
if not xml:
xml = ACL.EMPTY_ACL
self.grantees = []
self.owner_id = ""
self.owner_nick = ""
tree = getTreeFromXml(xml)
self.parseOwner(tree)
self.parseGrants(tree)
def parseOwner(self, tree):
self.owner_id = tree.findtext(".//Owner//ID")
self.owner_nick = tree.findtext(".//Owner//DisplayName")
def parseGrants(self, tree):
for grant in tree.findall(".//Grant"):
grantee = Grantee()
g = grant.find(".//Grantee")
grantee.xsi_type = g.attrib['{http://www.w3.org/2001/XMLSchema-instance}type']
grantee.permission = grant.find('Permission').text
for el in g:
if el.tag == "DisplayName":
grantee.display_name = el.text
else:
grantee.tag = el.tag
grantee.name = el.text
self.grantees.append(grantee)
def getGrantList(self):
acl = []
for grantee in self.grantees:
if grantee.display_name:
user = grantee.display_name
elif grantee.isAllUsers():
user = "*anon*"
else:
user = grantee.name
acl.append({'grantee': user, 'permission': grantee.permission})
return acl
def getOwner(self):
return { 'id' : self.owner_id, 'nick' : self.owner_nick }
def isAnonRead(self):
for grantee in self.grantees:
if grantee.isAnonRead():
return True
return False
def grantAnonRead(self):
if not self.isAnonRead():
self.appendGrantee(GranteeAnonRead())
def revokeAnonRead(self):
self.grantees = [g for g in self.grantees if not g.isAnonRead()]
def appendGrantee(self, grantee):
self.grantees.append(grantee)
def hasGrant(self, name, permission):
name = name.lower()
permission = permission.upper()
for grantee in self.grantees:
if grantee.name.lower() == name:
if grantee.permission == "FULL_CONTROL":
return True
elif grantee.permission.upper() == permission:
return True
return False;
def grant(self, name, permission):
if self.hasGrant(name, permission):
return
name = name.lower()
permission = permission.upper()
if "ALL" == permission:
permission = "FULL_CONTROL"
if "FULL_CONTROL" == permission:
self.revoke(name, "ALL")
grantee = Grantee()
grantee.name = name
grantee.permission = permission
if name.find('@') <= -1: # ultra lame attempt to differenciate emails id from canonical ids
grantee.xsi_type = "CanonicalUser"
grantee.tag = "ID"
else:
grantee.xsi_type = "AmazonCustomerByEmail"
grantee.tag = "EmailAddress"
self.appendGrantee(grantee)
def revoke(self, name, permission):
name = name.lower()
permission = permission.upper()
if "ALL" == permission:
self.grantees = [g for g in self.grantees if not g.name.lower() == name]
else:
self.grantees = [g for g in self.grantees if not (g.name.lower() == name and g.permission.upper() == permission)]
def __str__(self):
tree = getTreeFromXml(ACL.EMPTY_ACL)
tree.attrib['xmlns'] = "http://s3.amazonaws.com/doc/2006-03-01/"
owner = tree.find(".//Owner//ID")
owner.text = self.owner_id
acl = tree.find(".//AccessControlList")
for grantee in self.grantees:
acl.append(grantee.getElement())
return ET.tostring(tree)
if __name__ == "__main__":
xml = """<?xml version="1.0" encoding="UTF-8"?>
<AccessControlPolicy xmlns="http://s3.amazonaws.com/doc/2006-03-01/">
<Owner>
<ID>12345678901234567890</ID>
<DisplayName>owner-nickname</DisplayName>
</Owner>
<AccessControlList>
<Grant>
<Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser">
<ID>12345678901234567890</ID>
<DisplayName>owner-nickname</DisplayName>
</Grantee>
<Permission>FULL_CONTROL</Permission>
</Grant>
<Grant>
<Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="Group">
<URI>http://acs.amazonaws.com/groups/global/AllUsers</URI>
</Grantee>
<Permission>READ</Permission>
</Grant>
</AccessControlList>
</AccessControlPolicy>
"""
acl = ACL(xml)
print "Grants:", acl.getGrantList()
acl.revokeAnonRead()
print "Grants:", acl.getGrantList()
acl.grantAnonRead()
print "Grants:", acl.getGrantList()
print acl
# vim:et:ts=4:sts=4:ai

92
fabfile/S3/AccessLog.py

@ -1,92 +0,0 @@
## Amazon S3 - Access Control List representation
## Author: Michal Ludvig <michal@logix.cz>
## http://www.logix.cz/michal
## License: GPL Version 2
import S3Uri
from Exceptions import ParameterError
from Utils import getTreeFromXml
from ACL import GranteeAnonRead
try:
import xml.etree.ElementTree as ET
except ImportError:
import elementtree.ElementTree as ET
__all__ = []
class AccessLog(object):
LOG_DISABLED = "<BucketLoggingStatus></BucketLoggingStatus>"
LOG_TEMPLATE = "<LoggingEnabled><TargetBucket></TargetBucket><TargetPrefix></TargetPrefix></LoggingEnabled>"
def __init__(self, xml = None):
if not xml:
xml = self.LOG_DISABLED
self.tree = getTreeFromXml(xml)
self.tree.attrib['xmlns'] = "http://doc.s3.amazonaws.com/2006-03-01"
def isLoggingEnabled(self):
return bool(self.tree.find(".//LoggingEnabled"))
def disableLogging(self):
el = self.tree.find(".//LoggingEnabled")
if el:
self.tree.remove(el)
def enableLogging(self, target_prefix_uri):
el = self.tree.find(".//LoggingEnabled")
if not el:
el = getTreeFromXml(self.LOG_TEMPLATE)
self.tree.append(el)
el.find(".//TargetBucket").text = target_prefix_uri.bucket()
el.find(".//TargetPrefix").text = target_prefix_uri.object()
def targetPrefix(self):
if self.isLoggingEnabled():
el = self.tree.find(".//LoggingEnabled")
target_prefix = "s3://%s/%s" % (
self.tree.find(".//LoggingEnabled//TargetBucket").text,
self.tree.find(".//LoggingEnabled//TargetPrefix").text)
return S3Uri.S3Uri(target_prefix)
else:
return ""
def setAclPublic(self, acl_public):
le = self.tree.find(".//LoggingEnabled")
if not le:
raise ParameterError("Logging not enabled, can't set default ACL for logs")
tg = le.find(".//TargetGrants")
if not acl_public:
if not tg:
## All good, it's not been there
return
else:
le.remove(tg)
else: # acl_public == True
anon_read = GranteeAnonRead().getElement()
if not tg:
tg = ET.SubElement(le, "TargetGrants")
## What if TargetGrants already exists? We should check if
## AnonRead is there before appending a new one. Later...
tg.append(anon_read)
def isAclPublic(self):
raise NotImplementedError()
def __str__(self):
return ET.tostring(self.tree)
__all__.append("AccessLog")
if __name__ == "__main__":
from S3Uri import S3Uri
log = AccessLog()
print log
log.enableLogging(S3Uri("s3://targetbucket/prefix/log-"))
print log
log.setAclPublic(True)
print log
log.setAclPublic(False)
print log
log.disableLogging()
print log
# vim:et:ts=4:sts=4:ai

42
fabfile/S3/BidirMap.py

@ -1,42 +0,0 @@
## Amazon S3 manager
## Author: Michal Ludvig <michal@logix.cz>
## http://www.logix.cz/michal
## License: GPL Version 2
class BidirMap(object):
def __init__(self, **map):
self.k2v = {}
self.v2k = {}
for key in map:
self.__setitem__(key, map[key])
def __setitem__(self, key, value):
if self.v2k.has_key(value):
if self.v2k[value] != key:
raise KeyError("Value '"+str(value)+"' already in use with key '"+str(self.v2k[value])+"'")
try:
del(self.v2k[self.k2v[key]])
except KeyError:
pass
self.k2v[key] = value
self.v2k[value] = key
def __getitem__(self, key):
return self.k2v[key]
def __str__(self):
return self.v2k.__str__()
def getkey(self, value):
return self.v2k[value]
def getvalue(self, key):
return self.k2v[key]
def keys(self):
return [key for key in self.k2v]
def values(self):
return [value for value in self.v2k]
# vim:et:ts=4:sts=4:ai

773
fabfile/S3/CloudFront.py

@ -1,773 +0,0 @@
## Amazon CloudFront support
## Author: Michal Ludvig <michal@logix.cz>
## http://www.logix.cz/michal
## License: GPL Version 2
import sys
import time
import httplib
import random
from datetime import datetime
from logging import debug, info, warning, error
try:
import xml.etree.ElementTree as ET
except ImportError:
import elementtree.ElementTree as ET
from Config import Config
from Exceptions import *
from Utils import getTreeFromXml, appendXmlTextNode, getDictFromTree, dateS3toPython, sign_string, getBucketFromHostname, getHostnameFromBucket
from S3Uri import S3Uri, S3UriS3
from FileLists import fetch_remote_list
cloudfront_api_version = "2010-11-01"
cloudfront_resource = "/%(api_ver)s/distribution" % { 'api_ver' : cloudfront_api_version }
def output(message):
sys.stdout.write(message + "\n")
def pretty_output(label, message):
#label = ("%s " % label).ljust(20, ".")
label = ("%s:" % label).ljust(15)
output("%s %s" % (label, message))
class DistributionSummary(object):
## Example:
##
## <DistributionSummary>
## <Id>1234567890ABC</Id>
## <Status>Deployed</Status>
## <LastModifiedTime>2009-01-16T11:49:02.189Z</LastModifiedTime>
## <DomainName>blahblahblah.cloudfront.net</DomainName>
## <S3Origin>
## <DNSName>example.bucket.s3.amazonaws.com</DNSName>
## </S3Origin>
## <CNAME>cdn.example.com</CNAME>
## <CNAME>img.example.com</CNAME>
## <Comment>What Ever</Comment>
## <Enabled>true</Enabled>
## </DistributionSummary>
def __init__(self, tree):
if tree.tag != "DistributionSummary":
raise ValueError("Expected <DistributionSummary /> xml, got: <%s />" % tree.tag)
self.parse(tree)
def parse(self, tree):
self.info = getDictFromTree(tree)
self.info['Enabled'] = (self.info['Enabled'].lower() == "true")
if self.info.has_key("CNAME") and type(self.info['CNAME']) != list:
self.info['CNAME'] = [self.info['CNAME']]
def uri(self):
return S3Uri("cf://%s" % self.info['Id'])
class DistributionList(object):
## Example:
##
## <DistributionList xmlns="http://cloudfront.amazonaws.com/doc/2010-07-15/">
## <Marker />
## <MaxItems>100</MaxItems>
## <IsTruncated>false</IsTruncated>
## <DistributionSummary>
## ... handled by DistributionSummary() class ...
## </DistributionSummary>
## </DistributionList>
def __init__(self, xml):
tree = getTreeFromXml(xml)
if tree.tag != "DistributionList":
raise ValueError("Expected <DistributionList /> xml, got: <%s />" % tree.tag)
self.parse(tree)
def parse(self, tree):
self.info = getDictFromTree(tree)
## Normalise some items
self.info['IsTruncated'] = (self.info['IsTruncated'].lower() == "true")
self.dist_summs = []
for dist_summ in tree.findall(".//DistributionSummary"):
self.dist_summs.append(DistributionSummary(dist_summ))
class Distribution(object):
## Example:
##
## <Distribution xmlns="http://cloudfront.amazonaws.com/doc/2010-07-15/">
## <Id>1234567890ABC</Id>
## <Status>InProgress</Status>
## <LastModifiedTime>2009-01-16T13:07:11.319Z</LastModifiedTime>
## <DomainName>blahblahblah.cloudfront.net</DomainName>
## <DistributionConfig>
## ... handled by DistributionConfig() class ...
## </DistributionConfig>
## </Distribution>
def __init__(self, xml):
tree = getTreeFromXml(xml)
if tree.tag != "Distribution":
raise ValueError("Expected <Distribution /> xml, got: <%s />" % tree.tag)
self.parse(tree)
def parse(self, tree):
self.info = getDictFromTree(tree)
## Normalise some items
self.info['LastModifiedTime'] = dateS3toPython(self.info['LastModifiedTime'])
self.info['DistributionConfig'] = DistributionConfig(tree = tree.find(".//DistributionConfig"))
def uri(self):
return S3Uri("cf://%s" % self.info['Id'])
class DistributionConfig(object):
## Example:
##
## <DistributionConfig>
## <Origin>somebucket.s3.amazonaws.com</Origin>
## <CallerReference>s3://somebucket/</CallerReference>
## <Comment>http://somebucket.s3.amazonaws.com/</Comment>
## <Enabled>true</Enabled>
## <Logging>
## <Bucket>bu.ck.et</Bucket>
## <Prefix>/cf-somebucket/</Prefix>
## </Logging>
## </DistributionConfig>
EMPTY_CONFIG = "<DistributionConfig><S3Origin><DNSName/></S3Origin><CallerReference/><Enabled>true</Enabled></DistributionConfig>"
xmlns = "http://cloudfront.amazonaws.com/doc/%(api_ver)s/" % { 'api_ver' : cloudfront_api_version }
def __init__(self, xml = None, tree = None):
if xml is None:
xml = DistributionConfig.EMPTY_CONFIG
if tree is None:
tree = getTreeFromXml(xml)
if tree.tag != "DistributionConfig":
raise ValueError("Expected <DistributionConfig /> xml, got: <%s />" % tree.tag)
self.parse(tree)
def parse(self, tree):
self.info = getDictFromTree(tree)
self.info['Enabled'] = (self.info['Enabled'].lower() == "true")
if not self.info.has_key("CNAME"):
self.info['CNAME'] = []
if type(self.info['CNAME']) != list:
self.info['CNAME'] = [self.info['CNAME']]
self.info['CNAME'] = [cname.lower() for cname in self.info['CNAME']]
if not self.info.has_key("Comment"):
self.info['Comment'] = ""
if not self.info.has_key("DefaultRootObject"):
self.info['DefaultRootObject'] = ""
## Figure out logging - complex node not parsed by getDictFromTree()
logging_nodes = tree.findall(".//Logging")
if logging_nodes:
logging_dict = getDictFromTree(logging_nodes[0])
logging_dict['Bucket'], success = getBucketFromHostname(logging_dict['Bucket'])
if not success:
warning("Logging to unparsable bucket name: %s" % logging_dict['Bucket'])
self.info['Logging'] = S3UriS3("s3://%(Bucket)s/%(Prefix)s" % logging_dict)
else:
self.info['Logging'] = None
def __str__(self):
tree = ET.Element("DistributionConfig")
tree.attrib['xmlns'] = DistributionConfig.xmlns
## Retain the order of the following calls!
s3org = appendXmlTextNode("S3Origin", '', tree)
appendXmlTextNode("DNSName", self.info['S3Origin']['DNSName'], s3org)
appendXmlTextNode("CallerReference", self.info['CallerReference'], tree)
for cname in self.info['CNAME']:
appendXmlTextNode("CNAME", cname.lower(), tree)
if self.info['Comment']:
appendXmlTextNode("Comment", self.info['Comment'], tree)
appendXmlTextNode("Enabled", str(self.info['Enabled']).lower(), tree)
# don't create a empty DefaultRootObject element as it would result in a MalformedXML error
if str(self.info['DefaultRootObject']):
appendXmlTextNode("DefaultRootObject", str(self.info['DefaultRootObject']), tree)
if self.info['Logging']:
logging_el = ET.Element("Logging")
appendXmlTextNode("Bucket", getHostnameFromBucket(self.info['Logging'].bucket()), logging_el)
appendXmlTextNode("Prefix", self.info['Logging'].object(), logging_el)
tree.append(logging_el)
return ET.tostring(tree)
class Invalidation(object):
## Example:
##
## <Invalidation xmlns="http://cloudfront.amazonaws.com/doc/2010-11-01/">
## <Id>id</Id>
## <Status>status</Status>
## <CreateTime>date</CreateTime>
## <InvalidationBatch>
## <Path>/image1.jpg</Path>
## <Path>/image2.jpg</Path>
## <Path>/videos/movie.flv</Path>
## <CallerReference>my-batch</CallerReference>
## </InvalidationBatch>
## </Invalidation>
def __init__(self, xml):
tree = getTreeFromXml(xml)
if tree.tag != "Invalidation":
raise ValueError("Expected <Invalidation /> xml, got: <%s />" % tree.tag)
self.parse(tree)
def parse(self, tree):
self.info = getDictFromTree(tree)
def __str__(self):
return str(self.info)
class InvalidationList(object):
## Example:
##
## <InvalidationList>
## <Marker/>
## <NextMarker>Invalidation ID</NextMarker>
## <MaxItems>2</MaxItems>
## <IsTruncated>true</IsTruncated>
## <InvalidationSummary>
## <Id>[Second Invalidation ID]</Id>
## <Status>Completed</Status>
## </InvalidationSummary>
## <InvalidationSummary>
## <Id>[First Invalidation ID]</Id>
## <Status>Completed</Status>
## </InvalidationSummary>
## </InvalidationList>
def __init__(self, xml):
tree = getTreeFromXml(xml)
if tree.tag != "InvalidationList":
raise ValueError("Expected <InvalidationList /> xml, got: <%s />" % tree.tag)
self.parse(tree)
def parse(self, tree):
self.info = getDictFromTree(tree)
def __str__(self):
return str(self.info)
class InvalidationBatch(object):
## Example:
##
## <InvalidationBatch>
## <Path>/image1.jpg</Path>
## <Path>/image2.jpg</Path>
## <Path>/videos/movie.flv</Path>
## <Path>/sound%20track.mp3</Path>
## <CallerReference>my-batch</CallerReference>
## </InvalidationBatch>
def __init__(self, reference = None, distribution = None, paths = []):
if reference:
self.reference = reference
else:
if not distribution:
distribution="0"
self.reference = "%s.%s.%s" % (distribution,
datetime.strftime(datetime.now(),"%Y%m%d%H%M%S"),
random.randint(1000,9999))
self.paths = []
self.add_objects(paths)
def add_objects(self, paths):
self.paths.extend(paths)
def get_reference(self):
return self.reference
def __str__(self):
tree = ET.Element("InvalidationBatch")
for path in self.paths:
if len(path) < 1 or path[0] != "/":
path = "/" + path
appendXmlTextNode("Path", path, tree)
appendXmlTextNode("CallerReference", self.reference, tree)
return ET.tostring(tree)
class CloudFront(object):
operations = {
"CreateDist" : { 'method' : "POST", 'resource' : "" },
"DeleteDist" : { 'method' : "DELETE", 'resource' : "/%(dist_id)s" },
"GetList" : { 'method' : "GET", 'resource' : "" },
"GetDistInfo" : { 'method' : "GET", 'resource' : "/%(dist_id)s" },
"GetDistConfig" : { 'method' : "GET", 'resource' : "/%(dist_id)s/config" },
"SetDistConfig" : { 'method' : "PUT", 'resource' : "/%(dist_id)s/config" },
"Invalidate" : { 'method' : "POST", 'resource' : "/%(dist_id)s/invalidation" },
"GetInvalList" : { 'method' : "GET", 'resource' : "/%(dist_id)s/invalidation" },
"GetInvalInfo" : { 'method' : "GET", 'resource' : "/%(dist_id)s/invalidation/%(request_id)s" },
}
## Maximum attempts of re-issuing failed requests
_max_retries = 5
dist_list = None
def __init__(self, config):
self.config = config
## --------------------------------------------------
## Methods implementing CloudFront API
## --------------------------------------------------
def GetList(self):
response = self.send_request("GetList")
response['dist_list'] = DistributionList(response['data'])
if response['dist_list'].info['IsTruncated']:
raise NotImplementedError("List is truncated. Ask s3cmd author to add support.")
## TODO: handle Truncated
return response
def CreateDistribution(self, uri, cnames_add = [], comment = None, logging = None, default_root_object = None):
dist_config = DistributionConfig()
dist_config.info['Enabled'] = True
dist_config.info['S3Origin']['DNSName'] = uri.host_name()
dist_config.info['CallerReference'] = str(uri)
dist_config.info['DefaultRootObject'] = default_root_object
if comment == None:
dist_config.info['Comment'] = uri.public_url()
else:
dist_config.info['Comment'] = comment
for cname in cnames_add:
if dist_config.info['CNAME'].count(cname) == 0:
dist_config.info['CNAME'].append(cname)
if logging:
dist_config.info['Logging'] = S3UriS3(logging)
request_body = str(dist_config)
debug("CreateDistribution(): request_body: %s" % request_body)
response = self.send_request("CreateDist", body = request_body)
response['distribution'] = Distribution(response['data'])
return response
def ModifyDistribution(self, cfuri, cnames_add = [], cnames_remove = [],
comment = None, enabled = None, logging = None,
default_root_object = None):
if cfuri.type != "cf":
raise ValueError("Expected CFUri instead of: %s" % cfuri)
# Get current dist status (enabled/disabled) and Etag
info("Checking current status of %s" % cfuri)
response = self.GetDistConfig(cfuri)
dc = response['dist_config']
if enabled != None:
dc.info['Enabled'] = enabled
if comment != None:
dc.info['Comment'] = comment
if default_root_object != None:
dc.info['DefaultRootObject'] = default_root_object
for cname in cnames_add:
if dc.info['CNAME'].count(cname) == 0:
dc.info['CNAME'].append(cname)
for cname in cnames_remove:
while dc.info['CNAME'].count(cname) > 0:
dc.info['CNAME'].remove(cname)
if logging != None:
if logging == False:
dc.info['Logging'] = False
else:
dc.info['Logging'] = S3UriS3(logging)
response = self.SetDistConfig(cfuri, dc, response['headers']['etag'])
return response
def DeleteDistribution(self, cfuri):
if cfuri.type != "cf":
raise ValueError("Expected CFUri instead of: %s" % cfuri)
# Get current dist status (enabled/disabled) and Etag
info("Checking current status of %s" % cfuri)
response = self.GetDistConfig(cfuri)
if response['dist_config'].info['Enabled']:
info("Distribution is ENABLED. Disabling first.")
response['dist_config'].info['Enabled'] = False
response = self.SetDistConfig(cfuri, response['dist_config'],
response['headers']['etag'])
warning("Waiting for Distribution to become disabled.")
warning("This may take several minutes, please wait.")
while True:
response = self.GetDistInfo(cfuri)
d = response['distribution']
if d.info['Status'] == "Deployed" and d.info['Enabled'] == False:
info("Distribution is now disabled")
break
warning("Still waiting...")
time.sleep(10)
headers = {}
headers['if-match'] = response['headers']['etag']
response = self.send_request("DeleteDist", dist_id = cfuri.dist_id(),
headers = headers)
return response
def GetDistInfo(self, cfuri):
if cfuri.type != "cf":
raise ValueError("Expected CFUri instead of: %s" % cfuri)
response = self.send_request("GetDistInfo", dist_id = cfuri.dist_id())
response['distribution'] = Distribution(response['data'])
return response
def GetDistConfig(self, cfuri):
if cfuri.type != "cf":
raise ValueError("Expected CFUri instead of: %s" % cfuri)
response = self.send_request("GetDistConfig", dist_id = cfuri.dist_id())
response['dist_config'] = DistributionConfig(response['data'])
return response
def SetDistConfig(self, cfuri, dist_config, etag = None):
if etag == None:
debug("SetDistConfig(): Etag not set. Fetching it first.")
etag = self.GetDistConfig(cfuri)['headers']['etag']
debug("SetDistConfig(): Etag = %s" % etag)
request_body = str(dist_config)
debug("SetDistConfig(): request_body: %s" % request_body)
headers = {}
headers['if-match'] = etag
response = self.send_request("SetDistConfig", dist_id = cfuri.dist_id(),
body = request_body, headers = headers)
return response
def InvalidateObjects(self, uri, paths, default_index_file, invalidate_default_index_on_cf, invalidate_default_index_root_on_cf):
# joseprio: if the user doesn't want to invalidate the default index
# path, or if the user wants to invalidate the root of the default
# index, we need to process those paths
if default_index_file is not None and (not invalidate_default_index_on_cf or invalidate_default_index_root_on_cf):
new_paths = []
default_index_suffix = '/' + default_index_file
for path in paths:
if path.endswith(default_index_suffix) or path == default_index_file:
if invalidate_default_index_on_cf:
new_paths.append(path)
if invalidate_default_index_root_on_cf:
new_paths.append(path[:-len(default_index_file)])
else:
new_paths.append(path)
paths = new_paths
# uri could be either cf:// or s3:// uri
cfuri = self.get_dist_name_for_bucket(uri)
if len(paths) > 999:
try:
tmp_filename = Utils.mktmpfile()
f = open(tmp_filename, "w")
f.write("\n".join(paths)+"\n")
f.close()
warning("Request to invalidate %d paths (max 999 supported)" % len(paths))
warning("All the paths are now saved in: %s" % tmp_filename)
except:
pass
raise ParameterError("Too many paths to invalidate")
invalbatch = InvalidationBatch(distribution = cfuri.dist_id(), paths = paths)
debug("InvalidateObjects(): request_body: %s" % invalbatch)
response = self.send_request("Invalidate", dist_id = cfuri.dist_id(),
body = str(invalbatch))
response['dist_id'] = cfuri.dist_id()
if response['status'] == 201:
inval_info = Invalidation(response['data']).info
response['request_id'] = inval_info['Id']
debug("InvalidateObjects(): response: %s" % response)
return response
def GetInvalList(self, cfuri):
if cfuri.type != "cf":
raise ValueError("Expected CFUri instead of: %s" % cfuri)
response = self.send_request("GetInvalList", dist_id = cfuri.dist_id())
response['inval_list'] = InvalidationList(response['data'])
return response
def GetInvalInfo(self, cfuri):
if cfuri.type != "cf":
raise ValueError("Expected CFUri instead of: %s" % cfuri)
if cfuri.request_id() is None:
raise ValueError("Expected CFUri with Request ID")
response = self.send_request("GetInvalInfo", dist_id = cfuri.dist_id(), request_id = cfuri.request_id())
response['inval_status'] = Invalidation(response['data'])
return response
## --------------------------------------------------
## Low-level methods for handling CloudFront requests
## --------------------------------------------------
def send_request(self, op_name, dist_id = None, request_id = None, body = None, headers = {}, retries = _max_retries):
operation = self.operations[op_name]
if body:
headers['content-type'] = 'text/plain'
request = self.create_request(operation, dist_id, request_id, headers)
conn = self.get_connection()
debug("send_request(): %s %s" % (request['method'], request['resource']))
conn.request(request['method'], request['resource'], body, request['headers'])
http_response = conn.getresponse()
response = {}
response["status"] = http_response.status
response["reason"] = http_response.reason
response["headers"] = dict(http_response.getheaders())
response["data"] = http_response.read()
conn.close()
debug("CloudFront: response: %r" % response)
if response["status"] >= 500:
e = CloudFrontError(response)
if retries:
warning(u"Retrying failed request: %s" % op_name)
warning(unicode(e))
warning("Waiting %d sec..." % self._fail_wait(retries))
time.sleep(self._fail_wait(retries))
return self.send_request(op_name, dist_id, body, retries - 1)
else:
raise e
if response["status"] < 200 or response["status"] > 299:
raise CloudFrontError(response)
return response
def create_request(self, operation, dist_id = None, request_id = None, headers = None):
resource = cloudfront_resource + (
operation['resource'] % { 'dist_id' : dist_id, 'request_id' : request_id })
if not headers:
headers = {}
if headers.has_key("date"):
if not headers.has_key("x-amz-date"):
headers["x-amz-date"] = headers["date"]
del(headers["date"])
if not headers.has_key("x-amz-date"):
headers["x-amz-date"] = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())
if len(self.config.access_token)>0:
self.config.refresh_role()
headers['x-amz-security-token']=self.config.access_token
signature = self.sign_request(headers)
headers["Authorization"] = "AWS "+self.config.access_key+":"+signature
request = {}
request['resource'] = resource
request['headers'] = headers
request['method'] = operation['method']
return request
def sign_request(self, headers):
string_to_sign = headers['x-amz-date']
signature = sign_string(string_to_sign)
debug(u"CloudFront.sign_request('%s') = %s" % (string_to_sign, signature))
return signature
def get_connection(self):
if self.config.proxy_host != "":
raise ParameterError("CloudFront commands don't work from behind a HTTP proxy")
return httplib.HTTPSConnection(self.config.cloudfront_host)
def _fail_wait(self, retries):
# Wait a few seconds. The more it fails the more we wait.
return (self._max_retries - retries + 1) * 3
def get_dist_name_for_bucket(self, uri):
if (uri.type == "cf"):
return uri
if (uri.type != "s3"):
raise ParameterError("CloudFront or S3 URI required instead of: %s" % arg)
debug("_get_dist_name_for_bucket(%r)" % uri)
if CloudFront.dist_list is None:
response = self.GetList()
CloudFront.dist_list = {}
for d in response['dist_list'].dist_summs:
if d.info.has_key("S3Origin"):
CloudFront.dist_list[getBucketFromHostname(d.info['S3Origin']['DNSName'])[0]] = d.uri()
elif d.info.has_key("CustomOrigin"):
# Aral: This used to skip over distributions with CustomOrigin, however, we mustn't
# do this since S3 buckets that are set up as websites use custom origins.
# Thankfully, the custom origin URLs they use start with the URL of the
# S3 bucket. Here, we make use this naming convention to support this use case.
distListIndex = getBucketFromHostname(d.info['CustomOrigin']['DNSName'])[0];
distListIndex = distListIndex[:len(uri.bucket())]
CloudFront.dist_list[distListIndex] = d.uri()
else:
# Aral: I'm not sure when this condition will be reached, but keeping it in there.
continue
debug("dist_list: %s" % CloudFront.dist_list)
try:
return CloudFront.dist_list[uri.bucket()]
except Exception, e:
debug(e)
raise ParameterError("Unable to translate S3 URI to CloudFront distribution name: %s" % arg)
class Cmd(object):
"""
Class that implements CloudFront commands
"""
class Options(object):
cf_cnames_add = []
cf_cnames_remove = []
cf_comment = None
cf_enable = None
cf_logging = None
cf_default_root_object = None
def option_list(self):
return [opt for opt in dir(self) if opt.startswith("cf_")]
def update_option(self, option, value):
setattr(Cmd.options, option, value)
options = Options()
@staticmethod
def _parse_args(args):
cf = CloudFront(Config())
cfuris = []
for arg in args:
uri = cf.get_dist_name_for_bucket(S3Uri(arg))
cfuris.append(uri)
return cfuris
@staticmethod
def info(args):
cf = CloudFront(Config())
if not args:
response = cf.GetList()
for d in response['dist_list'].dist_summs:
if d.info.has_key("S3Origin"):
origin = S3UriS3.httpurl_to_s3uri(d.info['S3Origin']['DNSName'])
elif d.info.has_key("CustomOrigin"):
origin = "http://%s/" % d.info['CustomOrigin']['DNSName']
else:
origin = "<unknown>"
pretty_output("Origin", origin)
pretty_output("DistId", d.uri())
pretty_output("DomainName", d.info['DomainName'])
if d.info.has_key("CNAME"):
pretty_output("CNAMEs", ", ".join(d.info['CNAME']))
pretty_output("Status", d.info['Status'])
pretty_output("Enabled", d.info['Enabled'])
output("")
else:
cfuris = Cmd._parse_args(args)
for cfuri in cfuris:
response = cf.GetDistInfo(cfuri)
d = response['distribution']
dc = d.info['DistributionConfig']
if dc.info.has_key("S3Origin"):
origin = S3UriS3.httpurl_to_s3uri(dc.info['S3Origin']['DNSName'])
elif dc.info.has_key("CustomOrigin"):
origin = "http://%s/" % dc.info['CustomOrigin']['DNSName']
else:
origin = "<unknown>"
pretty_output("Origin", origin)
pretty_output("DistId", d.uri())
pretty_output("DomainName", d.info['DomainName'])
if dc.info.has_key("CNAME"):
pretty_output("CNAMEs", ", ".join(dc.info['CNAME']))
pretty_output("Status", d.info['Status'])
pretty_output("Comment", dc.info['Comment'])
pretty_output("Enabled", dc.info['Enabled'])
pretty_output("DfltRootObject", dc.info['DefaultRootObject'])
pretty_output("Logging", dc.info['Logging'] or "Disabled")
pretty_output("Etag", response['headers']['etag'])
@staticmethod
def create(args):
cf = CloudFront(Config())
buckets = []
for arg in args:
uri = S3Uri(arg)
if uri.type != "s3":
raise ParameterError("Bucket can only be created from a s3:// URI instead of: %s" % arg)
if uri.object():
raise ParameterError("Use s3:// URI with a bucket name only instead of: %s" % arg)
if not uri.is_dns_compatible():
raise ParameterError("CloudFront can only handle lowercase-named buckets.")
buckets.append(uri)
if not buckets:
raise ParameterError("No valid bucket names found")
for uri in buckets:
info("Creating distribution from: %s" % uri)
response = cf.CreateDistribution(uri, cnames_add = Cmd.options.cf_cnames_add,
comment = Cmd.options.cf_comment,
logging = Cmd.options.cf_logging,
default_root_object = Cmd.options.cf_default_root_object)
d = response['distribution']
dc = d.info['DistributionConfig']
output("Distribution created:")
pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['S3Origin']['DNSName']))
pretty_output("DistId", d.uri())
pretty_output("DomainName", d.info['DomainName'])
pretty_output("CNAMEs", ", ".join(dc.info['CNAME']))
pretty_output("Comment", dc.info['Comment'])
pretty_output("Status", d.info['Status'])
pretty_output("Enabled", dc.info['Enabled'])
pretty_output("DefaultRootObject", dc.info['DefaultRootObject'])
pretty_output("Etag", response['headers']['etag'])
@staticmethod
def delete(args):
cf = CloudFront(Config())
cfuris = Cmd._parse_args(args)
for cfuri in cfuris:
response = cf.DeleteDistribution(cfuri)
if response['status'] >= 400:
error("Distribution %s could not be deleted: %s" % (cfuri, response['reason']))
output("Distribution %s deleted" % cfuri)
@staticmethod
def modify(args):
cf = CloudFront(Config())
if len(args) > 1:
raise ParameterError("Too many parameters. Modify one Distribution at a time.")
try:
cfuri = Cmd._parse_args(args)[0]
except IndexError, e:
raise ParameterError("No valid Distribution URI found.")
response = cf.ModifyDistribution(cfuri,
cnames_add = Cmd.options.cf_cnames_add,
cnames_remove = Cmd.options.cf_cnames_remove,
comment = Cmd.options.cf_comment,
enabled = Cmd.options.cf_enable,
logging = Cmd.options.cf_logging,
default_root_object = Cmd.options.cf_default_root_object)
if response['status'] >= 400:
error("Distribution %s could not be modified: %s" % (cfuri, response['reason']))
output("Distribution modified: %s" % cfuri)
response = cf.GetDistInfo(cfuri)
d = response['distribution']
dc = d.info['DistributionConfig']
pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['S3Origin']['DNSName']))
pretty_output("DistId", d.uri())
pretty_output("DomainName", d.info['DomainName'])
pretty_output("Status", d.info['Status'])
pretty_output("CNAMEs", ", ".join(dc.info['CNAME']))
pretty_output("Comment", dc.info['Comment'])
pretty_output("Enabled", dc.info['Enabled'])
pretty_output("DefaultRootObject", dc.info['DefaultRootObject'])
pretty_output("Etag", response['headers']['etag'])
@staticmethod
def invalinfo(args):
cf = CloudFront(Config())
cfuris = Cmd._parse_args(args)
requests = []
for cfuri in cfuris:
if cfuri.request_id():
requests.append(str(cfuri))
else:
inval_list = cf.GetInvalList(cfuri)
try:
for i in inval_list['inval_list'].info['InvalidationSummary']:
requests.append("/".join(["cf:/", cfuri.dist_id(), i["Id"]]))
except:
continue
for req in requests:
cfuri = S3Uri(req)
inval_info = cf.GetInvalInfo(cfuri)
st = inval_info['inval_status'].info
pretty_output("URI", str(cfuri))
pretty_output("Status", st['Status'])
pretty_output("Created", st['CreateTime'])
pretty_output("Nr of paths", len(st['InvalidationBatch']['Path']))
pretty_output("Reference", st['InvalidationBatch']['CallerReference'])
output("")
# vim:et:ts=4:sts=4:ai

294
fabfile/S3/Config.py

@ -1,294 +0,0 @@
## Amazon S3 manager
## Author: Michal Ludvig <michal@logix.cz>
## http://www.logix.cz/michal
## License: GPL Version 2
import logging
from logging import debug, info, warning, error
import re
import os
import sys
import Progress
from SortedDict import SortedDict
import httplib
import json
class Config(object):
_instance = None
_parsed_files = []
_doc = {}
access_key = ""
secret_key = ""
access_token = ""
host_base = "s3.amazonaws.com"
host_bucket = "%(bucket)s.s3.amazonaws.com"
simpledb_host = "sdb.amazonaws.com"
cloudfront_host = "cloudfront.amazonaws.com"
verbosity = logging.WARNING
progress_meter = True
progress_class = Progress.ProgressCR
send_chunk = 4096
recv_chunk = 4096
list_md5 = False
human_readable_sizes = False
extra_headers = SortedDict(ignore_case = True)
force = False
enable = None
get_continue = False
skip_existing = False
recursive = False
acl_public = None
acl_grants = []
acl_revokes = []
proxy_host = ""
proxy_port = 3128
encrypt = False
dry_run = False
add_encoding_exts = ""
preserve_attrs = True
preserve_attrs_list = [
'uname', # Verbose owner Name (e.g. 'root')
'uid', # Numeric user ID (e.g. 0)
'gname', # Group name (e.g. 'users')
'gid', # Numeric group ID (e.g. 100)
'atime', # Last access timestamp
'mtime', # Modification timestamp
'ctime', # Creation timestamp
'mode', # File mode (e.g. rwxr-xr-x = 755)
'md5', # File MD5 (if known)
#'acl', # Full ACL (not yet supported)
]
delete_removed = False
delete_after = False
delete_after_fetch = False
_doc['delete_removed'] = "[sync] Remove remote S3 objects when local file has been deleted"
delay_updates = False
gpg_passphrase = ""
gpg_command = ""
gpg_encrypt = "%(gpg_command)s -c --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s"
gpg_decrypt = "%(gpg_command)s -d --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s"
use_https = False
bucket_location = "US"
default_mime_type = "binary/octet-stream"
guess_mime_type = True
mime_type = ""
enable_multipart = True
multipart_chunk_size_mb = 15 # MB
# List of checks to be performed for 'sync'
sync_checks = ['size', 'md5'] # 'weak-timestamp'
# List of compiled REGEXPs
exclude = []
include = []
# Dict mapping compiled REGEXPs back to their textual form
debug_exclude = {}
debug_include = {}
encoding = "utf-8"
urlencoding_mode = "normal"
log_target_prefix = ""
reduced_redundancy = False
follow_symlinks = False
socket_timeout = 300
invalidate_on_cf = False
# joseprio: new flags for default index invalidation
invalidate_default_index_on_cf = False
invalidate_default_index_root_on_cf = True
website_index = "index.html"
website_error = ""
website_endpoint = "http://%(bucket)s.s3-website-%(location)s.amazonaws.com/"
additional_destinations = []
cache_file = ""
add_headers = ""
## Creating a singleton
def __new__(self, configfile = None):
if self._instance is None:
self._instance = object.__new__(self)
return self._instance
def __init__(self, configfile = None):
if configfile:
try:
self.read_config_file(configfile)
except IOError, e:
if 'AWS_CREDENTIAL_FILE' in os.environ:
self.env_config()
if len(self.access_key)==0:
self.role_config()
def role_config(self):
conn = httplib.HTTPConnection(host='169.254.169.254',timeout=0.1)
try:
conn.request('GET', "/latest/meta-data/iam/security-credentials/")
resp = conn.getresponse()
files = resp.read()
if resp.status == 200 and len(files)>1:
conn.request('GET', "/latest/meta-data/iam/security-credentials/%s"%files)
resp=conn.getresponse()
if resp.status == 200:
creds=json.load(resp)
Config().update_option('access_key', creds['AccessKeyId'].encode('ascii'))
Config().update_option('secret_key', creds['SecretAccessKey'].encode('ascii'))
Config().update_option('access_token', creds['Token'].encode('ascii'))
else:
raise IOError
else:
raise IOError
except:
raise
def role_refresh(self):
try:
self.role_config()
except:
warning("Could not refresh role")
def env_config(self):
cred_content = ""
try:
cred_file = open(os.environ['AWS_CREDENTIAL_FILE'],'r')
cred_content = cred_file.read()
except IOError, e:
debug("Error %d accessing credentials file %s" % (e.errno,os.environ['AWS_CREDENTIAL_FILE']))
r_data = re.compile("^\s*(?P<orig_key>\w+)\s*=\s*(?P<value>.*)")
r_quotes = re.compile("^\"(.*)\"\s*$")
if len(cred_content)>0:
for line in cred_content.splitlines():
is_data = r_data.match(line)
is_data = r_data.match(line)
if is_data:
data = is_data.groupdict()
if r_quotes.match(data["value"]):
data["value"] = data["value"][1:-1]
if data["orig_key"]=="AWSAccessKeyId":
data["key"] = "access_key"
elif data["orig_key"]=="AWSSecretKey":
data["key"] = "secret_key"
else:
del data["key"]
if "key" in data:
Config().update_option(data["key"], data["value"])
if data["key"] in ("access_key", "secret_key", "gpg_passphrase"):
print_value = (data["value"][:2]+"...%d_chars..."+data["value"][-1:]) % (len(data["value"]) - 3)
else:
print_value = data["value"]
debug("env_Config: %s->%s" % (data["key"], print_value))
def option_list(self):
retval = []
for option in dir(self):
## Skip attributes that start with underscore or are not string, int or bool
option_type = type(getattr(Config, option))
if option.startswith("_") or \
not (option_type in (
type("string"), # str
type(42), # int
type(True))): # bool
continue
retval.append(option)
return retval
def read_config_file(self, configfile):
cp = ConfigParser(configfile)
for option in self.option_list():
self.update_option(option, cp.get(option))
if cp.get('add_headers'):
for option in cp.get('add_headers').split(","):
(key, value) = option.split(':')
self.extra_headers[key.replace('_', '-').strip()] = value.strip()
self._parsed_files.append(configfile)
def dump_config(self, stream):
ConfigDumper(stream).dump("default", self)
def update_option(self, option, value):
if value is None:
return
#### Handle environment reference
if str(value).startswith("$"):
return self.update_option(option, os.getenv(str(value)[1:]))
#### Special treatment of some options
## verbosity must be known to "logging" module
if option == "verbosity":
try:
setattr(Config, "verbosity", logging._levelNames[value])
except KeyError:
error("Config: verbosity level '%s' is not valid" % value)
## allow yes/no, true/false, on/off and 1/0 for boolean options
elif type(getattr(Config, option)) is type(True): # bool
if str(value).lower() in ("true", "yes", "on", "1"):
setattr(Config, option, True)
elif str(value).lower() in ("false", "no", "off", "0"):
setattr(Config, option, False)
else:
error("Config: value of option '%s' must be Yes or No, not '%s'" % (option, value))
elif type(getattr(Config, option)) is type(42): # int
try:
setattr(Config, option, int(value))
except ValueError, e:
error("Config: value of option '%s' must be an integer, not '%s'" % (option, value))
else: # string
setattr(Config, option, value)
class ConfigParser(object):
def __init__(self, file, sections = []):
self.cfg = {}
self.parse_file(file, sections)
def parse_file(self, file, sections = []):
debug("ConfigParser: Reading file '%s'" % file)
if type(sections) != type([]):
sections = [sections]
in_our_section = True
f = open(file, "r")
r_comment = re.compile("^\s*#.*")
r_empty = re.compile("^\s*$")
r_section = re.compile("^\[([^\]]+)\]")
r_data = re.compile("^\s*(?P<key>\w+)\s*=\s*(?P<value>.*)")
r_quotes = re.compile("^\"(.*)\"\s*$")
for line in f:
if r_comment.match(line) or r_empty.match(line):
continue
is_section = r_section.match(line)
if is_section:
section = is_section.groups()[0]
in_our_section = (section in sections) or (len(sections) == 0)
continue
is_data = r_data.match(line)
if is_data and in_our_section:
data = is_data.groupdict()
if r_quotes.match(data["value"]):
data["value"] = data["value"][1:-1]
self.__setitem__(data["key"], data["value"])
if data["key"] in ("access_key", "secret_key", "gpg_passphrase"):
print_value = (data["value"][:2]+"...%d_chars..."+data["value"][-1:]) % (len(data["value"]) - 3)
else:
print_value = data["value"]
debug("ConfigParser: %s->%s" % (data["key"], print_value))
continue
warning("Ignoring invalid line in '%s': %s" % (file, line))
def __getitem__(self, name):
return self.cfg[name]
def __setitem__(self, name, value):
self.cfg[name] = value
def get(self, name, default = None):
if self.cfg.has_key(name):
return self.cfg[name]
return default
class ConfigDumper(object):
def __init__(self, stream):
self.stream = stream
def dump(self, section, config):
self.stream.write("[%s]\n" % section)
for option in config.option_list():
self.stream.write("%s = %s\n" % (option, getattr(config, option)))
# vim:et:ts=4:sts=4:ai

71
fabfile/S3/ConnMan.py

@ -1,71 +0,0 @@
import httplib
from urlparse import urlparse
from threading import Semaphore
from logging import debug, info, warning, error
from Config import Config
from Exceptions import ParameterError
__all__ = [ "ConnMan" ]
class http_connection(object):
def __init__(self, id, hostname, ssl, cfg):
self.hostname = hostname
self.ssl = ssl
self.id = id
self.counter = 0
if cfg.proxy_host != "":
self.c = httplib.HTTPConnection(cfg.proxy_host, cfg.proxy_port)
elif not ssl:
self.c = httplib.HTTPConnection(hostname)
else:
self.c = httplib.HTTPSConnection(hostname)
class ConnMan(object):
conn_pool_sem = Semaphore()
conn_pool = {}
conn_max_counter = 800 ## AWS closes connection after some ~90 requests
@staticmethod
def get(hostname, ssl = None):
cfg = Config()
if ssl == None:
ssl = cfg.use_https
conn = None
if cfg.proxy_host != "":
if ssl:
raise ParameterError("use_ssl=True can't be used with proxy")
conn_id = "proxy://%s:%s" % (cfg.proxy_host, cfg.proxy_port)
else:
conn_id = "http%s://%s" % (ssl and "s" or "", hostname)
ConnMan.conn_pool_sem.acquire()
if not ConnMan.conn_pool.has_key(conn_id):
ConnMan.conn_pool[conn_id] = []
if len(ConnMan.conn_pool[conn_id]):
conn = ConnMan.conn_pool[conn_id].pop()
debug("ConnMan.get(): re-using connection: %s#%d" % (conn.id, conn.counter))
ConnMan.conn_pool_sem.release()
if not conn:
debug("ConnMan.get(): creating new connection: %s" % conn_id)
conn = http_connection(conn_id, hostname, ssl, cfg)
conn.c.connect()
conn.counter += 1
return conn
@staticmethod
def put(conn):
if conn.id.startswith("proxy://"):
conn.c.close()
debug("ConnMan.put(): closing proxy connection (keep-alive not yet supported)")
return
if conn.counter >= ConnMan.conn_max_counter:
conn.c.close()
debug("ConnMan.put(): closing over-used connection")
return
ConnMan.conn_pool_sem.acquire()
ConnMan.conn_pool[conn.id].append(conn)
ConnMan.conn_pool_sem.release()
debug("ConnMan.put(): connection put back to pool (%s#%d)" % (conn.id, conn.counter))

88
fabfile/S3/Exceptions.py

@ -1,88 +0,0 @@
## Amazon S3 manager - Exceptions library
## Author: Michal Ludvig <michal@logix.cz>
## http://www.logix.cz/michal
## License: GPL Version 2
from Utils import getTreeFromXml, unicodise, deunicodise
from logging import debug, info, warning, error
try:
import xml.etree.ElementTree as ET
except ImportError:
import elementtree.ElementTree as ET
class S3Exception(Exception):
def __init__(self, message = ""):
self.message = unicodise(message)
def __str__(self):
## Call unicode(self) instead of self.message because
## __unicode__() method could be overriden in subclasses!
return deunicodise(unicode(self))
def __unicode__(self):
return self.message
## (Base)Exception.message has been deprecated in Python 2.6
def _get_message(self):
return self._message
def _set_message(self, message):
self._message = message
message = property(_get_message, _set_message)
class S3Error (S3Exception):
def __init__(self, response):
self.status = response["status"]
self.reason = response["reason"]
self.info = {
"Code" : "",
"Message" : "",
"Resource" : ""
}
debug("S3Error: %s (%s)" % (self.status, self.reason))
if response.has_key("headers"):
for header in response["headers"]:
debug("HttpHeader: %s: %s" % (header, response["headers"][header]))
if response.has_key("data") and response["data"]:
tree = getTreeFromXml(response["data"])
error_node = tree
if not error_node.tag == "Error":
error_node = tree.find(".//Error")
for child in error_node.getchildren():
if child.text != "":
debug("ErrorXML: " + child.tag + ": " + repr(child.text))
self.info[child.tag] = child.text
self.code = self.info["Code"]
self.message = self.info["Message"]
self.resource = self.info["Resource"]
def __unicode__(self):
retval = u"%d " % (self.status)
retval += (u"(%s)" % (self.info.has_key("Code") and self.info["Code"] or self.reason))
if self.info.has_key("Message"):
retval += (u": %s" % self.info["Message"])
return retval
class CloudFrontError(S3Error):
pass
class S3UploadError(S3Exception):
pass
class S3DownloadError(S3Exception):
pass
class S3RequestError(S3Exception):
pass
class S3ResponseError(S3Exception):
pass
class InvalidFileError(S3Exception):
pass
class ParameterError(S3Exception):
pass
# vim:et:ts=4:sts=4:ai

53
fabfile/S3/FileDict.py

@ -1,53 +0,0 @@
## Amazon S3 manager
## Author: Michal Ludvig <michal@logix.cz>
## http://www.logix.cz/michal
## License: GPL Version 2
from SortedDict import SortedDict
import Utils
class FileDict(SortedDict):
def __init__(self, mapping = {}, ignore_case = True, **kwargs):
SortedDict.__init__(self, mapping = mapping, ignore_case = ignore_case, **kwargs)
self.hardlinks = dict() # { dev: { inode : {'md5':, 'relative_files':}}}
self.by_md5 = dict() # {md5: set(relative_files)}
def record_md5(self, relative_file, md5):
if md5 not in self.by_md5:
self.by_md5[md5] = set()
self.by_md5[md5].add(relative_file)
def find_md5_one(self, md5):
try:
return list(self.by_md5.get(md5, set()))[0]
except:
return None
def get_md5(self, relative_file):
"""returns md5 if it can, or raises IOError if file is unreadable"""
md5 = None
if 'md5' in self[relative_file]:
return self[relative_file]['md5']
md5 = self.get_hardlink_md5(relative_file)
if md5 is None:
md5 = Utils.hash_file_md5(self[relative_file]['full_name'])
self.record_md5(relative_file, md5)
self[relative_file]['md5'] = md5
return md5
def record_hardlink(self, relative_file, dev, inode, md5):
if dev not in self.hardlinks:
self.hardlinks[dev] = dict()
if inode not in self.hardlinks[dev]:
self.hardlinks[dev][inode] = dict(md5=md5, relative_files=set())
self.hardlinks[dev][inode]['relative_files'].add(relative_file)
def get_hardlink_md5(self, relative_file):
md5 = None
dev = self[relative_file]['dev']
inode = self[relative_file]['inode']
try:
md5 = self.hardlinks[dev][inode]['md5']
except:
pass
return md5

517
fabfile/S3/FileLists.py

@ -1,517 +0,0 @@
## Create and compare lists of files/objects
## Author: Michal Ludvig <michal@logix.cz>
## http://www.logix.cz/michal
## License: GPL Version 2
from S3 import S3
from Config import Config
from S3Uri import S3Uri
from FileDict import FileDict
from Utils import *
from Exceptions import ParameterError
from HashCache import HashCache
from logging import debug, info, warning, error
import os
import glob
import copy
__all__ = ["fetch_local_list", "fetch_remote_list", "compare_filelists", "filter_exclude_include", "parse_attrs_header"]
def _fswalk_follow_symlinks(path):
'''
Walk filesystem, following symbolic links (but without recursion), on python2.4 and later
If a symlink directory loop is detected, emit a warning and skip.
E.g.: dir1/dir2/sym-dir -> ../dir2
'''
assert os.path.isdir(path) # only designed for directory argument
walkdirs = set([path])
for dirpath, dirnames, filenames in os.walk(path):
handle_exclude_include_walk(dirpath, dirnames, [])
real_dirpath = os.path.realpath(dirpath)
for dirname in dirnames:
current = os.path.join(dirpath, dirname)
real_current = os.path.realpath(current)
if os.path.islink(current):
if (real_dirpath == real_current or
real_dirpath.startswith(real_current + os.path.sep)):
warning("Skipping recursively symlinked directory %s" % dirname)
else:
walkdirs.add(current)
for walkdir in walkdirs:
for dirpath, dirnames, filenames in os.walk(walkdir):
handle_exclude_include_walk(dirpath, dirnames, [])
yield (dirpath, dirnames, filenames)
def _fswalk_no_symlinks(path):
'''
Directory tree generator
path (str) is the root of the directory tree to walk
'''
for dirpath, dirnames, filenames in os.walk(path):
handle_exclude_include_walk(dirpath, dirnames, filenames)
yield (dirpath, dirnames, filenames)
def filter_exclude_include(src_list):
info(u"Applying --exclude/--include")
cfg = Config()
exclude_list = FileDict(ignore_case = False)
for file in src_list.keys():
debug(u"CHECK: %s" % file)
excluded = False
for r in cfg.exclude:
if r.search(file):
excluded = True
debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r]))
break
if excluded:
## No need to check for --include if not excluded
for r in cfg.include:
if r.search(file):
excluded = False
debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r]))
break
if excluded:
## Still excluded - ok, action it
debug(u"EXCLUDE: %s" % file)
exclude_list[file] = src_list[file]
del(src_list[file])
continue
else:
debug(u"PASS: %r" % (file))
return src_list, exclude_list
def handle_exclude_include_walk(root, dirs, files):
cfg = Config()
copydirs = copy.copy(dirs)
copyfiles = copy.copy(files)
# exclude dir matches in the current directory
# this prevents us from recursing down trees we know we want to ignore
for x in copydirs:
d = os.path.join(root, x, '')
debug(u"CHECK: %r" % d)
excluded = False
for r in cfg.exclude:
if r.search(d):
excluded = True
debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r]))
break
if excluded:
## No need to check for --include if not excluded
for r in cfg.include:
if r.search(d):
excluded = False
debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r]))
break
if excluded:
## Still excluded - ok, action it
debug(u"EXCLUDE: %r" % d)
dirs.remove(x)
continue
else:
debug(u"PASS: %r" % (d))
# exclude file matches in the current directory
for x in copyfiles:
file = os.path.join(root, x)
debug(u"CHECK: %r" % file)
excluded = False
for r in cfg.exclude:
if r.search(file):
excluded = True
debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r]))
break
if excluded:
## No need to check for --include if not excluded
for r in cfg.include:
if r.search(file):
excluded = False
debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r]))
break
if excluded:
## Still excluded - ok, action it
debug(u"EXCLUDE: %s" % file)
files.remove(x)
continue
else:
debug(u"PASS: %r" % (file))
def fetch_local_list(args, recursive = None):
def _get_filelist_local(loc_list, local_uri, cache):
info(u"Compiling list of local files...")
if deunicodise(local_uri.basename()) == "-":
loc_list["-"] = {
'full_name_unicode' : '-',
'full_name' : '-',
'size' : -1,
'mtime' : -1,
}
return loc_list, True
if local_uri.isdir():
local_base = deunicodise(local_uri.basename())
local_path = deunicodise(local_uri.path())
if cfg.follow_symlinks:
filelist = _fswalk_follow_symlinks(local_path)
else:
filelist = _fswalk_no_symlinks(local_path)
single_file = False
else:
local_base = ""
local_path = deunicodise(local_uri.dirname())
filelist = [( local_path, [], [deunicodise(local_uri.basename())] )]
single_file = True
for root, dirs, files in filelist:
rel_root = root.replace(local_path, local_base, 1)
for f in files:
full_name = os.path.join(root, f)
if not os.path.isfile(full_name):
continue
if os.path.islink(full_name):
if not cfg.follow_symlinks:
continue
relative_file = unicodise(os.path.join(rel_root, f))
if os.path.sep != "/":
# Convert non-unix dir separators to '/'
relative_file = "/".join(relative_file.split(os.path.sep))
if cfg.urlencoding_mode == "normal":
relative_file = replace_nonprintables(relative_file)
if relative_file.startswith('./'):
relative_file = relative_file[2:]
sr = os.stat_result(os.lstat(full_name))
loc_list[relative_file] = {
'full_name_unicode' : unicodise(full_name),
'full_name' : full_name,
'size' : sr.st_size,
'mtime' : sr.st_mtime,
'dev' : sr.st_dev,
'inode' : sr.st_ino,
'uid' : sr.st_uid,
'gid' : sr.st_gid,
'sr': sr # save it all, may need it in preserve_attrs_list
## TODO: Possibly more to save here...
}
if 'md5' in cfg.sync_checks:
md5 = cache.md5(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size)
if md5 is None:
try:
md5 = loc_list.get_md5(relative_file) # this does the file I/O
except IOError:
continue
cache.add(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size, md5)
loc_list.record_hardlink(relative_file, sr.st_dev, sr.st_ino, md5)
return loc_list, single_file
def _maintain_cache(cache, local_list):
if cfg.cache_file:
cache.mark_all_for_purge()
for i in local_list.keys():
cache.unmark_for_purge(local_list[i]['dev'], local_list[i]['inode'], local_list[i]['mtime'], local_list[i]['size'])
cache.purge()
cache.save(cfg.cache_file)
cfg = Config()
cache = HashCache()
if cfg.cache_file:
try:
cache.load(cfg.cache_file)
except IOError:
info(u"No cache file found, creating it.")
local_uris = []
local_list = FileDict(ignore_case = False)
single_file = False
if type(args) not in (list, tuple):
args = [args]
if recursive == None:
recursive = cfg.recursive
for arg in args:
uri = S3Uri(arg)
if not uri.type == 'file':
raise ParameterError("Expecting filename or directory instead of: %s" % arg)
if uri.isdir() and not recursive:
raise ParameterError("Use --recursive to upload a directory: %s" % arg)
local_uris.append(uri)
for uri in local_uris:
list_for_uri, single_file = _get_filelist_local(local_list, uri, cache)
## Single file is True if and only if the user
## specified one local URI and that URI represents
## a FILE. Ie it is False if the URI was of a DIR
## and that dir contained only one FILE. That's not
## a case of single_file==True.
if len(local_list) > 1:
single_file = False
_maintain_cache(cache, local_list)
return local_list, single_file
def fetch_remote_list(args, require_attribs = False, recursive = None):
def _get_filelist_remote(remote_uri, recursive = True):
## If remote_uri ends with '/' then all remote files will have
## the remote_uri prefix removed in the relative path.
## If, on the other hand, the remote_uri ends with something else
## (probably alphanumeric symbol) we'll use the last path part
## in the relative path.
##
## Complicated, eh? See an example:
## _get_filelist_remote("s3://bckt/abc/def") may yield:
## { 'def/file1.jpg' : {}, 'def/xyz/blah.txt' : {} }
## _get_filelist_remote("s3://bckt/abc/def/") will yield:
## { 'file1.jpg' : {}, 'xyz/blah.txt' : {} }
## Furthermore a prefix-magic can restrict the return list:
## _get_filelist_remote("s3://bckt/abc/def/x") yields:
## { 'xyz/blah.txt' : {} }
info(u"Retrieving list of remote files for %s ..." % remote_uri)
s3 = S3(Config())
response = s3.bucket_list(remote_uri.bucket(), prefix = remote_uri.object(), recursive = recursive)
rem_base_original = rem_base = remote_uri.object()
remote_uri_original = remote_uri
if rem_base != '' and rem_base[-1] != '/':
rem_base = rem_base[:rem_base.rfind('/')+1]
remote_uri = S3Uri("s3://%s/%s" % (remote_uri.bucket(), rem_base))
rem_base_len = len(rem_base)
rem_list = FileDict(ignore_case = False)
break_now = False
for object in response['list']:
if object['Key'] == rem_base_original and object['Key'][-1] != os.path.sep:
## We asked for one file and we got that file :-)
key = os.path.basename(object['Key'])
object_uri_str = remote_uri_original.uri()
break_now = True
rem_list = FileDict(ignore_case = False) ## Remove whatever has already been put to rem_list
else:
key = object['Key'][rem_base_len:] ## Beware - this may be '' if object['Key']==rem_base !!
object_uri_str = remote_uri.uri() + key
rem_list[key] = {
'size' : int(object['Size']),
'timestamp' : dateS3toUnix(object['LastModified']), ## Sadly it's upload time, not our lastmod time :-(
'md5' : object['ETag'][1:-1],
'object_key' : object['Key'],
'object_uri_str' : object_uri_str,
'base_uri' : remote_uri,
'dev' : None,
'inode' : None,
}
md5 = object['ETag'][1:-1]
rem_list.record_md5(key, md5)
if break_now:
break
return rem_list
cfg = Config()
remote_uris = []
remote_list = FileDict(ignore_case = False)
if type(args) not in (list, tuple):
args = [args]
if recursive == None:
recursive = cfg.recursive
for arg in args:
uri = S3Uri(arg)
if not uri.type == 's3':
raise ParameterError("Expecting S3 URI instead of '%s'" % arg)
remote_uris.append(uri)
if recursive:
for uri in remote_uris:
objectlist = _get_filelist_remote(uri)
for key in objectlist:
remote_list[key] = objectlist[key]
remote_list.record_md5(key, objectlist.get_md5(key))
else:
for uri in remote_uris:
uri_str = str(uri)
## Wildcards used in remote URI?
## If yes we'll need a bucket listing...
if uri_str.find('*') > -1 or uri_str.find('?') > -1:
first_wildcard = uri_str.find('*')
first_questionmark = uri_str.find('?')
if first_questionmark > -1 and first_questionmark < first_wildcard:
first_wildcard = first_questionmark
prefix = uri_str[:first_wildcard]
rest = uri_str[first_wildcard+1:]
## Only request recursive listing if the 'rest' of the URI,
## i.e. the part after first wildcard, contains '/'
need_recursion = rest.find('/') > -1
objectlist = _get_filelist_remote(S3Uri(prefix), recursive = need_recursion)
for key in objectlist:
## Check whether the 'key' matches the requested wildcards
if glob.fnmatch.fnmatch(objectlist[key]['object_uri_str'], uri_str):
remote_list[key] = objectlist[key]
else:
## No wildcards - simply append the given URI to the list
key = os.path.basename(uri.object())
if not key:
raise ParameterError(u"Expecting S3 URI with a filename or --recursive: %s" % uri.uri())
remote_item = {
'base_uri': uri,
'object_uri_str': unicode(uri),
'object_key': uri.object()
}
if require_attribs:
response = S3(cfg).object_info(uri)
remote_item.update({
'size': int(response['headers']['content-length']),
'md5': response['headers']['etag'].strip('"\''),
'timestamp' : dateRFC822toUnix(response['headers']['date'])
})
# get md5 from header if it's present. We would have set that during upload
if response['headers'].has_key('x-amz-meta-s3cmd-attrs'):
attrs = parse_attrs_header(response['headers']['x-amz-meta-s3cmd-attrs'])
if attrs.has_key('md5'):
remote_item.update({'md5': attrs['md5']})
remote_list[key] = remote_item
return remote_list
def parse_attrs_header(attrs_header):
attrs = {}
for attr in attrs_header.split("/"):
key, val = attr.split(":")
attrs[key] = val
return attrs
def compare_filelists(src_list, dst_list, src_remote, dst_remote, delay_updates = False):
def __direction_str(is_remote):
return is_remote and "remote" or "local"
def _compare(src_list, dst_lst, src_remote, dst_remote, file):
"""Return True if src_list[file] matches dst_list[file], else False"""
attribs_match = True
if not (src_list.has_key(file) and dst_list.has_key(file)):
info(u"%s: does not exist in one side or the other: src_list=%s, dst_list=%s" % (file, src_list.has_key(file), dst_list.has_key(file)))
return False
## check size first
if 'size' in cfg.sync_checks and dst_list[file]['size'] != src_list[file]['size']:
debug(u"xfer: %s (size mismatch: src=%s dst=%s)" % (file, src_list[file]['size'], dst_list[file]['size']))
attribs_match = False
## check md5
compare_md5 = 'md5' in cfg.sync_checks
# Multipart-uploaded files don't have a valid md5 sum - it ends with "...-nn"
if compare_md5:
if (src_remote == True and src_list[file]['md5'].find("-") >= 0) or (dst_remote == True and dst_list[file]['md5'].find("-") >= 0):
compare_md5 = False
info(u"disabled md5 check for %s" % file)
if attribs_match and compare_md5:
try:
src_md5 = src_list.get_md5(file)
dst_md5 = dst_list.get_md5(file)
except (IOError,OSError), e:
# md5 sum verification failed - ignore that file altogether
debug(u"IGNR: %s (disappeared)" % (file))
warning(u"%s: file disappeared, ignoring." % (file))
raise
if src_md5 != dst_md5:
## checksums are different.
attribs_match = False
debug(u"XFER: %s (md5 mismatch: src=%s dst=%s)" % (file, src_md5, dst_md5))
return attribs_match
# we don't support local->local sync, use 'rsync' or something like that instead ;-)
assert(not(src_remote == False and dst_remote == False))
info(u"Verifying attributes...")
cfg = Config()
## Items left on src_list will be transferred
## Items left on update_list will be transferred after src_list
## Items left on copy_pairs will be copied from dst1 to dst2
update_list = FileDict(ignore_case = False)
## Items left on dst_list will be deleted
copy_pairs = []
debug("Comparing filelists (direction: %s -> %s)" % (__direction_str(src_remote), __direction_str(dst_remote)))
for relative_file in src_list.keys():
debug(u"CHECK: %s" % (relative_file))
if dst_list.has_key(relative_file):
## Was --skip-existing requested?
if cfg.skip_existing:
debug(u"IGNR: %s (used --skip-existing)" % (relative_file))
del(src_list[relative_file])
del(dst_list[relative_file])
continue
try:
same_file = _compare(src_list, dst_list, src_remote, dst_remote, relative_file)
except (IOError,OSError), e:
debug(u"IGNR: %s (disappeared)" % (relative_file))
warning(u"%s: file disappeared, ignoring." % (relative_file))
del(src_list[relative_file])
del(dst_list[relative_file])
continue
if same_file:
debug(u"IGNR: %s (transfer not needed)" % relative_file)
del(src_list[relative_file])
del(dst_list[relative_file])
else:
# look for matching file in src
try:
md5 = src_list.get_md5(relative_file)
except IOError:
md5 = None
if md5 is not None and dst_list.by_md5.has_key(md5):
# Found one, we want to copy
dst1 = list(dst_list.by_md5[md5])[0]
debug(u"DST COPY src: %s -> %s" % (dst1, relative_file))
copy_pairs.append((src_list[relative_file], dst1, relative_file))
del(src_list[relative_file])
del(dst_list[relative_file])
else:
# record that we will get this file transferred to us (before all the copies), so if we come across it later again,
# we can copy from _this_ copy (e.g. we only upload it once, and copy thereafter).
dst_list.record_md5(relative_file, md5)
update_list[relative_file] = src_list[relative_file]
del src_list[relative_file]
del dst_list[relative_file]
else:
# dst doesn't have this file
# look for matching file elsewhere in dst
try:
md5 = src_list.get_md5(relative_file)
except IOError:
md5 = None
dst1 = dst_list.find_md5_one(md5)
if dst1 is not None:
# Found one, we want to copy
debug(u"DST COPY dst: %s -> %s" % (dst1, relative_file))
copy_pairs.append((src_list[relative_file], dst1, relative_file))
del(src_list[relative_file])
else:
# we don't have this file, and we don't have a copy of this file elsewhere. Get it.
# record that we will get this file transferred to us (before all the copies), so if we come across it later again,
# we can copy from _this_ copy (e.g. we only upload it once, and copy thereafter).
dst_list.record_md5(relative_file, md5)
for f in dst_list.keys():
if src_list.has_key(f) or update_list.has_key(f):
# leave only those not on src_list + update_list
del dst_list[f]
return src_list, dst_list, update_list, copy_pairs
# vim:et:ts=4:sts=4:ai

53
fabfile/S3/HashCache.py

@ -1,53 +0,0 @@
import cPickle as pickle
class HashCache(object):
def __init__(self):
self.inodes = dict()
def add(self, dev, inode, mtime, size, md5):
if dev not in self.inodes:
self.inodes[dev] = dict()
if inode not in self.inodes[dev]:
self.inodes[dev][inode] = dict()
self.inodes[dev][inode][mtime] = dict(md5=md5, size=size)
def md5(self, dev, inode, mtime, size):
try:
d = self.inodes[dev][inode][mtime]
if d['size'] != size:
return None
except:
return None
return d['md5']
def mark_all_for_purge(self):
for d in self.inodes.keys():
for i in self.inodes[d].keys():
for c in self.inodes[d][i].keys():
self.inodes[d][i][c]['purge'] = True
def unmark_for_purge(self, dev, inode, mtime, size):
d = self.inodes[dev][inode][mtime]
if d['size'] == size and 'purge' in d:
del self.inodes[dev][inode][mtime]['purge']
def purge(self):
for d in self.inodes.keys():
for i in self.inodes[d].keys():
for m in self.inodes[d][i].keys():
if 'purge' in self.inodes[d][i][m]:
del self.inodes[d][i]
break
def save(self, f):
d = dict(inodes=self.inodes, version=1)
f = open(f, 'w')
p = pickle.dump(d, f)
f.close()
def load(self, f):
f = open(f, 'r')
d = pickle.load(f)
f.close()
if d.get('version') == 1 and 'inodes' in d:
self.inodes = d['inodes']

137
fabfile/S3/MultiPart.py

@ -1,137 +0,0 @@
## Amazon S3 Multipart upload support
## Author: Jerome Leclanche <jerome.leclanche@gmail.com>
## License: GPL Version 2
import os
from stat import ST_SIZE
from logging import debug, info, warning, error
from Utils import getTextFromXml, formatSize, unicodise
from Exceptions import S3UploadError
class MultiPartUpload(object):
MIN_CHUNK_SIZE_MB = 5 # 5MB
MAX_CHUNK_SIZE_MB = 5120 # 5GB
MAX_FILE_SIZE = 42949672960 # 5TB
def __init__(self, s3, file, uri, headers_baseline = {}):
self.s3 = s3
self.file = file
self.uri = uri
self.parts = {}
self.headers_baseline = headers_baseline
self.upload_id = self.initiate_multipart_upload()
def initiate_multipart_upload(self):
"""
Begin a multipart upload
http://docs.amazonwebservices.com/AmazonS3/latest/API/index.html?mpUploadInitiate.html
"""
request = self.s3.create_request("OBJECT_POST", uri = self.uri, headers = self.headers_baseline, extra = "?uploads")
response = self.s3.send_request(request)
data = response["data"]
self.upload_id = getTextFromXml(data, "UploadId")
return self.upload_id
def upload_all_parts(self):
"""
Execute a full multipart upload on a file
Returns the seq/etag dict
TODO use num_processes to thread it
"""
if not self.upload_id:
raise RuntimeError("Attempting to use a multipart upload that has not been initiated.")
self.chunk_size = self.s3.config.multipart_chunk_size_mb * 1024 * 1024
if self.file.name != "<stdin>":
size_left = file_size = os.stat(self.file.name)[ST_SIZE]
nr_parts = file_size / self.chunk_size + (file_size % self.chunk_size and 1)
debug("MultiPart: Uploading %s in %d parts" % (self.file.name, nr_parts))
else:
debug("MultiPart: Uploading from %s" % (self.file.name))
seq = 1
if self.file.name != "<stdin>":
while size_left > 0:
offset = self.chunk_size * (seq - 1)
current_chunk_size = min(file_size - offset, self.chunk_size)
size_left -= current_chunk_size
labels = {
'source' : unicodise(self.file.name),
'destination' : unicodise(self.uri.uri()),
'extra' : "[part %d of %d, %s]" % (seq, nr_parts, "%d%sB" % formatSize(current_chunk_size, human_readable = True))
}
try:
self.upload_part(seq, offset, current_chunk_size, labels)
except:
error(u"Upload of '%s' part %d failed. Aborting multipart upload." % (self.file.name, seq))
self.abort_upload()
raise
seq += 1
else:
while True:
buffer = self.file.read(self.chunk_size)
offset = self.chunk_size * (seq - 1)
current_chunk_size = len(buffer)
labels = {
'source' : unicodise(self.file.name),
'destination' : unicodise(self.uri.uri()),
'extra' : "[part %d, %s]" % (seq, "%d%sB" % formatSize(current_chunk_size, human_readable = True))
}
if len(buffer) == 0: # EOF
break
try:
self.upload_part(seq, offset, current_chunk_size, labels, buffer)
except:
error(u"Upload of '%s' part %d failed. Aborting multipart upload." % (self.file.name, seq))
self.abort_upload()
raise
seq += 1
debug("MultiPart: Upload finished: %d parts", seq - 1)
def upload_part(self, seq, offset, chunk_size, labels, buffer = ''):
"""
Upload a file chunk
http://docs.amazonwebservices.com/AmazonS3/latest/API/index.html?mpUploadUploadPart.html
"""
# TODO implement Content-MD5
debug("Uploading part %i of %r (%s bytes)" % (seq, self.upload_id, chunk_size))
headers = { "content-length": chunk_size }
query_string = "?partNumber=%i&uploadId=%s" % (seq, self.upload_id)
request = self.s3.create_request("OBJECT_PUT", uri = self.uri, headers = headers, extra = query_string)
response = self.s3.send_file(request, self.file, labels, buffer, offset = offset, chunk_size = chunk_size)
self.parts[seq] = response["headers"]["etag"]
return response
def complete_multipart_upload(self):
"""
Finish a multipart upload
http://docs.amazonwebservices.com/AmazonS3/latest/API/index.html?mpUploadComplete.html
"""
debug("MultiPart: Completing upload: %s" % self.upload_id)
parts_xml = []
part_xml = "<Part><PartNumber>%i</PartNumber><ETag>%s</ETag></Part>"
for seq, etag in self.parts.items():
parts_xml.append(part_xml % (seq, etag))
body = "<CompleteMultipartUpload>%s</CompleteMultipartUpload>" % ("".join(parts_xml))
headers = { "content-length": len(body) }
request = self.s3.create_request("OBJECT_POST", uri = self.uri, headers = headers, extra = "?uploadId=%s" % (self.upload_id))
response = self.s3.send_request(request, body = body)
return response
def abort_upload(self):
"""
Abort multipart upload
http://docs.amazonwebservices.com/AmazonS3/latest/API/index.html?mpUploadAbort.html
"""
debug("MultiPart: Aborting upload: %s" % self.upload_id)
request = self.s3.create_request("OBJECT_DELETE", uri = self.uri, extra = "?uploadId=%s" % (self.upload_id))
response = self.s3.send_request(request)
return response
# vim:et:ts=4:sts=4:ai

14
fabfile/S3/PkgInfo.py

@ -1,14 +0,0 @@
package = "s3cmd"
version = "1.5.0-alpha3"
url = "http://s3tools.org"
license = "GPL version 2"
short_description = "Command line tool for managing Amazon S3 and CloudFront services"
long_description = """
S3cmd lets you copy files from/to Amazon S3
(Simple Storage Service) using a simple to use
command line client. Supports rsync-like backup,
GPG encryption, and more. Also supports management
of Amazon's CloudFront content delivery network.
"""
# vim:et:ts=4:sts=4:ai

173
fabfile/S3/Progress.py

@ -1,173 +0,0 @@
## Amazon S3 manager
## Author: Michal Ludvig <michal@logix.cz>
## http://www.logix.cz/michal
## License: GPL Version 2
import sys
import datetime
import time
import Utils
class Progress(object):
_stdout = sys.stdout
_last_display = 0
def __init__(self, labels, total_size):
self._stdout = sys.stdout
self.new_file(labels, total_size)
def new_file(self, labels, total_size):
self.labels = labels
self.total_size = total_size
# Set initial_position to something in the
# case we're not counting from 0. For instance
# when appending to a partially downloaded file.
# Setting initial_position will let the speed
# be computed right.
self.initial_position = 0
self.current_position = self.initial_position
self.time_start = datetime.datetime.now()
self.time_last = self.time_start
self.time_current = self.time_start
self.display(new_file = True)
def update(self, current_position = -1, delta_position = -1):
self.time_last = self.time_current
self.time_current = datetime.datetime.now()
if current_position > -1:
self.current_position = current_position
elif delta_position > -1:
self.current_position += delta_position
#else:
# no update, just call display()
self.display()
def done(self, message):
self.display(done_message = message)
def output_labels(self):
self._stdout.write(u"%(source)s -> %(destination)s %(extra)s\n" % self.labels)
self._stdout.flush()
def _display_needed(self):
# We only need to update the display every so often.
if time.time() - self._last_display > 1:
self._last_display = time.time()
return True
return False
def display(self, new_file = False, done_message = None):
"""
display(new_file = False[/True], done = False[/True])
Override this method to provide a nicer output.
"""
if new_file:
self.output_labels()
self.last_milestone = 0
return
if self.current_position == self.total_size:
print_size = Utils.formatSize(self.current_position, True)
if print_size[1] != "": print_size[1] += "B"
timedelta = self.time_current - self.time_start
sec_elapsed = timedelta.days * 86400 + timedelta.seconds + float(timedelta.microseconds)/1000000.0
print_speed = Utils.formatSize((self.current_position - self.initial_position) / sec_elapsed, True, True)
self._stdout.write("100%% %s%s in %.2fs (%.2f %sB/s)\n" %
(print_size[0], print_size[1], sec_elapsed, print_speed[0], print_speed[1]))
self._stdout.flush()
return
rel_position = selfself.current_position * 100 / self.total_size
if rel_position >= self.last_milestone:
self.last_milestone = (int(rel_position) / 5) * 5
self._stdout.write("%d%% ", self.last_milestone)
self._stdout.flush()
return
class ProgressANSI(Progress):
## http://en.wikipedia.org/wiki/ANSI_escape_code
SCI = '\x1b['
ANSI_hide_cursor = SCI + "?25l"
ANSI_show_cursor = SCI + "?25h"
ANSI_save_cursor_pos = SCI + "s"
ANSI_restore_cursor_pos = SCI + "u"
ANSI_move_cursor_to_column = SCI + "%uG"
ANSI_erase_to_eol = SCI + "0K"
ANSI_erase_current_line = SCI + "2K"
def display(self, new_file = False, done_message = None):
"""
display(new_file = False[/True], done_message = None)
"""
if new_file:
self.output_labels()
self._stdout.write(self.ANSI_save_cursor_pos)
self._stdout.flush()
return
# Only display progress every so often
if not (new_file or done_message) and not self._display_needed():
return
timedelta = self.time_current - self.time_start
sec_elapsed = timedelta.days * 86400 + timedelta.seconds + float(timedelta.microseconds)/1000000.0
if (sec_elapsed > 0):
print_speed = Utils.formatSize((self.current_position - self.initial_position) / sec_elapsed, True, True)
else:
print_speed = (0, "")
self._stdout.write(self.ANSI_restore_cursor_pos)
self._stdout.write(self.ANSI_erase_to_eol)
self._stdout.write("%(current)s of %(total)s %(percent)3d%% in %(elapsed)ds %(speed).2f %(speed_coeff)sB/s" % {
"current" : str(self.current_position).rjust(len(str(self.total_size))),
"total" : self.total_size,
"percent" : self.total_size and (self.current_position * 100 / self.total_size) or 0,
"elapsed" : sec_elapsed,
"speed" : print_speed[0],
"speed_coeff" : print_speed[1]
})
if done_message:
self._stdout.write(" %s\n" % done_message)
self._stdout.flush()
class ProgressCR(Progress):
## Uses CR char (Carriage Return) just like other progress bars do.
CR_char = chr(13)
def display(self, new_file = False, done_message = None):
"""
display(new_file = False[/True], done_message = None)
"""
if new_file:
self.output_labels()
return
# Only display progress every so often
if not (new_file or done_message) and not self._display_needed():
return
timedelta = self.time_current - self.time_start
sec_elapsed = timedelta.days * 86400 + timedelta.seconds + float(timedelta.microseconds)/1000000.0
if (sec_elapsed > 0):
print_speed = Utils.formatSize((self.current_position - self.initial_position) / sec_elapsed, True, True)
else:
print_speed = (0, "")
self._stdout.write(self.CR_char)
output = " %(current)s of %(total)s %(percent)3d%% in %(elapsed)4ds %(speed)7.2f %(speed_coeff)sB/s" % {
"current" : str(self.current_position).rjust(len(str(self.total_size))),
"total" : self.total_size,
"percent" : self.total_size and (self.current_position * 100 / self.total_size) or 0,
"elapsed" : sec_elapsed,
"speed" : print_speed[0],
"speed_coeff" : print_speed[1]
}
self._stdout.write(output)
if done_message:
self._stdout.write(" %s\n" % done_message)
self._stdout.flush()
# vim:et:ts=4:sts=4:ai

979
fabfile/S3/S3.py

@ -1,979 +0,0 @@
## Amazon S3 manager
## Author: Michal Ludvig <michal@logix.cz>
## http://www.logix.cz/michal
## License: GPL Version 2
import sys
import os, os.path
import time
import httplib
import logging
import mimetypes
import re
from logging import debug, info, warning, error
from stat import ST_SIZE
try:
from hashlib import md5
except ImportError:
from md5 import md5
from Utils import *
from SortedDict import SortedDict
from AccessLog import AccessLog
from ACL import ACL, GranteeLogDelivery
from BidirMap import BidirMap
from Config import Config
from Exceptions import *
from MultiPart import MultiPartUpload
from S3Uri import S3Uri
from ConnMan import ConnMan
try:
import magic, gzip
try:
## https://github.com/ahupp/python-magic
magic_ = magic.Magic(mime=True)
def mime_magic_file(file):
return magic_.from_file(file)
def mime_magic_buffer(buffer):
return magic_.from_buffer(buffer)
except TypeError:
## http://pypi.python.org/pypi/filemagic
try:
magic_ = magic.Magic(flags=magic.MAGIC_MIME)
def mime_magic_file(file):
return magic_.id_filename(file)
def mime_magic_buffer(buffer):
return magic_.id_buffer(buffer)
except TypeError:
## file-5.11 built-in python bindings
magic_ = magic.open(magic.MAGIC_MIME)
magic_.load()
def mime_magic_file(file):
return magic_.file(file)
def mime_magic_buffer(buffer):
return magic_.buffer(buffer)
except AttributeError:
## Older python-magic versions
magic_ = magic.open(magic.MAGIC_MIME)
magic_.load()
def mime_magic_file(file):
return magic_.file(file)
def mime_magic_buffer(buffer):
return magic_.buffer(buffer)
def mime_magic(file):
type = mime_magic_file(file)
if type != "application/x-gzip; charset=binary":
return (type, None)
else:
return (mime_magic_buffer(gzip.open(file).read(8192)), 'gzip')
except ImportError, e:
if str(e).find("magic") >= 0:
magic_message = "Module python-magic is not available."
else:
magic_message = "Module python-magic can't be used (%s)." % e.message
magic_message += " Guessing MIME types based on file extensions."
magic_warned = False
def mime_magic(file):
global magic_warned
if (not magic_warned):
warning(magic_message)
magic_warned = True
return mimetypes.guess_type(file)
__all__ = []
class S3Request(object):
def __init__(self, s3, method_string, resource, headers, params = {}):
self.s3 = s3
self.headers = SortedDict(headers or {}, ignore_case = True)
# Add in any extra headers from s3 config object
if self.s3.config.extra_headers:
self.headers.update(self.s3.config.extra_headers)
if len(self.s3.config.access_token)>0:
self.s3.config.role_refresh()
self.headers['x-amz-security-token']=self.s3.config.access_token
self.resource = resource
self.method_string = method_string
self.params = params
self.update_timestamp()
self.sign()
def update_timestamp(self):
if self.headers.has_key("date"):
del(self.headers["date"])
self.headers["x-amz-date"] = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime())
def format_param_str(self):
"""
Format URL parameters from self.params and returns
?parm1=val1&parm2=val2 or an empty string if there
are no parameters. Output of this function should
be appended directly to self.resource['uri']
"""
param_str = ""
for param in self.params:
if self.params[param] not in (None, ""):
param_str += "&%s=%s" % (param, self.params[param])
else:
param_str += "&%s" % param
return param_str and "?" + param_str[1:]
def sign(self):
h = self.method_string + "\n"
h += self.headers.get("content-md5", "")+"\n"
h += self.headers.get("content-type", "")+"\n"
h += self.headers.get("date", "")+"\n"
for header in self.headers.keys():
if header.startswith("x-amz-"):
h += header+":"+str(self.headers[header])+"\n"
if self.resource['bucket']:
h += "/" + self.resource['bucket']
h += self.resource['uri']
debug("SignHeaders: " + repr(h))
signature = sign_string(h)
self.headers["Authorization"] = "AWS "+self.s3.config.access_key+":"+signature
def get_triplet(self):
self.update_timestamp()
self.sign()
resource = dict(self.resource) ## take a copy
resource['uri'] += self.format_param_str()
return (self.method_string, resource, self.headers)
class S3(object):
http_methods = BidirMap(
GET = 0x01,
PUT = 0x02,
HEAD = 0x04,
DELETE = 0x08,
POST = 0x10,
MASK = 0x1F,
)
targets = BidirMap(
SERVICE = 0x0100,
BUCKET = 0x0200,
OBJECT = 0x0400,
MASK = 0x0700,
)
operations = BidirMap(
UNDFINED = 0x0000,
LIST_ALL_BUCKETS = targets["SERVICE"] | http_methods["GET"],
BUCKET_CREATE = targets["BUCKET"] | http_methods["PUT"],
BUCKET_LIST = targets["BUCKET"] | http_methods["GET"],
BUCKET_DELETE = targets["BUCKET"] | http_methods["DELETE"],
OBJECT_PUT = targets["OBJECT"] | http_methods["PUT"],
OBJECT_GET = targets["OBJECT"] | http_methods["GET"],
OBJECT_HEAD = targets["OBJECT"] | http_methods["HEAD"],
OBJECT_DELETE = targets["OBJECT"] | http_methods["DELETE"],
OBJECT_POST = targets["OBJECT"] | http_methods["POST"],
)
codes = {
"NoSuchBucket" : "Bucket '%s' does not exist",
"AccessDenied" : "Access to bucket '%s' was denied",
"BucketAlreadyExists" : "Bucket '%s' already exists",
}
## S3 sometimes sends HTTP-307 response
redir_map = {}
## Maximum attempts of re-issuing failed requests
_max_retries = 5
def __init__(self, config):
self.config = config
def get_hostname(self, bucket):
if bucket and check_bucket_name_dns_conformity(bucket):
if self.redir_map.has_key(bucket):
host = self.redir_map[bucket]
else:
host = getHostnameFromBucket(bucket)
else:
host = self.config.host_base
debug('get_hostname(%s): %s' % (bucket, host))
return host
def set_hostname(self, bucket, redir_hostname):
self.redir_map[bucket] = redir_hostname
def format_uri(self, resource):
if resource['bucket'] and not check_bucket_name_dns_conformity(resource['bucket']):
uri = "/%s%s" % (resource['bucket'], resource['uri'])
else:
uri = resource['uri']
if self.config.proxy_host != "":
uri = "http://%s%s" % (self.get_hostname(resource['bucket']), uri)
debug('format_uri(): ' + uri)
return uri
## Commands / Actions
def list_all_buckets(self):
request = self.create_request("LIST_ALL_BUCKETS")
response = self.send_request(request)
response["list"] = getListFromXml(response["data"], "Bucket")
return response
def bucket_list(self, bucket, prefix = None, recursive = None):
def _list_truncated(data):
## <IsTruncated> can either be "true" or "false" or be missing completely
is_truncated = getTextFromXml(data, ".//IsTruncated") or "false"
return is_truncated.lower() != "false"
def _get_contents(data):
return getListFromXml(data, "Contents")
def _get_common_prefixes(data):
return getListFromXml(data, "CommonPrefixes")
uri_params = {}
truncated = True
list = []
prefixes = []
while truncated:
response = self.bucket_list_noparse(bucket, prefix, recursive, uri_params)
current_list = _get_contents(response["data"])
current_prefixes = _get_common_prefixes(response["data"])
truncated = _list_truncated(response["data"])
if truncated:
if current_list:
uri_params['marker'] = self.urlencode_string(current_list[-1]["Key"])
else:
uri_params['marker'] = self.urlencode_string(current_prefixes[-1]["Prefix"])
debug("Listing continues after '%s'" % uri_params['marker'])
list += current_list
prefixes += current_prefixes
response['list'] = list
response['common_prefixes'] = prefixes
return response
def bucket_list_noparse(self, bucket, prefix = None, recursive = None, uri_params = {}):
if prefix:
uri_params['prefix'] = self.urlencode_string(prefix)
if not self.config.recursive and not recursive:
uri_params['delimiter'] = "/"
request = self.create_request("BUCKET_LIST", bucket = bucket, **uri_params)
response = self.send_request(request)
#debug(response)
return response
def bucket_create(self, bucket, bucket_location = None):
headers = SortedDict(ignore_case = True)
body = ""
if bucket_location and bucket_location.strip().upper() != "US":
bucket_location = bucket_location.strip()
if bucket_location.upper() == "EU":
bucket_location = bucket_location.upper()
else:
bucket_location = bucket_location.lower()
body = "<CreateBucketConfiguration><LocationConstraint>"
body += bucket_location
body += "</LocationConstraint></CreateBucketConfiguration>"
debug("bucket_location: " + body)
check_bucket_name(bucket, dns_strict = True)
else:
check_bucket_name(bucket, dns_strict = False)
if self.config.acl_public:
headers["x-amz-acl"] = "public-read"
request = self.create_request("BUCKET_CREATE", bucket = bucket, headers = headers)
response = self.send_request(request, body)
return response
def bucket_delete(self, bucket):
request = self.create_request("BUCKET_DELETE", bucket = bucket)
response = self.send_request(request)
return response
def get_bucket_location(self, uri):
request = self.create_request("BUCKET_LIST", bucket = uri.bucket(), extra = "?location")
response = self.send_request(request)
location = getTextFromXml(response['data'], "LocationConstraint")
if not location or location in [ "", "US" ]:
location = "us-east-1"
elif location == "EU":
location = "eu-west-1"
return location
def bucket_info(self, uri):
# For now reports only "Location". One day perhaps more.
response = {}
response['bucket-location'] = self.get_bucket_location(uri)
return response
def website_info(self, uri, bucket_location = None):
headers = SortedDict(ignore_case = True)
bucket = uri.bucket()
body = ""
request = self.create_request("BUCKET_LIST", bucket = bucket, extra="?website")
try:
response = self.send_request(request, body)
response['index_document'] = getTextFromXml(response['data'], ".//IndexDocument//Suffix")
response['error_document'] = getTextFromXml(response['data'], ".//ErrorDocument//Key")
response['website_endpoint'] = self.config.website_endpoint % {
"bucket" : uri.bucket(),
"location" : self.get_bucket_location(uri)}
return response
except S3Error, e:
if e.status == 404:
debug("Could not get /?website - website probably not configured for this bucket")
return None
raise
def website_create(self, uri, bucket_location = None):
headers = SortedDict(ignore_case = True)
bucket = uri.bucket()
body = '<WebsiteConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/">'
body += ' <IndexDocument>'
body += (' <Suffix>%s</Suffix>' % self.config.website_index)
body += ' </IndexDocument>'
if self.config.website_error:
body += ' <ErrorDocument>'
body += (' <Key>%s</Key>' % self.config.website_error)
body += ' </ErrorDocument>'
body += '</WebsiteConfiguration>'
request = self.create_request("BUCKET_CREATE", bucket = bucket, extra="?website")
debug("About to send request '%s' with body '%s'" % (request, body))
response = self.send_request(request, body)
debug("Received response '%s'" % (response))
return response
def website_delete(self, uri, bucket_location = None):
headers = SortedDict(ignore_case = True)
bucket = uri.bucket()
body = ""
request = self.create_request("BUCKET_DELETE", bucket = bucket, extra="?website")
debug("About to send request '%s' with body '%s'" % (request, body))
response = self.send_request(request, body)
debug("Received response '%s'" % (response))
if response['status'] != 204:
raise S3ResponseError("Expected status 204: %s" % response)
return response
def add_encoding(self, filename, content_type):
if content_type.find("charset=") != -1:
return False
exts = self.config.add_encoding_exts.split(',')
if exts[0]=='':
return False
parts = filename.rsplit('.',2)
if len(parts) < 2:
return False
ext = parts[1]
if ext in exts:
return True
else:
return False
def object_put(self, filename, uri, extra_headers = None, extra_label = ""):
# TODO TODO
# Make it consistent with stream-oriented object_get()
if uri.type != "s3":
raise ValueError("Expected URI type 's3', got '%s'" % uri.type)
if filename != "-" and not os.path.isfile(filename):
raise InvalidFileError(u"%s is not a regular file" % unicodise(filename))
try:
if filename == "-":
file = sys.stdin
size = 0
else:
file = open(filename, "rb")
size = os.stat(filename)[ST_SIZE]
except (IOError, OSError), e:
raise InvalidFileError(u"%s: %s" % (unicodise(filename), e.strerror))
headers = SortedDict(ignore_case = True)
if extra_headers:
headers.update(extra_headers)
## MIME-type handling
content_type = self.config.mime_type
content_encoding = None
if filename != "-" and not content_type and self.config.guess_mime_type:
(content_type, content_encoding) = mime_magic(filename)
if not content_type:
content_type = self.config.default_mime_type
if not content_encoding:
content_encoding = self.config.encoding.upper()
## add charset to content type
if self.add_encoding(filename, content_type) and content_encoding is not None:
content_type = content_type + "; charset=" + content_encoding
headers["content-type"] = content_type
if content_encoding is not None:
headers["content-encoding"] = content_encoding
## Other Amazon S3 attributes
if self.config.acl_public:
headers["x-amz-acl"] = "public-read"
if self.config.reduced_redundancy:
headers["x-amz-storage-class"] = "REDUCED_REDUNDANCY"
## Multipart decision
multipart = False
if not self.config.enable_multipart and filename == "-":
raise ParameterError("Multi-part upload is required to upload from stdin")
if self.config.enable_multipart:
if size > self.config.multipart_chunk_size_mb * 1024 * 1024 or filename == "-":
multipart = True
if multipart:
# Multipart requests are quite different... drop here
return self.send_file_multipart(file, headers, uri, size)
## Not multipart...
headers["content-length"] = size
request = self.create_request("OBJECT_PUT", uri = uri, headers = headers)
labels = { 'source' : unicodise(filename), 'destination' : unicodise(uri.uri()), 'extra' : extra_label }
response = self.send_file(request, file, labels)
return response
def object_get(self, uri, stream, start_position = 0, extra_label = ""):
if uri.type != "s3":
raise ValueError("Expected URI type 's3', got '%s'" % uri.type)
request = self.create_request("OBJECT_GET", uri = uri)
labels = { 'source' : unicodise(uri.uri()), 'destination' : unicodise(stream.name), 'extra' : extra_label }
response = self.recv_file(request, stream, labels, start_position)
return response
def object_delete(self, uri):
if uri.type != "s3":
raise ValueError("Expected URI type 's3', got '%s'" % uri.type)
request = self.create_request("OBJECT_DELETE", uri = uri)
response = self.send_request(request)
return response
def object_copy(self, src_uri, dst_uri, extra_headers = None):
if src_uri.type != "s3":
raise ValueError("Expected URI type 's3', got '%s'" % src_uri.type)
if dst_uri.type != "s3":
raise ValueError("Expected URI type 's3', got '%s'" % dst_uri.type)
headers = SortedDict(ignore_case = True)
headers['x-amz-copy-source'] = "/%s/%s" % (src_uri.bucket(), self.urlencode_string(src_uri.object()))
## TODO: For now COPY, later maybe add a switch?
headers['x-amz-metadata-directive'] = "COPY"
if self.config.acl_public:
headers["x-amz-acl"] = "public-read"
if self.config.reduced_redundancy:
headers["x-amz-storage-class"] = "REDUCED_REDUNDANCY"
# if extra_headers:
# headers.update(extra_headers)
request = self.create_request("OBJECT_PUT", uri = dst_uri, headers = headers)
response = self.send_request(request)
return response
def object_move(self, src_uri, dst_uri, extra_headers = None):
response_copy = self.object_copy(src_uri, dst_uri, extra_headers)
debug("Object %s copied to %s" % (src_uri, dst_uri))
if getRootTagName(response_copy["data"]) == "CopyObjectResult":
response_delete = self.object_delete(src_uri)
debug("Object %s deleted" % src_uri)
return response_copy
def object_info(self, uri):
request = self.create_request("OBJECT_HEAD", uri = uri)
response = self.send_request(request)
return response
def get_acl(self, uri):
if uri.has_object():
request = self.create_request("OBJECT_GET", uri = uri, extra = "?acl")
else:
request = self.create_request("BUCKET_LIST", bucket = uri.bucket(), extra = "?acl")
response = self.send_request(request)
acl = ACL(response['data'])
return acl
def set_acl(self, uri, acl):
if uri.has_object():
request = self.create_request("OBJECT_PUT", uri = uri, extra = "?acl")
else:
request = self.create_request("BUCKET_CREATE", bucket = uri.bucket(), extra = "?acl")
body = str(acl)
debug(u"set_acl(%s): acl-xml: %s" % (uri, body))
response = self.send_request(request, body)
return response
def get_policy(self, uri):
request = self.create_request("BUCKET_LIST", bucket = uri.bucket(), extra = "?policy")
response = self.send_request(request)
return response['data']
def set_policy(self, uri, policy):
headers = {}
# TODO check policy is proper json string
headers['content-type'] = 'application/json'
request = self.create_request("BUCKET_CREATE", uri = uri,
extra = "?policy", headers=headers)
body = policy
debug(u"set_policy(%s): policy-json: %s" % (uri, body))
request.sign()
response = self.send_request(request, body=body)
return response
def delete_policy(self, uri):
request = self.create_request("BUCKET_DELETE", uri = uri, extra = "?policy")
debug(u"delete_policy(%s)" % uri)
response = self.send_request(request)
return response
def get_accesslog(self, uri):
request = self.create_request("BUCKET_LIST", bucket = uri.bucket(), extra = "?logging")
response = self.send_request(request)
accesslog = AccessLog(response['data'])
return accesslog
def set_accesslog_acl(self, uri):
acl = self.get_acl(uri)
debug("Current ACL(%s): %s" % (uri.uri(), str(acl)))
acl.appendGrantee(GranteeLogDelivery("READ_ACP"))
acl.appendGrantee(GranteeLogDelivery("WRITE"))
debug("Updated ACL(%s): %s" % (uri.uri(), str(acl)))
self.set_acl(uri, acl)
def set_accesslog(self, uri, enable, log_target_prefix_uri = None, acl_public = False):
request = self.create_request("BUCKET_CREATE", bucket = uri.bucket(), extra = "?logging")
accesslog = AccessLog()
if enable:
accesslog.enableLogging(log_target_prefix_uri)
accesslog.setAclPublic(acl_public)
else:
accesslog.disableLogging()
body = str(accesslog)
debug(u"set_accesslog(%s): accesslog-xml: %s" % (uri, body))
try:
response = self.send_request(request, body)
except S3Error, e:
if e.info['Code'] == "InvalidTargetBucketForLogging":
info("Setting up log-delivery ACL for target bucket.")
self.set_accesslog_acl(S3Uri("s3://%s" % log_target_prefix_uri.bucket()))
response = self.send_request(request, body)
else:
raise
return accesslog, response
## Low level methods
def urlencode_string(self, string, urlencoding_mode = None):
if type(string) == unicode:
string = string.encode("utf-8")
if urlencoding_mode is None:
urlencoding_mode = self.config.urlencoding_mode
if urlencoding_mode == "verbatim":
## Don't do any pre-processing
return string
encoded = ""
## List of characters that must be escaped for S3
## Haven't found this in any official docs
## but my tests show it's more less correct.
## If you start getting InvalidSignature errors
## from S3 check the error headers returned
## from S3 to see whether the list hasn't
## changed.
for c in string: # I'm not sure how to know in what encoding
# 'object' is. Apparently "type(object)==str"
# but the contents is a string of unicode
# bytes, e.g. '\xc4\x8d\xc5\xafr\xc3\xa1k'
# Don't know what it will do on non-utf8
# systems.
# [hope that sounds reassuring ;-)]
o = ord(c)
if (o < 0x20 or o == 0x7f):
if urlencoding_mode == "fixbucket":
encoded += "%%%02X" % o
else:
error(u"Non-printable character 0x%02x in: %s" % (o, string))
error(u"Please report it to s3tools-bugs@lists.sourceforge.net")
encoded += replace_nonprintables(c)
elif (o == 0x20 or # Space and below
o == 0x22 or # "
o == 0x23 or # #
o == 0x25 or # % (escape character)
o == 0x26 or # &
o == 0x2B or # + (or it would become <space>)
o == 0x3C or # <
o == 0x3E or # >
o == 0x3F or # ?
o == 0x60 or # `
o >= 123): # { and above, including >= 128 for UTF-8
encoded += "%%%02X" % o
else:
encoded += c
debug("String '%s' encoded to '%s'" % (string, encoded))
return encoded
def create_request(self, operation, uri = None, bucket = None, object = None, headers = None, extra = None, **params):
resource = { 'bucket' : None, 'uri' : "/" }
if uri and (bucket or object):
raise ValueError("Both 'uri' and either 'bucket' or 'object' parameters supplied")
## If URI is given use that instead of bucket/object parameters
if uri:
bucket = uri.bucket()
object = uri.has_object() and uri.object() or None
if bucket:
resource['bucket'] = str(bucket)
if object:
resource['uri'] = "/" + self.urlencode_string(object)
if extra:
resource['uri'] += extra
method_string = S3.http_methods.getkey(S3.operations[operation] & S3.http_methods["MASK"])
request = S3Request(self, method_string, resource, headers, params)
debug("CreateRequest: resource[uri]=" + resource['uri'])
return request
def _fail_wait(self, retries):
# Wait a few seconds. The more it fails the more we wait.
return (self._max_retries - retries + 1) * 3
def send_request(self, request, body = None, retries = _max_retries):
method_string, resource, headers = request.get_triplet()
debug("Processing request, please wait...")
if not headers.has_key('content-length'):
headers['content-length'] = body and len(body) or 0
try:
# "Stringify" all headers
for header in headers.keys():
headers[header] = str(headers[header])
conn = ConnMan.get(self.get_hostname(resource['bucket']))
uri = self.format_uri(resource)
debug("Sending request method_string=%r, uri=%r, headers=%r, body=(%i bytes)" % (method_string, uri, headers, len(body or "")))
conn.c.request(method_string, uri, body, headers)
response = {}
http_response = conn.c.getresponse()
response["status"] = http_response.status
response["reason"] = http_response.reason
response["headers"] = convertTupleListToDict(http_response.getheaders())
response["data"] = http_response.read()
debug("Response: " + str(response))
ConnMan.put(conn)
except ParameterError, e:
raise
except Exception, e:
if retries:
warning("Retrying failed request: %s (%s)" % (resource['uri'], e))
warning("Waiting %d sec..." % self._fail_wait(retries))
time.sleep(self._fail_wait(retries))
return self.send_request(request, body, retries - 1)
else:
raise S3RequestError("Request failed for: %s" % resource['uri'])
if response["status"] == 307:
## RedirectPermanent
redir_bucket = getTextFromXml(response['data'], ".//Bucket")
redir_hostname = getTextFromXml(response['data'], ".//Endpoint")
self.set_hostname(redir_bucket, redir_hostname)
warning("Redirected to: %s" % (redir_hostname))
return self.send_request(request, body)
if response["status"] >= 500:
e = S3Error(response)
if retries:
warning(u"Retrying failed request: %s" % resource['uri'])
warning(unicode(e))
warning("Waiting %d sec..." % self._fail_wait(retries))
time.sleep(self._fail_wait(retries))
return self.send_request(request, body, retries - 1)
else:
raise e
if response["status"] < 200 or response["status"] > 299:
raise S3Error(response)
return response
def send_file(self, request, file, labels, buffer = '', throttle = 0, retries = _max_retries, offset = 0, chunk_size = -1):
method_string, resource, headers = request.get_triplet()
size_left = size_total = headers.get("content-length")
if self.config.progress_meter:
progress = self.config.progress_class(labels, size_total)
else:
info("Sending file '%s', please wait..." % file.name)
timestamp_start = time.time()
try:
conn = ConnMan.get(self.get_hostname(resource['bucket']))
conn.c.putrequest(method_string, self.format_uri(resource))
for header in headers.keys():
conn.c.putheader(header, str(headers[header]))
conn.c.endheaders()
except ParameterError, e:
raise
except Exception, e:
if self.config.progress_meter:
progress.done("failed")
if retries:
warning("Retrying failed request: %s (%s)" % (resource['uri'], e))
warning("Waiting %d sec..." % self._fail_wait(retries))
time.sleep(self._fail_wait(retries))
# Connection error -> same throttle value
return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size)
else:
raise S3UploadError("Upload failed for: %s" % resource['uri'])
if buffer == '':
file.seek(offset)
md5_hash = md5()
try:
while (size_left > 0):
#debug("SendFile: Reading up to %d bytes from '%s' - remaining bytes: %s" % (self.config.send_chunk, file.name, size_left))
if buffer == '':
data = file.read(min(self.config.send_chunk, size_left))
else:
data = buffer
md5_hash.update(data)
conn.c.send(data)
if self.config.progress_meter:
progress.update(delta_position = len(data))
size_left -= len(data)
if throttle:
time.sleep(throttle)
md5_computed = md5_hash.hexdigest()
response = {}
http_response = conn.c.getresponse()
response["status"] = http_response.status
response["reason"] = http_response.reason
response["headers"] = convertTupleListToDict(http_response.getheaders())
response["data"] = http_response.read()
response["size"] = size_total
ConnMan.put(conn)
debug(u"Response: %s" % response)
except ParameterError, e:
raise
except Exception, e:
if self.config.progress_meter:
progress.done("failed")
if retries:
if retries < self._max_retries:
throttle = throttle and throttle * 5 or 0.01
warning("Upload failed: %s (%s)" % (resource['uri'], e))
warning("Retrying on lower speed (throttle=%0.2f)" % throttle)
warning("Waiting %d sec..." % self._fail_wait(retries))
time.sleep(self._fail_wait(retries))
# Connection error -> same throttle value
return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size)
else:
debug("Giving up on '%s' %s" % (file.name, e))
raise S3UploadError("Upload failed for: %s" % resource['uri'])
timestamp_end = time.time()
response["elapsed"] = timestamp_end - timestamp_start
response["speed"] = response["elapsed"] and float(response["size"]) / response["elapsed"] or float(-1)
if self.config.progress_meter:
## Finalising the upload takes some time -> update() progress meter
## to correct the average speed. Otherwise people will complain that
## 'progress' and response["speed"] are inconsistent ;-)
progress.update()
progress.done("done")
if response["status"] == 307:
## RedirectPermanent
redir_bucket = getTextFromXml(response['data'], ".//Bucket")
redir_hostname = getTextFromXml(response['data'], ".//Endpoint")
self.set_hostname(redir_bucket, redir_hostname)
warning("Redirected to: %s" % (redir_hostname))
return self.send_file(request, file, labels, buffer, offset = offset, chunk_size = chunk_size)
# S3 from time to time doesn't send ETag back in a response :-(
# Force re-upload here.
if not response['headers'].has_key('etag'):
response['headers']['etag'] = ''
if response["status"] < 200 or response["status"] > 299:
try_retry = False
if response["status"] >= 500:
## AWS internal error - retry
try_retry = True
elif response["status"] >= 400:
err = S3Error(response)
## Retriable client error?
if err.code in [ 'BadDigest', 'OperationAborted', 'TokenRefreshRequired', 'RequestTimeout' ]:
try_retry = True
if try_retry:
if retries:
warning("Upload failed: %s (%s)" % (resource['uri'], S3Error(response)))
warning("Waiting %d sec..." % self._fail_wait(retries))
time.sleep(self._fail_wait(retries))
return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size)
else:
warning("Too many failures. Giving up on '%s'" % (file.name))
raise S3UploadError
## Non-recoverable error
raise S3Error(response)
debug("MD5 sums: computed=%s, received=%s" % (md5_computed, response["headers"]["etag"]))
if response["headers"]["etag"].strip('"\'') != md5_hash.hexdigest():
warning("MD5 Sums don't match!")
if retries:
warning("Retrying upload of %s" % (file.name))
return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size)
else:
warning("Too many failures. Giving up on '%s'" % (file.name))
raise S3UploadError
return response
def send_file_multipart(self, file, headers, uri, size):
chunk_size = self.config.multipart_chunk_size_mb * 1024 * 1024
timestamp_start = time.time()
upload = MultiPartUpload(self, file, uri, headers)
upload.upload_all_parts()
response = upload.complete_multipart_upload()
timestamp_end = time.time()
response["elapsed"] = timestamp_end - timestamp_start
response["size"] = size
response["speed"] = response["elapsed"] and float(response["size"]) / response["elapsed"] or float(-1)
return response
def recv_file(self, request, stream, labels, start_position = 0, retries = _max_retries):
method_string, resource, headers = request.get_triplet()
if self.config.progress_meter:
progress = self.config.progress_class(labels, 0)
else:
info("Receiving file '%s', please wait..." % stream.name)
timestamp_start = time.time()
try:
conn = ConnMan.get(self.get_hostname(resource['bucket']))
conn.c.putrequest(method_string, self.format_uri(resource))
for header in headers.keys():
conn.c.putheader(header, str(headers[header]))
if start_position > 0:
debug("Requesting Range: %d .. end" % start_position)
conn.c.putheader("Range", "bytes=%d-" % start_position)
conn.c.endheaders()
response = {}
http_response = conn.c.getresponse()
response["status"] = http_response.status
response["reason"] = http_response.reason
response["headers"] = convertTupleListToDict(http_response.getheaders())
debug("Response: %s" % response)
except ParameterError, e:
raise
except Exception, e:
if self.config.progress_meter:
progress.done("failed")
if retries:
warning("Retrying failed request: %s (%s)" % (resource['uri'], e))
warning("Waiting %d sec..." % self._fail_wait(retries))
time.sleep(self._fail_wait(retries))
# Connection error -> same throttle value
return self.recv_file(request, stream, labels, start_position, retries - 1)
else:
raise S3DownloadError("Download failed for: %s" % resource['uri'])
if response["status"] == 307:
## RedirectPermanent
response['data'] = http_response.read()
redir_bucket = getTextFromXml(response['data'], ".//Bucket")
redir_hostname = getTextFromXml(response['data'], ".//Endpoint")
self.set_hostname(redir_bucket, redir_hostname)
warning("Redirected to: %s" % (redir_hostname))
return self.recv_file(request, stream, labels)
if response["status"] < 200 or response["status"] > 299:
raise S3Error(response)
if start_position == 0:
# Only compute MD5 on the fly if we're downloading from beginning
# Otherwise we'd get a nonsense.
md5_hash = md5()
size_left = int(response["headers"]["content-length"])
size_total = start_position + size_left
current_position = start_position
if self.config.progress_meter:
progress.total_size = size_total
progress.initial_position = current_position
progress.current_position = current_position
try:
while (current_position < size_total):
this_chunk = size_left > self.config.recv_chunk and self.config.recv_chunk or size_left
data = http_response.read(this_chunk)
if len(data) == 0:
raise S3Error("EOF from S3!")
stream.write(data)
if start_position == 0:
md5_hash.update(data)
current_position += len(data)
## Call progress meter from here...
if self.config.progress_meter:
progress.update(delta_position = len(data))
ConnMan.put(conn)
except Exception, e:
if self.config.progress_meter:
progress.done("failed")
if retries:
warning("Retrying failed request: %s (%s)" % (resource['uri'], e))
warning("Waiting %d sec..." % self._fail_wait(retries))
time.sleep(self._fail_wait(retries))
# Connection error -> same throttle value
return self.recv_file(request, stream, labels, current_position, retries - 1)
else:
raise S3DownloadError("Download failed for: %s" % resource['uri'])
stream.flush()
timestamp_end = time.time()
if self.config.progress_meter:
## The above stream.flush() may take some time -> update() progress meter
## to correct the average speed. Otherwise people will complain that
## 'progress' and response["speed"] are inconsistent ;-)
progress.update()
progress.done("done")
if start_position == 0:
# Only compute MD5 on the fly if we were downloading from the beginning
response["md5"] = md5_hash.hexdigest()
else:
# Otherwise try to compute MD5 of the output file
try:
response["md5"] = hash_file_md5(stream.name)
except IOError, e:
if e.errno != errno.ENOENT:
warning("Unable to open file: %s: %s" % (stream.name, e))
warning("Unable to verify MD5. Assume it matches.")
response["md5"] = response["headers"]["etag"]
response["md5match"] = response["headers"]["etag"].find(response["md5"]) >= 0
response["elapsed"] = timestamp_end - timestamp_start
response["size"] = current_position
response["speed"] = response["elapsed"] and float(response["size"]) / response["elapsed"] or float(-1)
if response["size"] != start_position + long(response["headers"]["content-length"]):
warning("Reported size (%s) does not match received size (%s)" % (
start_position + response["headers"]["content-length"], response["size"]))
debug("ReceiveFile: Computed MD5 = %s" % response["md5"])
if not response["md5match"]:
warning("MD5 signatures do not match: computed=%s, received=%s" % (
response["md5"], response["headers"]["etag"]))
return response
__all__.append("S3")
# vim:et:ts=4:sts=4:ai

223
fabfile/S3/S3Uri.py

@ -1,223 +0,0 @@
## Amazon S3 manager
## Author: Michal Ludvig <michal@logix.cz>
## http://www.logix.cz/michal
## License: GPL Version 2
import os
import re
import sys
from BidirMap import BidirMap
from logging import debug
import S3
from Utils import unicodise, check_bucket_name_dns_conformity
import Config
class S3Uri(object):
type = None
_subclasses = None
def __new__(self, string):
if not self._subclasses:
## Generate a list of all subclasses of S3Uri
self._subclasses = []
dict = sys.modules[__name__].__dict__
for something in dict:
if type(dict[something]) is not type(self):
continue
if issubclass(dict[something], self) and dict[something] != self:
self._subclasses.append(dict[something])
for subclass in self._subclasses:
try:
instance = object.__new__(subclass)
instance.__init__(string)
return instance
except ValueError, e:
continue
raise ValueError("%s: not a recognized URI" % string)
def __str__(self):
return self.uri()
def __unicode__(self):
return self.uri()
def __repr__(self):
return "<%s: %s>" % (self.__class__.__name__, self.__unicode__())
def public_url(self):
raise ValueError("This S3 URI does not have Anonymous URL representation")
def basename(self):
return self.__unicode__().split("/")[-1]
class S3UriS3(S3Uri):
type = "s3"
_re = re.compile("^s3://([^/]+)/?(.*)", re.IGNORECASE)
def __init__(self, string):
match = self._re.match(string)
if not match:
raise ValueError("%s: not a S3 URI" % string)
groups = match.groups()
self._bucket = groups[0]
self._object = unicodise(groups[1])
def bucket(self):
return self._bucket
def object(self):
return self._object
def has_bucket(self):
return bool(self._bucket)
def has_object(self):
return bool(self._object)
def uri(self):
return "/".join(["s3:/", self._bucket, self._object])
def is_dns_compatible(self):
return check_bucket_name_dns_conformity(self._bucket)
def public_url(self):
if self.is_dns_compatible():
return "http://%s.%s/%s" % (self._bucket, Config.Config().host_base, self._object)
else:
return "http://%s/%s/%s" % (self._bucket, Config.Config().host_base, self._object)
def host_name(self):
if self.is_dns_compatible():
return "%s.s3.amazonaws.com" % (self._bucket)
else:
return "s3.amazonaws.com"
@staticmethod
def compose_uri(bucket, object = ""):
return "s3://%s/%s" % (bucket, object)
@staticmethod
def httpurl_to_s3uri(http_url):
m=re.match("(https?://)?([^/]+)/?(.*)", http_url, re.IGNORECASE)
hostname, object = m.groups()[1:]
hostname = hostname.lower()
if hostname == "s3.amazonaws.com":
## old-style url: http://s3.amazonaws.com/bucket/object
if object.count("/") == 0:
## no object given
bucket = object
object = ""
else:
## bucket/object
bucket, object = object.split("/", 1)
elif hostname.endswith(".s3.amazonaws.com"):
## new-style url: http://bucket.s3.amazonaws.com/object
bucket = hostname[:-(len(".s3.amazonaws.com"))]
else:
raise ValueError("Unable to parse URL: %s" % http_url)
return S3Uri("s3://%(bucket)s/%(object)s" % {
'bucket' : bucket,
'object' : object })
class S3UriS3FS(S3Uri):
type = "s3fs"
_re = re.compile("^s3fs://([^/]*)/?(.*)", re.IGNORECASE)
def __init__(self, string):
match = self._re.match(string)
if not match:
raise ValueError("%s: not a S3fs URI" % string)
groups = match.groups()
self._fsname = groups[0]
self._path = unicodise(groups[1]).split("/")
def fsname(self):
return self._fsname
def path(self):
return "/".join(self._path)
def uri(self):
return "/".join(["s3fs:/", self._fsname, self.path()])
class S3UriFile(S3Uri):
type = "file"
_re = re.compile("^(\w+://)?(.*)")
def __init__(self, string):
match = self._re.match(string)
groups = match.groups()
if groups[0] not in (None, "file://"):
raise ValueError("%s: not a file:// URI" % string)
self._path = unicodise(groups[1]).split("/")
def path(self):
return "/".join(self._path)
def uri(self):
return "/".join(["file:/", self.path()])
def isdir(self):
return os.path.isdir(self.path())
def dirname(self):
return os.path.dirname(self.path())
class S3UriCloudFront(S3Uri):
type = "cf"
_re = re.compile("^cf://([^/]*)/*(.*)", re.IGNORECASE)
def __init__(self, string):
match = self._re.match(string)
if not match:
raise ValueError("%s: not a CloudFront URI" % string)
groups = match.groups()
self._dist_id = groups[0]
self._request_id = groups[1] != "/" and groups[1] or None
def dist_id(self):
return self._dist_id
def request_id(self):
return self._request_id
def uri(self):
uri = "cf://" + self.dist_id()
if self.request_id():
uri += "/" + self.request_id()
return uri
if __name__ == "__main__":
uri = S3Uri("s3://bucket/object")
print "type() =", type(uri)
print "uri =", uri
print "uri.type=", uri.type
print "bucket =", uri.bucket()
print "object =", uri.object()
print
uri = S3Uri("s3://bucket")
print "type() =", type(uri)
print "uri =", uri
print "uri.type=", uri.type
print "bucket =", uri.bucket()
print
uri = S3Uri("s3fs://filesystem1/path/to/remote/file.txt")
print "type() =", type(uri)
print "uri =", uri
print "uri.type=", uri.type
print "path =", uri.path()
print
uri = S3Uri("/path/to/local/file.txt")
print "type() =", type(uri)
print "uri =", uri
print "uri.type=", uri.type
print "path =", uri.path()
print
uri = S3Uri("cf://1234567890ABCD/")
print "type() =", type(uri)
print "uri =", uri
print "uri.type=", uri.type
print "dist_id =", uri.dist_id()
print
# vim:et:ts=4:sts=4:ai

178
fabfile/S3/SimpleDB.py

@ -1,178 +0,0 @@
## Amazon SimpleDB library
## Author: Michal Ludvig <michal@logix.cz>
## http://www.logix.cz/michal
## License: GPL Version 2
"""
Low-level class for working with Amazon SimpleDB
"""
import time
import urllib
import base64
import hmac
import sha
import httplib
from logging import debug, info, warning, error
from Utils import convertTupleListToDict
from SortedDict import SortedDict
from Exceptions import *
class SimpleDB(object):
# API Version
# See http://docs.amazonwebservices.com/AmazonSimpleDB/2007-11-07/DeveloperGuide/
Version = "2007-11-07"
SignatureVersion = 1
def __init__(self, config):
self.config = config
## ------------------------------------------------
## Methods implementing SimpleDB API
## ------------------------------------------------
def ListDomains(self, MaxNumberOfDomains = 100):
'''
Lists all domains associated with our Access Key. Returns
domain names up to the limit set by MaxNumberOfDomains.
'''
parameters = SortedDict()
parameters['MaxNumberOfDomains'] = MaxNumberOfDomains
return self.send_request("ListDomains", DomainName = None, parameters = parameters)
def CreateDomain(self, DomainName):
return self.send_request("CreateDomain", DomainName = DomainName)
def DeleteDomain(self, DomainName):
return self.send_request("DeleteDomain", DomainName = DomainName)
def PutAttributes(self, DomainName, ItemName, Attributes):
parameters = SortedDict()
parameters['ItemName'] = ItemName
seq = 0
for attrib in Attributes:
if type(Attributes[attrib]) == type(list()):
for value in Attributes[attrib]:
parameters['Attribute.%d.Name' % seq] = attrib
parameters['Attribute.%d.Value' % seq] = unicode(value)
seq += 1
else:
parameters['Attribute.%d.Name' % seq] = attrib
parameters['Attribute.%d.Value' % seq] = unicode(Attributes[attrib])
seq += 1
## TODO:
## - support for Attribute.N.Replace
## - support for multiple values for one attribute
return self.send_request("PutAttributes", DomainName = DomainName, parameters = parameters)
def GetAttributes(self, DomainName, ItemName, Attributes = []):
parameters = SortedDict()
parameters['ItemName'] = ItemName
seq = 0
for attrib in Attributes:
parameters['AttributeName.%d' % seq] = attrib
seq += 1
return self.send_request("GetAttributes", DomainName = DomainName, parameters = parameters)
def DeleteAttributes(self, DomainName, ItemName, Attributes = {}):
"""
Remove specified Attributes from ItemName.
Attributes parameter can be either:
- not specified, in which case the whole Item is removed
- list, e.g. ['Attr1', 'Attr2'] in which case these parameters are removed
- dict, e.g. {'Attr' : 'One', 'Attr' : 'Two'} in which case the
specified values are removed from multi-value attributes.
"""
parameters = SortedDict()
parameters['ItemName'] = ItemName
seq = 0
for attrib in Attributes:
parameters['Attribute.%d.Name' % seq] = attrib
if type(Attributes) == type(dict()):
parameters['Attribute.%d.Value' % seq] = unicode(Attributes[attrib])
seq += 1
return self.send_request("DeleteAttributes", DomainName = DomainName, parameters = parameters)
def Query(self, DomainName, QueryExpression = None, MaxNumberOfItems = None, NextToken = None):
parameters = SortedDict()
if QueryExpression:
parameters['QueryExpression'] = QueryExpression
if MaxNumberOfItems:
parameters['MaxNumberOfItems'] = MaxNumberOfItems
if NextToken:
parameters['NextToken'] = NextToken
return self.send_request("Query", DomainName = DomainName, parameters = parameters)
## Handle NextToken? Or maybe not - let the upper level do it
## ------------------------------------------------
## Low-level methods for handling SimpleDB requests
## ------------------------------------------------
def send_request(self, *args, **kwargs):
request = self.create_request(*args, **kwargs)
#debug("Request: %s" % repr(request))
conn = self.get_connection()
conn.request("GET", self.format_uri(request['uri_params']))
http_response = conn.getresponse()
response = {}
response["status"] = http_response.status
response["reason"] = http_response.reason
response["headers"] = convertTupleListToDict(http_response.getheaders())
response["data"] = http_response.read()
conn.close()
if response["status"] < 200 or response["status"] > 299:
debug("Response: " + str(response))
raise S3Error(response)
return response
def create_request(self, Action, DomainName, parameters = None):
if not parameters:
parameters = SortedDict()
if len(self.config.access_token) > 0:
self.config.refresh_role()
parameters['Signature']=self.config.access_token
parameters['AWSAccessKeyId'] = self.config.access_key
parameters['Version'] = self.Version
parameters['SignatureVersion'] = self.SignatureVersion
parameters['Action'] = Action
parameters['Timestamp'] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
if DomainName:
parameters['DomainName'] = DomainName
parameters['Signature'] = self.sign_request(parameters)
parameters.keys_return_lowercase = False
uri_params = urllib.urlencode(parameters)
request = {}
request['uri_params'] = uri_params
request['parameters'] = parameters
return request
def sign_request(self, parameters):
h = ""
parameters.keys_sort_lowercase = True
parameters.keys_return_lowercase = False
for key in parameters:
h += "%s%s" % (key, parameters[key])
#debug("SignRequest: %s" % h)
return base64.encodestring(hmac.new(self.config.secret_key, h, sha).digest()).strip()
def get_connection(self):
if self.config.proxy_host != "":
return httplib.HTTPConnection(self.config.proxy_host, self.config.proxy_port)
else:
if self.config.use_https:
return httplib.HTTPSConnection(self.config.simpledb_host)
else:
return httplib.HTTPConnection(self.config.simpledb_host)
def format_uri(self, uri_params):
if self.config.proxy_host != "":
uri = "http://%s/?%s" % (self.config.simpledb_host, uri_params)
else:
uri = "/?%s" % uri_params
#debug('format_uri(): ' + uri)
return uri
# vim:et:ts=4:sts=4:ai

66
fabfile/S3/SortedDict.py

@ -1,66 +0,0 @@
## Amazon S3 manager
## Author: Michal Ludvig <michal@logix.cz>
## http://www.logix.cz/michal
## License: GPL Version 2
from BidirMap import BidirMap
import Utils
class SortedDictIterator(object):
def __init__(self, sorted_dict, keys):
self.sorted_dict = sorted_dict
self.keys = keys
def next(self):
try:
return self.keys.pop(0)
except IndexError:
raise StopIteration
class SortedDict(dict):
def __init__(self, mapping = {}, ignore_case = True, **kwargs):
"""
WARNING: SortedDict() with ignore_case==True will
drop entries differing only in capitalisation!
Eg: SortedDict({'auckland':1, 'Auckland':2}).keys() => ['Auckland']
With ignore_case==False it's all right
"""
dict.__init__(self, mapping, **kwargs)
self.ignore_case = ignore_case
def keys(self):
keys = dict.keys(self)
if self.ignore_case:
# Translation map
xlat_map = BidirMap()
for key in keys:
xlat_map[key.lower()] = key
# Lowercase keys
lc_keys = xlat_map.keys()
lc_keys.sort()
return [xlat_map[k] for k in lc_keys]
else:
keys.sort()
return keys
def __iter__(self):
return SortedDictIterator(self, self.keys())
if __name__ == "__main__":
d = { 'AWS' : 1, 'Action' : 2, 'america' : 3, 'Auckland' : 4, 'America' : 5 }
sd = SortedDict(d)
print "Wanted: Action, america, Auckland, AWS, [ignore case]"
print "Got: ",
for key in sd:
print "%s," % key,
print " [used: __iter__()]"
d = SortedDict(d, ignore_case = False)
print "Wanted: AWS, Action, Auckland, america, [case sensitive]"
print "Got: ",
for key in d.keys():
print "%s," % key,
print " [used: keys()]"
# vim:et:ts=4:sts=4:ai

462
fabfile/S3/Utils.py

@ -1,462 +0,0 @@
## Amazon S3 manager
## Author: Michal Ludvig <michal@logix.cz>
## http://www.logix.cz/michal
## License: GPL Version 2
import datetime
import os
import sys
import time
import re
import string
import random
import rfc822
import hmac
import base64
import errno
import urllib
from logging import debug, info, warning, error
import Config
import Exceptions
# hashlib backported to python 2.4 / 2.5 is not compatible with hmac!
if sys.version_info[0] == 2 and sys.version_info[1] < 6:
from md5 import md5
import sha as sha1
else:
from hashlib import md5, sha1
try:
import xml.etree.ElementTree as ET
except ImportError:
import elementtree.ElementTree as ET
from xml.parsers.expat import ExpatError
__all__ = []
def parseNodes(nodes):
## WARNING: Ignores text nodes from mixed xml/text.
## For instance <tag1>some text<tag2>other text</tag2></tag1>
## will be ignore "some text" node
retval = []
for node in nodes:
retval_item = {}
for child in node.getchildren():
name = child.tag
if child.getchildren():
retval_item[name] = parseNodes([child])
else:
retval_item[name] = node.findtext(".//%s" % child.tag)
retval.append(retval_item)
return retval
__all__.append("parseNodes")
def stripNameSpace(xml):
"""
removeNameSpace(xml) -- remove top-level AWS namespace
"""
r = re.compile('^(<?[^>]+?>\s?)(<\w+) xmlns=[\'"](http://[^\'"]+)[\'"](.*)', re.MULTILINE)
if r.match(xml):
xmlns = r.match(xml).groups()[2]
xml = r.sub("\\1\\2\\4", xml)
else:
xmlns = None
return xml, xmlns
__all__.append("stripNameSpace")
def getTreeFromXml(xml):
xml, xmlns = stripNameSpace(xml)
try:
tree = ET.fromstring(xml)
if xmlns:
tree.attrib['xmlns'] = xmlns
return tree
except ExpatError, e:
error(e)
raise Exceptions.ParameterError("Bucket contains invalid filenames. Please run: s3cmd fixbucket s3://your-bucket/")
__all__.append("getTreeFromXml")
def getListFromXml(xml, node):
tree = getTreeFromXml(xml)
nodes = tree.findall('.//%s' % (node))
return parseNodes(nodes)
__all__.append("getListFromXml")
def getDictFromTree(tree):
ret_dict = {}
for child in tree.getchildren():
if child.getchildren():
## Complex-type child. Recurse
content = getDictFromTree(child)
else:
content = child.text
if ret_dict.has_key(child.tag):
if not type(ret_dict[child.tag]) == list:
ret_dict[child.tag] = [ret_dict[child.tag]]
ret_dict[child.tag].append(content or "")
else:
ret_dict[child.tag] = content or ""
return ret_dict
__all__.append("getDictFromTree")
def getTextFromXml(xml, xpath):
tree = getTreeFromXml(xml)
if tree.tag.endswith(xpath):
return tree.text
else:
return tree.findtext(xpath)
__all__.append("getTextFromXml")
def getRootTagName(xml):
tree = getTreeFromXml(xml)
return tree.tag
__all__.append("getRootTagName")
def xmlTextNode(tag_name, text):
el = ET.Element(tag_name)
el.text = unicode(text)
return el
__all__.append("xmlTextNode")
def appendXmlTextNode(tag_name, text, parent):
"""
Creates a new <tag_name> Node and sets
its content to 'text'. Then appends the
created Node to 'parent' element if given.
Returns the newly created Node.
"""
el = xmlTextNode(tag_name, text)
parent.append(el)
return el
__all__.append("appendXmlTextNode")
def dateS3toPython(date):
date = re.compile("(\.\d*)?Z").sub(".000Z", date)
return time.strptime(date, "%Y-%m-%dT%H:%M:%S.000Z")
__all__.append("dateS3toPython")
def dateS3toUnix(date):
## FIXME: This should be timezone-aware.
## Currently the argument to strptime() is GMT but mktime()
## treats it as "localtime". Anyway...
return time.mktime(dateS3toPython(date))
__all__.append("dateS3toUnix")
def dateRFC822toPython(date):
return rfc822.parsedate(date)
__all__.append("dateRFC822toPython")
def dateRFC822toUnix(date):
return time.mktime(dateRFC822toPython(date))
__all__.append("dateRFC822toUnix")
def formatSize(size, human_readable = False, floating_point = False):
size = floating_point and float(size) or int(size)
if human_readable:
coeffs = ['k', 'M', 'G', 'T']
coeff = ""
while size > 2048:
size /= 1024
coeff = coeffs.pop(0)
return (size, coeff)
else:
return (size, "")
__all__.append("formatSize")
def formatDateTime(s3timestamp):
try:
import pytz
timezone = pytz.timezone(os.environ.get('TZ', 'UTC'))
tz = pytz.timezone('UTC')
## Can't unpack args and follow that with kwargs in python 2.5
## So we pass them all as kwargs
params = zip(('year', 'month', 'day', 'hour', 'minute', 'second', 'tzinfo'),
dateS3toPython(s3timestamp)[0:6] + (tz,))
params = dict(params)
utc_dt = datetime.datetime(**params)
dt_object = utc_dt.astimezone(timezone)
except ImportError:
dt_object = datetime.datetime(*dateS3toPython(s3timestamp)[0:6])
return dt_object.strftime("%Y-%m-%d %H:%M")
__all__.append("formatDateTime")
def convertTupleListToDict(list):
retval = {}
for tuple in list:
retval[tuple[0]] = tuple[1]
return retval
__all__.append("convertTupleListToDict")
_rnd_chars = string.ascii_letters+string.digits
_rnd_chars_len = len(_rnd_chars)
def rndstr(len):
retval = ""
while len > 0:
retval += _rnd_chars[random.randint(0, _rnd_chars_len-1)]
len -= 1
return retval
__all__.append("rndstr")
def mktmpsomething(prefix, randchars, createfunc):
old_umask = os.umask(0077)
tries = 5
while tries > 0:
dirname = prefix + rndstr(randchars)
try:
createfunc(dirname)
break
except OSError, e:
if e.errno != errno.EEXIST:
os.umask(old_umask)
raise
tries -= 1
os.umask(old_umask)
return dirname
__all__.append("mktmpsomething")
def mktmpdir(prefix = "/tmp/tmpdir-", randchars = 10):
return mktmpsomething(prefix, randchars, os.mkdir)
__all__.append("mktmpdir")
def mktmpfile(prefix = "/tmp/tmpfile-", randchars = 20):
createfunc = lambda filename : os.close(os.open(filename, os.O_CREAT | os.O_EXCL))
return mktmpsomething(prefix, randchars, createfunc)
__all__.append("mktmpfile")
def hash_file_md5(filename):
h = md5()
f = open(filename, "rb")
while True:
# Hash 32kB chunks
data = f.read(32*1024)
if not data:
break
h.update(data)
f.close()
return h.hexdigest()
__all__.append("hash_file_md5")
def mkdir_with_parents(dir_name):
"""
mkdir_with_parents(dst_dir)
Create directory 'dir_name' with all parent directories
Returns True on success, False otherwise.
"""
pathmembers = dir_name.split(os.sep)
tmp_stack = []
while pathmembers and not os.path.isdir(os.sep.join(pathmembers)):
tmp_stack.append(pathmembers.pop())
while tmp_stack:
pathmembers.append(tmp_stack.pop())
cur_dir = os.sep.join(pathmembers)
try:
debug("mkdir(%s)" % cur_dir)
os.mkdir(cur_dir)
except (OSError, IOError), e:
warning("%s: can not make directory: %s" % (cur_dir, e.strerror))
return False
except Exception, e:
warning("%s: %s" % (cur_dir, e))
return False
return True
__all__.append("mkdir_with_parents")
def unicodise(string, encoding = None, errors = "replace"):
"""
Convert 'string' to Unicode or raise an exception.
"""
if not encoding:
encoding = Config.Config().encoding
if type(string) == unicode:
return string
debug("Unicodising %r using %s" % (string, encoding))
try:
return string.decode(encoding, errors)
except UnicodeDecodeError:
raise UnicodeDecodeError("Conversion to unicode failed: %r" % string)
__all__.append("unicodise")
def deunicodise(string, encoding = None, errors = "replace"):
"""
Convert unicode 'string' to <type str>, by default replacing
all invalid characters with '?' or raise an exception.
"""
if not encoding:
encoding = Config.Config().encoding
if type(string) != unicode:
return str(string)
debug("DeUnicodising %r using %s" % (string, encoding))
try:
return string.encode(encoding, errors)
except UnicodeEncodeError:
raise UnicodeEncodeError("Conversion from unicode failed: %r" % string)
__all__.append("deunicodise")
def unicodise_safe(string, encoding = None):
"""
Convert 'string' to Unicode according to current encoding
and replace all invalid characters with '?'
"""
return unicodise(deunicodise(string, encoding), encoding).replace(u'\ufffd', '?')
__all__.append("unicodise_safe")
def replace_nonprintables(string):
"""
replace_nonprintables(string)
Replaces all non-printable characters 'ch' in 'string'
where ord(ch) <= 26 with ^@, ^A, ... ^Z
"""
new_string = ""
modified = 0
for c in string:
o = ord(c)
if (o <= 31):
new_string += "^" + chr(ord('@') + o)
modified += 1
elif (o == 127):
new_string += "^?"
modified += 1
else:
new_string += c
if modified and Config.Config().urlencoding_mode != "fixbucket":
warning("%d non-printable characters replaced in: %s" % (modified, new_string))
return new_string
__all__.append("replace_nonprintables")
def sign_string(string_to_sign):
"""Sign a string with the secret key, returning base64 encoded results.
By default the configured secret key is used, but may be overridden as
an argument.
Useful for REST authentication. See http://s3.amazonaws.com/doc/s3-developer-guide/RESTAuthentication.html
"""
signature = base64.encodestring(hmac.new(Config.Config().secret_key, string_to_sign, sha1).digest()).strip()
return signature
__all__.append("sign_string")
def sign_url(url_to_sign, expiry):
"""Sign a URL in s3://bucket/object form with the given expiry
time. The object will be accessible via the signed URL until the
AWS key and secret are revoked or the expiry time is reached, even
if the object is otherwise private.
See: http://s3.amazonaws.com/doc/s3-developer-guide/RESTAuthentication.html
"""
return sign_url_base(
bucket = url_to_sign.bucket(),
object = url_to_sign.object(),
expiry = expiry
)
__all__.append("sign_url")
def sign_url_base(**parms):
"""Shared implementation of sign_url methods. Takes a hash of 'bucket', 'object' and 'expiry' as args."""
parms['expiry']=time_to_epoch(parms['expiry'])
parms['access_key']=Config.Config().access_key
debug("Expiry interpreted as epoch time %s", parms['expiry'])
signtext = 'GET\n\n\n%(expiry)d\n/%(bucket)s/%(object)s' % parms
debug("Signing plaintext: %r", signtext)
parms['sig'] = urllib.quote_plus(sign_string(signtext))
debug("Urlencoded signature: %s", parms['sig'])
return "http://%(bucket)s.s3.amazonaws.com/%(object)s?AWSAccessKeyId=%(access_key)s&Expires=%(expiry)d&Signature=%(sig)s" % parms
def time_to_epoch(t):
"""Convert time specified in a variety of forms into UNIX epoch time.
Accepts datetime.datetime, int, anything that has a strftime() method, and standard time 9-tuples
"""
if isinstance(t, int):
# Already an int
return t
elif isinstance(t, tuple) or isinstance(t, time.struct_time):
# Assume it's a time 9-tuple
return int(time.mktime(t))
elif hasattr(t, 'timetuple'):
# Looks like a datetime object or compatible
return int(time.mktime(ex.timetuple()))
elif hasattr(t, 'strftime'):
# Looks like the object supports standard srftime()
return int(t.strftime('%s'))
elif isinstance(t, str) or isinstance(t, unicode):
# See if it's a string representation of an epoch
try:
return int(t)
except ValueError:
# Try to parse it as a timestamp string
try:
return time.strptime(t)
except ValueError, ex:
# Will fall through
debug("Failed to parse date with strptime: %s", ex)
pass
raise Exceptions.ParameterError('Unable to convert %r to an epoch time. Pass an epoch time. Try `date -d \'now + 1 year\' +%%s` (shell) or time.mktime (Python).' % t)
def check_bucket_name(bucket, dns_strict = True):
if dns_strict:
invalid = re.search("([^a-z0-9\.-])", bucket)
if invalid:
raise Exceptions.ParameterError("Bucket name '%s' contains disallowed character '%s'. The only supported ones are: lowercase us-ascii letters (a-z), digits (0-9), dot (.) and hyphen (-)." % (bucket, invalid.groups()[0]))
else:
invalid = re.search("([^A-Za-z0-9\._-])", bucket)
if invalid:
raise Exceptions.ParameterError("Bucket name '%s' contains disallowed character '%s'. The only supported ones are: us-ascii letters (a-z, A-Z), digits (0-9), dot (.), hyphen (-) and underscore (_)." % (bucket, invalid.groups()[0]))
if len(bucket) < 3:
raise Exceptions.ParameterError("Bucket name '%s' is too short (min 3 characters)" % bucket)
if len(bucket) > 255:
raise Exceptions.ParameterError("Bucket name '%s' is too long (max 255 characters)" % bucket)
if dns_strict:
if len(bucket) > 63:
raise Exceptions.ParameterError("Bucket name '%s' is too long (max 63 characters)" % bucket)
if re.search("-\.", bucket):
raise Exceptions.ParameterError("Bucket name '%s' must not contain sequence '-.' for DNS compatibility" % bucket)
if re.search("\.\.", bucket):
raise Exceptions.ParameterError("Bucket name '%s' must not contain sequence '..' for DNS compatibility" % bucket)
if not re.search("^[0-9a-z]", bucket):
raise Exceptions.ParameterError("Bucket name '%s' must start with a letter or a digit" % bucket)
if not re.search("[0-9a-z]$", bucket):
raise Exceptions.ParameterError("Bucket name '%s' must end with a letter or a digit" % bucket)
return True
__all__.append("check_bucket_name")
def check_bucket_name_dns_conformity(bucket):
try:
return check_bucket_name(bucket, dns_strict = True)
except Exceptions.ParameterError:
return False
__all__.append("check_bucket_name_dns_conformity")
def getBucketFromHostname(hostname):
"""
bucket, success = getBucketFromHostname(hostname)
Only works for hostnames derived from bucket names
using Config.host_bucket pattern.
Returns bucket name and a boolean success flag.
"""
# Create RE pattern from Config.host_bucket
pattern = Config.Config().host_bucket % { 'bucket' : '(?P<bucket>.*)' }
m = re.match(pattern, hostname)
if not m:
return (hostname, False)
return m.groups()[0], True
__all__.append("getBucketFromHostname")
def getHostnameFromBucket(bucket):
return Config.Config().host_bucket % { 'bucket' : bucket }
__all__.append("getHostnameFromBucket")
# vim:et:ts=4:sts=4:ai

0
fabfile/S3/__init__.py

560
fabfile/__init__.py

@ -1,560 +0,0 @@
# -*- coding: utf-8 -*-
import sys
import os
from os.path import dirname, abspath, join
from datetime import date
import json
import codecs
import shutil
import fnmatch
import re
import collections
from fabric.api import env, settings, hide, local, lcd
from fabric.decorators import task
from fabric.operations import prompt
from fabric.utils import puts, abort, warn
env.debug = False
#
# Set paths
#
env.project_path = dirname(dirname(abspath(__file__)))
env.sites_path = dirname(env.project_path)
env.build_path = join(env.project_path, 'build')
env.source_path = join(env.project_path, 'source')
#
# Read config.json and update vars
#
with open(join(env.project_path, 'config.json')) as fp:
s = fp.read()
s = re.sub(r'//.*', '', s)
s = re.sub(r'/\*.*?\*/', '', s, flags=re.DOTALL)
CONFIG = json.loads(s, object_pairs_hook=collections.OrderedDict)
today = date.today()
CONFIG['date'] = today
CONFIG['year'] = today.year
# Path to cdn deployment
env.cdn_path = abspath(join(
env.sites_path, 'cdn.knightlab.com', 'app', 'libs', CONFIG['name']))
# Path to s3cmd.cnf in secrets repository
env.s3cmd_cfg = join(env.sites_path, 'secrets', 's3cmd.cfg')
# Banner for the top of CSS and JS files
BANNER = """
/*
TimelineJS - ver. %(version)s - %(date)s
Copyright (c) 2012-%(year)s Northwestern University
a project of the Northwestern University Knight Lab, originally created by Zach Wise
https://github.com/NUKnightLab/TimelineJS
This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
""".lstrip()
def _check_path(path):
"""Check for the existence of a path"""
if not os.path.exists(path):
abort('Could not find %s.' % path)
def _clean(path):
"""Delete directory contents"""
path = os.path.abspath(path)
puts('clean: %s' % path)
if os.path.exists(path):
if os.path.isdir(path):
for item in [join(path, x) for x in os.listdir(path)]:
if os.path.isfile(item):
os.unlink(item)
else:
shutil.rmtree(item)
else:
os.unlink(path)
def _find_file(file_name, cur_dir):
"""Find a file. Look first in cur_dir, then env.source_path"""
file_path = os.path.abspath(join(cur_dir, file_name))
if os.path.exists(file_path):
return file_path
for dirpath, dirs, files in os.walk(env.source_path):
if file_name in files:
return os.path.join(dirpath, file_name)
raise Exception('Could not find "%s" in %s' % (file_name, env.source_path))
def _match_files(src, regex):
"""Return relative filepaths matching regex in src"""
re_match = re.compile(regex)
for (dirpath, dirnames, filenames) in os.walk(src):
rel_dir = _relpath(src, dirpath)
for f in filter(lambda x: not x.startswith('.'), filenames):
rel_path = join(rel_dir, f)
if re_match.match(rel_path):
yield rel_path
def _makedirs(path, isfile=False):
"""Make directories in path"""
if isfile:
path = dirname(path)
if not os.path.exists(path):
os.makedirs(path)
def _open_file(path, mode, encoding=''):
"""Open a file with character encoding detection"""
if mode.startswith('r'):
bytes = min(32, os.path.getsize(path))
with open(path, 'rb') as fd:
raw = fd.read()
if raw.startswith(codecs.BOM_UTF8):
encoding = 'utf-8-sig'
else:
encoding = 'utf-8'
return codecs.open(path, mode, encoding)
def _relpath(root_path, path):
"""Get relative path from root_path"""
if root_path == path:
return ''
return os.path.relpath(path, root_path)
#
# tagging
#
def _get_tags():
"""Get list of current tags from the repo"""
tags = os.popen('cd %(project_path)s;git tag' % env).read().strip()
if tags:
return [x.strip() for x in tags.split('\n')]
return []
def _last_version_tag():
"""Get the last version tag"""
re_num = re.compile('[^0-9.]')
tags = sorted([map(int, re_num.sub('', t).split('.')) for t in _get_tags()])
if tags:
return '.'.join(map(str, tags[-1]))
return None
def _get_version_tag():
"""Get a new version tag from user"""
tags = _get_tags()
puts('This project has the following tags:')
puts(tags)
while True:
version = prompt("Enter a new version number: ").strip()
if not re.match(r'^[0-9]+\.[0-9]+\.[0-9]+$', version):
warn('Invalid version number, must be in the format:' \
' major.minor.revision')
elif version in tags:
warn('Invalid version number, tag already exists')
else:
break
return version
def _render_templates(src_path, dst_path):
"""Render flask templates"""
puts('render: %s >> %s' % (src_path, dst_path))
from website import app
from flask import g, request
compiled_includes = []
for f in _match_files(src_path, '^[^_].*$'):
with app.app.test_request_context():
g.compile_includes = True
g.compiled_includes = compiled_includes
content = app.catch_all(f)
compiled_includes = g.compiled_includes
page_file = join(dst_path, f)
puts(' %s' % page_file)
_makedirs(page_file, isfile=True)
with open(page_file, 'w') as fd:
fd.write(content.encode('utf-8'))
#
# build steps
#
def banner(conf):
"""
Place banner at top of js and css files in-place.
"""
_banner_text = BANNER % CONFIG
def _do(file_path):
puts(' %s' % file_path)
with _open_file(file_path, 'r+') as fd:
s = fd.read()
fd.seek(0)
fd.write(_banner_text+s)
for r in conf:
src = join(env.project_path, r)
puts('banner: %s' % src)
if os.path.isdir(src):
for f in _match_files(src, '.*\.(css|js)$'):
_do(join(src, f))
else:
_do(src)
def concat(conf):
"""
Concatenate files
"""
for r in conf:
dst = join(env.project_path, r['dst'])
src = map(lambda x: join(env.project_path, x), r['src'])
_makedirs(dst, isfile=True)
local('cat %s > %s' % (' '.join(src), dst))
def copy(conf):
"""
Copy files
"""
def _do(src_path, dst_path):
puts(' %s' % src_path)
_makedirs(dst_path, isfile=True)
shutil.copy2(src_path, dst_path)
for r in conf:
src = join(env.project_path, r['src'])
dst = join(env.project_path, r['dst'])
puts('copy: %s >> %s' % (src, dst))
if os.path.isdir(src):
regex = r['regex'] if 'regex' in r else '.*'
for f in _match_files(src, regex):
_do(join(src, f), join(dst, f))
else:
_do(src, dst)
def lessc(conf):
"""
Compile LESS
"""
def _do(src_path, dst_path):
_makedirs(dst_path, isfile=True)
with hide('warnings'), settings(warn_only=True):
result = local('lessc -x %s %s' % (src_path, dst_path))
if result.failed:
abort('Error running lessc on %s' % src_path)
if not os.popen('which lessc').read().strip():
abort('You must install the LESS compiler')
for r in conf:
src = join(env.project_path, r['src'])
dst = join(env.project_path, r['dst'])
if os.path.isdir(src):
regex = r['regex'] if 'regex' in r else '.*'
for f in _match_files(src, regex):
(base, ext) = os.path.splitext(join(dst, f))
_do(join(src, f), base+".css")
else:
_do(src, dst)
def minify(conf):
"""
Minify javascript
"""
def _do(src_path, dst_path, opt):
local('uglifyjs %s --output %s %s' % (opt, dst_path, src_path))
for r in conf:
src = join(env.project_path, r['src'])
dst = join(env.project_path, r['dst'])
puts('minify: %s >> %s' % (src, dst))
opt = r['opt'] if ('opt' in r) else ''
out_ext = r['ext'] if ('ext' in r) else ''
if os.path.isdir(src):
_makedirs(dst, isfile=False)
for f in _match_files(src, '.*\.js'):
(base, in_ext) = os.path.splitext(join(dst, f))
_do(join(src, f), base+out_ext+in_ext, opt)
else:
_makedirs(dst, isfile=True)
_do(src, dst, opt)
def process(conf):
"""
Process codekit style imports
"""
_re_prepend = re.compile(r'@codekit-prepend\s*[\'"](?P<file>.+)[\'"]\s*;')
_re_append = re.compile(r'@codekit-append\s*[\'"](?P<file>.+)[\'"]\s*;')
def _mark(f_out, path):
f_out.write("""
/* **********************************************
Begin %s
********************************************** */
""" % os.path.basename(path))
def _do(f_out, path, imported):
s = ''
dirpath = dirname(path)
with _open_file(path, 'r') as f_in:
s = f_in.read()
# Write out prepends
for m in _re_prepend.finditer(s):
file_path = _find_file(m.group('file'), dirpath)
if not file_path in imported:
puts(' prepend: %s' % file_path)
imported.append(file_path)
_do(f_out, file_path, imported)
# Write out file
_mark(f_out, os.path.basename(path))
f_out.write(s+'\n')
# Write out appends
for m in _re_append.finditer(s):
file_path = _find_file(m.group('file'), dirpath)
if not file_path in imported:
puts(' append: %s' % file_path)
imported.append(file_path)
_do(f_out, file_path, imported)
for r in conf:
src = join(env.project_path, r['src'])
dst = join(env.project_path, r['dst'])
puts('process: %s >> %s' % (src, dst))
_makedirs(dst, isfile=True)
with _open_file(dst, 'w', 'utf-8') as out_file:
_do(out_file, src, [])
def usemin(conf):
"""
Replaces usemin-style build blocks with a reference to a single file.
Build blocks take the format:
<!-- build:type path -->
(references to unoptimized files go here)
<!-- endbuild -->
where:
type = css | js
path = reference to the optimized file
Any leading backslashes will be stripped, but the path will otherwise
by used as it appears within the opening build tag.
"""
_re_build = re.compile(r"""
<!--\s*build:(?P<type>\css|js)\s+(?P<dest>\S+)\s*-->
.*?
<!--\s*endbuild\s*-->
""",
re.VERBOSE | re.DOTALL)
def _sub(m):
type = m.group('type')
dest = m.group('dest').strip('\\')
if type == 'css':
return '<link rel="stylesheet" href="%s">' % dest
elif type == 'js':
return '<script type="text/javascript" src="%s"></script>' % dest
else:
warn('Unknown build block type (%s)' % type)
return m.group(0)
def _do(file_path):
with _open_file(file_path, 'r+') as fd:
s = fd.read()
(new_s, n) = _re_build.subn(_sub, s)
if n:
puts(' (%d) %s' % (n, file_path))
fd.seek(0)
fd.write(new_s)
fd.truncate()
for r in conf:
src = join(env.project_path, r)
puts('usemin: %s' % src)
if os.path.isdir(src):
for f in _match_files(src, '.*\.html'):
_do(join(src, f))
else:
_do(src)
#
# tasks
#
@task
def debug():
"""Setup debug settings"""
warn('DEBUG IS ON:')
CONFIG['deploy']['bucket'] = 'test.knilab.com'
CONFIG['version'] = '0.0.0'
print 'deploy.bucket:', CONFIG['deploy']['bucket']
print 'version:', CONFIG['version']
print 'version tagging is OFF'
print ''
doit = prompt("Continue? (y/n): ").strip()
if doit != 'y':
abort('Stopped')
env.debug = True
@task
def serve():
"""Run the local version of the documentation site (timeline.knightlab.com)"""
with lcd(join(env.project_path)):
local('python website/app.py')
@task
def build():
"""Build version"""
# Get build config
if not 'build' in CONFIG:
abort('Could not find "build" in config file')
# Determine version
if not 'version' in CONFIG:
CONFIG['version'] = _last_version_tag()
if not CONFIG['version']:
abort('No available version tag')
print 'Building version %(version)s...' % CONFIG
# Clean build directory
_clean(env.build_path)
for key, param in CONFIG['build'].iteritems():
getattr(sys.modules[__name__], key)(param)
@task
def stage():
"""
Build version, copy to local cdn repository, tag last commit
"""
if not 'stage' in CONFIG:
abort('Could not find "stage" in config file')
# Make sure cdn exists
_check_path(dirname(env.cdn_path))
# Ask user for a new version
if not env.debug:
CONFIG['version'] = _get_version_tag()
build()
cdn_path = join(env.cdn_path, CONFIG['version'])
_clean(cdn_path)
for r in CONFIG['stage']:
copy([{"src": r['src'], "dst": cdn_path, "regex": r['regex']}])
if not env.debug:
with lcd(env.project_path):
local('git tag %(version)s' % CONFIG)
local('git push origin %(version)s' % CONFIG)
@task
def stage_latest():
"""
Copy version to latest within local cdn repository
"""
if 'version' in CONFIG:
version = CONFIG['version']
else:
tags = _get_tags()
puts('This project has the following tags:')
puts(tags)
while True:
version = prompt("Which version to stage as 'latest'? ").strip()
if not version in tags:
warn('You must enter an existing version')
else:
break
print 'stage_latest: %s' % version
# Make sure version has been staged
version_cdn_path = join(env.cdn_path, version)
if not os.path.exists(version_cdn_path):
abort("Version '%s' has not been staged" % version)
# Stage version as latest
latest_cdn_path = join(env.cdn_path, 'latest')
_clean(latest_cdn_path)
copy([{"src": version_cdn_path, "dst": latest_cdn_path}])
@task
def deploy():
"""Deploy to S3 bucket"""
if not 'deploy' in CONFIG:
abort('Could not find "deploy" in config file')
# Make sure s3cmd.cnf exists
_check_path(env.s3cmd_cfg)
# Do we need to build anything here?!?
#build()
template_path = join(env.project_path, 'website', 'templates')
deploy_path = join(env.project_path, 'build', 'website')
_clean(deploy_path)
# render templates and run usemin
_render_templates(template_path, deploy_path)
usemin([deploy_path])
# copy static fiels
copy([{
"src": join(env.project_path, 'website', 'static'),
"dst": join(deploy_path, 'static')
}])
# additional copy?
if 'copy' in CONFIG['deploy']:
copy(CONFIG['deploy']['copy'])
# sync to S3
with lcd(env.project_path):
local('fabfile/s3cmd --config=%s sync' \
' --rexclude ".*/\.[^/]*$"' \
' --delete-removed --acl-public' \
' %s/ s3://%s/' \
% (env.s3cmd_cfg, deploy_path, CONFIG['deploy']['bucket'])
)

2116
fabfile/s3cmd

File diff suppressed because it is too large Load Diff

1
requirements.txt

@ -3,6 +3,7 @@ Flask==0.10.1
Jinja2==2.7 Jinja2==2.7
MarkupSafe==0.18 MarkupSafe==0.18
Werkzeug==0.9.3 Werkzeug==0.9.3
boto==2.13.3
itsdangerous==0.22 itsdangerous==0.22
paramiko==1.10.1 paramiko==1.10.1
ply==3.4 ply==3.4

Loading…
Cancel
Save