jywsn
11 years ago
24 changed files with 50 additions and 7117 deletions
@ -0,0 +1,29 @@
|
||||
from os.path import abspath, basename, dirname, join |
||||
import sys |
||||
from fabric.api import env |
||||
|
||||
# |
||||
# Project-specific settings, alter as needed |
||||
# |
||||
# env.project_name = basename(dirname(__file__)) |
||||
env.project_name = 'TimelineJS' |
||||
|
||||
# |
||||
# Add paths |
||||
# |
||||
def add_paths(*args): |
||||
"""Make paths are in sys.path.""" |
||||
for p in args: |
||||
if p not in sys.path: |
||||
sys.path.append(p) |
||||
|
||||
project_path = dirname(abspath(__file__)) |
||||
repos_path = dirname(project_path) |
||||
fablib_path = join(repos_path, 'fablib') |
||||
|
||||
add_paths(project_path, repos_path, fablib_path) |
||||
|
||||
# |
||||
# Import from fablib |
||||
# |
||||
from fablib import * |
@ -1,224 +0,0 @@
|
||||
## Amazon S3 - Access Control List representation |
||||
## Author: Michal Ludvig <michal@logix.cz> |
||||
## http://www.logix.cz/michal |
||||
## License: GPL Version 2 |
||||
|
||||
from Utils import getTreeFromXml |
||||
|
||||
try: |
||||
import xml.etree.ElementTree as ET |
||||
except ImportError: |
||||
import elementtree.ElementTree as ET |
||||
|
||||
class Grantee(object): |
||||
ALL_USERS_URI = "http://acs.amazonaws.com/groups/global/AllUsers" |
||||
LOG_DELIVERY_URI = "http://acs.amazonaws.com/groups/s3/LogDelivery" |
||||
|
||||
def __init__(self): |
||||
self.xsi_type = None |
||||
self.tag = None |
||||
self.name = None |
||||
self.display_name = None |
||||
self.permission = None |
||||
|
||||
def __repr__(self): |
||||
return 'Grantee("%(tag)s", "%(name)s", "%(permission)s")' % { |
||||
"tag" : self.tag, |
||||
"name" : self.name, |
||||
"permission" : self.permission |
||||
} |
||||
|
||||
def isAllUsers(self): |
||||
return self.tag == "URI" and self.name == Grantee.ALL_USERS_URI |
||||
|
||||
def isAnonRead(self): |
||||
return self.isAllUsers() and (self.permission == "READ" or self.permission == "FULL_CONTROL") |
||||
|
||||
def getElement(self): |
||||
el = ET.Element("Grant") |
||||
grantee = ET.SubElement(el, "Grantee", { |
||||
'xmlns:xsi' : 'http://www.w3.org/2001/XMLSchema-instance', |
||||
'xsi:type' : self.xsi_type |
||||
}) |
||||
name = ET.SubElement(grantee, self.tag) |
||||
name.text = self.name |
||||
permission = ET.SubElement(el, "Permission") |
||||
permission.text = self.permission |
||||
return el |
||||
|
||||
class GranteeAnonRead(Grantee): |
||||
def __init__(self): |
||||
Grantee.__init__(self) |
||||
self.xsi_type = "Group" |
||||
self.tag = "URI" |
||||
self.name = Grantee.ALL_USERS_URI |
||||
self.permission = "READ" |
||||
|
||||
class GranteeLogDelivery(Grantee): |
||||
def __init__(self, permission): |
||||
""" |
||||
permission must be either READ_ACP or WRITE |
||||
""" |
||||
Grantee.__init__(self) |
||||
self.xsi_type = "Group" |
||||
self.tag = "URI" |
||||
self.name = Grantee.LOG_DELIVERY_URI |
||||
self.permission = permission |
||||
|
||||
class ACL(object): |
||||
EMPTY_ACL = "<AccessControlPolicy><Owner><ID></ID></Owner><AccessControlList></AccessControlList></AccessControlPolicy>" |
||||
|
||||
def __init__(self, xml = None): |
||||
if not xml: |
||||
xml = ACL.EMPTY_ACL |
||||
|
||||
self.grantees = [] |
||||
self.owner_id = "" |
||||
self.owner_nick = "" |
||||
|
||||
tree = getTreeFromXml(xml) |
||||
self.parseOwner(tree) |
||||
self.parseGrants(tree) |
||||
|
||||
def parseOwner(self, tree): |
||||
self.owner_id = tree.findtext(".//Owner//ID") |
||||
self.owner_nick = tree.findtext(".//Owner//DisplayName") |
||||
|
||||
def parseGrants(self, tree): |
||||
for grant in tree.findall(".//Grant"): |
||||
grantee = Grantee() |
||||
g = grant.find(".//Grantee") |
||||
grantee.xsi_type = g.attrib['{http://www.w3.org/2001/XMLSchema-instance}type'] |
||||
grantee.permission = grant.find('Permission').text |
||||
for el in g: |
||||
if el.tag == "DisplayName": |
||||
grantee.display_name = el.text |
||||
else: |
||||
grantee.tag = el.tag |
||||
grantee.name = el.text |
||||
self.grantees.append(grantee) |
||||
|
||||
def getGrantList(self): |
||||
acl = [] |
||||
for grantee in self.grantees: |
||||
if grantee.display_name: |
||||
user = grantee.display_name |
||||
elif grantee.isAllUsers(): |
||||
user = "*anon*" |
||||
else: |
||||
user = grantee.name |
||||
acl.append({'grantee': user, 'permission': grantee.permission}) |
||||
return acl |
||||
|
||||
def getOwner(self): |
||||
return { 'id' : self.owner_id, 'nick' : self.owner_nick } |
||||
|
||||
def isAnonRead(self): |
||||
for grantee in self.grantees: |
||||
if grantee.isAnonRead(): |
||||
return True |
||||
return False |
||||
|
||||
def grantAnonRead(self): |
||||
if not self.isAnonRead(): |
||||
self.appendGrantee(GranteeAnonRead()) |
||||
|
||||
def revokeAnonRead(self): |
||||
self.grantees = [g for g in self.grantees if not g.isAnonRead()] |
||||
|
||||
def appendGrantee(self, grantee): |
||||
self.grantees.append(grantee) |
||||
|
||||
def hasGrant(self, name, permission): |
||||
name = name.lower() |
||||
permission = permission.upper() |
||||
|
||||
for grantee in self.grantees: |
||||
if grantee.name.lower() == name: |
||||
if grantee.permission == "FULL_CONTROL": |
||||
return True |
||||
elif grantee.permission.upper() == permission: |
||||
return True |
||||
|
||||
return False; |
||||
|
||||
def grant(self, name, permission): |
||||
if self.hasGrant(name, permission): |
||||
return |
||||
|
||||
name = name.lower() |
||||
permission = permission.upper() |
||||
|
||||
if "ALL" == permission: |
||||
permission = "FULL_CONTROL" |
||||
|
||||
if "FULL_CONTROL" == permission: |
||||
self.revoke(name, "ALL") |
||||
|
||||
grantee = Grantee() |
||||
grantee.name = name |
||||
grantee.permission = permission |
||||
|
||||
if name.find('@') <= -1: # ultra lame attempt to differenciate emails id from canonical ids |
||||
grantee.xsi_type = "CanonicalUser" |
||||
grantee.tag = "ID" |
||||
else: |
||||
grantee.xsi_type = "AmazonCustomerByEmail" |
||||
grantee.tag = "EmailAddress" |
||||
|
||||
self.appendGrantee(grantee) |
||||
|
||||
|
||||
def revoke(self, name, permission): |
||||
name = name.lower() |
||||
permission = permission.upper() |
||||
|
||||
if "ALL" == permission: |
||||
self.grantees = [g for g in self.grantees if not g.name.lower() == name] |
||||
else: |
||||
self.grantees = [g for g in self.grantees if not (g.name.lower() == name and g.permission.upper() == permission)] |
||||
|
||||
|
||||
def __str__(self): |
||||
tree = getTreeFromXml(ACL.EMPTY_ACL) |
||||
tree.attrib['xmlns'] = "http://s3.amazonaws.com/doc/2006-03-01/" |
||||
owner = tree.find(".//Owner//ID") |
||||
owner.text = self.owner_id |
||||
acl = tree.find(".//AccessControlList") |
||||
for grantee in self.grantees: |
||||
acl.append(grantee.getElement()) |
||||
return ET.tostring(tree) |
||||
|
||||
if __name__ == "__main__": |
||||
xml = """<?xml version="1.0" encoding="UTF-8"?> |
||||
<AccessControlPolicy xmlns="http://s3.amazonaws.com/doc/2006-03-01/"> |
||||
<Owner> |
||||
<ID>12345678901234567890</ID> |
||||
<DisplayName>owner-nickname</DisplayName> |
||||
</Owner> |
||||
<AccessControlList> |
||||
<Grant> |
||||
<Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"> |
||||
<ID>12345678901234567890</ID> |
||||
<DisplayName>owner-nickname</DisplayName> |
||||
</Grantee> |
||||
<Permission>FULL_CONTROL</Permission> |
||||
</Grant> |
||||
<Grant> |
||||
<Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="Group"> |
||||
<URI>http://acs.amazonaws.com/groups/global/AllUsers</URI> |
||||
</Grantee> |
||||
<Permission>READ</Permission> |
||||
</Grant> |
||||
</AccessControlList> |
||||
</AccessControlPolicy> |
||||
""" |
||||
acl = ACL(xml) |
||||
print "Grants:", acl.getGrantList() |
||||
acl.revokeAnonRead() |
||||
print "Grants:", acl.getGrantList() |
||||
acl.grantAnonRead() |
||||
print "Grants:", acl.getGrantList() |
||||
print acl |
||||
|
||||
# vim:et:ts=4:sts=4:ai |
@ -1,92 +0,0 @@
|
||||
## Amazon S3 - Access Control List representation |
||||
## Author: Michal Ludvig <michal@logix.cz> |
||||
## http://www.logix.cz/michal |
||||
## License: GPL Version 2 |
||||
|
||||
import S3Uri |
||||
from Exceptions import ParameterError |
||||
from Utils import getTreeFromXml |
||||
from ACL import GranteeAnonRead |
||||
|
||||
try: |
||||
import xml.etree.ElementTree as ET |
||||
except ImportError: |
||||
import elementtree.ElementTree as ET |
||||
|
||||
__all__ = [] |
||||
class AccessLog(object): |
||||
LOG_DISABLED = "<BucketLoggingStatus></BucketLoggingStatus>" |
||||
LOG_TEMPLATE = "<LoggingEnabled><TargetBucket></TargetBucket><TargetPrefix></TargetPrefix></LoggingEnabled>" |
||||
|
||||
def __init__(self, xml = None): |
||||
if not xml: |
||||
xml = self.LOG_DISABLED |
||||
self.tree = getTreeFromXml(xml) |
||||
self.tree.attrib['xmlns'] = "http://doc.s3.amazonaws.com/2006-03-01" |
||||
|
||||
def isLoggingEnabled(self): |
||||
return bool(self.tree.find(".//LoggingEnabled")) |
||||
|
||||
def disableLogging(self): |
||||
el = self.tree.find(".//LoggingEnabled") |
||||
if el: |
||||
self.tree.remove(el) |
||||
|
||||
def enableLogging(self, target_prefix_uri): |
||||
el = self.tree.find(".//LoggingEnabled") |
||||
if not el: |
||||
el = getTreeFromXml(self.LOG_TEMPLATE) |
||||
self.tree.append(el) |
||||
el.find(".//TargetBucket").text = target_prefix_uri.bucket() |
||||
el.find(".//TargetPrefix").text = target_prefix_uri.object() |
||||
|
||||
def targetPrefix(self): |
||||
if self.isLoggingEnabled(): |
||||
el = self.tree.find(".//LoggingEnabled") |
||||
target_prefix = "s3://%s/%s" % ( |
||||
self.tree.find(".//LoggingEnabled//TargetBucket").text, |
||||
self.tree.find(".//LoggingEnabled//TargetPrefix").text) |
||||
return S3Uri.S3Uri(target_prefix) |
||||
else: |
||||
return "" |
||||
|
||||
def setAclPublic(self, acl_public): |
||||
le = self.tree.find(".//LoggingEnabled") |
||||
if not le: |
||||
raise ParameterError("Logging not enabled, can't set default ACL for logs") |
||||
tg = le.find(".//TargetGrants") |
||||
if not acl_public: |
||||
if not tg: |
||||
## All good, it's not been there |
||||
return |
||||
else: |
||||
le.remove(tg) |
||||
else: # acl_public == True |
||||
anon_read = GranteeAnonRead().getElement() |
||||
if not tg: |
||||
tg = ET.SubElement(le, "TargetGrants") |
||||
## What if TargetGrants already exists? We should check if |
||||
## AnonRead is there before appending a new one. Later... |
||||
tg.append(anon_read) |
||||
|
||||
def isAclPublic(self): |
||||
raise NotImplementedError() |
||||
|
||||
def __str__(self): |
||||
return ET.tostring(self.tree) |
||||
__all__.append("AccessLog") |
||||
|
||||
if __name__ == "__main__": |
||||
from S3Uri import S3Uri |
||||
log = AccessLog() |
||||
print log |
||||
log.enableLogging(S3Uri("s3://targetbucket/prefix/log-")) |
||||
print log |
||||
log.setAclPublic(True) |
||||
print log |
||||
log.setAclPublic(False) |
||||
print log |
||||
log.disableLogging() |
||||
print log |
||||
|
||||
# vim:et:ts=4:sts=4:ai |
@ -1,42 +0,0 @@
|
||||
## Amazon S3 manager |
||||
## Author: Michal Ludvig <michal@logix.cz> |
||||
## http://www.logix.cz/michal |
||||
## License: GPL Version 2 |
||||
|
||||
class BidirMap(object): |
||||
def __init__(self, **map): |
||||
self.k2v = {} |
||||
self.v2k = {} |
||||
for key in map: |
||||
self.__setitem__(key, map[key]) |
||||
|
||||
def __setitem__(self, key, value): |
||||
if self.v2k.has_key(value): |
||||
if self.v2k[value] != key: |
||||
raise KeyError("Value '"+str(value)+"' already in use with key '"+str(self.v2k[value])+"'") |
||||
try: |
||||
del(self.v2k[self.k2v[key]]) |
||||
except KeyError: |
||||
pass |
||||
self.k2v[key] = value |
||||
self.v2k[value] = key |
||||
|
||||
def __getitem__(self, key): |
||||
return self.k2v[key] |
||||
|
||||
def __str__(self): |
||||
return self.v2k.__str__() |
||||
|
||||
def getkey(self, value): |
||||
return self.v2k[value] |
||||
|
||||
def getvalue(self, key): |
||||
return self.k2v[key] |
||||
|
||||
def keys(self): |
||||
return [key for key in self.k2v] |
||||
|
||||
def values(self): |
||||
return [value for value in self.v2k] |
||||
|
||||
# vim:et:ts=4:sts=4:ai |
@ -1,773 +0,0 @@
|
||||
## Amazon CloudFront support |
||||
## Author: Michal Ludvig <michal@logix.cz> |
||||
## http://www.logix.cz/michal |
||||
## License: GPL Version 2 |
||||
|
||||
import sys |
||||
import time |
||||
import httplib |
||||
import random |
||||
from datetime import datetime |
||||
from logging import debug, info, warning, error |
||||
|
||||
try: |
||||
import xml.etree.ElementTree as ET |
||||
except ImportError: |
||||
import elementtree.ElementTree as ET |
||||
|
||||
from Config import Config |
||||
from Exceptions import * |
||||
from Utils import getTreeFromXml, appendXmlTextNode, getDictFromTree, dateS3toPython, sign_string, getBucketFromHostname, getHostnameFromBucket |
||||
from S3Uri import S3Uri, S3UriS3 |
||||
from FileLists import fetch_remote_list |
||||
|
||||
cloudfront_api_version = "2010-11-01" |
||||
cloudfront_resource = "/%(api_ver)s/distribution" % { 'api_ver' : cloudfront_api_version } |
||||
|
||||
def output(message): |
||||
sys.stdout.write(message + "\n") |
||||
|
||||
def pretty_output(label, message): |
||||
#label = ("%s " % label).ljust(20, ".") |
||||
label = ("%s:" % label).ljust(15) |
||||
output("%s %s" % (label, message)) |
||||
|
||||
class DistributionSummary(object): |
||||
## Example: |
||||
## |
||||
## <DistributionSummary> |
||||
## <Id>1234567890ABC</Id> |
||||
## <Status>Deployed</Status> |
||||
## <LastModifiedTime>2009-01-16T11:49:02.189Z</LastModifiedTime> |
||||
## <DomainName>blahblahblah.cloudfront.net</DomainName> |
||||
## <S3Origin> |
||||
## <DNSName>example.bucket.s3.amazonaws.com</DNSName> |
||||
## </S3Origin> |
||||
## <CNAME>cdn.example.com</CNAME> |
||||
## <CNAME>img.example.com</CNAME> |
||||
## <Comment>What Ever</Comment> |
||||
## <Enabled>true</Enabled> |
||||
## </DistributionSummary> |
||||
|
||||
def __init__(self, tree): |
||||
if tree.tag != "DistributionSummary": |
||||
raise ValueError("Expected <DistributionSummary /> xml, got: <%s />" % tree.tag) |
||||
self.parse(tree) |
||||
|
||||
def parse(self, tree): |
||||
self.info = getDictFromTree(tree) |
||||
self.info['Enabled'] = (self.info['Enabled'].lower() == "true") |
||||
if self.info.has_key("CNAME") and type(self.info['CNAME']) != list: |
||||
self.info['CNAME'] = [self.info['CNAME']] |
||||
|
||||
def uri(self): |
||||
return S3Uri("cf://%s" % self.info['Id']) |
||||
|
||||
class DistributionList(object): |
||||
## Example: |
||||
## |
||||
## <DistributionList xmlns="http://cloudfront.amazonaws.com/doc/2010-07-15/"> |
||||
## <Marker /> |
||||
## <MaxItems>100</MaxItems> |
||||
## <IsTruncated>false</IsTruncated> |
||||
## <DistributionSummary> |
||||
## ... handled by DistributionSummary() class ... |
||||
## </DistributionSummary> |
||||
## </DistributionList> |
||||
|
||||
def __init__(self, xml): |
||||
tree = getTreeFromXml(xml) |
||||
if tree.tag != "DistributionList": |
||||
raise ValueError("Expected <DistributionList /> xml, got: <%s />" % tree.tag) |
||||
self.parse(tree) |
||||
|
||||
def parse(self, tree): |
||||
self.info = getDictFromTree(tree) |
||||
## Normalise some items |
||||
self.info['IsTruncated'] = (self.info['IsTruncated'].lower() == "true") |
||||
|
||||
self.dist_summs = [] |
||||
for dist_summ in tree.findall(".//DistributionSummary"): |
||||
self.dist_summs.append(DistributionSummary(dist_summ)) |
||||
|
||||
class Distribution(object): |
||||
## Example: |
||||
## |
||||
## <Distribution xmlns="http://cloudfront.amazonaws.com/doc/2010-07-15/"> |
||||
## <Id>1234567890ABC</Id> |
||||
## <Status>InProgress</Status> |
||||
## <LastModifiedTime>2009-01-16T13:07:11.319Z</LastModifiedTime> |
||||
## <DomainName>blahblahblah.cloudfront.net</DomainName> |
||||
## <DistributionConfig> |
||||
## ... handled by DistributionConfig() class ... |
||||
## </DistributionConfig> |
||||
## </Distribution> |
||||
|
||||
def __init__(self, xml): |
||||
tree = getTreeFromXml(xml) |
||||
if tree.tag != "Distribution": |
||||
raise ValueError("Expected <Distribution /> xml, got: <%s />" % tree.tag) |
||||
self.parse(tree) |
||||
|
||||
def parse(self, tree): |
||||
self.info = getDictFromTree(tree) |
||||
## Normalise some items |
||||
self.info['LastModifiedTime'] = dateS3toPython(self.info['LastModifiedTime']) |
||||
|
||||
self.info['DistributionConfig'] = DistributionConfig(tree = tree.find(".//DistributionConfig")) |
||||
|
||||
def uri(self): |
||||
return S3Uri("cf://%s" % self.info['Id']) |
||||
|
||||
class DistributionConfig(object): |
||||
## Example: |
||||
## |
||||
## <DistributionConfig> |
||||
## <Origin>somebucket.s3.amazonaws.com</Origin> |
||||
## <CallerReference>s3://somebucket/</CallerReference> |
||||
## <Comment>http://somebucket.s3.amazonaws.com/</Comment> |
||||
## <Enabled>true</Enabled> |
||||
## <Logging> |
||||
## <Bucket>bu.ck.et</Bucket> |
||||
## <Prefix>/cf-somebucket/</Prefix> |
||||
## </Logging> |
||||
## </DistributionConfig> |
||||
|
||||
EMPTY_CONFIG = "<DistributionConfig><S3Origin><DNSName/></S3Origin><CallerReference/><Enabled>true</Enabled></DistributionConfig>" |
||||
xmlns = "http://cloudfront.amazonaws.com/doc/%(api_ver)s/" % { 'api_ver' : cloudfront_api_version } |
||||
def __init__(self, xml = None, tree = None): |
||||
if xml is None: |
||||
xml = DistributionConfig.EMPTY_CONFIG |
||||
|
||||
if tree is None: |
||||
tree = getTreeFromXml(xml) |
||||
|
||||
if tree.tag != "DistributionConfig": |
||||
raise ValueError("Expected <DistributionConfig /> xml, got: <%s />" % tree.tag) |
||||
self.parse(tree) |
||||
|
||||
def parse(self, tree): |
||||
self.info = getDictFromTree(tree) |
||||
self.info['Enabled'] = (self.info['Enabled'].lower() == "true") |
||||
if not self.info.has_key("CNAME"): |
||||
self.info['CNAME'] = [] |
||||
if type(self.info['CNAME']) != list: |
||||
self.info['CNAME'] = [self.info['CNAME']] |
||||
self.info['CNAME'] = [cname.lower() for cname in self.info['CNAME']] |
||||
if not self.info.has_key("Comment"): |
||||
self.info['Comment'] = "" |
||||
if not self.info.has_key("DefaultRootObject"): |
||||
self.info['DefaultRootObject'] = "" |
||||
## Figure out logging - complex node not parsed by getDictFromTree() |
||||
logging_nodes = tree.findall(".//Logging") |
||||
if logging_nodes: |
||||
logging_dict = getDictFromTree(logging_nodes[0]) |
||||
logging_dict['Bucket'], success = getBucketFromHostname(logging_dict['Bucket']) |
||||
if not success: |
||||
warning("Logging to unparsable bucket name: %s" % logging_dict['Bucket']) |
||||
self.info['Logging'] = S3UriS3("s3://%(Bucket)s/%(Prefix)s" % logging_dict) |
||||
else: |
||||
self.info['Logging'] = None |
||||
|
||||
def __str__(self): |
||||
tree = ET.Element("DistributionConfig") |
||||
tree.attrib['xmlns'] = DistributionConfig.xmlns |
||||
|
||||
## Retain the order of the following calls! |
||||
s3org = appendXmlTextNode("S3Origin", '', tree) |
||||
appendXmlTextNode("DNSName", self.info['S3Origin']['DNSName'], s3org) |
||||
appendXmlTextNode("CallerReference", self.info['CallerReference'], tree) |
||||
for cname in self.info['CNAME']: |
||||
appendXmlTextNode("CNAME", cname.lower(), tree) |
||||
if self.info['Comment']: |
||||
appendXmlTextNode("Comment", self.info['Comment'], tree) |
||||
appendXmlTextNode("Enabled", str(self.info['Enabled']).lower(), tree) |
||||
# don't create a empty DefaultRootObject element as it would result in a MalformedXML error |
||||
if str(self.info['DefaultRootObject']): |
||||
appendXmlTextNode("DefaultRootObject", str(self.info['DefaultRootObject']), tree) |
||||
if self.info['Logging']: |
||||
logging_el = ET.Element("Logging") |
||||
appendXmlTextNode("Bucket", getHostnameFromBucket(self.info['Logging'].bucket()), logging_el) |
||||
appendXmlTextNode("Prefix", self.info['Logging'].object(), logging_el) |
||||
tree.append(logging_el) |
||||
return ET.tostring(tree) |
||||
|
||||
class Invalidation(object): |
||||
## Example: |
||||
## |
||||
## <Invalidation xmlns="http://cloudfront.amazonaws.com/doc/2010-11-01/"> |
||||
## <Id>id</Id> |
||||
## <Status>status</Status> |
||||
## <CreateTime>date</CreateTime> |
||||
## <InvalidationBatch> |
||||
## <Path>/image1.jpg</Path> |
||||
## <Path>/image2.jpg</Path> |
||||
## <Path>/videos/movie.flv</Path> |
||||
## <CallerReference>my-batch</CallerReference> |
||||
## </InvalidationBatch> |
||||
## </Invalidation> |
||||
|
||||
def __init__(self, xml): |
||||
tree = getTreeFromXml(xml) |
||||
if tree.tag != "Invalidation": |
||||
raise ValueError("Expected <Invalidation /> xml, got: <%s />" % tree.tag) |
||||
self.parse(tree) |
||||
|
||||
def parse(self, tree): |
||||
self.info = getDictFromTree(tree) |
||||
|
||||
def __str__(self): |
||||
return str(self.info) |
||||
|
||||
class InvalidationList(object): |
||||
## Example: |
||||
## |
||||
## <InvalidationList> |
||||
## <Marker/> |
||||
## <NextMarker>Invalidation ID</NextMarker> |
||||
## <MaxItems>2</MaxItems> |
||||
## <IsTruncated>true</IsTruncated> |
||||
## <InvalidationSummary> |
||||
## <Id>[Second Invalidation ID]</Id> |
||||
## <Status>Completed</Status> |
||||
## </InvalidationSummary> |
||||
## <InvalidationSummary> |
||||
## <Id>[First Invalidation ID]</Id> |
||||
## <Status>Completed</Status> |
||||
## </InvalidationSummary> |
||||
## </InvalidationList> |
||||
|
||||
def __init__(self, xml): |
||||
tree = getTreeFromXml(xml) |
||||
if tree.tag != "InvalidationList": |
||||
raise ValueError("Expected <InvalidationList /> xml, got: <%s />" % tree.tag) |
||||
self.parse(tree) |
||||
|
||||
def parse(self, tree): |
||||
self.info = getDictFromTree(tree) |
||||
|
||||
def __str__(self): |
||||
return str(self.info) |
||||
|
||||
class InvalidationBatch(object): |
||||
## Example: |
||||
## |
||||
## <InvalidationBatch> |
||||
## <Path>/image1.jpg</Path> |
||||
## <Path>/image2.jpg</Path> |
||||
## <Path>/videos/movie.flv</Path> |
||||
## <Path>/sound%20track.mp3</Path> |
||||
## <CallerReference>my-batch</CallerReference> |
||||
## </InvalidationBatch> |
||||
|
||||
def __init__(self, reference = None, distribution = None, paths = []): |
||||
if reference: |
||||
self.reference = reference |
||||
else: |
||||
if not distribution: |
||||
distribution="0" |
||||
self.reference = "%s.%s.%s" % (distribution, |
||||
datetime.strftime(datetime.now(),"%Y%m%d%H%M%S"), |
||||
random.randint(1000,9999)) |
||||
self.paths = [] |
||||
self.add_objects(paths) |
||||
|
||||
def add_objects(self, paths): |
||||
self.paths.extend(paths) |
||||
|
||||
def get_reference(self): |
||||
return self.reference |
||||
|
||||
def __str__(self): |
||||
tree = ET.Element("InvalidationBatch") |
||||
|
||||
for path in self.paths: |
||||
if len(path) < 1 or path[0] != "/": |
||||
path = "/" + path |
||||
appendXmlTextNode("Path", path, tree) |
||||
appendXmlTextNode("CallerReference", self.reference, tree) |
||||
return ET.tostring(tree) |
||||
|
||||
class CloudFront(object): |
||||
operations = { |
||||
"CreateDist" : { 'method' : "POST", 'resource' : "" }, |
||||
"DeleteDist" : { 'method' : "DELETE", 'resource' : "/%(dist_id)s" }, |
||||
"GetList" : { 'method' : "GET", 'resource' : "" }, |
||||
"GetDistInfo" : { 'method' : "GET", 'resource' : "/%(dist_id)s" }, |
||||
"GetDistConfig" : { 'method' : "GET", 'resource' : "/%(dist_id)s/config" }, |
||||
"SetDistConfig" : { 'method' : "PUT", 'resource' : "/%(dist_id)s/config" }, |
||||
"Invalidate" : { 'method' : "POST", 'resource' : "/%(dist_id)s/invalidation" }, |
||||
"GetInvalList" : { 'method' : "GET", 'resource' : "/%(dist_id)s/invalidation" }, |
||||
"GetInvalInfo" : { 'method' : "GET", 'resource' : "/%(dist_id)s/invalidation/%(request_id)s" }, |
||||
} |
||||
|
||||
## Maximum attempts of re-issuing failed requests |
||||
_max_retries = 5 |
||||
dist_list = None |
||||
|
||||
def __init__(self, config): |
||||
self.config = config |
||||
|
||||
## -------------------------------------------------- |
||||
## Methods implementing CloudFront API |
||||
## -------------------------------------------------- |
||||
|
||||
def GetList(self): |
||||
response = self.send_request("GetList") |
||||
response['dist_list'] = DistributionList(response['data']) |
||||
if response['dist_list'].info['IsTruncated']: |
||||
raise NotImplementedError("List is truncated. Ask s3cmd author to add support.") |
||||
## TODO: handle Truncated |
||||
return response |
||||
|
||||
def CreateDistribution(self, uri, cnames_add = [], comment = None, logging = None, default_root_object = None): |
||||
dist_config = DistributionConfig() |
||||
dist_config.info['Enabled'] = True |
||||
dist_config.info['S3Origin']['DNSName'] = uri.host_name() |
||||
dist_config.info['CallerReference'] = str(uri) |
||||
dist_config.info['DefaultRootObject'] = default_root_object |
||||
if comment == None: |
||||
dist_config.info['Comment'] = uri.public_url() |
||||
else: |
||||
dist_config.info['Comment'] = comment |
||||
for cname in cnames_add: |
||||
if dist_config.info['CNAME'].count(cname) == 0: |
||||
dist_config.info['CNAME'].append(cname) |
||||
if logging: |
||||
dist_config.info['Logging'] = S3UriS3(logging) |
||||
request_body = str(dist_config) |
||||
debug("CreateDistribution(): request_body: %s" % request_body) |
||||
response = self.send_request("CreateDist", body = request_body) |
||||
response['distribution'] = Distribution(response['data']) |
||||
return response |
||||
|
||||
def ModifyDistribution(self, cfuri, cnames_add = [], cnames_remove = [], |
||||
comment = None, enabled = None, logging = None, |
||||
default_root_object = None): |
||||
if cfuri.type != "cf": |
||||
raise ValueError("Expected CFUri instead of: %s" % cfuri) |
||||
# Get current dist status (enabled/disabled) and Etag |
||||
info("Checking current status of %s" % cfuri) |
||||
response = self.GetDistConfig(cfuri) |
||||
dc = response['dist_config'] |
||||
if enabled != None: |
||||
dc.info['Enabled'] = enabled |
||||
if comment != None: |
||||
dc.info['Comment'] = comment |
||||
if default_root_object != None: |
||||
dc.info['DefaultRootObject'] = default_root_object |
||||
for cname in cnames_add: |
||||
if dc.info['CNAME'].count(cname) == 0: |
||||
dc.info['CNAME'].append(cname) |
||||
for cname in cnames_remove: |
||||
while dc.info['CNAME'].count(cname) > 0: |
||||
dc.info['CNAME'].remove(cname) |
||||
if logging != None: |
||||
if logging == False: |
||||
dc.info['Logging'] = False |
||||
else: |
||||
dc.info['Logging'] = S3UriS3(logging) |
||||
response = self.SetDistConfig(cfuri, dc, response['headers']['etag']) |
||||
return response |
||||
|
||||
def DeleteDistribution(self, cfuri): |
||||
if cfuri.type != "cf": |
||||
raise ValueError("Expected CFUri instead of: %s" % cfuri) |
||||
# Get current dist status (enabled/disabled) and Etag |
||||
info("Checking current status of %s" % cfuri) |
||||
response = self.GetDistConfig(cfuri) |
||||
if response['dist_config'].info['Enabled']: |
||||
info("Distribution is ENABLED. Disabling first.") |
||||
response['dist_config'].info['Enabled'] = False |
||||
response = self.SetDistConfig(cfuri, response['dist_config'], |
||||
response['headers']['etag']) |
||||
warning("Waiting for Distribution to become disabled.") |
||||
warning("This may take several minutes, please wait.") |
||||
while True: |
||||
response = self.GetDistInfo(cfuri) |
||||
d = response['distribution'] |
||||
if d.info['Status'] == "Deployed" and d.info['Enabled'] == False: |
||||
info("Distribution is now disabled") |
||||
break |
||||
warning("Still waiting...") |
||||
time.sleep(10) |
||||
headers = {} |
||||
headers['if-match'] = response['headers']['etag'] |
||||
response = self.send_request("DeleteDist", dist_id = cfuri.dist_id(), |
||||
headers = headers) |
||||
return response |
||||
|
||||
def GetDistInfo(self, cfuri): |
||||
if cfuri.type != "cf": |
||||
raise ValueError("Expected CFUri instead of: %s" % cfuri) |
||||
response = self.send_request("GetDistInfo", dist_id = cfuri.dist_id()) |
||||
response['distribution'] = Distribution(response['data']) |
||||
return response |
||||
|
||||
def GetDistConfig(self, cfuri): |
||||
if cfuri.type != "cf": |
||||
raise ValueError("Expected CFUri instead of: %s" % cfuri) |
||||
response = self.send_request("GetDistConfig", dist_id = cfuri.dist_id()) |
||||
response['dist_config'] = DistributionConfig(response['data']) |
||||
return response |
||||
|
||||
def SetDistConfig(self, cfuri, dist_config, etag = None): |
||||
if etag == None: |
||||
debug("SetDistConfig(): Etag not set. Fetching it first.") |
||||
etag = self.GetDistConfig(cfuri)['headers']['etag'] |
||||
debug("SetDistConfig(): Etag = %s" % etag) |
||||
request_body = str(dist_config) |
||||
debug("SetDistConfig(): request_body: %s" % request_body) |
||||
headers = {} |
||||
headers['if-match'] = etag |
||||
response = self.send_request("SetDistConfig", dist_id = cfuri.dist_id(), |
||||
body = request_body, headers = headers) |
||||
return response |
||||
|
||||
def InvalidateObjects(self, uri, paths, default_index_file, invalidate_default_index_on_cf, invalidate_default_index_root_on_cf): |
||||
# joseprio: if the user doesn't want to invalidate the default index |
||||
# path, or if the user wants to invalidate the root of the default |
||||
# index, we need to process those paths |
||||
if default_index_file is not None and (not invalidate_default_index_on_cf or invalidate_default_index_root_on_cf): |
||||
new_paths = [] |
||||
default_index_suffix = '/' + default_index_file |
||||
for path in paths: |
||||
if path.endswith(default_index_suffix) or path == default_index_file: |
||||
if invalidate_default_index_on_cf: |
||||
new_paths.append(path) |
||||
if invalidate_default_index_root_on_cf: |
||||
new_paths.append(path[:-len(default_index_file)]) |
||||
else: |
||||
new_paths.append(path) |
||||
paths = new_paths |
||||
|
||||
# uri could be either cf:// or s3:// uri |
||||
cfuri = self.get_dist_name_for_bucket(uri) |
||||
if len(paths) > 999: |
||||
try: |
||||
tmp_filename = Utils.mktmpfile() |
||||
f = open(tmp_filename, "w") |
||||
f.write("\n".join(paths)+"\n") |
||||
f.close() |
||||
warning("Request to invalidate %d paths (max 999 supported)" % len(paths)) |
||||
warning("All the paths are now saved in: %s" % tmp_filename) |
||||
except: |
||||
pass |
||||
raise ParameterError("Too many paths to invalidate") |
||||
invalbatch = InvalidationBatch(distribution = cfuri.dist_id(), paths = paths) |
||||
debug("InvalidateObjects(): request_body: %s" % invalbatch) |
||||
response = self.send_request("Invalidate", dist_id = cfuri.dist_id(), |
||||
body = str(invalbatch)) |
||||
response['dist_id'] = cfuri.dist_id() |
||||
if response['status'] == 201: |
||||
inval_info = Invalidation(response['data']).info |
||||
response['request_id'] = inval_info['Id'] |
||||
debug("InvalidateObjects(): response: %s" % response) |
||||
return response |
||||
|
||||
def GetInvalList(self, cfuri): |
||||
if cfuri.type != "cf": |
||||
raise ValueError("Expected CFUri instead of: %s" % cfuri) |
||||
response = self.send_request("GetInvalList", dist_id = cfuri.dist_id()) |
||||
response['inval_list'] = InvalidationList(response['data']) |
||||
return response |
||||
|
||||
def GetInvalInfo(self, cfuri): |
||||
if cfuri.type != "cf": |
||||
raise ValueError("Expected CFUri instead of: %s" % cfuri) |
||||
if cfuri.request_id() is None: |
||||
raise ValueError("Expected CFUri with Request ID") |
||||
response = self.send_request("GetInvalInfo", dist_id = cfuri.dist_id(), request_id = cfuri.request_id()) |
||||
response['inval_status'] = Invalidation(response['data']) |
||||
return response |
||||
|
||||
## -------------------------------------------------- |
||||
## Low-level methods for handling CloudFront requests |
||||
## -------------------------------------------------- |
||||
|
||||
def send_request(self, op_name, dist_id = None, request_id = None, body = None, headers = {}, retries = _max_retries): |
||||
operation = self.operations[op_name] |
||||
if body: |
||||
headers['content-type'] = 'text/plain' |
||||
request = self.create_request(operation, dist_id, request_id, headers) |
||||
conn = self.get_connection() |
||||
debug("send_request(): %s %s" % (request['method'], request['resource'])) |
||||
conn.request(request['method'], request['resource'], body, request['headers']) |
||||
http_response = conn.getresponse() |
||||
response = {} |
||||
response["status"] = http_response.status |
||||
response["reason"] = http_response.reason |
||||
response["headers"] = dict(http_response.getheaders()) |
||||
response["data"] = http_response.read() |
||||
conn.close() |
||||
|
||||
debug("CloudFront: response: %r" % response) |
||||
|
||||
if response["status"] >= 500: |
||||
e = CloudFrontError(response) |
||||
if retries: |
||||
warning(u"Retrying failed request: %s" % op_name) |
||||
warning(unicode(e)) |
||||
warning("Waiting %d sec..." % self._fail_wait(retries)) |
||||
time.sleep(self._fail_wait(retries)) |
||||
return self.send_request(op_name, dist_id, body, retries - 1) |
||||
else: |
||||
raise e |
||||
|
||||
if response["status"] < 200 or response["status"] > 299: |
||||
raise CloudFrontError(response) |
||||
|
||||
return response |
||||
|
||||
def create_request(self, operation, dist_id = None, request_id = None, headers = None): |
||||
resource = cloudfront_resource + ( |
||||
operation['resource'] % { 'dist_id' : dist_id, 'request_id' : request_id }) |
||||
|
||||
if not headers: |
||||
headers = {} |
||||
|
||||
if headers.has_key("date"): |
||||
if not headers.has_key("x-amz-date"): |
||||
headers["x-amz-date"] = headers["date"] |
||||
del(headers["date"]) |
||||
|
||||
if not headers.has_key("x-amz-date"): |
||||
headers["x-amz-date"] = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) |
||||
|
||||
if len(self.config.access_token)>0: |
||||
self.config.refresh_role() |
||||
headers['x-amz-security-token']=self.config.access_token |
||||
|
||||
signature = self.sign_request(headers) |
||||
headers["Authorization"] = "AWS "+self.config.access_key+":"+signature |
||||
|
||||
request = {} |
||||
request['resource'] = resource |
||||
request['headers'] = headers |
||||
request['method'] = operation['method'] |
||||
|
||||
return request |
||||
|
||||
def sign_request(self, headers): |
||||
string_to_sign = headers['x-amz-date'] |
||||
signature = sign_string(string_to_sign) |
||||
debug(u"CloudFront.sign_request('%s') = %s" % (string_to_sign, signature)) |
||||
return signature |
||||
|
||||
def get_connection(self): |
||||
if self.config.proxy_host != "": |
||||
raise ParameterError("CloudFront commands don't work from behind a HTTP proxy") |
||||
return httplib.HTTPSConnection(self.config.cloudfront_host) |
||||
|
||||
def _fail_wait(self, retries): |
||||
# Wait a few seconds. The more it fails the more we wait. |
||||
return (self._max_retries - retries + 1) * 3 |
||||
|
||||
def get_dist_name_for_bucket(self, uri): |
||||
if (uri.type == "cf"): |
||||
return uri |
||||
if (uri.type != "s3"): |
||||
raise ParameterError("CloudFront or S3 URI required instead of: %s" % arg) |
||||
|
||||
debug("_get_dist_name_for_bucket(%r)" % uri) |
||||
if CloudFront.dist_list is None: |
||||
response = self.GetList() |
||||
CloudFront.dist_list = {} |
||||
for d in response['dist_list'].dist_summs: |
||||
if d.info.has_key("S3Origin"): |
||||
CloudFront.dist_list[getBucketFromHostname(d.info['S3Origin']['DNSName'])[0]] = d.uri() |
||||
elif d.info.has_key("CustomOrigin"): |
||||
# Aral: This used to skip over distributions with CustomOrigin, however, we mustn't |
||||
# do this since S3 buckets that are set up as websites use custom origins. |
||||
# Thankfully, the custom origin URLs they use start with the URL of the |
||||
# S3 bucket. Here, we make use this naming convention to support this use case. |
||||
distListIndex = getBucketFromHostname(d.info['CustomOrigin']['DNSName'])[0]; |
||||
distListIndex = distListIndex[:len(uri.bucket())] |
||||
CloudFront.dist_list[distListIndex] = d.uri() |
||||
else: |
||||
# Aral: I'm not sure when this condition will be reached, but keeping it in there. |
||||
continue |
||||
debug("dist_list: %s" % CloudFront.dist_list) |
||||
try: |
||||
return CloudFront.dist_list[uri.bucket()] |
||||
except Exception, e: |
||||
debug(e) |
||||
raise ParameterError("Unable to translate S3 URI to CloudFront distribution name: %s" % arg) |
||||
|
||||
class Cmd(object): |
||||
""" |
||||
Class that implements CloudFront commands |
||||
""" |
||||
|
||||
class Options(object): |
||||
cf_cnames_add = [] |
||||
cf_cnames_remove = [] |
||||
cf_comment = None |
||||
cf_enable = None |
||||
cf_logging = None |
||||
cf_default_root_object = None |
||||
|
||||
def option_list(self): |
||||
return [opt for opt in dir(self) if opt.startswith("cf_")] |
||||
|
||||
def update_option(self, option, value): |
||||
setattr(Cmd.options, option, value) |
||||
|
||||
options = Options() |
||||
|
||||
@staticmethod |
||||
def _parse_args(args): |
||||
cf = CloudFront(Config()) |
||||
cfuris = [] |
||||
for arg in args: |
||||
uri = cf.get_dist_name_for_bucket(S3Uri(arg)) |
||||
cfuris.append(uri) |
||||
return cfuris |
||||
|
||||
@staticmethod |
||||
def info(args): |
||||
cf = CloudFront(Config()) |
||||
if not args: |
||||
response = cf.GetList() |
||||
for d in response['dist_list'].dist_summs: |
||||
if d.info.has_key("S3Origin"): |
||||
origin = S3UriS3.httpurl_to_s3uri(d.info['S3Origin']['DNSName']) |
||||
elif d.info.has_key("CustomOrigin"): |
||||
origin = "http://%s/" % d.info['CustomOrigin']['DNSName'] |
||||
else: |
||||
origin = "<unknown>" |
||||
pretty_output("Origin", origin) |
||||
pretty_output("DistId", d.uri()) |
||||
pretty_output("DomainName", d.info['DomainName']) |
||||
if d.info.has_key("CNAME"): |
||||
pretty_output("CNAMEs", ", ".join(d.info['CNAME'])) |
||||
pretty_output("Status", d.info['Status']) |
||||
pretty_output("Enabled", d.info['Enabled']) |
||||
output("") |
||||
else: |
||||
cfuris = Cmd._parse_args(args) |
||||
for cfuri in cfuris: |
||||
response = cf.GetDistInfo(cfuri) |
||||
d = response['distribution'] |
||||
dc = d.info['DistributionConfig'] |
||||
if dc.info.has_key("S3Origin"): |
||||
origin = S3UriS3.httpurl_to_s3uri(dc.info['S3Origin']['DNSName']) |
||||
elif dc.info.has_key("CustomOrigin"): |
||||
origin = "http://%s/" % dc.info['CustomOrigin']['DNSName'] |
||||
else: |
||||
origin = "<unknown>" |
||||
pretty_output("Origin", origin) |
||||
pretty_output("DistId", d.uri()) |
||||
pretty_output("DomainName", d.info['DomainName']) |
||||
if dc.info.has_key("CNAME"): |
||||
pretty_output("CNAMEs", ", ".join(dc.info['CNAME'])) |
||||
pretty_output("Status", d.info['Status']) |
||||
pretty_output("Comment", dc.info['Comment']) |
||||
pretty_output("Enabled", dc.info['Enabled']) |
||||
pretty_output("DfltRootObject", dc.info['DefaultRootObject']) |
||||
pretty_output("Logging", dc.info['Logging'] or "Disabled") |
||||
pretty_output("Etag", response['headers']['etag']) |
||||
|
||||
@staticmethod |
||||
def create(args): |
||||
cf = CloudFront(Config()) |
||||
buckets = [] |
||||
for arg in args: |
||||
uri = S3Uri(arg) |
||||
if uri.type != "s3": |
||||
raise ParameterError("Bucket can only be created from a s3:// URI instead of: %s" % arg) |
||||
if uri.object(): |
||||
raise ParameterError("Use s3:// URI with a bucket name only instead of: %s" % arg) |
||||
if not uri.is_dns_compatible(): |
||||
raise ParameterError("CloudFront can only handle lowercase-named buckets.") |
||||
buckets.append(uri) |
||||
if not buckets: |
||||
raise ParameterError("No valid bucket names found") |
||||
for uri in buckets: |
||||
info("Creating distribution from: %s" % uri) |
||||
response = cf.CreateDistribution(uri, cnames_add = Cmd.options.cf_cnames_add, |
||||
comment = Cmd.options.cf_comment, |
||||
logging = Cmd.options.cf_logging, |
||||
default_root_object = Cmd.options.cf_default_root_object) |
||||
d = response['distribution'] |
||||
dc = d.info['DistributionConfig'] |
||||
output("Distribution created:") |
||||
pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['S3Origin']['DNSName'])) |
||||
pretty_output("DistId", d.uri()) |
||||
pretty_output("DomainName", d.info['DomainName']) |
||||
pretty_output("CNAMEs", ", ".join(dc.info['CNAME'])) |
||||
pretty_output("Comment", dc.info['Comment']) |
||||
pretty_output("Status", d.info['Status']) |
||||
pretty_output("Enabled", dc.info['Enabled']) |
||||
pretty_output("DefaultRootObject", dc.info['DefaultRootObject']) |
||||
pretty_output("Etag", response['headers']['etag']) |
||||
|
||||
@staticmethod |
||||
def delete(args): |
||||
cf = CloudFront(Config()) |
||||
cfuris = Cmd._parse_args(args) |
||||
for cfuri in cfuris: |
||||
response = cf.DeleteDistribution(cfuri) |
||||
if response['status'] >= 400: |
||||
error("Distribution %s could not be deleted: %s" % (cfuri, response['reason'])) |
||||
output("Distribution %s deleted" % cfuri) |
||||
|
||||
@staticmethod |
||||
def modify(args): |
||||
cf = CloudFront(Config()) |
||||
if len(args) > 1: |
||||
raise ParameterError("Too many parameters. Modify one Distribution at a time.") |
||||
try: |
||||
cfuri = Cmd._parse_args(args)[0] |
||||
except IndexError, e: |
||||
raise ParameterError("No valid Distribution URI found.") |
||||
response = cf.ModifyDistribution(cfuri, |
||||
cnames_add = Cmd.options.cf_cnames_add, |
||||
cnames_remove = Cmd.options.cf_cnames_remove, |
||||
comment = Cmd.options.cf_comment, |
||||
enabled = Cmd.options.cf_enable, |
||||
logging = Cmd.options.cf_logging, |
||||
default_root_object = Cmd.options.cf_default_root_object) |
||||
if response['status'] >= 400: |
||||
error("Distribution %s could not be modified: %s" % (cfuri, response['reason'])) |
||||
output("Distribution modified: %s" % cfuri) |
||||
response = cf.GetDistInfo(cfuri) |
||||
d = response['distribution'] |
||||
dc = d.info['DistributionConfig'] |
||||
pretty_output("Origin", S3UriS3.httpurl_to_s3uri(dc.info['S3Origin']['DNSName'])) |
||||
pretty_output("DistId", d.uri()) |
||||
pretty_output("DomainName", d.info['DomainName']) |
||||
pretty_output("Status", d.info['Status']) |
||||
pretty_output("CNAMEs", ", ".join(dc.info['CNAME'])) |
||||
pretty_output("Comment", dc.info['Comment']) |
||||
pretty_output("Enabled", dc.info['Enabled']) |
||||
pretty_output("DefaultRootObject", dc.info['DefaultRootObject']) |
||||
pretty_output("Etag", response['headers']['etag']) |
||||
|
||||
@staticmethod |
||||
def invalinfo(args): |
||||
cf = CloudFront(Config()) |
||||
cfuris = Cmd._parse_args(args) |
||||
requests = [] |
||||
for cfuri in cfuris: |
||||
if cfuri.request_id(): |
||||
requests.append(str(cfuri)) |
||||
else: |
||||
inval_list = cf.GetInvalList(cfuri) |
||||
try: |
||||
for i in inval_list['inval_list'].info['InvalidationSummary']: |
||||
requests.append("/".join(["cf:/", cfuri.dist_id(), i["Id"]])) |
||||
except: |
||||
continue |
||||
for req in requests: |
||||
cfuri = S3Uri(req) |
||||
inval_info = cf.GetInvalInfo(cfuri) |
||||
st = inval_info['inval_status'].info |
||||
pretty_output("URI", str(cfuri)) |
||||
pretty_output("Status", st['Status']) |
||||
pretty_output("Created", st['CreateTime']) |
||||
pretty_output("Nr of paths", len(st['InvalidationBatch']['Path'])) |
||||
pretty_output("Reference", st['InvalidationBatch']['CallerReference']) |
||||
output("") |
||||
|
||||
# vim:et:ts=4:sts=4:ai |
@ -1,294 +0,0 @@
|
||||
## Amazon S3 manager |
||||
## Author: Michal Ludvig <michal@logix.cz> |
||||
## http://www.logix.cz/michal |
||||
## License: GPL Version 2 |
||||
|
||||
import logging |
||||
from logging import debug, info, warning, error |
||||
import re |
||||
import os |
||||
import sys |
||||
import Progress |
||||
from SortedDict import SortedDict |
||||
import httplib |
||||
import json |
||||
|
||||
class Config(object): |
||||
_instance = None |
||||
_parsed_files = [] |
||||
_doc = {} |
||||
access_key = "" |
||||
secret_key = "" |
||||
access_token = "" |
||||
host_base = "s3.amazonaws.com" |
||||
host_bucket = "%(bucket)s.s3.amazonaws.com" |
||||
simpledb_host = "sdb.amazonaws.com" |
||||
cloudfront_host = "cloudfront.amazonaws.com" |
||||
verbosity = logging.WARNING |
||||
progress_meter = True |
||||
progress_class = Progress.ProgressCR |
||||
send_chunk = 4096 |
||||
recv_chunk = 4096 |
||||
list_md5 = False |
||||
human_readable_sizes = False |
||||
extra_headers = SortedDict(ignore_case = True) |
||||
force = False |
||||
enable = None |
||||
get_continue = False |
||||
skip_existing = False |
||||
recursive = False |
||||
acl_public = None |
||||
acl_grants = [] |
||||
acl_revokes = [] |
||||
proxy_host = "" |
||||
proxy_port = 3128 |
||||
encrypt = False |
||||
dry_run = False |
||||
add_encoding_exts = "" |
||||
preserve_attrs = True |
||||
preserve_attrs_list = [ |
||||
'uname', # Verbose owner Name (e.g. 'root') |
||||
'uid', # Numeric user ID (e.g. 0) |
||||
'gname', # Group name (e.g. 'users') |
||||
'gid', # Numeric group ID (e.g. 100) |
||||
'atime', # Last access timestamp |
||||
'mtime', # Modification timestamp |
||||
'ctime', # Creation timestamp |
||||
'mode', # File mode (e.g. rwxr-xr-x = 755) |
||||
'md5', # File MD5 (if known) |
||||
#'acl', # Full ACL (not yet supported) |
||||
] |
||||
delete_removed = False |
||||
delete_after = False |
||||
delete_after_fetch = False |
||||
_doc['delete_removed'] = "[sync] Remove remote S3 objects when local file has been deleted" |
||||
delay_updates = False |
||||
gpg_passphrase = "" |
||||
gpg_command = "" |
||||
gpg_encrypt = "%(gpg_command)s -c --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s" |
||||
gpg_decrypt = "%(gpg_command)s -d --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s" |
||||
use_https = False |
||||
bucket_location = "US" |
||||
default_mime_type = "binary/octet-stream" |
||||
guess_mime_type = True |
||||
mime_type = "" |
||||
enable_multipart = True |
||||
multipart_chunk_size_mb = 15 # MB |
||||
# List of checks to be performed for 'sync' |
||||
sync_checks = ['size', 'md5'] # 'weak-timestamp' |
||||
# List of compiled REGEXPs |
||||
exclude = [] |
||||
include = [] |
||||
# Dict mapping compiled REGEXPs back to their textual form |
||||
debug_exclude = {} |
||||
debug_include = {} |
||||
encoding = "utf-8" |
||||
urlencoding_mode = "normal" |
||||
log_target_prefix = "" |
||||
reduced_redundancy = False |
||||
follow_symlinks = False |
||||
socket_timeout = 300 |
||||
invalidate_on_cf = False |
||||
# joseprio: new flags for default index invalidation |
||||
invalidate_default_index_on_cf = False |
||||
invalidate_default_index_root_on_cf = True |
||||
website_index = "index.html" |
||||
website_error = "" |
||||
website_endpoint = "http://%(bucket)s.s3-website-%(location)s.amazonaws.com/" |
||||
additional_destinations = [] |
||||
cache_file = "" |
||||
add_headers = "" |
||||
|
||||
## Creating a singleton |
||||
def __new__(self, configfile = None): |
||||
if self._instance is None: |
||||
self._instance = object.__new__(self) |
||||
return self._instance |
||||
|
||||
def __init__(self, configfile = None): |
||||
if configfile: |
||||
try: |
||||
self.read_config_file(configfile) |
||||
except IOError, e: |
||||
if 'AWS_CREDENTIAL_FILE' in os.environ: |
||||
self.env_config() |
||||
if len(self.access_key)==0: |
||||
self.role_config() |
||||
|
||||
def role_config(self): |
||||
conn = httplib.HTTPConnection(host='169.254.169.254',timeout=0.1) |
||||
try: |
||||
conn.request('GET', "/latest/meta-data/iam/security-credentials/") |
||||
resp = conn.getresponse() |
||||
files = resp.read() |
||||
if resp.status == 200 and len(files)>1: |
||||
conn.request('GET', "/latest/meta-data/iam/security-credentials/%s"%files) |
||||
resp=conn.getresponse() |
||||
if resp.status == 200: |
||||
creds=json.load(resp) |
||||
Config().update_option('access_key', creds['AccessKeyId'].encode('ascii')) |
||||
Config().update_option('secret_key', creds['SecretAccessKey'].encode('ascii')) |
||||
Config().update_option('access_token', creds['Token'].encode('ascii')) |
||||
else: |
||||
raise IOError |
||||
else: |
||||
raise IOError |
||||
except: |
||||
raise |
||||
|
||||
def role_refresh(self): |
||||
try: |
||||
self.role_config() |
||||
except: |
||||
warning("Could not refresh role") |
||||
|
||||
def env_config(self): |
||||
cred_content = "" |
||||
try: |
||||
cred_file = open(os.environ['AWS_CREDENTIAL_FILE'],'r') |
||||
cred_content = cred_file.read() |
||||
except IOError, e: |
||||
debug("Error %d accessing credentials file %s" % (e.errno,os.environ['AWS_CREDENTIAL_FILE'])) |
||||
r_data = re.compile("^\s*(?P<orig_key>\w+)\s*=\s*(?P<value>.*)") |
||||
r_quotes = re.compile("^\"(.*)\"\s*$") |
||||
if len(cred_content)>0: |
||||
for line in cred_content.splitlines(): |
||||
is_data = r_data.match(line) |
||||
is_data = r_data.match(line) |
||||
if is_data: |
||||
data = is_data.groupdict() |
||||
if r_quotes.match(data["value"]): |
||||
data["value"] = data["value"][1:-1] |
||||
if data["orig_key"]=="AWSAccessKeyId": |
||||
data["key"] = "access_key" |
||||
elif data["orig_key"]=="AWSSecretKey": |
||||
data["key"] = "secret_key" |
||||
else: |
||||
del data["key"] |
||||
if "key" in data: |
||||
Config().update_option(data["key"], data["value"]) |
||||
if data["key"] in ("access_key", "secret_key", "gpg_passphrase"): |
||||
print_value = (data["value"][:2]+"...%d_chars..."+data["value"][-1:]) % (len(data["value"]) - 3) |
||||
else: |
||||
print_value = data["value"] |
||||
debug("env_Config: %s->%s" % (data["key"], print_value)) |
||||
|
||||
|
||||
|
||||
def option_list(self): |
||||
retval = [] |
||||
for option in dir(self): |
||||
## Skip attributes that start with underscore or are not string, int or bool |
||||
option_type = type(getattr(Config, option)) |
||||
if option.startswith("_") or \ |
||||
not (option_type in ( |
||||
type("string"), # str |
||||
type(42), # int |
||||
type(True))): # bool |
||||
continue |
||||
retval.append(option) |
||||
return retval |
||||
|
||||
def read_config_file(self, configfile): |
||||
cp = ConfigParser(configfile) |
||||
for option in self.option_list(): |
||||
self.update_option(option, cp.get(option)) |
||||
|
||||
if cp.get('add_headers'): |
||||
for option in cp.get('add_headers').split(","): |
||||
(key, value) = option.split(':') |
||||
self.extra_headers[key.replace('_', '-').strip()] = value.strip() |
||||
|
||||
self._parsed_files.append(configfile) |
||||
|
||||
def dump_config(self, stream): |
||||
ConfigDumper(stream).dump("default", self) |
||||
|
||||
def update_option(self, option, value): |
||||
if value is None: |
||||
return |
||||
#### Handle environment reference |
||||
if str(value).startswith("$"): |
||||
return self.update_option(option, os.getenv(str(value)[1:])) |
||||
#### Special treatment of some options |
||||
## verbosity must be known to "logging" module |
||||
if option == "verbosity": |
||||
try: |
||||
setattr(Config, "verbosity", logging._levelNames[value]) |
||||
except KeyError: |
||||
error("Config: verbosity level '%s' is not valid" % value) |
||||
## allow yes/no, true/false, on/off and 1/0 for boolean options |
||||
elif type(getattr(Config, option)) is type(True): # bool |
||||
if str(value).lower() in ("true", "yes", "on", "1"): |
||||
setattr(Config, option, True) |
||||
elif str(value).lower() in ("false", "no", "off", "0"): |
||||
setattr(Config, option, False) |
||||
else: |
||||
error("Config: value of option '%s' must be Yes or No, not '%s'" % (option, value)) |
||||
elif type(getattr(Config, option)) is type(42): # int |
||||
try: |
||||
setattr(Config, option, int(value)) |
||||
except ValueError, e: |
||||
error("Config: value of option '%s' must be an integer, not '%s'" % (option, value)) |
||||
else: # string |
||||
setattr(Config, option, value) |
||||
|
||||
class ConfigParser(object): |
||||
def __init__(self, file, sections = []): |
||||
self.cfg = {} |
||||
self.parse_file(file, sections) |
||||
|
||||
def parse_file(self, file, sections = []): |
||||
debug("ConfigParser: Reading file '%s'" % file) |
||||
if type(sections) != type([]): |
||||
sections = [sections] |
||||
in_our_section = True |
||||
f = open(file, "r") |
||||
r_comment = re.compile("^\s*#.*") |
||||
r_empty = re.compile("^\s*$") |
||||
r_section = re.compile("^\[([^\]]+)\]") |
||||
r_data = re.compile("^\s*(?P<key>\w+)\s*=\s*(?P<value>.*)") |
||||
r_quotes = re.compile("^\"(.*)\"\s*$") |
||||
for line in f: |
||||
if r_comment.match(line) or r_empty.match(line): |
||||
continue |
||||
is_section = r_section.match(line) |
||||
if is_section: |
||||
section = is_section.groups()[0] |
||||
in_our_section = (section in sections) or (len(sections) == 0) |
||||
continue |
||||
is_data = r_data.match(line) |
||||
if is_data and in_our_section: |
||||
data = is_data.groupdict() |
||||
if r_quotes.match(data["value"]): |
||||
data["value"] = data["value"][1:-1] |
||||
self.__setitem__(data["key"], data["value"]) |
||||
if data["key"] in ("access_key", "secret_key", "gpg_passphrase"): |
||||
print_value = (data["value"][:2]+"...%d_chars..."+data["value"][-1:]) % (len(data["value"]) - 3) |
||||
else: |
||||
print_value = data["value"] |
||||
debug("ConfigParser: %s->%s" % (data["key"], print_value)) |
||||
continue |
||||
warning("Ignoring invalid line in '%s': %s" % (file, line)) |
||||
|
||||
def __getitem__(self, name): |
||||
return self.cfg[name] |
||||
|
||||
def __setitem__(self, name, value): |
||||
self.cfg[name] = value |
||||
|
||||
def get(self, name, default = None): |
||||
if self.cfg.has_key(name): |
||||
return self.cfg[name] |
||||
return default |
||||
|
||||
class ConfigDumper(object): |
||||
def __init__(self, stream): |
||||
self.stream = stream |
||||
|
||||
def dump(self, section, config): |
||||
self.stream.write("[%s]\n" % section) |
||||
for option in config.option_list(): |
||||
self.stream.write("%s = %s\n" % (option, getattr(config, option))) |
||||
|
||||
# vim:et:ts=4:sts=4:ai |
@ -1,71 +0,0 @@
|
||||
import httplib |
||||
from urlparse import urlparse |
||||
from threading import Semaphore |
||||
from logging import debug, info, warning, error |
||||
|
||||
from Config import Config |
||||
from Exceptions import ParameterError |
||||
|
||||
__all__ = [ "ConnMan" ] |
||||
|
||||
class http_connection(object): |
||||
def __init__(self, id, hostname, ssl, cfg): |
||||
self.hostname = hostname |
||||
self.ssl = ssl |
||||
self.id = id |
||||
self.counter = 0 |
||||
if cfg.proxy_host != "": |
||||
self.c = httplib.HTTPConnection(cfg.proxy_host, cfg.proxy_port) |
||||
elif not ssl: |
||||
self.c = httplib.HTTPConnection(hostname) |
||||
else: |
||||
self.c = httplib.HTTPSConnection(hostname) |
||||
|
||||
class ConnMan(object): |
||||
conn_pool_sem = Semaphore() |
||||
conn_pool = {} |
||||
conn_max_counter = 800 ## AWS closes connection after some ~90 requests |
||||
|
||||
@staticmethod |
||||
def get(hostname, ssl = None): |
||||
cfg = Config() |
||||
if ssl == None: |
||||
ssl = cfg.use_https |
||||
conn = None |
||||
if cfg.proxy_host != "": |
||||
if ssl: |
||||
raise ParameterError("use_ssl=True can't be used with proxy") |
||||
conn_id = "proxy://%s:%s" % (cfg.proxy_host, cfg.proxy_port) |
||||
else: |
||||
conn_id = "http%s://%s" % (ssl and "s" or "", hostname) |
||||
ConnMan.conn_pool_sem.acquire() |
||||
if not ConnMan.conn_pool.has_key(conn_id): |
||||
ConnMan.conn_pool[conn_id] = [] |
||||
if len(ConnMan.conn_pool[conn_id]): |
||||
conn = ConnMan.conn_pool[conn_id].pop() |
||||
debug("ConnMan.get(): re-using connection: %s#%d" % (conn.id, conn.counter)) |
||||
ConnMan.conn_pool_sem.release() |
||||
if not conn: |
||||
debug("ConnMan.get(): creating new connection: %s" % conn_id) |
||||
conn = http_connection(conn_id, hostname, ssl, cfg) |
||||
conn.c.connect() |
||||
conn.counter += 1 |
||||
return conn |
||||
|
||||
@staticmethod |
||||
def put(conn): |
||||
if conn.id.startswith("proxy://"): |
||||
conn.c.close() |
||||
debug("ConnMan.put(): closing proxy connection (keep-alive not yet supported)") |
||||
return |
||||
|
||||
if conn.counter >= ConnMan.conn_max_counter: |
||||
conn.c.close() |
||||
debug("ConnMan.put(): closing over-used connection") |
||||
return |
||||
|
||||
ConnMan.conn_pool_sem.acquire() |
||||
ConnMan.conn_pool[conn.id].append(conn) |
||||
ConnMan.conn_pool_sem.release() |
||||
debug("ConnMan.put(): connection put back to pool (%s#%d)" % (conn.id, conn.counter)) |
||||
|
@ -1,88 +0,0 @@
|
||||
## Amazon S3 manager - Exceptions library |
||||
## Author: Michal Ludvig <michal@logix.cz> |
||||
## http://www.logix.cz/michal |
||||
## License: GPL Version 2 |
||||
|
||||
from Utils import getTreeFromXml, unicodise, deunicodise |
||||
from logging import debug, info, warning, error |
||||
|
||||
try: |
||||
import xml.etree.ElementTree as ET |
||||
except ImportError: |
||||
import elementtree.ElementTree as ET |
||||
|
||||
class S3Exception(Exception): |
||||
def __init__(self, message = ""): |
||||
self.message = unicodise(message) |
||||
|
||||
def __str__(self): |
||||
## Call unicode(self) instead of self.message because |
||||
## __unicode__() method could be overriden in subclasses! |
||||
return deunicodise(unicode(self)) |
||||
|
||||
def __unicode__(self): |
||||
return self.message |
||||
|
||||
## (Base)Exception.message has been deprecated in Python 2.6 |
||||
def _get_message(self): |
||||
return self._message |
||||
def _set_message(self, message): |
||||
self._message = message |
||||
message = property(_get_message, _set_message) |
||||
|
||||
|
||||
class S3Error (S3Exception): |
||||
def __init__(self, response): |
||||
self.status = response["status"] |
||||
self.reason = response["reason"] |
||||
self.info = { |
||||
"Code" : "", |
||||
"Message" : "", |
||||
"Resource" : "" |
||||
} |
||||
debug("S3Error: %s (%s)" % (self.status, self.reason)) |
||||
if response.has_key("headers"): |
||||
for header in response["headers"]: |
||||
debug("HttpHeader: %s: %s" % (header, response["headers"][header])) |
||||
if response.has_key("data") and response["data"]: |
||||
tree = getTreeFromXml(response["data"]) |
||||
error_node = tree |
||||
if not error_node.tag == "Error": |
||||
error_node = tree.find(".//Error") |
||||
for child in error_node.getchildren(): |
||||
if child.text != "": |
||||
debug("ErrorXML: " + child.tag + ": " + repr(child.text)) |
||||
self.info[child.tag] = child.text |
||||
self.code = self.info["Code"] |
||||
self.message = self.info["Message"] |
||||
self.resource = self.info["Resource"] |
||||
|
||||
def __unicode__(self): |
||||
retval = u"%d " % (self.status) |
||||
retval += (u"(%s)" % (self.info.has_key("Code") and self.info["Code"] or self.reason)) |
||||
if self.info.has_key("Message"): |
||||
retval += (u": %s" % self.info["Message"]) |
||||
return retval |
||||
|
||||
class CloudFrontError(S3Error): |
||||
pass |
||||
|
||||
class S3UploadError(S3Exception): |
||||
pass |
||||
|
||||
class S3DownloadError(S3Exception): |
||||
pass |
||||
|
||||
class S3RequestError(S3Exception): |
||||
pass |
||||
|
||||
class S3ResponseError(S3Exception): |
||||
pass |
||||
|
||||
class InvalidFileError(S3Exception): |
||||
pass |
||||
|
||||
class ParameterError(S3Exception): |
||||
pass |
||||
|
||||
# vim:et:ts=4:sts=4:ai |
@ -1,53 +0,0 @@
|
||||
## Amazon S3 manager |
||||
## Author: Michal Ludvig <michal@logix.cz> |
||||
## http://www.logix.cz/michal |
||||
## License: GPL Version 2 |
||||
|
||||
from SortedDict import SortedDict |
||||
import Utils |
||||
|
||||
class FileDict(SortedDict): |
||||
def __init__(self, mapping = {}, ignore_case = True, **kwargs): |
||||
SortedDict.__init__(self, mapping = mapping, ignore_case = ignore_case, **kwargs) |
||||
self.hardlinks = dict() # { dev: { inode : {'md5':, 'relative_files':}}} |
||||
self.by_md5 = dict() # {md5: set(relative_files)} |
||||
|
||||
def record_md5(self, relative_file, md5): |
||||
if md5 not in self.by_md5: |
||||
self.by_md5[md5] = set() |
||||
self.by_md5[md5].add(relative_file) |
||||
|
||||
def find_md5_one(self, md5): |
||||
try: |
||||
return list(self.by_md5.get(md5, set()))[0] |
||||
except: |
||||
return None |
||||
|
||||
def get_md5(self, relative_file): |
||||
"""returns md5 if it can, or raises IOError if file is unreadable""" |
||||
md5 = None |
||||
if 'md5' in self[relative_file]: |
||||
return self[relative_file]['md5'] |
||||
md5 = self.get_hardlink_md5(relative_file) |
||||
if md5 is None: |
||||
md5 = Utils.hash_file_md5(self[relative_file]['full_name']) |
||||
self.record_md5(relative_file, md5) |
||||
self[relative_file]['md5'] = md5 |
||||
return md5 |
||||
|
||||
def record_hardlink(self, relative_file, dev, inode, md5): |
||||
if dev not in self.hardlinks: |
||||
self.hardlinks[dev] = dict() |
||||
if inode not in self.hardlinks[dev]: |
||||
self.hardlinks[dev][inode] = dict(md5=md5, relative_files=set()) |
||||
self.hardlinks[dev][inode]['relative_files'].add(relative_file) |
||||
|
||||
def get_hardlink_md5(self, relative_file): |
||||
md5 = None |
||||
dev = self[relative_file]['dev'] |
||||
inode = self[relative_file]['inode'] |
||||
try: |
||||
md5 = self.hardlinks[dev][inode]['md5'] |
||||
except: |
||||
pass |
||||
return md5 |
@ -1,517 +0,0 @@
|
||||
## Create and compare lists of files/objects |
||||
## Author: Michal Ludvig <michal@logix.cz> |
||||
## http://www.logix.cz/michal |
||||
## License: GPL Version 2 |
||||
|
||||
from S3 import S3 |
||||
from Config import Config |
||||
from S3Uri import S3Uri |
||||
from FileDict import FileDict |
||||
from Utils import * |
||||
from Exceptions import ParameterError |
||||
from HashCache import HashCache |
||||
|
||||
from logging import debug, info, warning, error |
||||
|
||||
import os |
||||
import glob |
||||
import copy |
||||
|
||||
__all__ = ["fetch_local_list", "fetch_remote_list", "compare_filelists", "filter_exclude_include", "parse_attrs_header"] |
||||
|
||||
def _fswalk_follow_symlinks(path): |
||||
''' |
||||
Walk filesystem, following symbolic links (but without recursion), on python2.4 and later |
||||
|
||||
If a symlink directory loop is detected, emit a warning and skip. |
||||
E.g.: dir1/dir2/sym-dir -> ../dir2 |
||||
''' |
||||
assert os.path.isdir(path) # only designed for directory argument |
||||
walkdirs = set([path]) |
||||
for dirpath, dirnames, filenames in os.walk(path): |
||||
handle_exclude_include_walk(dirpath, dirnames, []) |
||||
real_dirpath = os.path.realpath(dirpath) |
||||
for dirname in dirnames: |
||||
current = os.path.join(dirpath, dirname) |
||||
real_current = os.path.realpath(current) |
||||
if os.path.islink(current): |
||||
if (real_dirpath == real_current or |
||||
real_dirpath.startswith(real_current + os.path.sep)): |
||||
warning("Skipping recursively symlinked directory %s" % dirname) |
||||
else: |
||||
walkdirs.add(current) |
||||
for walkdir in walkdirs: |
||||
for dirpath, dirnames, filenames in os.walk(walkdir): |
||||
handle_exclude_include_walk(dirpath, dirnames, []) |
||||
yield (dirpath, dirnames, filenames) |
||||
|
||||
def _fswalk_no_symlinks(path): |
||||
''' |
||||
Directory tree generator |
||||
|
||||
path (str) is the root of the directory tree to walk |
||||
''' |
||||
for dirpath, dirnames, filenames in os.walk(path): |
||||
handle_exclude_include_walk(dirpath, dirnames, filenames) |
||||
yield (dirpath, dirnames, filenames) |
||||
|
||||
def filter_exclude_include(src_list): |
||||
info(u"Applying --exclude/--include") |
||||
cfg = Config() |
||||
exclude_list = FileDict(ignore_case = False) |
||||
for file in src_list.keys(): |
||||
debug(u"CHECK: %s" % file) |
||||
excluded = False |
||||
for r in cfg.exclude: |
||||
if r.search(file): |
||||
excluded = True |
||||
debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r])) |
||||
break |
||||
if excluded: |
||||
## No need to check for --include if not excluded |
||||
for r in cfg.include: |
||||
if r.search(file): |
||||
excluded = False |
||||
debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r])) |
||||
break |
||||
if excluded: |
||||
## Still excluded - ok, action it |
||||
debug(u"EXCLUDE: %s" % file) |
||||
exclude_list[file] = src_list[file] |
||||
del(src_list[file]) |
||||
continue |
||||
else: |
||||
debug(u"PASS: %r" % (file)) |
||||
return src_list, exclude_list |
||||
|
||||
def handle_exclude_include_walk(root, dirs, files): |
||||
cfg = Config() |
||||
copydirs = copy.copy(dirs) |
||||
copyfiles = copy.copy(files) |
||||
|
||||
# exclude dir matches in the current directory |
||||
# this prevents us from recursing down trees we know we want to ignore |
||||
for x in copydirs: |
||||
d = os.path.join(root, x, '') |
||||
debug(u"CHECK: %r" % d) |
||||
excluded = False |
||||
for r in cfg.exclude: |
||||
if r.search(d): |
||||
excluded = True |
||||
debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r])) |
||||
break |
||||
if excluded: |
||||
## No need to check for --include if not excluded |
||||
for r in cfg.include: |
||||
if r.search(d): |
||||
excluded = False |
||||
debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r])) |
||||
break |
||||
if excluded: |
||||
## Still excluded - ok, action it |
||||
debug(u"EXCLUDE: %r" % d) |
||||
dirs.remove(x) |
||||
continue |
||||
else: |
||||
debug(u"PASS: %r" % (d)) |
||||
|
||||
# exclude file matches in the current directory |
||||
for x in copyfiles: |
||||
file = os.path.join(root, x) |
||||
debug(u"CHECK: %r" % file) |
||||
excluded = False |
||||
for r in cfg.exclude: |
||||
if r.search(file): |
||||
excluded = True |
||||
debug(u"EXCL-MATCH: '%s'" % (cfg.debug_exclude[r])) |
||||
break |
||||
if excluded: |
||||
## No need to check for --include if not excluded |
||||
for r in cfg.include: |
||||
if r.search(file): |
||||
excluded = False |
||||
debug(u"INCL-MATCH: '%s'" % (cfg.debug_include[r])) |
||||
break |
||||
if excluded: |
||||
## Still excluded - ok, action it |
||||
debug(u"EXCLUDE: %s" % file) |
||||
files.remove(x) |
||||
continue |
||||
else: |
||||
debug(u"PASS: %r" % (file)) |
||||
|
||||
def fetch_local_list(args, recursive = None): |
||||
def _get_filelist_local(loc_list, local_uri, cache): |
||||
info(u"Compiling list of local files...") |
||||
|
||||
if deunicodise(local_uri.basename()) == "-": |
||||
loc_list["-"] = { |
||||
'full_name_unicode' : '-', |
||||
'full_name' : '-', |
||||
'size' : -1, |
||||
'mtime' : -1, |
||||
} |
||||
return loc_list, True |
||||
if local_uri.isdir(): |
||||
local_base = deunicodise(local_uri.basename()) |
||||
local_path = deunicodise(local_uri.path()) |
||||
if cfg.follow_symlinks: |
||||
filelist = _fswalk_follow_symlinks(local_path) |
||||
else: |
||||
filelist = _fswalk_no_symlinks(local_path) |
||||
single_file = False |
||||
else: |
||||
local_base = "" |
||||
local_path = deunicodise(local_uri.dirname()) |
||||
filelist = [( local_path, [], [deunicodise(local_uri.basename())] )] |
||||
single_file = True |
||||
for root, dirs, files in filelist: |
||||
rel_root = root.replace(local_path, local_base, 1) |
||||
for f in files: |
||||
full_name = os.path.join(root, f) |
||||
if not os.path.isfile(full_name): |
||||
continue |
||||
if os.path.islink(full_name): |
||||
if not cfg.follow_symlinks: |
||||
continue |
||||
relative_file = unicodise(os.path.join(rel_root, f)) |
||||
if os.path.sep != "/": |
||||
# Convert non-unix dir separators to '/' |
||||
relative_file = "/".join(relative_file.split(os.path.sep)) |
||||
if cfg.urlencoding_mode == "normal": |
||||
relative_file = replace_nonprintables(relative_file) |
||||
if relative_file.startswith('./'): |
||||
relative_file = relative_file[2:] |
||||
sr = os.stat_result(os.lstat(full_name)) |
||||
loc_list[relative_file] = { |
||||
'full_name_unicode' : unicodise(full_name), |
||||
'full_name' : full_name, |
||||
'size' : sr.st_size, |
||||
'mtime' : sr.st_mtime, |
||||
'dev' : sr.st_dev, |
||||
'inode' : sr.st_ino, |
||||
'uid' : sr.st_uid, |
||||
'gid' : sr.st_gid, |
||||
'sr': sr # save it all, may need it in preserve_attrs_list |
||||
## TODO: Possibly more to save here... |
||||
} |
||||
if 'md5' in cfg.sync_checks: |
||||
md5 = cache.md5(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size) |
||||
if md5 is None: |
||||
try: |
||||
md5 = loc_list.get_md5(relative_file) # this does the file I/O |
||||
except IOError: |
||||
continue |
||||
cache.add(sr.st_dev, sr.st_ino, sr.st_mtime, sr.st_size, md5) |
||||
loc_list.record_hardlink(relative_file, sr.st_dev, sr.st_ino, md5) |
||||
return loc_list, single_file |
||||
|
||||
def _maintain_cache(cache, local_list): |
||||
if cfg.cache_file: |
||||
cache.mark_all_for_purge() |
||||
for i in local_list.keys(): |
||||
cache.unmark_for_purge(local_list[i]['dev'], local_list[i]['inode'], local_list[i]['mtime'], local_list[i]['size']) |
||||
cache.purge() |
||||
cache.save(cfg.cache_file) |
||||
|
||||
cfg = Config() |
||||
|
||||
cache = HashCache() |
||||
if cfg.cache_file: |
||||
try: |
||||
cache.load(cfg.cache_file) |
||||
except IOError: |
||||
info(u"No cache file found, creating it.") |
||||
|
||||
local_uris = [] |
||||
local_list = FileDict(ignore_case = False) |
||||
single_file = False |
||||
|
||||
if type(args) not in (list, tuple): |
||||
args = [args] |
||||
|
||||
if recursive == None: |
||||
recursive = cfg.recursive |
||||
|
||||
for arg in args: |
||||
uri = S3Uri(arg) |
||||
if not uri.type == 'file': |
||||
raise ParameterError("Expecting filename or directory instead of: %s" % arg) |
||||
if uri.isdir() and not recursive: |
||||
raise ParameterError("Use --recursive to upload a directory: %s" % arg) |
||||
local_uris.append(uri) |
||||
|
||||
for uri in local_uris: |
||||
list_for_uri, single_file = _get_filelist_local(local_list, uri, cache) |
||||
|
||||
## Single file is True if and only if the user |
||||
## specified one local URI and that URI represents |
||||
## a FILE. Ie it is False if the URI was of a DIR |
||||
## and that dir contained only one FILE. That's not |
||||
## a case of single_file==True. |
||||
if len(local_list) > 1: |
||||
single_file = False |
||||
|
||||
_maintain_cache(cache, local_list) |
||||
|
||||
return local_list, single_file |
||||
|
||||
def fetch_remote_list(args, require_attribs = False, recursive = None): |
||||
def _get_filelist_remote(remote_uri, recursive = True): |
||||
## If remote_uri ends with '/' then all remote files will have |
||||
## the remote_uri prefix removed in the relative path. |
||||
## If, on the other hand, the remote_uri ends with something else |
||||
## (probably alphanumeric symbol) we'll use the last path part |
||||
## in the relative path. |
||||
## |
||||
## Complicated, eh? See an example: |
||||
## _get_filelist_remote("s3://bckt/abc/def") may yield: |
||||
## { 'def/file1.jpg' : {}, 'def/xyz/blah.txt' : {} } |
||||
## _get_filelist_remote("s3://bckt/abc/def/") will yield: |
||||
## { 'file1.jpg' : {}, 'xyz/blah.txt' : {} } |
||||
## Furthermore a prefix-magic can restrict the return list: |
||||
## _get_filelist_remote("s3://bckt/abc/def/x") yields: |
||||
## { 'xyz/blah.txt' : {} } |
||||
|
||||
info(u"Retrieving list of remote files for %s ..." % remote_uri) |
||||
|
||||
s3 = S3(Config()) |
||||
response = s3.bucket_list(remote_uri.bucket(), prefix = remote_uri.object(), recursive = recursive) |
||||
|
||||
rem_base_original = rem_base = remote_uri.object() |
||||
remote_uri_original = remote_uri |
||||
if rem_base != '' and rem_base[-1] != '/': |
||||
rem_base = rem_base[:rem_base.rfind('/')+1] |
||||
remote_uri = S3Uri("s3://%s/%s" % (remote_uri.bucket(), rem_base)) |
||||
rem_base_len = len(rem_base) |
||||
rem_list = FileDict(ignore_case = False) |
||||
break_now = False |
||||
for object in response['list']: |
||||
if object['Key'] == rem_base_original and object['Key'][-1] != os.path.sep: |
||||
## We asked for one file and we got that file :-) |
||||
key = os.path.basename(object['Key']) |
||||
object_uri_str = remote_uri_original.uri() |
||||
break_now = True |
||||
rem_list = FileDict(ignore_case = False) ## Remove whatever has already been put to rem_list |
||||
else: |
||||
key = object['Key'][rem_base_len:] ## Beware - this may be '' if object['Key']==rem_base !! |
||||
object_uri_str = remote_uri.uri() + key |
||||
rem_list[key] = { |
||||
'size' : int(object['Size']), |
||||
'timestamp' : dateS3toUnix(object['LastModified']), ## Sadly it's upload time, not our lastmod time :-( |
||||
'md5' : object['ETag'][1:-1], |
||||
'object_key' : object['Key'], |
||||
'object_uri_str' : object_uri_str, |
||||
'base_uri' : remote_uri, |
||||
'dev' : None, |
||||
'inode' : None, |
||||
} |
||||
md5 = object['ETag'][1:-1] |
||||
rem_list.record_md5(key, md5) |
||||
if break_now: |
||||
break |
||||
return rem_list |
||||
|
||||
cfg = Config() |
||||
remote_uris = [] |
||||
remote_list = FileDict(ignore_case = False) |
||||
|
||||
if type(args) not in (list, tuple): |
||||
args = [args] |
||||
|
||||
if recursive == None: |
||||
recursive = cfg.recursive |
||||
|
||||
for arg in args: |
||||
uri = S3Uri(arg) |
||||
if not uri.type == 's3': |
||||
raise ParameterError("Expecting S3 URI instead of '%s'" % arg) |
||||
remote_uris.append(uri) |
||||
|
||||
if recursive: |
||||
for uri in remote_uris: |
||||
objectlist = _get_filelist_remote(uri) |
||||
for key in objectlist: |
||||
remote_list[key] = objectlist[key] |
||||
remote_list.record_md5(key, objectlist.get_md5(key)) |
||||
else: |
||||
for uri in remote_uris: |
||||
uri_str = str(uri) |
||||
## Wildcards used in remote URI? |
||||
## If yes we'll need a bucket listing... |
||||
if uri_str.find('*') > -1 or uri_str.find('?') > -1: |
||||
first_wildcard = uri_str.find('*') |
||||
first_questionmark = uri_str.find('?') |
||||
if first_questionmark > -1 and first_questionmark < first_wildcard: |
||||
first_wildcard = first_questionmark |
||||
prefix = uri_str[:first_wildcard] |
||||
rest = uri_str[first_wildcard+1:] |
||||
## Only request recursive listing if the 'rest' of the URI, |
||||
## i.e. the part after first wildcard, contains '/' |
||||
need_recursion = rest.find('/') > -1 |
||||
objectlist = _get_filelist_remote(S3Uri(prefix), recursive = need_recursion) |
||||
for key in objectlist: |
||||
## Check whether the 'key' matches the requested wildcards |
||||
if glob.fnmatch.fnmatch(objectlist[key]['object_uri_str'], uri_str): |
||||
remote_list[key] = objectlist[key] |
||||
else: |
||||
## No wildcards - simply append the given URI to the list |
||||
key = os.path.basename(uri.object()) |
||||
if not key: |
||||
raise ParameterError(u"Expecting S3 URI with a filename or --recursive: %s" % uri.uri()) |
||||
remote_item = { |
||||
'base_uri': uri, |
||||
'object_uri_str': unicode(uri), |
||||
'object_key': uri.object() |
||||
} |
||||
if require_attribs: |
||||
response = S3(cfg).object_info(uri) |
||||
remote_item.update({ |
||||
'size': int(response['headers']['content-length']), |
||||
'md5': response['headers']['etag'].strip('"\''), |
||||
'timestamp' : dateRFC822toUnix(response['headers']['date']) |
||||
}) |
||||
# get md5 from header if it's present. We would have set that during upload |
||||
if response['headers'].has_key('x-amz-meta-s3cmd-attrs'): |
||||
attrs = parse_attrs_header(response['headers']['x-amz-meta-s3cmd-attrs']) |
||||
if attrs.has_key('md5'): |
||||
remote_item.update({'md5': attrs['md5']}) |
||||
|
||||
remote_list[key] = remote_item |
||||
return remote_list |
||||
|
||||
def parse_attrs_header(attrs_header): |
||||
attrs = {} |
||||
for attr in attrs_header.split("/"): |
||||
key, val = attr.split(":") |
||||
attrs[key] = val |
||||
return attrs |
||||
|
||||
|
||||
def compare_filelists(src_list, dst_list, src_remote, dst_remote, delay_updates = False): |
||||
def __direction_str(is_remote): |
||||
return is_remote and "remote" or "local" |
||||
|
||||
def _compare(src_list, dst_lst, src_remote, dst_remote, file): |
||||
"""Return True if src_list[file] matches dst_list[file], else False""" |
||||
attribs_match = True |
||||
if not (src_list.has_key(file) and dst_list.has_key(file)): |
||||
info(u"%s: does not exist in one side or the other: src_list=%s, dst_list=%s" % (file, src_list.has_key(file), dst_list.has_key(file))) |
||||
return False |
||||
|
||||
## check size first |
||||
if 'size' in cfg.sync_checks and dst_list[file]['size'] != src_list[file]['size']: |
||||
debug(u"xfer: %s (size mismatch: src=%s dst=%s)" % (file, src_list[file]['size'], dst_list[file]['size'])) |
||||
attribs_match = False |
||||
|
||||
## check md5 |
||||
compare_md5 = 'md5' in cfg.sync_checks |
||||
# Multipart-uploaded files don't have a valid md5 sum - it ends with "...-nn" |
||||
if compare_md5: |
||||
if (src_remote == True and src_list[file]['md5'].find("-") >= 0) or (dst_remote == True and dst_list[file]['md5'].find("-") >= 0): |
||||
compare_md5 = False |
||||
info(u"disabled md5 check for %s" % file) |
||||
if attribs_match and compare_md5: |
||||
try: |
||||
src_md5 = src_list.get_md5(file) |
||||
dst_md5 = dst_list.get_md5(file) |
||||
except (IOError,OSError), e: |
||||
# md5 sum verification failed - ignore that file altogether |
||||
debug(u"IGNR: %s (disappeared)" % (file)) |
||||
warning(u"%s: file disappeared, ignoring." % (file)) |
||||
raise |
||||
|
||||
if src_md5 != dst_md5: |
||||
## checksums are different. |
||||
attribs_match = False |
||||
debug(u"XFER: %s (md5 mismatch: src=%s dst=%s)" % (file, src_md5, dst_md5)) |
||||
|
||||
return attribs_match |
||||
|
||||
# we don't support local->local sync, use 'rsync' or something like that instead ;-) |
||||
assert(not(src_remote == False and dst_remote == False)) |
||||
|
||||
info(u"Verifying attributes...") |
||||
cfg = Config() |
||||
## Items left on src_list will be transferred |
||||
## Items left on update_list will be transferred after src_list |
||||
## Items left on copy_pairs will be copied from dst1 to dst2 |
||||
update_list = FileDict(ignore_case = False) |
||||
## Items left on dst_list will be deleted |
||||
copy_pairs = [] |
||||
|
||||
debug("Comparing filelists (direction: %s -> %s)" % (__direction_str(src_remote), __direction_str(dst_remote))) |
||||
|
||||
for relative_file in src_list.keys(): |
||||
debug(u"CHECK: %s" % (relative_file)) |
||||
|
||||
if dst_list.has_key(relative_file): |
||||
## Was --skip-existing requested? |
||||
if cfg.skip_existing: |
||||
debug(u"IGNR: %s (used --skip-existing)" % (relative_file)) |
||||
del(src_list[relative_file]) |
||||
del(dst_list[relative_file]) |
||||
continue |
||||
|
||||
try: |
||||
same_file = _compare(src_list, dst_list, src_remote, dst_remote, relative_file) |
||||
except (IOError,OSError), e: |
||||
debug(u"IGNR: %s (disappeared)" % (relative_file)) |
||||
warning(u"%s: file disappeared, ignoring." % (relative_file)) |
||||
del(src_list[relative_file]) |
||||
del(dst_list[relative_file]) |
||||
continue |
||||
|
||||
if same_file: |
||||
debug(u"IGNR: %s (transfer not needed)" % relative_file) |
||||
del(src_list[relative_file]) |
||||
del(dst_list[relative_file]) |
||||
|
||||
else: |
||||
# look for matching file in src |
||||
try: |
||||
md5 = src_list.get_md5(relative_file) |
||||
except IOError: |
||||
md5 = None |
||||
if md5 is not None and dst_list.by_md5.has_key(md5): |
||||
# Found one, we want to copy |
||||
dst1 = list(dst_list.by_md5[md5])[0] |
||||
debug(u"DST COPY src: %s -> %s" % (dst1, relative_file)) |
||||
copy_pairs.append((src_list[relative_file], dst1, relative_file)) |
||||
del(src_list[relative_file]) |
||||
del(dst_list[relative_file]) |
||||
else: |
||||
# record that we will get this file transferred to us (before all the copies), so if we come across it later again, |
||||
# we can copy from _this_ copy (e.g. we only upload it once, and copy thereafter). |
||||
dst_list.record_md5(relative_file, md5) |
||||
update_list[relative_file] = src_list[relative_file] |
||||
del src_list[relative_file] |
||||
del dst_list[relative_file] |
||||
|
||||
else: |
||||
# dst doesn't have this file |
||||
# look for matching file elsewhere in dst |
||||
try: |
||||
md5 = src_list.get_md5(relative_file) |
||||
except IOError: |
||||
md5 = None |
||||
dst1 = dst_list.find_md5_one(md5) |
||||
if dst1 is not None: |
||||
# Found one, we want to copy |
||||
debug(u"DST COPY dst: %s -> %s" % (dst1, relative_file)) |
||||
copy_pairs.append((src_list[relative_file], dst1, relative_file)) |
||||
del(src_list[relative_file]) |
||||
else: |
||||
# we don't have this file, and we don't have a copy of this file elsewhere. Get it. |
||||
# record that we will get this file transferred to us (before all the copies), so if we come across it later again, |
||||
# we can copy from _this_ copy (e.g. we only upload it once, and copy thereafter). |
||||
dst_list.record_md5(relative_file, md5) |
||||
|
||||
for f in dst_list.keys(): |
||||
if src_list.has_key(f) or update_list.has_key(f): |
||||
# leave only those not on src_list + update_list |
||||
del dst_list[f] |
||||
|
||||
return src_list, dst_list, update_list, copy_pairs |
||||
|
||||
# vim:et:ts=4:sts=4:ai |
@ -1,53 +0,0 @@
|
||||
import cPickle as pickle |
||||
|
||||
class HashCache(object): |
||||
def __init__(self): |
||||
self.inodes = dict() |
||||
|
||||
def add(self, dev, inode, mtime, size, md5): |
||||
if dev not in self.inodes: |
||||
self.inodes[dev] = dict() |
||||
if inode not in self.inodes[dev]: |
||||
self.inodes[dev][inode] = dict() |
||||
self.inodes[dev][inode][mtime] = dict(md5=md5, size=size) |
||||
|
||||
def md5(self, dev, inode, mtime, size): |
||||
try: |
||||
d = self.inodes[dev][inode][mtime] |
||||
if d['size'] != size: |
||||
return None |
||||
except: |
||||
return None |
||||
return d['md5'] |
||||
|
||||
def mark_all_for_purge(self): |
||||
for d in self.inodes.keys(): |
||||
for i in self.inodes[d].keys(): |
||||
for c in self.inodes[d][i].keys(): |
||||
self.inodes[d][i][c]['purge'] = True |
||||
|
||||
def unmark_for_purge(self, dev, inode, mtime, size): |
||||
d = self.inodes[dev][inode][mtime] |
||||
if d['size'] == size and 'purge' in d: |
||||
del self.inodes[dev][inode][mtime]['purge'] |
||||
|
||||
def purge(self): |
||||
for d in self.inodes.keys(): |
||||
for i in self.inodes[d].keys(): |
||||
for m in self.inodes[d][i].keys(): |
||||
if 'purge' in self.inodes[d][i][m]: |
||||
del self.inodes[d][i] |
||||
break |
||||
|
||||
def save(self, f): |
||||
d = dict(inodes=self.inodes, version=1) |
||||
f = open(f, 'w') |
||||
p = pickle.dump(d, f) |
||||
f.close() |
||||
|
||||
def load(self, f): |
||||
f = open(f, 'r') |
||||
d = pickle.load(f) |
||||
f.close() |
||||
if d.get('version') == 1 and 'inodes' in d: |
||||
self.inodes = d['inodes'] |
@ -1,137 +0,0 @@
|
||||
## Amazon S3 Multipart upload support |
||||
## Author: Jerome Leclanche <jerome.leclanche@gmail.com> |
||||
## License: GPL Version 2 |
||||
|
||||
import os |
||||
from stat import ST_SIZE |
||||
from logging import debug, info, warning, error |
||||
from Utils import getTextFromXml, formatSize, unicodise |
||||
from Exceptions import S3UploadError |
||||
|
||||
class MultiPartUpload(object): |
||||
|
||||
MIN_CHUNK_SIZE_MB = 5 # 5MB |
||||
MAX_CHUNK_SIZE_MB = 5120 # 5GB |
||||
MAX_FILE_SIZE = 42949672960 # 5TB |
||||
|
||||
def __init__(self, s3, file, uri, headers_baseline = {}): |
||||
self.s3 = s3 |
||||
self.file = file |
||||
self.uri = uri |
||||
self.parts = {} |
||||
self.headers_baseline = headers_baseline |
||||
self.upload_id = self.initiate_multipart_upload() |
||||
|
||||
def initiate_multipart_upload(self): |
||||
""" |
||||
Begin a multipart upload |
||||
http://docs.amazonwebservices.com/AmazonS3/latest/API/index.html?mpUploadInitiate.html |
||||
""" |
||||
request = self.s3.create_request("OBJECT_POST", uri = self.uri, headers = self.headers_baseline, extra = "?uploads") |
||||
response = self.s3.send_request(request) |
||||
data = response["data"] |
||||
self.upload_id = getTextFromXml(data, "UploadId") |
||||
return self.upload_id |
||||
|
||||
def upload_all_parts(self): |
||||
""" |
||||
Execute a full multipart upload on a file |
||||
Returns the seq/etag dict |
||||
TODO use num_processes to thread it |
||||
""" |
||||
if not self.upload_id: |
||||
raise RuntimeError("Attempting to use a multipart upload that has not been initiated.") |
||||
|
||||
self.chunk_size = self.s3.config.multipart_chunk_size_mb * 1024 * 1024 |
||||
|
||||
if self.file.name != "<stdin>": |
||||
size_left = file_size = os.stat(self.file.name)[ST_SIZE] |
||||
nr_parts = file_size / self.chunk_size + (file_size % self.chunk_size and 1) |
||||
debug("MultiPart: Uploading %s in %d parts" % (self.file.name, nr_parts)) |
||||
else: |
||||
debug("MultiPart: Uploading from %s" % (self.file.name)) |
||||
|
||||
seq = 1 |
||||
if self.file.name != "<stdin>": |
||||
while size_left > 0: |
||||
offset = self.chunk_size * (seq - 1) |
||||
current_chunk_size = min(file_size - offset, self.chunk_size) |
||||
size_left -= current_chunk_size |
||||
labels = { |
||||
'source' : unicodise(self.file.name), |
||||
'destination' : unicodise(self.uri.uri()), |
||||
'extra' : "[part %d of %d, %s]" % (seq, nr_parts, "%d%sB" % formatSize(current_chunk_size, human_readable = True)) |
||||
} |
||||
try: |
||||
self.upload_part(seq, offset, current_chunk_size, labels) |
||||
except: |
||||
error(u"Upload of '%s' part %d failed. Aborting multipart upload." % (self.file.name, seq)) |
||||
self.abort_upload() |
||||
raise |
||||
seq += 1 |
||||
else: |
||||
while True: |
||||
buffer = self.file.read(self.chunk_size) |
||||
offset = self.chunk_size * (seq - 1) |
||||
current_chunk_size = len(buffer) |
||||
labels = { |
||||
'source' : unicodise(self.file.name), |
||||
'destination' : unicodise(self.uri.uri()), |
||||
'extra' : "[part %d, %s]" % (seq, "%d%sB" % formatSize(current_chunk_size, human_readable = True)) |
||||
} |
||||
if len(buffer) == 0: # EOF |
||||
break |
||||
try: |
||||
self.upload_part(seq, offset, current_chunk_size, labels, buffer) |
||||
except: |
||||
error(u"Upload of '%s' part %d failed. Aborting multipart upload." % (self.file.name, seq)) |
||||
self.abort_upload() |
||||
raise |
||||
seq += 1 |
||||
|
||||
debug("MultiPart: Upload finished: %d parts", seq - 1) |
||||
|
||||
def upload_part(self, seq, offset, chunk_size, labels, buffer = ''): |
||||
""" |
||||
Upload a file chunk |
||||
http://docs.amazonwebservices.com/AmazonS3/latest/API/index.html?mpUploadUploadPart.html |
||||
""" |
||||
# TODO implement Content-MD5 |
||||
debug("Uploading part %i of %r (%s bytes)" % (seq, self.upload_id, chunk_size)) |
||||
headers = { "content-length": chunk_size } |
||||
query_string = "?partNumber=%i&uploadId=%s" % (seq, self.upload_id) |
||||
request = self.s3.create_request("OBJECT_PUT", uri = self.uri, headers = headers, extra = query_string) |
||||
response = self.s3.send_file(request, self.file, labels, buffer, offset = offset, chunk_size = chunk_size) |
||||
self.parts[seq] = response["headers"]["etag"] |
||||
return response |
||||
|
||||
def complete_multipart_upload(self): |
||||
""" |
||||
Finish a multipart upload |
||||
http://docs.amazonwebservices.com/AmazonS3/latest/API/index.html?mpUploadComplete.html |
||||
""" |
||||
debug("MultiPart: Completing upload: %s" % self.upload_id) |
||||
|
||||
parts_xml = [] |
||||
part_xml = "<Part><PartNumber>%i</PartNumber><ETag>%s</ETag></Part>" |
||||
for seq, etag in self.parts.items(): |
||||
parts_xml.append(part_xml % (seq, etag)) |
||||
body = "<CompleteMultipartUpload>%s</CompleteMultipartUpload>" % ("".join(parts_xml)) |
||||
|
||||
headers = { "content-length": len(body) } |
||||
request = self.s3.create_request("OBJECT_POST", uri = self.uri, headers = headers, extra = "?uploadId=%s" % (self.upload_id)) |
||||
response = self.s3.send_request(request, body = body) |
||||
|
||||
return response |
||||
|
||||
def abort_upload(self): |
||||
""" |
||||
Abort multipart upload |
||||
http://docs.amazonwebservices.com/AmazonS3/latest/API/index.html?mpUploadAbort.html |
||||
""" |
||||
debug("MultiPart: Aborting upload: %s" % self.upload_id) |
||||
request = self.s3.create_request("OBJECT_DELETE", uri = self.uri, extra = "?uploadId=%s" % (self.upload_id)) |
||||
response = self.s3.send_request(request) |
||||
return response |
||||
|
||||
# vim:et:ts=4:sts=4:ai |
@ -1,14 +0,0 @@
|
||||
package = "s3cmd" |
||||
version = "1.5.0-alpha3" |
||||
url = "http://s3tools.org" |
||||
license = "GPL version 2" |
||||
short_description = "Command line tool for managing Amazon S3 and CloudFront services" |
||||
long_description = """ |
||||
S3cmd lets you copy files from/to Amazon S3 |
||||
(Simple Storage Service) using a simple to use |
||||
command line client. Supports rsync-like backup, |
||||
GPG encryption, and more. Also supports management |
||||
of Amazon's CloudFront content delivery network. |
||||
""" |
||||
|
||||
# vim:et:ts=4:sts=4:ai |
@ -1,173 +0,0 @@
|
||||
## Amazon S3 manager |
||||
## Author: Michal Ludvig <michal@logix.cz> |
||||
## http://www.logix.cz/michal |
||||
## License: GPL Version 2 |
||||
|
||||
import sys |
||||
import datetime |
||||
import time |
||||
import Utils |
||||
|
||||
class Progress(object): |
||||
_stdout = sys.stdout |
||||
_last_display = 0 |
||||
|
||||
def __init__(self, labels, total_size): |
||||
self._stdout = sys.stdout |
||||
self.new_file(labels, total_size) |
||||
|
||||
def new_file(self, labels, total_size): |
||||
self.labels = labels |
||||
self.total_size = total_size |
||||
# Set initial_position to something in the |
||||
# case we're not counting from 0. For instance |
||||
# when appending to a partially downloaded file. |
||||
# Setting initial_position will let the speed |
||||
# be computed right. |
||||
self.initial_position = 0 |
||||
self.current_position = self.initial_position |
||||
self.time_start = datetime.datetime.now() |
||||
self.time_last = self.time_start |
||||
self.time_current = self.time_start |
||||
|
||||
self.display(new_file = True) |
||||
|
||||
def update(self, current_position = -1, delta_position = -1): |
||||
self.time_last = self.time_current |
||||
self.time_current = datetime.datetime.now() |
||||
if current_position > -1: |
||||
self.current_position = current_position |
||||
elif delta_position > -1: |
||||
self.current_position += delta_position |
||||
#else: |
||||
# no update, just call display() |
||||
self.display() |
||||
|
||||
def done(self, message): |
||||
self.display(done_message = message) |
||||
|
||||
def output_labels(self): |
||||
self._stdout.write(u"%(source)s -> %(destination)s %(extra)s\n" % self.labels) |
||||
self._stdout.flush() |
||||
|
||||
def _display_needed(self): |
||||
# We only need to update the display every so often. |
||||
if time.time() - self._last_display > 1: |
||||
self._last_display = time.time() |
||||
return True |
||||
return False |
||||
|
||||
def display(self, new_file = False, done_message = None): |
||||
""" |
||||
display(new_file = False[/True], done = False[/True]) |
||||
|
||||
Override this method to provide a nicer output. |
||||
""" |
||||
if new_file: |
||||
self.output_labels() |
||||
self.last_milestone = 0 |
||||
return |
||||
|
||||
if self.current_position == self.total_size: |
||||
print_size = Utils.formatSize(self.current_position, True) |
||||
if print_size[1] != "": print_size[1] += "B" |
||||
timedelta = self.time_current - self.time_start |
||||
sec_elapsed = timedelta.days * 86400 + timedelta.seconds + float(timedelta.microseconds)/1000000.0 |
||||
print_speed = Utils.formatSize((self.current_position - self.initial_position) / sec_elapsed, True, True) |
||||
self._stdout.write("100%% %s%s in %.2fs (%.2f %sB/s)\n" % |
||||
(print_size[0], print_size[1], sec_elapsed, print_speed[0], print_speed[1])) |
||||
self._stdout.flush() |
||||
return |
||||
|
||||
rel_position = selfself.current_position * 100 / self.total_size |
||||
if rel_position >= self.last_milestone: |
||||
self.last_milestone = (int(rel_position) / 5) * 5 |
||||
self._stdout.write("%d%% ", self.last_milestone) |
||||
self._stdout.flush() |
||||
return |
||||
|
||||
class ProgressANSI(Progress): |
||||
## http://en.wikipedia.org/wiki/ANSI_escape_code |
||||
SCI = '\x1b[' |
||||
ANSI_hide_cursor = SCI + "?25l" |
||||
ANSI_show_cursor = SCI + "?25h" |
||||
ANSI_save_cursor_pos = SCI + "s" |
||||
ANSI_restore_cursor_pos = SCI + "u" |
||||
ANSI_move_cursor_to_column = SCI + "%uG" |
||||
ANSI_erase_to_eol = SCI + "0K" |
||||
ANSI_erase_current_line = SCI + "2K" |
||||
|
||||
def display(self, new_file = False, done_message = None): |
||||
""" |
||||
display(new_file = False[/True], done_message = None) |
||||
""" |
||||
if new_file: |
||||
self.output_labels() |
||||
self._stdout.write(self.ANSI_save_cursor_pos) |
||||
self._stdout.flush() |
||||
return |
||||
|
||||
# Only display progress every so often |
||||
if not (new_file or done_message) and not self._display_needed(): |
||||
return |
||||
|
||||
timedelta = self.time_current - self.time_start |
||||
sec_elapsed = timedelta.days * 86400 + timedelta.seconds + float(timedelta.microseconds)/1000000.0 |
||||
if (sec_elapsed > 0): |
||||
print_speed = Utils.formatSize((self.current_position - self.initial_position) / sec_elapsed, True, True) |
||||
else: |
||||
print_speed = (0, "") |
||||
self._stdout.write(self.ANSI_restore_cursor_pos) |
||||
self._stdout.write(self.ANSI_erase_to_eol) |
||||
self._stdout.write("%(current)s of %(total)s %(percent)3d%% in %(elapsed)ds %(speed).2f %(speed_coeff)sB/s" % { |
||||
"current" : str(self.current_position).rjust(len(str(self.total_size))), |
||||
"total" : self.total_size, |
||||
"percent" : self.total_size and (self.current_position * 100 / self.total_size) or 0, |
||||
"elapsed" : sec_elapsed, |
||||
"speed" : print_speed[0], |
||||
"speed_coeff" : print_speed[1] |
||||
}) |
||||
|
||||
if done_message: |
||||
self._stdout.write(" %s\n" % done_message) |
||||
|
||||
self._stdout.flush() |
||||
|
||||
class ProgressCR(Progress): |
||||
## Uses CR char (Carriage Return) just like other progress bars do. |
||||
CR_char = chr(13) |
||||
|
||||
def display(self, new_file = False, done_message = None): |
||||
""" |
||||
display(new_file = False[/True], done_message = None) |
||||
""" |
||||
if new_file: |
||||
self.output_labels() |
||||
return |
||||
|
||||
# Only display progress every so often |
||||
if not (new_file or done_message) and not self._display_needed(): |
||||
return |
||||
|
||||
timedelta = self.time_current - self.time_start |
||||
sec_elapsed = timedelta.days * 86400 + timedelta.seconds + float(timedelta.microseconds)/1000000.0 |
||||
if (sec_elapsed > 0): |
||||
print_speed = Utils.formatSize((self.current_position - self.initial_position) / sec_elapsed, True, True) |
||||
else: |
||||
print_speed = (0, "") |
||||
self._stdout.write(self.CR_char) |
||||
output = " %(current)s of %(total)s %(percent)3d%% in %(elapsed)4ds %(speed)7.2f %(speed_coeff)sB/s" % { |
||||
"current" : str(self.current_position).rjust(len(str(self.total_size))), |
||||
"total" : self.total_size, |
||||
"percent" : self.total_size and (self.current_position * 100 / self.total_size) or 0, |
||||
"elapsed" : sec_elapsed, |
||||
"speed" : print_speed[0], |
||||
"speed_coeff" : print_speed[1] |
||||
} |
||||
self._stdout.write(output) |
||||
if done_message: |
||||
self._stdout.write(" %s\n" % done_message) |
||||
|
||||
self._stdout.flush() |
||||
|
||||
# vim:et:ts=4:sts=4:ai |
@ -1,979 +0,0 @@
|
||||
## Amazon S3 manager |
||||
## Author: Michal Ludvig <michal@logix.cz> |
||||
## http://www.logix.cz/michal |
||||
## License: GPL Version 2 |
||||
|
||||
import sys |
||||
import os, os.path |
||||
import time |
||||
import httplib |
||||
import logging |
||||
import mimetypes |
||||
import re |
||||
from logging import debug, info, warning, error |
||||
from stat import ST_SIZE |
||||
|
||||
try: |
||||
from hashlib import md5 |
||||
except ImportError: |
||||
from md5 import md5 |
||||
|
||||
from Utils import * |
||||
from SortedDict import SortedDict |
||||
from AccessLog import AccessLog |
||||
from ACL import ACL, GranteeLogDelivery |
||||
from BidirMap import BidirMap |
||||
from Config import Config |
||||
from Exceptions import * |
||||
from MultiPart import MultiPartUpload |
||||
from S3Uri import S3Uri |
||||
from ConnMan import ConnMan |
||||
|
||||
try: |
||||
import magic, gzip |
||||
try: |
||||
## https://github.com/ahupp/python-magic |
||||
magic_ = magic.Magic(mime=True) |
||||
def mime_magic_file(file): |
||||
return magic_.from_file(file) |
||||
def mime_magic_buffer(buffer): |
||||
return magic_.from_buffer(buffer) |
||||
except TypeError: |
||||
## http://pypi.python.org/pypi/filemagic |
||||
try: |
||||
magic_ = magic.Magic(flags=magic.MAGIC_MIME) |
||||
def mime_magic_file(file): |
||||
return magic_.id_filename(file) |
||||
def mime_magic_buffer(buffer): |
||||
return magic_.id_buffer(buffer) |
||||
except TypeError: |
||||
## file-5.11 built-in python bindings |
||||
magic_ = magic.open(magic.MAGIC_MIME) |
||||
magic_.load() |
||||
def mime_magic_file(file): |
||||
return magic_.file(file) |
||||
def mime_magic_buffer(buffer): |
||||
return magic_.buffer(buffer) |
||||
|
||||
except AttributeError: |
||||
## Older python-magic versions |
||||
magic_ = magic.open(magic.MAGIC_MIME) |
||||
magic_.load() |
||||
def mime_magic_file(file): |
||||
return magic_.file(file) |
||||
def mime_magic_buffer(buffer): |
||||
return magic_.buffer(buffer) |
||||
|
||||
def mime_magic(file): |
||||
type = mime_magic_file(file) |
||||
if type != "application/x-gzip; charset=binary": |
||||
return (type, None) |
||||
else: |
||||
return (mime_magic_buffer(gzip.open(file).read(8192)), 'gzip') |
||||
|
||||
except ImportError, e: |
||||
if str(e).find("magic") >= 0: |
||||
magic_message = "Module python-magic is not available." |
||||
else: |
||||
magic_message = "Module python-magic can't be used (%s)." % e.message |
||||
magic_message += " Guessing MIME types based on file extensions." |
||||
magic_warned = False |
||||
def mime_magic(file): |
||||
global magic_warned |
||||
if (not magic_warned): |
||||
warning(magic_message) |
||||
magic_warned = True |
||||
return mimetypes.guess_type(file) |
||||
|
||||
__all__ = [] |
||||
class S3Request(object): |
||||
def __init__(self, s3, method_string, resource, headers, params = {}): |
||||
self.s3 = s3 |
||||
self.headers = SortedDict(headers or {}, ignore_case = True) |
||||
# Add in any extra headers from s3 config object |
||||
if self.s3.config.extra_headers: |
||||
self.headers.update(self.s3.config.extra_headers) |
||||
if len(self.s3.config.access_token)>0: |
||||
self.s3.config.role_refresh() |
||||
self.headers['x-amz-security-token']=self.s3.config.access_token |
||||
self.resource = resource |
||||
self.method_string = method_string |
||||
self.params = params |
||||
|
||||
self.update_timestamp() |
||||
self.sign() |
||||
|
||||
def update_timestamp(self): |
||||
if self.headers.has_key("date"): |
||||
del(self.headers["date"]) |
||||
self.headers["x-amz-date"] = time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()) |
||||
|
||||
def format_param_str(self): |
||||
""" |
||||
Format URL parameters from self.params and returns |
||||
?parm1=val1&parm2=val2 or an empty string if there |
||||
are no parameters. Output of this function should |
||||
be appended directly to self.resource['uri'] |
||||
""" |
||||
param_str = "" |
||||
for param in self.params: |
||||
if self.params[param] not in (None, ""): |
||||
param_str += "&%s=%s" % (param, self.params[param]) |
||||
else: |
||||
param_str += "&%s" % param |
||||
return param_str and "?" + param_str[1:] |
||||
|
||||
def sign(self): |
||||
h = self.method_string + "\n" |
||||
h += self.headers.get("content-md5", "")+"\n" |
||||
h += self.headers.get("content-type", "")+"\n" |
||||
h += self.headers.get("date", "")+"\n" |
||||
for header in self.headers.keys(): |
||||
if header.startswith("x-amz-"): |
||||
h += header+":"+str(self.headers[header])+"\n" |
||||
if self.resource['bucket']: |
||||
h += "/" + self.resource['bucket'] |
||||
h += self.resource['uri'] |
||||
debug("SignHeaders: " + repr(h)) |
||||
signature = sign_string(h) |
||||
|
||||
self.headers["Authorization"] = "AWS "+self.s3.config.access_key+":"+signature |
||||
|
||||
def get_triplet(self): |
||||
self.update_timestamp() |
||||
self.sign() |
||||
resource = dict(self.resource) ## take a copy |
||||
resource['uri'] += self.format_param_str() |
||||
return (self.method_string, resource, self.headers) |
||||
|
||||
class S3(object): |
||||
http_methods = BidirMap( |
||||
GET = 0x01, |
||||
PUT = 0x02, |
||||
HEAD = 0x04, |
||||
DELETE = 0x08, |
||||
POST = 0x10, |
||||
MASK = 0x1F, |
||||
) |
||||
|
||||
targets = BidirMap( |
||||
SERVICE = 0x0100, |
||||
BUCKET = 0x0200, |
||||
OBJECT = 0x0400, |
||||
MASK = 0x0700, |
||||
) |
||||
|
||||
operations = BidirMap( |
||||
UNDFINED = 0x0000, |
||||
LIST_ALL_BUCKETS = targets["SERVICE"] | http_methods["GET"], |
||||
BUCKET_CREATE = targets["BUCKET"] | http_methods["PUT"], |
||||
BUCKET_LIST = targets["BUCKET"] | http_methods["GET"], |
||||
BUCKET_DELETE = targets["BUCKET"] | http_methods["DELETE"], |
||||
OBJECT_PUT = targets["OBJECT"] | http_methods["PUT"], |
||||
OBJECT_GET = targets["OBJECT"] | http_methods["GET"], |
||||
OBJECT_HEAD = targets["OBJECT"] | http_methods["HEAD"], |
||||
OBJECT_DELETE = targets["OBJECT"] | http_methods["DELETE"], |
||||
OBJECT_POST = targets["OBJECT"] | http_methods["POST"], |
||||
) |
||||
|
||||
codes = { |
||||
"NoSuchBucket" : "Bucket '%s' does not exist", |
||||
"AccessDenied" : "Access to bucket '%s' was denied", |
||||
"BucketAlreadyExists" : "Bucket '%s' already exists", |
||||
} |
||||
|
||||
## S3 sometimes sends HTTP-307 response |
||||
redir_map = {} |
||||
|
||||
## Maximum attempts of re-issuing failed requests |
||||
_max_retries = 5 |
||||
|
||||
def __init__(self, config): |
||||
self.config = config |
||||
|
||||
def get_hostname(self, bucket): |
||||
if bucket and check_bucket_name_dns_conformity(bucket): |
||||
if self.redir_map.has_key(bucket): |
||||
host = self.redir_map[bucket] |
||||
else: |
||||
host = getHostnameFromBucket(bucket) |
||||
else: |
||||
host = self.config.host_base |
||||
debug('get_hostname(%s): %s' % (bucket, host)) |
||||
return host |
||||
|
||||
def set_hostname(self, bucket, redir_hostname): |
||||
self.redir_map[bucket] = redir_hostname |
||||
|
||||
def format_uri(self, resource): |
||||
if resource['bucket'] and not check_bucket_name_dns_conformity(resource['bucket']): |
||||
uri = "/%s%s" % (resource['bucket'], resource['uri']) |
||||
else: |
||||
uri = resource['uri'] |
||||
if self.config.proxy_host != "": |
||||
uri = "http://%s%s" % (self.get_hostname(resource['bucket']), uri) |
||||
debug('format_uri(): ' + uri) |
||||
return uri |
||||
|
||||
## Commands / Actions |
||||
def list_all_buckets(self): |
||||
request = self.create_request("LIST_ALL_BUCKETS") |
||||
response = self.send_request(request) |
||||
response["list"] = getListFromXml(response["data"], "Bucket") |
||||
return response |
||||
|
||||
def bucket_list(self, bucket, prefix = None, recursive = None): |
||||
def _list_truncated(data): |
||||
## <IsTruncated> can either be "true" or "false" or be missing completely |
||||
is_truncated = getTextFromXml(data, ".//IsTruncated") or "false" |
||||
return is_truncated.lower() != "false" |
||||
|
||||
def _get_contents(data): |
||||
return getListFromXml(data, "Contents") |
||||
|
||||
def _get_common_prefixes(data): |
||||
return getListFromXml(data, "CommonPrefixes") |
||||
|
||||
uri_params = {} |
||||
truncated = True |
||||
list = [] |
||||
prefixes = [] |
||||
|
||||
while truncated: |
||||
response = self.bucket_list_noparse(bucket, prefix, recursive, uri_params) |
||||
current_list = _get_contents(response["data"]) |
||||
current_prefixes = _get_common_prefixes(response["data"]) |
||||
truncated = _list_truncated(response["data"]) |
||||
if truncated: |
||||
if current_list: |
||||
uri_params['marker'] = self.urlencode_string(current_list[-1]["Key"]) |
||||
else: |
||||
uri_params['marker'] = self.urlencode_string(current_prefixes[-1]["Prefix"]) |
||||
debug("Listing continues after '%s'" % uri_params['marker']) |
||||
|
||||
list += current_list |
||||
prefixes += current_prefixes |
||||
|
||||
response['list'] = list |
||||
response['common_prefixes'] = prefixes |
||||
return response |
||||
|
||||
def bucket_list_noparse(self, bucket, prefix = None, recursive = None, uri_params = {}): |
||||
if prefix: |
||||
uri_params['prefix'] = self.urlencode_string(prefix) |
||||
if not self.config.recursive and not recursive: |
||||
uri_params['delimiter'] = "/" |
||||
request = self.create_request("BUCKET_LIST", bucket = bucket, **uri_params) |
||||
response = self.send_request(request) |
||||
#debug(response) |
||||
return response |
||||
|
||||
def bucket_create(self, bucket, bucket_location = None): |
||||
headers = SortedDict(ignore_case = True) |
||||
body = "" |
||||
if bucket_location and bucket_location.strip().upper() != "US": |
||||
bucket_location = bucket_location.strip() |
||||
if bucket_location.upper() == "EU": |
||||
bucket_location = bucket_location.upper() |
||||
else: |
||||
bucket_location = bucket_location.lower() |
||||
body = "<CreateBucketConfiguration><LocationConstraint>" |
||||
body += bucket_location |
||||
body += "</LocationConstraint></CreateBucketConfiguration>" |
||||
debug("bucket_location: " + body) |
||||
check_bucket_name(bucket, dns_strict = True) |
||||
else: |
||||
check_bucket_name(bucket, dns_strict = False) |
||||
if self.config.acl_public: |
||||
headers["x-amz-acl"] = "public-read" |
||||
request = self.create_request("BUCKET_CREATE", bucket = bucket, headers = headers) |
||||
response = self.send_request(request, body) |
||||
return response |
||||
|
||||
def bucket_delete(self, bucket): |
||||
request = self.create_request("BUCKET_DELETE", bucket = bucket) |
||||
response = self.send_request(request) |
||||
return response |
||||
|
||||
def get_bucket_location(self, uri): |
||||
request = self.create_request("BUCKET_LIST", bucket = uri.bucket(), extra = "?location") |
||||
response = self.send_request(request) |
||||
location = getTextFromXml(response['data'], "LocationConstraint") |
||||
if not location or location in [ "", "US" ]: |
||||
location = "us-east-1" |
||||
elif location == "EU": |
||||
location = "eu-west-1" |
||||
return location |
||||
|
||||
def bucket_info(self, uri): |
||||
# For now reports only "Location". One day perhaps more. |
||||
response = {} |
||||
response['bucket-location'] = self.get_bucket_location(uri) |
||||
return response |
||||
|
||||
def website_info(self, uri, bucket_location = None): |
||||
headers = SortedDict(ignore_case = True) |
||||
bucket = uri.bucket() |
||||
body = "" |
||||
|
||||
request = self.create_request("BUCKET_LIST", bucket = bucket, extra="?website") |
||||
try: |
||||
response = self.send_request(request, body) |
||||
response['index_document'] = getTextFromXml(response['data'], ".//IndexDocument//Suffix") |
||||
response['error_document'] = getTextFromXml(response['data'], ".//ErrorDocument//Key") |
||||
response['website_endpoint'] = self.config.website_endpoint % { |
||||
"bucket" : uri.bucket(), |
||||
"location" : self.get_bucket_location(uri)} |
||||
return response |
||||
except S3Error, e: |
||||
if e.status == 404: |
||||
debug("Could not get /?website - website probably not configured for this bucket") |
||||
return None |
||||
raise |
||||
|
||||
def website_create(self, uri, bucket_location = None): |
||||
headers = SortedDict(ignore_case = True) |
||||
bucket = uri.bucket() |
||||
body = '<WebsiteConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/">' |
||||
body += ' <IndexDocument>' |
||||
body += (' <Suffix>%s</Suffix>' % self.config.website_index) |
||||
body += ' </IndexDocument>' |
||||
if self.config.website_error: |
||||
body += ' <ErrorDocument>' |
||||
body += (' <Key>%s</Key>' % self.config.website_error) |
||||
body += ' </ErrorDocument>' |
||||
body += '</WebsiteConfiguration>' |
||||
|
||||
request = self.create_request("BUCKET_CREATE", bucket = bucket, extra="?website") |
||||
debug("About to send request '%s' with body '%s'" % (request, body)) |
||||
response = self.send_request(request, body) |
||||
debug("Received response '%s'" % (response)) |
||||
|
||||
return response |
||||
|
||||
def website_delete(self, uri, bucket_location = None): |
||||
headers = SortedDict(ignore_case = True) |
||||
bucket = uri.bucket() |
||||
body = "" |
||||
|
||||
request = self.create_request("BUCKET_DELETE", bucket = bucket, extra="?website") |
||||
debug("About to send request '%s' with body '%s'" % (request, body)) |
||||
response = self.send_request(request, body) |
||||
debug("Received response '%s'" % (response)) |
||||
|
||||
if response['status'] != 204: |
||||
raise S3ResponseError("Expected status 204: %s" % response) |
||||
|
||||
return response |
||||
|
||||
def add_encoding(self, filename, content_type): |
||||
if content_type.find("charset=") != -1: |
||||
return False |
||||
exts = self.config.add_encoding_exts.split(',') |
||||
if exts[0]=='': |
||||
return False |
||||
parts = filename.rsplit('.',2) |
||||
if len(parts) < 2: |
||||
return False |
||||
ext = parts[1] |
||||
if ext in exts: |
||||
return True |
||||
else: |
||||
return False |
||||
|
||||
def object_put(self, filename, uri, extra_headers = None, extra_label = ""): |
||||
# TODO TODO |
||||
# Make it consistent with stream-oriented object_get() |
||||
if uri.type != "s3": |
||||
raise ValueError("Expected URI type 's3', got '%s'" % uri.type) |
||||
|
||||
if filename != "-" and not os.path.isfile(filename): |
||||
raise InvalidFileError(u"%s is not a regular file" % unicodise(filename)) |
||||
try: |
||||
if filename == "-": |
||||
file = sys.stdin |
||||
size = 0 |
||||
else: |
||||
file = open(filename, "rb") |
||||
size = os.stat(filename)[ST_SIZE] |
||||
except (IOError, OSError), e: |
||||
raise InvalidFileError(u"%s: %s" % (unicodise(filename), e.strerror)) |
||||
|
||||
headers = SortedDict(ignore_case = True) |
||||
if extra_headers: |
||||
headers.update(extra_headers) |
||||
|
||||
## MIME-type handling |
||||
content_type = self.config.mime_type |
||||
content_encoding = None |
||||
if filename != "-" and not content_type and self.config.guess_mime_type: |
||||
(content_type, content_encoding) = mime_magic(filename) |
||||
if not content_type: |
||||
content_type = self.config.default_mime_type |
||||
if not content_encoding: |
||||
content_encoding = self.config.encoding.upper() |
||||
|
||||
## add charset to content type |
||||
if self.add_encoding(filename, content_type) and content_encoding is not None: |
||||
content_type = content_type + "; charset=" + content_encoding |
||||
|
||||
headers["content-type"] = content_type |
||||
if content_encoding is not None: |
||||
headers["content-encoding"] = content_encoding |
||||
|
||||
## Other Amazon S3 attributes |
||||
if self.config.acl_public: |
||||
headers["x-amz-acl"] = "public-read" |
||||
if self.config.reduced_redundancy: |
||||
headers["x-amz-storage-class"] = "REDUCED_REDUNDANCY" |
||||
|
||||
## Multipart decision |
||||
multipart = False |
||||
if not self.config.enable_multipart and filename == "-": |
||||
raise ParameterError("Multi-part upload is required to upload from stdin") |
||||
if self.config.enable_multipart: |
||||
if size > self.config.multipart_chunk_size_mb * 1024 * 1024 or filename == "-": |
||||
multipart = True |
||||
if multipart: |
||||
# Multipart requests are quite different... drop here |
||||
return self.send_file_multipart(file, headers, uri, size) |
||||
|
||||
## Not multipart... |
||||
headers["content-length"] = size |
||||
request = self.create_request("OBJECT_PUT", uri = uri, headers = headers) |
||||
labels = { 'source' : unicodise(filename), 'destination' : unicodise(uri.uri()), 'extra' : extra_label } |
||||
response = self.send_file(request, file, labels) |
||||
return response |
||||
|
||||
def object_get(self, uri, stream, start_position = 0, extra_label = ""): |
||||
if uri.type != "s3": |
||||
raise ValueError("Expected URI type 's3', got '%s'" % uri.type) |
||||
request = self.create_request("OBJECT_GET", uri = uri) |
||||
labels = { 'source' : unicodise(uri.uri()), 'destination' : unicodise(stream.name), 'extra' : extra_label } |
||||
response = self.recv_file(request, stream, labels, start_position) |
||||
return response |
||||
|
||||
def object_delete(self, uri): |
||||
if uri.type != "s3": |
||||
raise ValueError("Expected URI type 's3', got '%s'" % uri.type) |
||||
request = self.create_request("OBJECT_DELETE", uri = uri) |
||||
response = self.send_request(request) |
||||
return response |
||||
|
||||
def object_copy(self, src_uri, dst_uri, extra_headers = None): |
||||
if src_uri.type != "s3": |
||||
raise ValueError("Expected URI type 's3', got '%s'" % src_uri.type) |
||||
if dst_uri.type != "s3": |
||||
raise ValueError("Expected URI type 's3', got '%s'" % dst_uri.type) |
||||
headers = SortedDict(ignore_case = True) |
||||
headers['x-amz-copy-source'] = "/%s/%s" % (src_uri.bucket(), self.urlencode_string(src_uri.object())) |
||||
## TODO: For now COPY, later maybe add a switch? |
||||
headers['x-amz-metadata-directive'] = "COPY" |
||||
if self.config.acl_public: |
||||
headers["x-amz-acl"] = "public-read" |
||||
if self.config.reduced_redundancy: |
||||
headers["x-amz-storage-class"] = "REDUCED_REDUNDANCY" |
||||
# if extra_headers: |
||||
# headers.update(extra_headers) |
||||
request = self.create_request("OBJECT_PUT", uri = dst_uri, headers = headers) |
||||
response = self.send_request(request) |
||||
return response |
||||
|
||||
def object_move(self, src_uri, dst_uri, extra_headers = None): |
||||
response_copy = self.object_copy(src_uri, dst_uri, extra_headers) |
||||
debug("Object %s copied to %s" % (src_uri, dst_uri)) |
||||
if getRootTagName(response_copy["data"]) == "CopyObjectResult": |
||||
response_delete = self.object_delete(src_uri) |
||||
debug("Object %s deleted" % src_uri) |
||||
return response_copy |
||||
|
||||
def object_info(self, uri): |
||||
request = self.create_request("OBJECT_HEAD", uri = uri) |
||||
response = self.send_request(request) |
||||
return response |
||||
|
||||
def get_acl(self, uri): |
||||
if uri.has_object(): |
||||
request = self.create_request("OBJECT_GET", uri = uri, extra = "?acl") |
||||
else: |
||||
request = self.create_request("BUCKET_LIST", bucket = uri.bucket(), extra = "?acl") |
||||
|
||||
response = self.send_request(request) |
||||
acl = ACL(response['data']) |
||||
return acl |
||||
|
||||
def set_acl(self, uri, acl): |
||||
if uri.has_object(): |
||||
request = self.create_request("OBJECT_PUT", uri = uri, extra = "?acl") |
||||
else: |
||||
request = self.create_request("BUCKET_CREATE", bucket = uri.bucket(), extra = "?acl") |
||||
|
||||
body = str(acl) |
||||
debug(u"set_acl(%s): acl-xml: %s" % (uri, body)) |
||||
response = self.send_request(request, body) |
||||
return response |
||||
|
||||
def get_policy(self, uri): |
||||
request = self.create_request("BUCKET_LIST", bucket = uri.bucket(), extra = "?policy") |
||||
response = self.send_request(request) |
||||
return response['data'] |
||||
|
||||
def set_policy(self, uri, policy): |
||||
headers = {} |
||||
# TODO check policy is proper json string |
||||
headers['content-type'] = 'application/json' |
||||
request = self.create_request("BUCKET_CREATE", uri = uri, |
||||
extra = "?policy", headers=headers) |
||||
body = policy |
||||
debug(u"set_policy(%s): policy-json: %s" % (uri, body)) |
||||
request.sign() |
||||
response = self.send_request(request, body=body) |
||||
return response |
||||
|
||||
def delete_policy(self, uri): |
||||
request = self.create_request("BUCKET_DELETE", uri = uri, extra = "?policy") |
||||
debug(u"delete_policy(%s)" % uri) |
||||
response = self.send_request(request) |
||||
return response |
||||
|
||||
def get_accesslog(self, uri): |
||||
request = self.create_request("BUCKET_LIST", bucket = uri.bucket(), extra = "?logging") |
||||
response = self.send_request(request) |
||||
accesslog = AccessLog(response['data']) |
||||
return accesslog |
||||
|
||||
def set_accesslog_acl(self, uri): |
||||
acl = self.get_acl(uri) |
||||
debug("Current ACL(%s): %s" % (uri.uri(), str(acl))) |
||||
acl.appendGrantee(GranteeLogDelivery("READ_ACP")) |
||||
acl.appendGrantee(GranteeLogDelivery("WRITE")) |
||||
debug("Updated ACL(%s): %s" % (uri.uri(), str(acl))) |
||||
self.set_acl(uri, acl) |
||||
|
||||
def set_accesslog(self, uri, enable, log_target_prefix_uri = None, acl_public = False): |
||||
request = self.create_request("BUCKET_CREATE", bucket = uri.bucket(), extra = "?logging") |
||||
accesslog = AccessLog() |
||||
if enable: |
||||
accesslog.enableLogging(log_target_prefix_uri) |
||||
accesslog.setAclPublic(acl_public) |
||||
else: |
||||
accesslog.disableLogging() |
||||
body = str(accesslog) |
||||
debug(u"set_accesslog(%s): accesslog-xml: %s" % (uri, body)) |
||||
try: |
||||
response = self.send_request(request, body) |
||||
except S3Error, e: |
||||
if e.info['Code'] == "InvalidTargetBucketForLogging": |
||||
info("Setting up log-delivery ACL for target bucket.") |
||||
self.set_accesslog_acl(S3Uri("s3://%s" % log_target_prefix_uri.bucket())) |
||||
response = self.send_request(request, body) |
||||
else: |
||||
raise |
||||
return accesslog, response |
||||
|
||||
## Low level methods |
||||
def urlencode_string(self, string, urlencoding_mode = None): |
||||
if type(string) == unicode: |
||||
string = string.encode("utf-8") |
||||
|
||||
if urlencoding_mode is None: |
||||
urlencoding_mode = self.config.urlencoding_mode |
||||
|
||||
if urlencoding_mode == "verbatim": |
||||
## Don't do any pre-processing |
||||
return string |
||||
|
||||
encoded = "" |
||||
## List of characters that must be escaped for S3 |
||||
## Haven't found this in any official docs |
||||
## but my tests show it's more less correct. |
||||
## If you start getting InvalidSignature errors |
||||
## from S3 check the error headers returned |
||||
## from S3 to see whether the list hasn't |
||||
## changed. |
||||
for c in string: # I'm not sure how to know in what encoding |
||||
# 'object' is. Apparently "type(object)==str" |
||||
# but the contents is a string of unicode |
||||
# bytes, e.g. '\xc4\x8d\xc5\xafr\xc3\xa1k' |
||||
# Don't know what it will do on non-utf8 |
||||
# systems. |
||||
# [hope that sounds reassuring ;-)] |
||||
o = ord(c) |
||||
if (o < 0x20 or o == 0x7f): |
||||
if urlencoding_mode == "fixbucket": |
||||
encoded += "%%%02X" % o |
||||
else: |
||||
error(u"Non-printable character 0x%02x in: %s" % (o, string)) |
||||
error(u"Please report it to s3tools-bugs@lists.sourceforge.net") |
||||
encoded += replace_nonprintables(c) |
||||
elif (o == 0x20 or # Space and below |
||||
o == 0x22 or # " |
||||
o == 0x23 or # # |
||||
o == 0x25 or # % (escape character) |
||||
o == 0x26 or # & |
||||
o == 0x2B or # + (or it would become <space>) |
||||
o == 0x3C or # < |
||||
o == 0x3E or # > |
||||
o == 0x3F or # ? |
||||
o == 0x60 or # ` |
||||
o >= 123): # { and above, including >= 128 for UTF-8 |
||||
encoded += "%%%02X" % o |
||||
else: |
||||
encoded += c |
||||
debug("String '%s' encoded to '%s'" % (string, encoded)) |
||||
return encoded |
||||
|
||||
def create_request(self, operation, uri = None, bucket = None, object = None, headers = None, extra = None, **params): |
||||
resource = { 'bucket' : None, 'uri' : "/" } |
||||
|
||||
if uri and (bucket or object): |
||||
raise ValueError("Both 'uri' and either 'bucket' or 'object' parameters supplied") |
||||
## If URI is given use that instead of bucket/object parameters |
||||
if uri: |
||||
bucket = uri.bucket() |
||||
object = uri.has_object() and uri.object() or None |
||||
|
||||
if bucket: |
||||
resource['bucket'] = str(bucket) |
||||
if object: |
||||
resource['uri'] = "/" + self.urlencode_string(object) |
||||
if extra: |
||||
resource['uri'] += extra |
||||
|
||||
method_string = S3.http_methods.getkey(S3.operations[operation] & S3.http_methods["MASK"]) |
||||
|
||||
request = S3Request(self, method_string, resource, headers, params) |
||||
|
||||
debug("CreateRequest: resource[uri]=" + resource['uri']) |
||||
return request |
||||
|
||||
def _fail_wait(self, retries): |
||||
# Wait a few seconds. The more it fails the more we wait. |
||||
return (self._max_retries - retries + 1) * 3 |
||||
|
||||
def send_request(self, request, body = None, retries = _max_retries): |
||||
method_string, resource, headers = request.get_triplet() |
||||
debug("Processing request, please wait...") |
||||
if not headers.has_key('content-length'): |
||||
headers['content-length'] = body and len(body) or 0 |
||||
try: |
||||
# "Stringify" all headers |
||||
for header in headers.keys(): |
||||
headers[header] = str(headers[header]) |
||||
conn = ConnMan.get(self.get_hostname(resource['bucket'])) |
||||
uri = self.format_uri(resource) |
||||
debug("Sending request method_string=%r, uri=%r, headers=%r, body=(%i bytes)" % (method_string, uri, headers, len(body or ""))) |
||||
conn.c.request(method_string, uri, body, headers) |
||||
response = {} |
||||
http_response = conn.c.getresponse() |
||||
response["status"] = http_response.status |
||||
response["reason"] = http_response.reason |
||||
response["headers"] = convertTupleListToDict(http_response.getheaders()) |
||||
response["data"] = http_response.read() |
||||
debug("Response: " + str(response)) |
||||
ConnMan.put(conn) |
||||
except ParameterError, e: |
||||
raise |
||||
except Exception, e: |
||||
if retries: |
||||
warning("Retrying failed request: %s (%s)" % (resource['uri'], e)) |
||||
warning("Waiting %d sec..." % self._fail_wait(retries)) |
||||
time.sleep(self._fail_wait(retries)) |
||||
return self.send_request(request, body, retries - 1) |
||||
else: |
||||
raise S3RequestError("Request failed for: %s" % resource['uri']) |
||||
|
||||
if response["status"] == 307: |
||||
## RedirectPermanent |
||||
redir_bucket = getTextFromXml(response['data'], ".//Bucket") |
||||
redir_hostname = getTextFromXml(response['data'], ".//Endpoint") |
||||
self.set_hostname(redir_bucket, redir_hostname) |
||||
warning("Redirected to: %s" % (redir_hostname)) |
||||
return self.send_request(request, body) |
||||
|
||||
if response["status"] >= 500: |
||||
e = S3Error(response) |
||||
if retries: |
||||
warning(u"Retrying failed request: %s" % resource['uri']) |
||||
warning(unicode(e)) |
||||
warning("Waiting %d sec..." % self._fail_wait(retries)) |
||||
time.sleep(self._fail_wait(retries)) |
||||
return self.send_request(request, body, retries - 1) |
||||
else: |
||||
raise e |
||||
|
||||
if response["status"] < 200 or response["status"] > 299: |
||||
raise S3Error(response) |
||||
|
||||
return response |
||||
|
||||
def send_file(self, request, file, labels, buffer = '', throttle = 0, retries = _max_retries, offset = 0, chunk_size = -1): |
||||
method_string, resource, headers = request.get_triplet() |
||||
size_left = size_total = headers.get("content-length") |
||||
if self.config.progress_meter: |
||||
progress = self.config.progress_class(labels, size_total) |
||||
else: |
||||
info("Sending file '%s', please wait..." % file.name) |
||||
timestamp_start = time.time() |
||||
try: |
||||
conn = ConnMan.get(self.get_hostname(resource['bucket'])) |
||||
conn.c.putrequest(method_string, self.format_uri(resource)) |
||||
for header in headers.keys(): |
||||
conn.c.putheader(header, str(headers[header])) |
||||
conn.c.endheaders() |
||||
except ParameterError, e: |
||||
raise |
||||
except Exception, e: |
||||
if self.config.progress_meter: |
||||
progress.done("failed") |
||||
if retries: |
||||
warning("Retrying failed request: %s (%s)" % (resource['uri'], e)) |
||||
warning("Waiting %d sec..." % self._fail_wait(retries)) |
||||
time.sleep(self._fail_wait(retries)) |
||||
# Connection error -> same throttle value |
||||
return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size) |
||||
else: |
||||
raise S3UploadError("Upload failed for: %s" % resource['uri']) |
||||
if buffer == '': |
||||
file.seek(offset) |
||||
md5_hash = md5() |
||||
try: |
||||
while (size_left > 0): |
||||
#debug("SendFile: Reading up to %d bytes from '%s' - remaining bytes: %s" % (self.config.send_chunk, file.name, size_left)) |
||||
if buffer == '': |
||||
data = file.read(min(self.config.send_chunk, size_left)) |
||||
else: |
||||
data = buffer |
||||
md5_hash.update(data) |
||||
conn.c.send(data) |
||||
if self.config.progress_meter: |
||||
progress.update(delta_position = len(data)) |
||||
size_left -= len(data) |
||||
if throttle: |
||||
time.sleep(throttle) |
||||
md5_computed = md5_hash.hexdigest() |
||||
response = {} |
||||
http_response = conn.c.getresponse() |
||||
response["status"] = http_response.status |
||||
response["reason"] = http_response.reason |
||||
response["headers"] = convertTupleListToDict(http_response.getheaders()) |
||||
response["data"] = http_response.read() |
||||
response["size"] = size_total |
||||
ConnMan.put(conn) |
||||
debug(u"Response: %s" % response) |
||||
except ParameterError, e: |
||||
raise |
||||
except Exception, e: |
||||
if self.config.progress_meter: |
||||
progress.done("failed") |
||||
if retries: |
||||
if retries < self._max_retries: |
||||
throttle = throttle and throttle * 5 or 0.01 |
||||
warning("Upload failed: %s (%s)" % (resource['uri'], e)) |
||||
warning("Retrying on lower speed (throttle=%0.2f)" % throttle) |
||||
warning("Waiting %d sec..." % self._fail_wait(retries)) |
||||
time.sleep(self._fail_wait(retries)) |
||||
# Connection error -> same throttle value |
||||
return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size) |
||||
else: |
||||
debug("Giving up on '%s' %s" % (file.name, e)) |
||||
raise S3UploadError("Upload failed for: %s" % resource['uri']) |
||||
|
||||
timestamp_end = time.time() |
||||
response["elapsed"] = timestamp_end - timestamp_start |
||||
response["speed"] = response["elapsed"] and float(response["size"]) / response["elapsed"] or float(-1) |
||||
|
||||
if self.config.progress_meter: |
||||
## Finalising the upload takes some time -> update() progress meter |
||||
## to correct the average speed. Otherwise people will complain that |
||||
## 'progress' and response["speed"] are inconsistent ;-) |
||||
progress.update() |
||||
progress.done("done") |
||||
|
||||
if response["status"] == 307: |
||||
## RedirectPermanent |
||||
redir_bucket = getTextFromXml(response['data'], ".//Bucket") |
||||
redir_hostname = getTextFromXml(response['data'], ".//Endpoint") |
||||
self.set_hostname(redir_bucket, redir_hostname) |
||||
warning("Redirected to: %s" % (redir_hostname)) |
||||
return self.send_file(request, file, labels, buffer, offset = offset, chunk_size = chunk_size) |
||||
|
||||
# S3 from time to time doesn't send ETag back in a response :-( |
||||
# Force re-upload here. |
||||
if not response['headers'].has_key('etag'): |
||||
response['headers']['etag'] = '' |
||||
|
||||
if response["status"] < 200 or response["status"] > 299: |
||||
try_retry = False |
||||
if response["status"] >= 500: |
||||
## AWS internal error - retry |
||||
try_retry = True |
||||
elif response["status"] >= 400: |
||||
err = S3Error(response) |
||||
## Retriable client error? |
||||
if err.code in [ 'BadDigest', 'OperationAborted', 'TokenRefreshRequired', 'RequestTimeout' ]: |
||||
try_retry = True |
||||
|
||||
if try_retry: |
||||
if retries: |
||||
warning("Upload failed: %s (%s)" % (resource['uri'], S3Error(response))) |
||||
warning("Waiting %d sec..." % self._fail_wait(retries)) |
||||
time.sleep(self._fail_wait(retries)) |
||||
return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size) |
||||
else: |
||||
warning("Too many failures. Giving up on '%s'" % (file.name)) |
||||
raise S3UploadError |
||||
|
||||
## Non-recoverable error |
||||
raise S3Error(response) |
||||
|
||||
debug("MD5 sums: computed=%s, received=%s" % (md5_computed, response["headers"]["etag"])) |
||||
if response["headers"]["etag"].strip('"\'') != md5_hash.hexdigest(): |
||||
warning("MD5 Sums don't match!") |
||||
if retries: |
||||
warning("Retrying upload of %s" % (file.name)) |
||||
return self.send_file(request, file, labels, buffer, throttle, retries - 1, offset, chunk_size) |
||||
else: |
||||
warning("Too many failures. Giving up on '%s'" % (file.name)) |
||||
raise S3UploadError |
||||
|
||||
return response |
||||
|
||||
def send_file_multipart(self, file, headers, uri, size): |
||||
chunk_size = self.config.multipart_chunk_size_mb * 1024 * 1024 |
||||
timestamp_start = time.time() |
||||
upload = MultiPartUpload(self, file, uri, headers) |
||||
upload.upload_all_parts() |
||||
response = upload.complete_multipart_upload() |
||||
timestamp_end = time.time() |
||||
response["elapsed"] = timestamp_end - timestamp_start |
||||
response["size"] = size |
||||
response["speed"] = response["elapsed"] and float(response["size"]) / response["elapsed"] or float(-1) |
||||
return response |
||||
|
||||
def recv_file(self, request, stream, labels, start_position = 0, retries = _max_retries): |
||||
method_string, resource, headers = request.get_triplet() |
||||
if self.config.progress_meter: |
||||
progress = self.config.progress_class(labels, 0) |
||||
else: |
||||
info("Receiving file '%s', please wait..." % stream.name) |
||||
timestamp_start = time.time() |
||||
try: |
||||
conn = ConnMan.get(self.get_hostname(resource['bucket'])) |
||||
conn.c.putrequest(method_string, self.format_uri(resource)) |
||||
for header in headers.keys(): |
||||
conn.c.putheader(header, str(headers[header])) |
||||
if start_position > 0: |
||||
debug("Requesting Range: %d .. end" % start_position) |
||||
conn.c.putheader("Range", "bytes=%d-" % start_position) |
||||
conn.c.endheaders() |
||||
response = {} |
||||
http_response = conn.c.getresponse() |
||||
response["status"] = http_response.status |
||||
response["reason"] = http_response.reason |
||||
response["headers"] = convertTupleListToDict(http_response.getheaders()) |
||||
debug("Response: %s" % response) |
||||
except ParameterError, e: |
||||
raise |
||||
except Exception, e: |
||||
if self.config.progress_meter: |
||||
progress.done("failed") |
||||
if retries: |
||||
warning("Retrying failed request: %s (%s)" % (resource['uri'], e)) |
||||
warning("Waiting %d sec..." % self._fail_wait(retries)) |
||||
time.sleep(self._fail_wait(retries)) |
||||
# Connection error -> same throttle value |
||||
return self.recv_file(request, stream, labels, start_position, retries - 1) |
||||
else: |
||||
raise S3DownloadError("Download failed for: %s" % resource['uri']) |
||||
|
||||
if response["status"] == 307: |
||||
## RedirectPermanent |
||||
response['data'] = http_response.read() |
||||
redir_bucket = getTextFromXml(response['data'], ".//Bucket") |
||||
redir_hostname = getTextFromXml(response['data'], ".//Endpoint") |
||||
self.set_hostname(redir_bucket, redir_hostname) |
||||
warning("Redirected to: %s" % (redir_hostname)) |
||||
return self.recv_file(request, stream, labels) |
||||
|
||||
if response["status"] < 200 or response["status"] > 299: |
||||
raise S3Error(response) |
||||
|
||||
if start_position == 0: |
||||
# Only compute MD5 on the fly if we're downloading from beginning |
||||
# Otherwise we'd get a nonsense. |
||||
md5_hash = md5() |
||||
size_left = int(response["headers"]["content-length"]) |
||||
size_total = start_position + size_left |
||||
current_position = start_position |
||||
|
||||
if self.config.progress_meter: |
||||
progress.total_size = size_total |
||||
progress.initial_position = current_position |
||||
progress.current_position = current_position |
||||
|
||||
try: |
||||
while (current_position < size_total): |
||||
this_chunk = size_left > self.config.recv_chunk and self.config.recv_chunk or size_left |
||||
data = http_response.read(this_chunk) |
||||
if len(data) == 0: |
||||
raise S3Error("EOF from S3!") |
||||
|
||||
stream.write(data) |
||||
if start_position == 0: |
||||
md5_hash.update(data) |
||||
current_position += len(data) |
||||
## Call progress meter from here... |
||||
if self.config.progress_meter: |
||||
progress.update(delta_position = len(data)) |
||||
ConnMan.put(conn) |
||||
except Exception, e: |
||||
if self.config.progress_meter: |
||||
progress.done("failed") |
||||
if retries: |
||||
warning("Retrying failed request: %s (%s)" % (resource['uri'], e)) |
||||
warning("Waiting %d sec..." % self._fail_wait(retries)) |
||||
time.sleep(self._fail_wait(retries)) |
||||
# Connection error -> same throttle value |
||||
return self.recv_file(request, stream, labels, current_position, retries - 1) |
||||
else: |
||||
raise S3DownloadError("Download failed for: %s" % resource['uri']) |
||||
|
||||
stream.flush() |
||||
timestamp_end = time.time() |
||||
|
||||
if self.config.progress_meter: |
||||
## The above stream.flush() may take some time -> update() progress meter |
||||
## to correct the average speed. Otherwise people will complain that |
||||
## 'progress' and response["speed"] are inconsistent ;-) |
||||
progress.update() |
||||
progress.done("done") |
||||
|
||||
if start_position == 0: |
||||
# Only compute MD5 on the fly if we were downloading from the beginning |
||||
response["md5"] = md5_hash.hexdigest() |
||||
else: |
||||
# Otherwise try to compute MD5 of the output file |
||||
try: |
||||
response["md5"] = hash_file_md5(stream.name) |
||||
except IOError, e: |
||||
if e.errno != errno.ENOENT: |
||||
warning("Unable to open file: %s: %s" % (stream.name, e)) |
||||
warning("Unable to verify MD5. Assume it matches.") |
||||
response["md5"] = response["headers"]["etag"] |
||||
|
||||
response["md5match"] = response["headers"]["etag"].find(response["md5"]) >= 0 |
||||
response["elapsed"] = timestamp_end - timestamp_start |
||||
response["size"] = current_position |
||||
response["speed"] = response["elapsed"] and float(response["size"]) / response["elapsed"] or float(-1) |
||||
if response["size"] != start_position + long(response["headers"]["content-length"]): |
||||
warning("Reported size (%s) does not match received size (%s)" % ( |
||||
start_position + response["headers"]["content-length"], response["size"])) |
||||
debug("ReceiveFile: Computed MD5 = %s" % response["md5"]) |
||||
if not response["md5match"]: |
||||
warning("MD5 signatures do not match: computed=%s, received=%s" % ( |
||||
response["md5"], response["headers"]["etag"])) |
||||
return response |
||||
__all__.append("S3") |
||||
|
||||
# vim:et:ts=4:sts=4:ai |
@ -1,223 +0,0 @@
|
||||
## Amazon S3 manager |
||||
## Author: Michal Ludvig <michal@logix.cz> |
||||
## http://www.logix.cz/michal |
||||
## License: GPL Version 2 |
||||
|
||||
import os |
||||
import re |
||||
import sys |
||||
from BidirMap import BidirMap |
||||
from logging import debug |
||||
import S3 |
||||
from Utils import unicodise, check_bucket_name_dns_conformity |
||||
import Config |
||||
|
||||
class S3Uri(object): |
||||
type = None |
||||
_subclasses = None |
||||
|
||||
def __new__(self, string): |
||||
if not self._subclasses: |
||||
## Generate a list of all subclasses of S3Uri |
||||
self._subclasses = [] |
||||
dict = sys.modules[__name__].__dict__ |
||||
for something in dict: |
||||
if type(dict[something]) is not type(self): |
||||
continue |
||||
if issubclass(dict[something], self) and dict[something] != self: |
||||
self._subclasses.append(dict[something]) |
||||
for subclass in self._subclasses: |
||||
try: |
||||
instance = object.__new__(subclass) |
||||
instance.__init__(string) |
||||
return instance |
||||
except ValueError, e: |
||||
continue |
||||
raise ValueError("%s: not a recognized URI" % string) |
||||
|
||||
def __str__(self): |
||||
return self.uri() |
||||
|
||||
def __unicode__(self): |
||||
return self.uri() |
||||
|
||||
def __repr__(self): |
||||
return "<%s: %s>" % (self.__class__.__name__, self.__unicode__()) |
||||
|
||||
def public_url(self): |
||||
raise ValueError("This S3 URI does not have Anonymous URL representation") |
||||
|
||||
def basename(self): |
||||
return self.__unicode__().split("/")[-1] |
||||
|
||||
class S3UriS3(S3Uri): |
||||
type = "s3" |
||||
_re = re.compile("^s3://([^/]+)/?(.*)", re.IGNORECASE) |
||||
def __init__(self, string): |
||||
match = self._re.match(string) |
||||
if not match: |
||||
raise ValueError("%s: not a S3 URI" % string) |
||||
groups = match.groups() |
||||
self._bucket = groups[0] |
||||
self._object = unicodise(groups[1]) |
||||
|
||||
def bucket(self): |
||||
return self._bucket |
||||
|
||||
def object(self): |
||||
return self._object |
||||
|
||||
def has_bucket(self): |
||||
return bool(self._bucket) |
||||
|
||||
def has_object(self): |
||||
return bool(self._object) |
||||
|
||||
def uri(self): |
||||
return "/".join(["s3:/", self._bucket, self._object]) |
||||
|
||||
def is_dns_compatible(self): |
||||
return check_bucket_name_dns_conformity(self._bucket) |
||||
|
||||
def public_url(self): |
||||
if self.is_dns_compatible(): |
||||
return "http://%s.%s/%s" % (self._bucket, Config.Config().host_base, self._object) |
||||
else: |
||||
return "http://%s/%s/%s" % (self._bucket, Config.Config().host_base, self._object) |
||||
|
||||
def host_name(self): |
||||
if self.is_dns_compatible(): |
||||
return "%s.s3.amazonaws.com" % (self._bucket) |
||||
else: |
||||
return "s3.amazonaws.com" |
||||
|
||||
@staticmethod |
||||
def compose_uri(bucket, object = ""): |
||||
return "s3://%s/%s" % (bucket, object) |
||||
|
||||
@staticmethod |
||||
def httpurl_to_s3uri(http_url): |
||||
m=re.match("(https?://)?([^/]+)/?(.*)", http_url, re.IGNORECASE) |
||||
hostname, object = m.groups()[1:] |
||||
hostname = hostname.lower() |
||||
if hostname == "s3.amazonaws.com": |
||||
## old-style url: http://s3.amazonaws.com/bucket/object |
||||
if object.count("/") == 0: |
||||
## no object given |
||||
bucket = object |
||||
object = "" |
||||
else: |
||||
## bucket/object |
||||
bucket, object = object.split("/", 1) |
||||
elif hostname.endswith(".s3.amazonaws.com"): |
||||
## new-style url: http://bucket.s3.amazonaws.com/object |
||||
bucket = hostname[:-(len(".s3.amazonaws.com"))] |
||||
else: |
||||
raise ValueError("Unable to parse URL: %s" % http_url) |
||||
return S3Uri("s3://%(bucket)s/%(object)s" % { |
||||
'bucket' : bucket, |
||||
'object' : object }) |
||||
|
||||
class S3UriS3FS(S3Uri): |
||||
type = "s3fs" |
||||
_re = re.compile("^s3fs://([^/]*)/?(.*)", re.IGNORECASE) |
||||
def __init__(self, string): |
||||
match = self._re.match(string) |
||||
if not match: |
||||
raise ValueError("%s: not a S3fs URI" % string) |
||||
groups = match.groups() |
||||
self._fsname = groups[0] |
||||
self._path = unicodise(groups[1]).split("/") |
||||
|
||||
def fsname(self): |
||||
return self._fsname |
||||
|
||||
def path(self): |
||||
return "/".join(self._path) |
||||
|
||||
def uri(self): |
||||
return "/".join(["s3fs:/", self._fsname, self.path()]) |
||||
|
||||
class S3UriFile(S3Uri): |
||||
type = "file" |
||||
_re = re.compile("^(\w+://)?(.*)") |
||||
def __init__(self, string): |
||||
match = self._re.match(string) |
||||
groups = match.groups() |
||||
if groups[0] not in (None, "file://"): |
||||
raise ValueError("%s: not a file:// URI" % string) |
||||
self._path = unicodise(groups[1]).split("/") |
||||
|
||||
def path(self): |
||||
return "/".join(self._path) |
||||
|
||||
def uri(self): |
||||
return "/".join(["file:/", self.path()]) |
||||
|
||||
def isdir(self): |
||||
return os.path.isdir(self.path()) |
||||
|
||||
def dirname(self): |
||||
return os.path.dirname(self.path()) |
||||
|
||||
class S3UriCloudFront(S3Uri): |
||||
type = "cf" |
||||
_re = re.compile("^cf://([^/]*)/*(.*)", re.IGNORECASE) |
||||
def __init__(self, string): |
||||
match = self._re.match(string) |
||||
if not match: |
||||
raise ValueError("%s: not a CloudFront URI" % string) |
||||
groups = match.groups() |
||||
self._dist_id = groups[0] |
||||
self._request_id = groups[1] != "/" and groups[1] or None |
||||
|
||||
def dist_id(self): |
||||
return self._dist_id |
||||
|
||||
def request_id(self): |
||||
return self._request_id |
||||
|
||||
def uri(self): |
||||
uri = "cf://" + self.dist_id() |
||||
if self.request_id(): |
||||
uri += "/" + self.request_id() |
||||
return uri |
||||
|
||||
if __name__ == "__main__": |
||||
uri = S3Uri("s3://bucket/object") |
||||
print "type() =", type(uri) |
||||
print "uri =", uri |
||||
print "uri.type=", uri.type |
||||
print "bucket =", uri.bucket() |
||||
print "object =", uri.object() |
||||
print |
||||
|
||||
uri = S3Uri("s3://bucket") |
||||
print "type() =", type(uri) |
||||
print "uri =", uri |
||||
print "uri.type=", uri.type |
||||
print "bucket =", uri.bucket() |
||||
print |
||||
|
||||
uri = S3Uri("s3fs://filesystem1/path/to/remote/file.txt") |
||||
print "type() =", type(uri) |
||||
print "uri =", uri |
||||
print "uri.type=", uri.type |
||||
print "path =", uri.path() |
||||
print |
||||
|
||||
uri = S3Uri("/path/to/local/file.txt") |
||||
print "type() =", type(uri) |
||||
print "uri =", uri |
||||
print "uri.type=", uri.type |
||||
print "path =", uri.path() |
||||
print |
||||
|
||||
uri = S3Uri("cf://1234567890ABCD/") |
||||
print "type() =", type(uri) |
||||
print "uri =", uri |
||||
print "uri.type=", uri.type |
||||
print "dist_id =", uri.dist_id() |
||||
print |
||||
|
||||
# vim:et:ts=4:sts=4:ai |
@ -1,178 +0,0 @@
|
||||
## Amazon SimpleDB library |
||||
## Author: Michal Ludvig <michal@logix.cz> |
||||
## http://www.logix.cz/michal |
||||
## License: GPL Version 2 |
||||
|
||||
""" |
||||
Low-level class for working with Amazon SimpleDB |
||||
""" |
||||
|
||||
import time |
||||
import urllib |
||||
import base64 |
||||
import hmac |
||||
import sha |
||||
import httplib |
||||
from logging import debug, info, warning, error |
||||
|
||||
from Utils import convertTupleListToDict |
||||
from SortedDict import SortedDict |
||||
from Exceptions import * |
||||
|
||||
class SimpleDB(object): |
||||
# API Version |
||||
# See http://docs.amazonwebservices.com/AmazonSimpleDB/2007-11-07/DeveloperGuide/ |
||||
Version = "2007-11-07" |
||||
SignatureVersion = 1 |
||||
|
||||
def __init__(self, config): |
||||
self.config = config |
||||
|
||||
## ------------------------------------------------ |
||||
## Methods implementing SimpleDB API |
||||
## ------------------------------------------------ |
||||
|
||||
def ListDomains(self, MaxNumberOfDomains = 100): |
||||
''' |
||||
Lists all domains associated with our Access Key. Returns |
||||
domain names up to the limit set by MaxNumberOfDomains. |
||||
''' |
||||
parameters = SortedDict() |
||||
parameters['MaxNumberOfDomains'] = MaxNumberOfDomains |
||||
return self.send_request("ListDomains", DomainName = None, parameters = parameters) |
||||
|
||||
def CreateDomain(self, DomainName): |
||||
return self.send_request("CreateDomain", DomainName = DomainName) |
||||
|
||||
def DeleteDomain(self, DomainName): |
||||
return self.send_request("DeleteDomain", DomainName = DomainName) |
||||
|
||||
def PutAttributes(self, DomainName, ItemName, Attributes): |
||||
parameters = SortedDict() |
||||
parameters['ItemName'] = ItemName |
||||
seq = 0 |
||||
for attrib in Attributes: |
||||
if type(Attributes[attrib]) == type(list()): |
||||
for value in Attributes[attrib]: |
||||
parameters['Attribute.%d.Name' % seq] = attrib |
||||
parameters['Attribute.%d.Value' % seq] = unicode(value) |
||||
seq += 1 |
||||
else: |
||||
parameters['Attribute.%d.Name' % seq] = attrib |
||||
parameters['Attribute.%d.Value' % seq] = unicode(Attributes[attrib]) |
||||
seq += 1 |
||||
## TODO: |
||||
## - support for Attribute.N.Replace |
||||
## - support for multiple values for one attribute |
||||
return self.send_request("PutAttributes", DomainName = DomainName, parameters = parameters) |
||||
|
||||
def GetAttributes(self, DomainName, ItemName, Attributes = []): |
||||
parameters = SortedDict() |
||||
parameters['ItemName'] = ItemName |
||||
seq = 0 |
||||
for attrib in Attributes: |
||||
parameters['AttributeName.%d' % seq] = attrib |
||||
seq += 1 |
||||
return self.send_request("GetAttributes", DomainName = DomainName, parameters = parameters) |
||||
|
||||
def DeleteAttributes(self, DomainName, ItemName, Attributes = {}): |
||||
""" |
||||
Remove specified Attributes from ItemName. |
||||
Attributes parameter can be either: |
||||
- not specified, in which case the whole Item is removed |
||||
- list, e.g. ['Attr1', 'Attr2'] in which case these parameters are removed |
||||
- dict, e.g. {'Attr' : 'One', 'Attr' : 'Two'} in which case the |
||||
specified values are removed from multi-value attributes. |
||||
""" |
||||
parameters = SortedDict() |
||||
parameters['ItemName'] = ItemName |
||||
seq = 0 |
||||
for attrib in Attributes: |
||||
parameters['Attribute.%d.Name' % seq] = attrib |
||||
if type(Attributes) == type(dict()): |
||||
parameters['Attribute.%d.Value' % seq] = unicode(Attributes[attrib]) |
||||
seq += 1 |
||||
return self.send_request("DeleteAttributes", DomainName = DomainName, parameters = parameters) |
||||
|
||||
def Query(self, DomainName, QueryExpression = None, MaxNumberOfItems = None, NextToken = None): |
||||
parameters = SortedDict() |
||||
if QueryExpression: |
||||
parameters['QueryExpression'] = QueryExpression |
||||
if MaxNumberOfItems: |
||||
parameters['MaxNumberOfItems'] = MaxNumberOfItems |
||||
if NextToken: |
||||
parameters['NextToken'] = NextToken |
||||
return self.send_request("Query", DomainName = DomainName, parameters = parameters) |
||||
## Handle NextToken? Or maybe not - let the upper level do it |
||||
|
||||
## ------------------------------------------------ |
||||
## Low-level methods for handling SimpleDB requests |
||||
## ------------------------------------------------ |
||||
|
||||
def send_request(self, *args, **kwargs): |
||||
request = self.create_request(*args, **kwargs) |
||||
#debug("Request: %s" % repr(request)) |
||||
conn = self.get_connection() |
||||
conn.request("GET", self.format_uri(request['uri_params'])) |
||||
http_response = conn.getresponse() |
||||
response = {} |
||||
response["status"] = http_response.status |
||||
response["reason"] = http_response.reason |
||||
response["headers"] = convertTupleListToDict(http_response.getheaders()) |
||||
response["data"] = http_response.read() |
||||
conn.close() |
||||
|
||||
if response["status"] < 200 or response["status"] > 299: |
||||
debug("Response: " + str(response)) |
||||
raise S3Error(response) |
||||
|
||||
return response |
||||
|
||||
def create_request(self, Action, DomainName, parameters = None): |
||||
if not parameters: |
||||
parameters = SortedDict() |
||||
if len(self.config.access_token) > 0: |
||||
self.config.refresh_role() |
||||
parameters['Signature']=self.config.access_token |
||||
parameters['AWSAccessKeyId'] = self.config.access_key |
||||
parameters['Version'] = self.Version |
||||
parameters['SignatureVersion'] = self.SignatureVersion |
||||
parameters['Action'] = Action |
||||
parameters['Timestamp'] = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()) |
||||
if DomainName: |
||||
parameters['DomainName'] = DomainName |
||||
parameters['Signature'] = self.sign_request(parameters) |
||||
parameters.keys_return_lowercase = False |
||||
uri_params = urllib.urlencode(parameters) |
||||
request = {} |
||||
request['uri_params'] = uri_params |
||||
request['parameters'] = parameters |
||||
return request |
||||
|
||||
def sign_request(self, parameters): |
||||
h = "" |
||||
parameters.keys_sort_lowercase = True |
||||
parameters.keys_return_lowercase = False |
||||
for key in parameters: |
||||
h += "%s%s" % (key, parameters[key]) |
||||
#debug("SignRequest: %s" % h) |
||||
return base64.encodestring(hmac.new(self.config.secret_key, h, sha).digest()).strip() |
||||
|
||||
def get_connection(self): |
||||
if self.config.proxy_host != "": |
||||
return httplib.HTTPConnection(self.config.proxy_host, self.config.proxy_port) |
||||
else: |
||||
if self.config.use_https: |
||||
return httplib.HTTPSConnection(self.config.simpledb_host) |
||||
else: |
||||
return httplib.HTTPConnection(self.config.simpledb_host) |
||||
|
||||
def format_uri(self, uri_params): |
||||
if self.config.proxy_host != "": |
||||
uri = "http://%s/?%s" % (self.config.simpledb_host, uri_params) |
||||
else: |
||||
uri = "/?%s" % uri_params |
||||
#debug('format_uri(): ' + uri) |
||||
return uri |
||||
|
||||
# vim:et:ts=4:sts=4:ai |
@ -1,66 +0,0 @@
|
||||
## Amazon S3 manager |
||||
## Author: Michal Ludvig <michal@logix.cz> |
||||
## http://www.logix.cz/michal |
||||
## License: GPL Version 2 |
||||
|
||||
from BidirMap import BidirMap |
||||
import Utils |
||||
|
||||
class SortedDictIterator(object): |
||||
def __init__(self, sorted_dict, keys): |
||||
self.sorted_dict = sorted_dict |
||||
self.keys = keys |
||||
|
||||
def next(self): |
||||
try: |
||||
return self.keys.pop(0) |
||||
except IndexError: |
||||
raise StopIteration |
||||
|
||||
class SortedDict(dict): |
||||
def __init__(self, mapping = {}, ignore_case = True, **kwargs): |
||||
""" |
||||
WARNING: SortedDict() with ignore_case==True will |
||||
drop entries differing only in capitalisation! |
||||
Eg: SortedDict({'auckland':1, 'Auckland':2}).keys() => ['Auckland'] |
||||
With ignore_case==False it's all right |
||||
""" |
||||
dict.__init__(self, mapping, **kwargs) |
||||
self.ignore_case = ignore_case |
||||
|
||||
def keys(self): |
||||
keys = dict.keys(self) |
||||
if self.ignore_case: |
||||
# Translation map |
||||
xlat_map = BidirMap() |
||||
for key in keys: |
||||
xlat_map[key.lower()] = key |
||||
# Lowercase keys |
||||
lc_keys = xlat_map.keys() |
||||
lc_keys.sort() |
||||
return [xlat_map[k] for k in lc_keys] |
||||
else: |
||||
keys.sort() |
||||
return keys |
||||
|
||||
def __iter__(self): |
||||
return SortedDictIterator(self, self.keys()) |
||||
|
||||
|
||||
|
||||
if __name__ == "__main__": |
||||
d = { 'AWS' : 1, 'Action' : 2, 'america' : 3, 'Auckland' : 4, 'America' : 5 } |
||||
sd = SortedDict(d) |
||||
print "Wanted: Action, america, Auckland, AWS, [ignore case]" |
||||
print "Got: ", |
||||
for key in sd: |
||||
print "%s," % key, |
||||
print " [used: __iter__()]" |
||||
d = SortedDict(d, ignore_case = False) |
||||
print "Wanted: AWS, Action, Auckland, america, [case sensitive]" |
||||
print "Got: ", |
||||
for key in d.keys(): |
||||
print "%s," % key, |
||||
print " [used: keys()]" |
||||
|
||||
# vim:et:ts=4:sts=4:ai |
@ -1,462 +0,0 @@
|
||||
## Amazon S3 manager |
||||
## Author: Michal Ludvig <michal@logix.cz> |
||||
## http://www.logix.cz/michal |
||||
## License: GPL Version 2 |
||||
|
||||
import datetime |
||||
import os |
||||
import sys |
||||
import time |
||||
import re |
||||
import string |
||||
import random |
||||
import rfc822 |
||||
import hmac |
||||
import base64 |
||||
import errno |
||||
import urllib |
||||
|
||||
from logging import debug, info, warning, error |
||||
|
||||
|
||||
import Config |
||||
import Exceptions |
||||
|
||||
# hashlib backported to python 2.4 / 2.5 is not compatible with hmac! |
||||
if sys.version_info[0] == 2 and sys.version_info[1] < 6: |
||||
from md5 import md5 |
||||
import sha as sha1 |
||||
else: |
||||
from hashlib import md5, sha1 |
||||
|
||||
try: |
||||
import xml.etree.ElementTree as ET |
||||
except ImportError: |
||||
import elementtree.ElementTree as ET |
||||
from xml.parsers.expat import ExpatError |
||||
|
||||
__all__ = [] |
||||
def parseNodes(nodes): |
||||
## WARNING: Ignores text nodes from mixed xml/text. |
||||
## For instance <tag1>some text<tag2>other text</tag2></tag1> |
||||
## will be ignore "some text" node |
||||
retval = [] |
||||
for node in nodes: |
||||
retval_item = {} |
||||
for child in node.getchildren(): |
||||
name = child.tag |
||||
if child.getchildren(): |
||||
retval_item[name] = parseNodes([child]) |
||||
else: |
||||
retval_item[name] = node.findtext(".//%s" % child.tag) |
||||
retval.append(retval_item) |
||||
return retval |
||||
__all__.append("parseNodes") |
||||
|
||||
def stripNameSpace(xml): |
||||
""" |
||||
removeNameSpace(xml) -- remove top-level AWS namespace |
||||
""" |
||||
r = re.compile('^(<?[^>]+?>\s?)(<\w+) xmlns=[\'"](http://[^\'"]+)[\'"](.*)', re.MULTILINE) |
||||
if r.match(xml): |
||||
xmlns = r.match(xml).groups()[2] |
||||
xml = r.sub("\\1\\2\\4", xml) |
||||
else: |
||||
xmlns = None |
||||
return xml, xmlns |
||||
__all__.append("stripNameSpace") |
||||
|
||||
def getTreeFromXml(xml): |
||||
xml, xmlns = stripNameSpace(xml) |
||||
try: |
||||
tree = ET.fromstring(xml) |
||||
if xmlns: |
||||
tree.attrib['xmlns'] = xmlns |
||||
return tree |
||||
except ExpatError, e: |
||||
error(e) |
||||
raise Exceptions.ParameterError("Bucket contains invalid filenames. Please run: s3cmd fixbucket s3://your-bucket/") |
||||
__all__.append("getTreeFromXml") |
||||
|
||||
def getListFromXml(xml, node): |
||||
tree = getTreeFromXml(xml) |
||||
nodes = tree.findall('.//%s' % (node)) |
||||
return parseNodes(nodes) |
||||
__all__.append("getListFromXml") |
||||
|
||||
def getDictFromTree(tree): |
||||
ret_dict = {} |
||||
for child in tree.getchildren(): |
||||
if child.getchildren(): |
||||
## Complex-type child. Recurse |
||||
content = getDictFromTree(child) |
||||
else: |
||||
content = child.text |
||||
if ret_dict.has_key(child.tag): |
||||
if not type(ret_dict[child.tag]) == list: |
||||
ret_dict[child.tag] = [ret_dict[child.tag]] |
||||
ret_dict[child.tag].append(content or "") |
||||
else: |
||||
ret_dict[child.tag] = content or "" |
||||
return ret_dict |
||||
__all__.append("getDictFromTree") |
||||
|
||||
def getTextFromXml(xml, xpath): |
||||
tree = getTreeFromXml(xml) |
||||
if tree.tag.endswith(xpath): |
||||
return tree.text |
||||
else: |
||||
return tree.findtext(xpath) |
||||
__all__.append("getTextFromXml") |
||||
|
||||
def getRootTagName(xml): |
||||
tree = getTreeFromXml(xml) |
||||
return tree.tag |
||||
__all__.append("getRootTagName") |
||||
|
||||
def xmlTextNode(tag_name, text): |
||||
el = ET.Element(tag_name) |
||||
el.text = unicode(text) |
||||
return el |
||||
__all__.append("xmlTextNode") |
||||
|
||||
def appendXmlTextNode(tag_name, text, parent): |
||||
""" |
||||
Creates a new <tag_name> Node and sets |
||||
its content to 'text'. Then appends the |
||||
created Node to 'parent' element if given. |
||||
Returns the newly created Node. |
||||
""" |
||||
el = xmlTextNode(tag_name, text) |
||||
parent.append(el) |
||||
return el |
||||
__all__.append("appendXmlTextNode") |
||||
|
||||
def dateS3toPython(date): |
||||
date = re.compile("(\.\d*)?Z").sub(".000Z", date) |
||||
return time.strptime(date, "%Y-%m-%dT%H:%M:%S.000Z") |
||||
__all__.append("dateS3toPython") |
||||
|
||||
def dateS3toUnix(date): |
||||
## FIXME: This should be timezone-aware. |
||||
## Currently the argument to strptime() is GMT but mktime() |
||||
## treats it as "localtime". Anyway... |
||||
return time.mktime(dateS3toPython(date)) |
||||
__all__.append("dateS3toUnix") |
||||
|
||||
def dateRFC822toPython(date): |
||||
return rfc822.parsedate(date) |
||||
__all__.append("dateRFC822toPython") |
||||
|
||||
def dateRFC822toUnix(date): |
||||
return time.mktime(dateRFC822toPython(date)) |
||||
__all__.append("dateRFC822toUnix") |
||||
|
||||
def formatSize(size, human_readable = False, floating_point = False): |
||||
size = floating_point and float(size) or int(size) |
||||
if human_readable: |
||||
coeffs = ['k', 'M', 'G', 'T'] |
||||
coeff = "" |
||||
while size > 2048: |
||||
size /= 1024 |
||||
coeff = coeffs.pop(0) |
||||
return (size, coeff) |
||||
else: |
||||
return (size, "") |
||||
__all__.append("formatSize") |
||||
|
||||
def formatDateTime(s3timestamp): |
||||
try: |
||||
import pytz |
||||
timezone = pytz.timezone(os.environ.get('TZ', 'UTC')) |
||||
tz = pytz.timezone('UTC') |
||||
## Can't unpack args and follow that with kwargs in python 2.5 |
||||
## So we pass them all as kwargs |
||||
params = zip(('year', 'month', 'day', 'hour', 'minute', 'second', 'tzinfo'), |
||||
dateS3toPython(s3timestamp)[0:6] + (tz,)) |
||||
params = dict(params) |
||||
utc_dt = datetime.datetime(**params) |
||||
dt_object = utc_dt.astimezone(timezone) |
||||
except ImportError: |
||||
dt_object = datetime.datetime(*dateS3toPython(s3timestamp)[0:6]) |
||||
return dt_object.strftime("%Y-%m-%d %H:%M") |
||||
__all__.append("formatDateTime") |
||||
|
||||
def convertTupleListToDict(list): |
||||
retval = {} |
||||
for tuple in list: |
||||
retval[tuple[0]] = tuple[1] |
||||
return retval |
||||
__all__.append("convertTupleListToDict") |
||||
|
||||
_rnd_chars = string.ascii_letters+string.digits |
||||
_rnd_chars_len = len(_rnd_chars) |
||||
def rndstr(len): |
||||
retval = "" |
||||
while len > 0: |
||||
retval += _rnd_chars[random.randint(0, _rnd_chars_len-1)] |
||||
len -= 1 |
||||
return retval |
||||
__all__.append("rndstr") |
||||
|
||||
def mktmpsomething(prefix, randchars, createfunc): |
||||
old_umask = os.umask(0077) |
||||
tries = 5 |
||||
while tries > 0: |
||||
dirname = prefix + rndstr(randchars) |
||||
try: |
||||
createfunc(dirname) |
||||
break |
||||
except OSError, e: |
||||
if e.errno != errno.EEXIST: |
||||
os.umask(old_umask) |
||||
raise |
||||
tries -= 1 |
||||
|
||||
os.umask(old_umask) |
||||
return dirname |
||||
__all__.append("mktmpsomething") |
||||
|
||||
def mktmpdir(prefix = "/tmp/tmpdir-", randchars = 10): |
||||
return mktmpsomething(prefix, randchars, os.mkdir) |
||||
__all__.append("mktmpdir") |
||||
|
||||
def mktmpfile(prefix = "/tmp/tmpfile-", randchars = 20): |
||||
createfunc = lambda filename : os.close(os.open(filename, os.O_CREAT | os.O_EXCL)) |
||||
return mktmpsomething(prefix, randchars, createfunc) |
||||
__all__.append("mktmpfile") |
||||
|
||||
def hash_file_md5(filename): |
||||
h = md5() |
||||
f = open(filename, "rb") |
||||
while True: |
||||
# Hash 32kB chunks |
||||
data = f.read(32*1024) |
||||
if not data: |
||||
break |
||||
h.update(data) |
||||
f.close() |
||||
return h.hexdigest() |
||||
__all__.append("hash_file_md5") |
||||
|
||||
def mkdir_with_parents(dir_name): |
||||
""" |
||||
mkdir_with_parents(dst_dir) |
||||
|
||||
Create directory 'dir_name' with all parent directories |
||||
|
||||
Returns True on success, False otherwise. |
||||
""" |
||||
pathmembers = dir_name.split(os.sep) |
||||
tmp_stack = [] |
||||
while pathmembers and not os.path.isdir(os.sep.join(pathmembers)): |
||||
tmp_stack.append(pathmembers.pop()) |
||||
while tmp_stack: |
||||
pathmembers.append(tmp_stack.pop()) |
||||
cur_dir = os.sep.join(pathmembers) |
||||
try: |
||||
debug("mkdir(%s)" % cur_dir) |
||||
os.mkdir(cur_dir) |
||||
except (OSError, IOError), e: |
||||
warning("%s: can not make directory: %s" % (cur_dir, e.strerror)) |
||||
return False |
||||
except Exception, e: |
||||
warning("%s: %s" % (cur_dir, e)) |
||||
return False |
||||
return True |
||||
__all__.append("mkdir_with_parents") |
||||
|
||||
def unicodise(string, encoding = None, errors = "replace"): |
||||
""" |
||||
Convert 'string' to Unicode or raise an exception. |
||||
""" |
||||
|
||||
if not encoding: |
||||
encoding = Config.Config().encoding |
||||
|
||||
if type(string) == unicode: |
||||
return string |
||||
debug("Unicodising %r using %s" % (string, encoding)) |
||||
try: |
||||
return string.decode(encoding, errors) |
||||
except UnicodeDecodeError: |
||||
raise UnicodeDecodeError("Conversion to unicode failed: %r" % string) |
||||
__all__.append("unicodise") |
||||
|
||||
def deunicodise(string, encoding = None, errors = "replace"): |
||||
""" |
||||
Convert unicode 'string' to <type str>, by default replacing |
||||
all invalid characters with '?' or raise an exception. |
||||
""" |
||||
|
||||
if not encoding: |
||||
encoding = Config.Config().encoding |
||||
|
||||
if type(string) != unicode: |
||||
return str(string) |
||||
debug("DeUnicodising %r using %s" % (string, encoding)) |
||||
try: |
||||
return string.encode(encoding, errors) |
||||
except UnicodeEncodeError: |
||||
raise UnicodeEncodeError("Conversion from unicode failed: %r" % string) |
||||
__all__.append("deunicodise") |
||||
|
||||
def unicodise_safe(string, encoding = None): |
||||
""" |
||||
Convert 'string' to Unicode according to current encoding |
||||
and replace all invalid characters with '?' |
||||
""" |
||||
|
||||
return unicodise(deunicodise(string, encoding), encoding).replace(u'\ufffd', '?') |
||||
__all__.append("unicodise_safe") |
||||
|
||||
def replace_nonprintables(string): |
||||
""" |
||||
replace_nonprintables(string) |
||||
|
||||
Replaces all non-printable characters 'ch' in 'string' |
||||
where ord(ch) <= 26 with ^@, ^A, ... ^Z |
||||
""" |
||||
new_string = "" |
||||
modified = 0 |
||||
for c in string: |
||||
o = ord(c) |
||||
if (o <= 31): |
||||
new_string += "^" + chr(ord('@') + o) |
||||
modified += 1 |
||||
elif (o == 127): |
||||
new_string += "^?" |
||||
modified += 1 |
||||
else: |
||||
new_string += c |
||||
if modified and Config.Config().urlencoding_mode != "fixbucket": |
||||
warning("%d non-printable characters replaced in: %s" % (modified, new_string)) |
||||
return new_string |
||||
__all__.append("replace_nonprintables") |
||||
|
||||
def sign_string(string_to_sign): |
||||
"""Sign a string with the secret key, returning base64 encoded results. |
||||
By default the configured secret key is used, but may be overridden as |
||||
an argument. |
||||
|
||||
Useful for REST authentication. See http://s3.amazonaws.com/doc/s3-developer-guide/RESTAuthentication.html |
||||
""" |
||||
signature = base64.encodestring(hmac.new(Config.Config().secret_key, string_to_sign, sha1).digest()).strip() |
||||
return signature |
||||
__all__.append("sign_string") |
||||
|
||||
def sign_url(url_to_sign, expiry): |
||||
"""Sign a URL in s3://bucket/object form with the given expiry |
||||
time. The object will be accessible via the signed URL until the |
||||
AWS key and secret are revoked or the expiry time is reached, even |
||||
if the object is otherwise private. |
||||
|
||||
See: http://s3.amazonaws.com/doc/s3-developer-guide/RESTAuthentication.html |
||||
""" |
||||
return sign_url_base( |
||||
bucket = url_to_sign.bucket(), |
||||
object = url_to_sign.object(), |
||||
expiry = expiry |
||||
) |
||||
__all__.append("sign_url") |
||||
|
||||
def sign_url_base(**parms): |
||||
"""Shared implementation of sign_url methods. Takes a hash of 'bucket', 'object' and 'expiry' as args.""" |
||||
parms['expiry']=time_to_epoch(parms['expiry']) |
||||
parms['access_key']=Config.Config().access_key |
||||
debug("Expiry interpreted as epoch time %s", parms['expiry']) |
||||
signtext = 'GET\n\n\n%(expiry)d\n/%(bucket)s/%(object)s' % parms |
||||
debug("Signing plaintext: %r", signtext) |
||||
parms['sig'] = urllib.quote_plus(sign_string(signtext)) |
||||
debug("Urlencoded signature: %s", parms['sig']) |
||||
return "http://%(bucket)s.s3.amazonaws.com/%(object)s?AWSAccessKeyId=%(access_key)s&Expires=%(expiry)d&Signature=%(sig)s" % parms |
||||
|
||||
def time_to_epoch(t): |
||||
"""Convert time specified in a variety of forms into UNIX epoch time. |
||||
Accepts datetime.datetime, int, anything that has a strftime() method, and standard time 9-tuples |
||||
""" |
||||
if isinstance(t, int): |
||||
# Already an int |
||||
return t |
||||
elif isinstance(t, tuple) or isinstance(t, time.struct_time): |
||||
# Assume it's a time 9-tuple |
||||
return int(time.mktime(t)) |
||||
elif hasattr(t, 'timetuple'): |
||||
# Looks like a datetime object or compatible |
||||
return int(time.mktime(ex.timetuple())) |
||||
elif hasattr(t, 'strftime'): |
||||
# Looks like the object supports standard srftime() |
||||
return int(t.strftime('%s')) |
||||
elif isinstance(t, str) or isinstance(t, unicode): |
||||
# See if it's a string representation of an epoch |
||||
try: |
||||
return int(t) |
||||
except ValueError: |
||||
# Try to parse it as a timestamp string |
||||
try: |
||||
return time.strptime(t) |
||||
except ValueError, ex: |
||||
# Will fall through |
||||
debug("Failed to parse date with strptime: %s", ex) |
||||
pass |
||||
raise Exceptions.ParameterError('Unable to convert %r to an epoch time. Pass an epoch time. Try `date -d \'now + 1 year\' +%%s` (shell) or time.mktime (Python).' % t) |
||||
|
||||
|
||||
def check_bucket_name(bucket, dns_strict = True): |
||||
if dns_strict: |
||||
invalid = re.search("([^a-z0-9\.-])", bucket) |
||||
if invalid: |
||||
raise Exceptions.ParameterError("Bucket name '%s' contains disallowed character '%s'. The only supported ones are: lowercase us-ascii letters (a-z), digits (0-9), dot (.) and hyphen (-)." % (bucket, invalid.groups()[0])) |
||||
else: |
||||
invalid = re.search("([^A-Za-z0-9\._-])", bucket) |
||||
if invalid: |
||||
raise Exceptions.ParameterError("Bucket name '%s' contains disallowed character '%s'. The only supported ones are: us-ascii letters (a-z, A-Z), digits (0-9), dot (.), hyphen (-) and underscore (_)." % (bucket, invalid.groups()[0])) |
||||
|
||||
if len(bucket) < 3: |
||||
raise Exceptions.ParameterError("Bucket name '%s' is too short (min 3 characters)" % bucket) |
||||
if len(bucket) > 255: |
||||
raise Exceptions.ParameterError("Bucket name '%s' is too long (max 255 characters)" % bucket) |
||||
if dns_strict: |
||||
if len(bucket) > 63: |
||||
raise Exceptions.ParameterError("Bucket name '%s' is too long (max 63 characters)" % bucket) |
||||
if re.search("-\.", bucket): |
||||
raise Exceptions.ParameterError("Bucket name '%s' must not contain sequence '-.' for DNS compatibility" % bucket) |
||||
if re.search("\.\.", bucket): |
||||
raise Exceptions.ParameterError("Bucket name '%s' must not contain sequence '..' for DNS compatibility" % bucket) |
||||
if not re.search("^[0-9a-z]", bucket): |
||||
raise Exceptions.ParameterError("Bucket name '%s' must start with a letter or a digit" % bucket) |
||||
if not re.search("[0-9a-z]$", bucket): |
||||
raise Exceptions.ParameterError("Bucket name '%s' must end with a letter or a digit" % bucket) |
||||
return True |
||||
__all__.append("check_bucket_name") |
||||
|
||||
def check_bucket_name_dns_conformity(bucket): |
||||
try: |
||||
return check_bucket_name(bucket, dns_strict = True) |
||||
except Exceptions.ParameterError: |
||||
return False |
||||
__all__.append("check_bucket_name_dns_conformity") |
||||
|
||||
def getBucketFromHostname(hostname): |
||||
""" |
||||
bucket, success = getBucketFromHostname(hostname) |
||||
|
||||
Only works for hostnames derived from bucket names |
||||
using Config.host_bucket pattern. |
||||
|
||||
Returns bucket name and a boolean success flag. |
||||
""" |
||||
|
||||
# Create RE pattern from Config.host_bucket |
||||
pattern = Config.Config().host_bucket % { 'bucket' : '(?P<bucket>.*)' } |
||||
m = re.match(pattern, hostname) |
||||
if not m: |
||||
return (hostname, False) |
||||
return m.groups()[0], True |
||||
__all__.append("getBucketFromHostname") |
||||
|
||||
def getHostnameFromBucket(bucket): |
||||
return Config.Config().host_bucket % { 'bucket' : bucket } |
||||
__all__.append("getHostnameFromBucket") |
||||
|
||||
# vim:et:ts=4:sts=4:ai |
@ -1,560 +0,0 @@
|
||||
# -*- coding: utf-8 -*- |
||||
import sys |
||||
import os |
||||
from os.path import dirname, abspath, join |
||||
from datetime import date |
||||
import json |
||||
import codecs |
||||
import shutil |
||||
import fnmatch |
||||
import re |
||||
import collections |
||||
from fabric.api import env, settings, hide, local, lcd |
||||
from fabric.decorators import task |
||||
from fabric.operations import prompt |
||||
from fabric.utils import puts, abort, warn |
||||
|
||||
env.debug = False |
||||
|
||||
# |
||||
# Set paths |
||||
# |
||||
env.project_path = dirname(dirname(abspath(__file__))) |
||||
env.sites_path = dirname(env.project_path) |
||||
env.build_path = join(env.project_path, 'build') |
||||
env.source_path = join(env.project_path, 'source') |
||||
|
||||
# |
||||
# Read config.json and update vars |
||||
# |
||||
with open(join(env.project_path, 'config.json')) as fp: |
||||
s = fp.read() |
||||
s = re.sub(r'//.*', '', s) |
||||
s = re.sub(r'/\*.*?\*/', '', s, flags=re.DOTALL) |
||||
CONFIG = json.loads(s, object_pairs_hook=collections.OrderedDict) |
||||
|
||||
today = date.today() |
||||
CONFIG['date'] = today |
||||
CONFIG['year'] = today.year |
||||
|
||||
# Path to cdn deployment |
||||
env.cdn_path = abspath(join( |
||||
env.sites_path, 'cdn.knightlab.com', 'app', 'libs', CONFIG['name'])) |
||||
|
||||
# Path to s3cmd.cnf in secrets repository |
||||
env.s3cmd_cfg = join(env.sites_path, 'secrets', 's3cmd.cfg') |
||||
|
||||
# Banner for the top of CSS and JS files |
||||
BANNER = """ |
||||
/* |
||||
TimelineJS - ver. %(version)s - %(date)s |
||||
Copyright (c) 2012-%(year)s Northwestern University |
||||
a project of the Northwestern University Knight Lab, originally created by Zach Wise |
||||
https://github.com/NUKnightLab/TimelineJS |
||||
This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. |
||||
If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/. |
||||
*/ |
||||
""".lstrip() |
||||
|
||||
|
||||
def _check_path(path): |
||||
"""Check for the existence of a path""" |
||||
if not os.path.exists(path): |
||||
abort('Could not find %s.' % path) |
||||
|
||||
def _clean(path): |
||||
"""Delete directory contents""" |
||||
path = os.path.abspath(path) |
||||
puts('clean: %s' % path) |
||||
|
||||
if os.path.exists(path): |
||||
if os.path.isdir(path): |
||||
for item in [join(path, x) for x in os.listdir(path)]: |
||||
if os.path.isfile(item): |
||||
os.unlink(item) |
||||
else: |
||||
shutil.rmtree(item) |
||||
else: |
||||
os.unlink(path) |
||||
|
||||
def _find_file(file_name, cur_dir): |
||||
"""Find a file. Look first in cur_dir, then env.source_path""" |
||||
file_path = os.path.abspath(join(cur_dir, file_name)) |
||||
if os.path.exists(file_path): |
||||
return file_path |
||||
for dirpath, dirs, files in os.walk(env.source_path): |
||||
if file_name in files: |
||||
return os.path.join(dirpath, file_name) |
||||
raise Exception('Could not find "%s" in %s' % (file_name, env.source_path)) |
||||
|
||||
def _match_files(src, regex): |
||||
"""Return relative filepaths matching regex in src""" |
||||
re_match = re.compile(regex) |
||||
|
||||
for (dirpath, dirnames, filenames) in os.walk(src): |
||||
rel_dir = _relpath(src, dirpath) |
||||
|
||||
for f in filter(lambda x: not x.startswith('.'), filenames): |
||||
rel_path = join(rel_dir, f) |
||||
if re_match.match(rel_path): |
||||
yield rel_path |
||||
|
||||
def _makedirs(path, isfile=False): |
||||
"""Make directories in path""" |
||||
if isfile: |
||||
path = dirname(path) |
||||
if not os.path.exists(path): |
||||
os.makedirs(path) |
||||
|
||||
def _open_file(path, mode, encoding=''): |
||||
"""Open a file with character encoding detection""" |
||||
if mode.startswith('r'): |
||||
bytes = min(32, os.path.getsize(path)) |
||||
|
||||
with open(path, 'rb') as fd: |
||||
raw = fd.read() |
||||
if raw.startswith(codecs.BOM_UTF8): |
||||
encoding = 'utf-8-sig' |
||||
else: |
||||
encoding = 'utf-8' |
||||
|
||||
return codecs.open(path, mode, encoding) |
||||
|
||||
def _relpath(root_path, path): |
||||
"""Get relative path from root_path""" |
||||
if root_path == path: |
||||
return '' |
||||
return os.path.relpath(path, root_path) |
||||
|
||||
# |
||||
# tagging |
||||
# |
||||
|
||||
def _get_tags(): |
||||
"""Get list of current tags from the repo""" |
||||
tags = os.popen('cd %(project_path)s;git tag' % env).read().strip() |
||||
if tags: |
||||
return [x.strip() for x in tags.split('\n')] |
||||
return [] |
||||
|
||||
def _last_version_tag(): |
||||
"""Get the last version tag""" |
||||
re_num = re.compile('[^0-9.]') |
||||
|
||||
tags = sorted([map(int, re_num.sub('', t).split('.')) for t in _get_tags()]) |
||||
if tags: |
||||
return '.'.join(map(str, tags[-1])) |
||||
return None |
||||
|
||||
def _get_version_tag(): |
||||
"""Get a new version tag from user""" |
||||
tags = _get_tags() |
||||
puts('This project has the following tags:') |
||||
puts(tags) |
||||
|
||||
while True: |
||||
version = prompt("Enter a new version number: ").strip() |
||||
|
||||
if not re.match(r'^[0-9]+\.[0-9]+\.[0-9]+$', version): |
||||
warn('Invalid version number, must be in the format:' \ |
||||
' major.minor.revision') |
||||
elif version in tags: |
||||
warn('Invalid version number, tag already exists') |
||||
else: |
||||
break |
||||
|
||||
return version |
||||
|
||||
def _render_templates(src_path, dst_path): |
||||
"""Render flask templates""" |
||||
puts('render: %s >> %s' % (src_path, dst_path)) |
||||
from website import app |
||||
from flask import g, request |
||||
|
||||
compiled_includes = [] |
||||
|
||||
for f in _match_files(src_path, '^[^_].*$'): |
||||
with app.app.test_request_context(): |
||||
g.compile_includes = True |
||||
g.compiled_includes = compiled_includes |
||||
content = app.catch_all(f) |
||||
compiled_includes = g.compiled_includes |
||||
|
||||
page_file = join(dst_path, f) |
||||
puts(' %s' % page_file) |
||||
_makedirs(page_file, isfile=True) |
||||
with open(page_file, 'w') as fd: |
||||
fd.write(content.encode('utf-8')) |
||||
|
||||
|
||||
# |
||||
# build steps |
||||
# |
||||
|
||||
def banner(conf): |
||||
""" |
||||
Place banner at top of js and css files in-place. |
||||
""" |
||||
_banner_text = BANNER % CONFIG |
||||
|
||||
def _do(file_path): |
||||
puts(' %s' % file_path) |
||||
with _open_file(file_path, 'r+') as fd: |
||||
s = fd.read() |
||||
fd.seek(0) |
||||
fd.write(_banner_text+s) |
||||
|
||||
for r in conf: |
||||
src = join(env.project_path, r) |
||||
puts('banner: %s' % src) |
||||
if os.path.isdir(src): |
||||
for f in _match_files(src, '.*\.(css|js)$'): |
||||
_do(join(src, f)) |
||||
else: |
||||
_do(src) |
||||
|
||||
def concat(conf): |
||||
""" |
||||
Concatenate files |
||||
""" |
||||
for r in conf: |
||||
dst = join(env.project_path, r['dst']) |
||||
src = map(lambda x: join(env.project_path, x), r['src']) |
||||
_makedirs(dst, isfile=True) |
||||
local('cat %s > %s' % (' '.join(src), dst)) |
||||
|
||||
def copy(conf): |
||||
""" |
||||
Copy files |
||||
""" |
||||
def _do(src_path, dst_path): |
||||
puts(' %s' % src_path) |
||||
_makedirs(dst_path, isfile=True) |
||||
shutil.copy2(src_path, dst_path) |
||||
|
||||
for r in conf: |
||||
src = join(env.project_path, r['src']) |
||||
dst = join(env.project_path, r['dst']) |
||||
puts('copy: %s >> %s' % (src, dst)) |
||||
if os.path.isdir(src): |
||||
regex = r['regex'] if 'regex' in r else '.*' |
||||
for f in _match_files(src, regex): |
||||
_do(join(src, f), join(dst, f)) |
||||
else: |
||||
_do(src, dst) |
||||
|
||||
def lessc(conf): |
||||
""" |
||||
Compile LESS |
||||
""" |
||||
def _do(src_path, dst_path): |
||||
_makedirs(dst_path, isfile=True) |
||||
with hide('warnings'), settings(warn_only=True): |
||||
result = local('lessc -x %s %s' % (src_path, dst_path)) |
||||
if result.failed: |
||||
abort('Error running lessc on %s' % src_path) |
||||
|
||||
if not os.popen('which lessc').read().strip(): |
||||
abort('You must install the LESS compiler') |
||||
|
||||
for r in conf: |
||||
src = join(env.project_path, r['src']) |
||||
dst = join(env.project_path, r['dst']) |
||||
|
||||
if os.path.isdir(src): |
||||
regex = r['regex'] if 'regex' in r else '.*' |
||||
for f in _match_files(src, regex): |
||||
(base, ext) = os.path.splitext(join(dst, f)) |
||||
_do(join(src, f), base+".css") |
||||
else: |
||||
_do(src, dst) |
||||
|
||||
|
||||
def minify(conf): |
||||
""" |
||||
Minify javascript |
||||
""" |
||||
def _do(src_path, dst_path, opt): |
||||
local('uglifyjs %s --output %s %s' % (opt, dst_path, src_path)) |
||||
|
||||
for r in conf: |
||||
src = join(env.project_path, r['src']) |
||||
dst = join(env.project_path, r['dst']) |
||||
puts('minify: %s >> %s' % (src, dst)) |
||||
|
||||
opt = r['opt'] if ('opt' in r) else '' |
||||
out_ext = r['ext'] if ('ext' in r) else '' |
||||
|
||||
if os.path.isdir(src): |
||||
_makedirs(dst, isfile=False) |
||||
for f in _match_files(src, '.*\.js'): |
||||
(base, in_ext) = os.path.splitext(join(dst, f)) |
||||
_do(join(src, f), base+out_ext+in_ext, opt) |
||||
else: |
||||
_makedirs(dst, isfile=True) |
||||
_do(src, dst, opt) |
||||
|
||||
|
||||
def process(conf): |
||||
""" |
||||
Process codekit style imports |
||||
""" |
||||
_re_prepend = re.compile(r'@codekit-prepend\s*[\'"](?P<file>.+)[\'"]\s*;') |
||||
_re_append = re.compile(r'@codekit-append\s*[\'"](?P<file>.+)[\'"]\s*;') |
||||
|
||||
def _mark(f_out, path): |
||||
f_out.write(""" |
||||
/* ********************************************** |
||||
Begin %s |
||||
********************************************** */ |
||||
|
||||
""" % os.path.basename(path)) |
||||
|
||||
def _do(f_out, path, imported): |
||||
s = '' |
||||
dirpath = dirname(path) |
||||
with _open_file(path, 'r') as f_in: |
||||
s = f_in.read() |
||||
|
||||
# Write out prepends |
||||
for m in _re_prepend.finditer(s): |
||||
file_path = _find_file(m.group('file'), dirpath) |
||||
if not file_path in imported: |
||||
puts(' prepend: %s' % file_path) |
||||
imported.append(file_path) |
||||
_do(f_out, file_path, imported) |
||||
|
||||
# Write out file |
||||
_mark(f_out, os.path.basename(path)) |
||||
f_out.write(s+'\n') |
||||
|
||||
# Write out appends |
||||
for m in _re_append.finditer(s): |
||||
file_path = _find_file(m.group('file'), dirpath) |
||||
if not file_path in imported: |
||||
puts(' append: %s' % file_path) |
||||
imported.append(file_path) |
||||
_do(f_out, file_path, imported) |
||||
|
||||
for r in conf: |
||||
src = join(env.project_path, r['src']) |
||||
dst = join(env.project_path, r['dst']) |
||||
puts('process: %s >> %s' % (src, dst)) |
||||
|
||||
_makedirs(dst, isfile=True) |
||||
with _open_file(dst, 'w', 'utf-8') as out_file: |
||||
_do(out_file, src, []) |
||||
|
||||
|
||||
def usemin(conf): |
||||
""" |
||||
Replaces usemin-style build blocks with a reference to a single file. |
||||
|
||||
Build blocks take the format: |
||||
|
||||
<!-- build:type path --> |
||||
(references to unoptimized files go here) |
||||
<!-- endbuild --> |
||||
|
||||
where: |
||||
type = css | js |
||||
path = reference to the optimized file |
||||
|
||||
Any leading backslashes will be stripped, but the path will otherwise |
||||
by used as it appears within the opening build tag. |
||||
""" |
||||
_re_build = re.compile(r""" |
||||
<!--\s*build:(?P<type>\css|js)\s+(?P<dest>\S+)\s*--> |
||||
.*? |
||||
<!--\s*endbuild\s*--> |
||||
""", |
||||
re.VERBOSE | re.DOTALL) |
||||
|
||||
def _sub(m): |
||||
type = m.group('type') |
||||
dest = m.group('dest').strip('\\') |
||||
|
||||
if type == 'css': |
||||
return '<link rel="stylesheet" href="%s">' % dest |
||||
elif type == 'js': |
||||
return '<script type="text/javascript" src="%s"></script>' % dest |
||||
else: |
||||
warn('Unknown build block type (%s)' % type) |
||||
return m.group(0) |
||||
|
||||
def _do(file_path): |
||||
with _open_file(file_path, 'r+') as fd: |
||||
s = fd.read() |
||||
(new_s, n) = _re_build.subn(_sub, s) |
||||
if n: |
||||
puts(' (%d) %s' % (n, file_path)) |
||||
fd.seek(0) |
||||
fd.write(new_s) |
||||
fd.truncate() |
||||
|
||||
for r in conf: |
||||
src = join(env.project_path, r) |
||||
puts('usemin: %s' % src) |
||||
|
||||
if os.path.isdir(src): |
||||
for f in _match_files(src, '.*\.html'): |
||||
_do(join(src, f)) |
||||
else: |
||||
_do(src) |
||||
|
||||
|
||||
# |
||||
# tasks |
||||
# |
||||
|
||||
@task |
||||
def debug(): |
||||
"""Setup debug settings""" |
||||
warn('DEBUG IS ON:') |
||||
CONFIG['deploy']['bucket'] = 'test.knilab.com' |
||||
CONFIG['version'] = '0.0.0' |
||||
|
||||
print 'deploy.bucket:', CONFIG['deploy']['bucket'] |
||||
print 'version:', CONFIG['version'] |
||||
print 'version tagging is OFF' |
||||
print '' |
||||
|
||||
doit = prompt("Continue? (y/n): ").strip() |
||||
if doit != 'y': |
||||
abort('Stopped') |
||||
|
||||
env.debug = True |
||||
|
||||
@task |
||||
def serve(): |
||||
"""Run the local version of the documentation site (timeline.knightlab.com)""" |
||||
with lcd(join(env.project_path)): |
||||
local('python website/app.py') |
||||
|
||||
|
||||
@task |
||||
def build(): |
||||
"""Build version""" |
||||
# Get build config |
||||
if not 'build' in CONFIG: |
||||
abort('Could not find "build" in config file') |
||||
|
||||
# Determine version |
||||
if not 'version' in CONFIG: |
||||
CONFIG['version'] = _last_version_tag() |
||||
if not CONFIG['version']: |
||||
abort('No available version tag') |
||||
|
||||
print 'Building version %(version)s...' % CONFIG |
||||
|
||||
# Clean build directory |
||||
_clean(env.build_path) |
||||
|
||||
for key, param in CONFIG['build'].iteritems(): |
||||
getattr(sys.modules[__name__], key)(param) |
||||
|
||||
|
||||
@task |
||||
def stage(): |
||||
""" |
||||
Build version, copy to local cdn repository, tag last commit |
||||
""" |
||||
if not 'stage' in CONFIG: |
||||
abort('Could not find "stage" in config file') |
||||
|
||||
# Make sure cdn exists |
||||
_check_path(dirname(env.cdn_path)) |
||||
|
||||
# Ask user for a new version |
||||
if not env.debug: |
||||
CONFIG['version'] = _get_version_tag() |
||||
|
||||
build() |
||||
|
||||
cdn_path = join(env.cdn_path, CONFIG['version']) |
||||
|
||||
_clean(cdn_path) |
||||
|
||||
for r in CONFIG['stage']: |
||||
copy([{"src": r['src'], "dst": cdn_path, "regex": r['regex']}]) |
||||
|
||||
if not env.debug: |
||||
with lcd(env.project_path): |
||||
local('git tag %(version)s' % CONFIG) |
||||
local('git push origin %(version)s' % CONFIG) |
||||
|
||||
|
||||
@task |
||||
def stage_latest(): |
||||
""" |
||||
Copy version to latest within local cdn repository |
||||
""" |
||||
if 'version' in CONFIG: |
||||
version = CONFIG['version'] |
||||
else: |
||||
tags = _get_tags() |
||||
puts('This project has the following tags:') |
||||
puts(tags) |
||||
|
||||
while True: |
||||
version = prompt("Which version to stage as 'latest'? ").strip() |
||||
if not version in tags: |
||||
warn('You must enter an existing version') |
||||
else: |
||||
break |
||||
|
||||
print 'stage_latest: %s' % version |
||||
|
||||
# Make sure version has been staged |
||||
version_cdn_path = join(env.cdn_path, version) |
||||
if not os.path.exists(version_cdn_path): |
||||
abort("Version '%s' has not been staged" % version) |
||||
|
||||
# Stage version as latest |
||||
latest_cdn_path = join(env.cdn_path, 'latest') |
||||
_clean(latest_cdn_path) |
||||
copy([{"src": version_cdn_path, "dst": latest_cdn_path}]) |
||||
|
||||
|
||||
@task |
||||
def deploy(): |
||||
"""Deploy to S3 bucket""" |
||||
if not 'deploy' in CONFIG: |
||||
abort('Could not find "deploy" in config file') |
||||
|
||||
# Make sure s3cmd.cnf exists |
||||
_check_path(env.s3cmd_cfg) |
||||
|
||||
# Do we need to build anything here?!? |
||||
#build() |
||||
|
||||
template_path = join(env.project_path, 'website', 'templates') |
||||
deploy_path = join(env.project_path, 'build', 'website') |
||||
|
||||
_clean(deploy_path) |
||||
|
||||
# render templates and run usemin |
||||
_render_templates(template_path, deploy_path) |
||||
usemin([deploy_path]) |
||||
|
||||
# copy static fiels |
||||
copy([{ |
||||
"src": join(env.project_path, 'website', 'static'), |
||||
"dst": join(deploy_path, 'static') |
||||
}]) |
||||
|
||||
# additional copy? |
||||
if 'copy' in CONFIG['deploy']: |
||||
copy(CONFIG['deploy']['copy']) |
||||
|
||||
# sync to S3 |
||||
with lcd(env.project_path): |
||||
local('fabfile/s3cmd --config=%s sync' \ |
||||
' --rexclude ".*/\.[^/]*$"' \ |
||||
' --delete-removed --acl-public' \ |
||||
' %s/ s3://%s/' \ |
||||
% (env.s3cmd_cfg, deploy_path, CONFIG['deploy']['bucket']) |
||||
) |
||||
|
||||
|
||||
|
Loading…
Reference in new issue