Beautifully crafted timelines that are easy and intuitive to use. http://timeline.knightlab.com/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
463 lines
15 KiB
463 lines
15 KiB
11 years ago
|
## Amazon S3 manager
|
||
|
## Author: Michal Ludvig <michal@logix.cz>
|
||
|
## http://www.logix.cz/michal
|
||
|
## License: GPL Version 2
|
||
|
|
||
|
import datetime
|
||
|
import os
|
||
|
import sys
|
||
|
import time
|
||
|
import re
|
||
|
import string
|
||
|
import random
|
||
|
import rfc822
|
||
|
import hmac
|
||
|
import base64
|
||
|
import errno
|
||
|
import urllib
|
||
|
|
||
|
from logging import debug, info, warning, error
|
||
|
|
||
|
|
||
|
import Config
|
||
|
import Exceptions
|
||
|
|
||
|
# hashlib backported to python 2.4 / 2.5 is not compatible with hmac!
|
||
|
if sys.version_info[0] == 2 and sys.version_info[1] < 6:
|
||
|
from md5 import md5
|
||
|
import sha as sha1
|
||
|
else:
|
||
|
from hashlib import md5, sha1
|
||
|
|
||
|
try:
|
||
|
import xml.etree.ElementTree as ET
|
||
|
except ImportError:
|
||
|
import elementtree.ElementTree as ET
|
||
|
from xml.parsers.expat import ExpatError
|
||
|
|
||
|
__all__ = []
|
||
|
def parseNodes(nodes):
|
||
|
## WARNING: Ignores text nodes from mixed xml/text.
|
||
|
## For instance <tag1>some text<tag2>other text</tag2></tag1>
|
||
|
## will be ignore "some text" node
|
||
|
retval = []
|
||
|
for node in nodes:
|
||
|
retval_item = {}
|
||
|
for child in node.getchildren():
|
||
|
name = child.tag
|
||
|
if child.getchildren():
|
||
|
retval_item[name] = parseNodes([child])
|
||
|
else:
|
||
|
retval_item[name] = node.findtext(".//%s" % child.tag)
|
||
|
retval.append(retval_item)
|
||
|
return retval
|
||
|
__all__.append("parseNodes")
|
||
|
|
||
|
def stripNameSpace(xml):
|
||
|
"""
|
||
|
removeNameSpace(xml) -- remove top-level AWS namespace
|
||
|
"""
|
||
|
r = re.compile('^(<?[^>]+?>\s?)(<\w+) xmlns=[\'"](http://[^\'"]+)[\'"](.*)', re.MULTILINE)
|
||
|
if r.match(xml):
|
||
|
xmlns = r.match(xml).groups()[2]
|
||
|
xml = r.sub("\\1\\2\\4", xml)
|
||
|
else:
|
||
|
xmlns = None
|
||
|
return xml, xmlns
|
||
|
__all__.append("stripNameSpace")
|
||
|
|
||
|
def getTreeFromXml(xml):
|
||
|
xml, xmlns = stripNameSpace(xml)
|
||
|
try:
|
||
|
tree = ET.fromstring(xml)
|
||
|
if xmlns:
|
||
|
tree.attrib['xmlns'] = xmlns
|
||
|
return tree
|
||
|
except ExpatError, e:
|
||
|
error(e)
|
||
|
raise Exceptions.ParameterError("Bucket contains invalid filenames. Please run: s3cmd fixbucket s3://your-bucket/")
|
||
|
__all__.append("getTreeFromXml")
|
||
|
|
||
|
def getListFromXml(xml, node):
|
||
|
tree = getTreeFromXml(xml)
|
||
|
nodes = tree.findall('.//%s' % (node))
|
||
|
return parseNodes(nodes)
|
||
|
__all__.append("getListFromXml")
|
||
|
|
||
|
def getDictFromTree(tree):
|
||
|
ret_dict = {}
|
||
|
for child in tree.getchildren():
|
||
|
if child.getchildren():
|
||
|
## Complex-type child. Recurse
|
||
|
content = getDictFromTree(child)
|
||
|
else:
|
||
|
content = child.text
|
||
|
if ret_dict.has_key(child.tag):
|
||
|
if not type(ret_dict[child.tag]) == list:
|
||
|
ret_dict[child.tag] = [ret_dict[child.tag]]
|
||
|
ret_dict[child.tag].append(content or "")
|
||
|
else:
|
||
|
ret_dict[child.tag] = content or ""
|
||
|
return ret_dict
|
||
|
__all__.append("getDictFromTree")
|
||
|
|
||
|
def getTextFromXml(xml, xpath):
|
||
|
tree = getTreeFromXml(xml)
|
||
|
if tree.tag.endswith(xpath):
|
||
|
return tree.text
|
||
|
else:
|
||
|
return tree.findtext(xpath)
|
||
|
__all__.append("getTextFromXml")
|
||
|
|
||
|
def getRootTagName(xml):
|
||
|
tree = getTreeFromXml(xml)
|
||
|
return tree.tag
|
||
|
__all__.append("getRootTagName")
|
||
|
|
||
|
def xmlTextNode(tag_name, text):
|
||
|
el = ET.Element(tag_name)
|
||
|
el.text = unicode(text)
|
||
|
return el
|
||
|
__all__.append("xmlTextNode")
|
||
|
|
||
|
def appendXmlTextNode(tag_name, text, parent):
|
||
|
"""
|
||
|
Creates a new <tag_name> Node and sets
|
||
|
its content to 'text'. Then appends the
|
||
|
created Node to 'parent' element if given.
|
||
|
Returns the newly created Node.
|
||
|
"""
|
||
|
el = xmlTextNode(tag_name, text)
|
||
|
parent.append(el)
|
||
|
return el
|
||
|
__all__.append("appendXmlTextNode")
|
||
|
|
||
|
def dateS3toPython(date):
|
||
|
date = re.compile("(\.\d*)?Z").sub(".000Z", date)
|
||
|
return time.strptime(date, "%Y-%m-%dT%H:%M:%S.000Z")
|
||
|
__all__.append("dateS3toPython")
|
||
|
|
||
|
def dateS3toUnix(date):
|
||
|
## FIXME: This should be timezone-aware.
|
||
|
## Currently the argument to strptime() is GMT but mktime()
|
||
|
## treats it as "localtime". Anyway...
|
||
|
return time.mktime(dateS3toPython(date))
|
||
|
__all__.append("dateS3toUnix")
|
||
|
|
||
|
def dateRFC822toPython(date):
|
||
|
return rfc822.parsedate(date)
|
||
|
__all__.append("dateRFC822toPython")
|
||
|
|
||
|
def dateRFC822toUnix(date):
|
||
|
return time.mktime(dateRFC822toPython(date))
|
||
|
__all__.append("dateRFC822toUnix")
|
||
|
|
||
|
def formatSize(size, human_readable = False, floating_point = False):
|
||
|
size = floating_point and float(size) or int(size)
|
||
|
if human_readable:
|
||
|
coeffs = ['k', 'M', 'G', 'T']
|
||
|
coeff = ""
|
||
|
while size > 2048:
|
||
|
size /= 1024
|
||
|
coeff = coeffs.pop(0)
|
||
|
return (size, coeff)
|
||
|
else:
|
||
|
return (size, "")
|
||
|
__all__.append("formatSize")
|
||
|
|
||
|
def formatDateTime(s3timestamp):
|
||
|
try:
|
||
|
import pytz
|
||
|
timezone = pytz.timezone(os.environ.get('TZ', 'UTC'))
|
||
|
tz = pytz.timezone('UTC')
|
||
|
## Can't unpack args and follow that with kwargs in python 2.5
|
||
|
## So we pass them all as kwargs
|
||
|
params = zip(('year', 'month', 'day', 'hour', 'minute', 'second', 'tzinfo'),
|
||
|
dateS3toPython(s3timestamp)[0:6] + (tz,))
|
||
|
params = dict(params)
|
||
|
utc_dt = datetime.datetime(**params)
|
||
|
dt_object = utc_dt.astimezone(timezone)
|
||
|
except ImportError:
|
||
|
dt_object = datetime.datetime(*dateS3toPython(s3timestamp)[0:6])
|
||
|
return dt_object.strftime("%Y-%m-%d %H:%M")
|
||
|
__all__.append("formatDateTime")
|
||
|
|
||
|
def convertTupleListToDict(list):
|
||
|
retval = {}
|
||
|
for tuple in list:
|
||
|
retval[tuple[0]] = tuple[1]
|
||
|
return retval
|
||
|
__all__.append("convertTupleListToDict")
|
||
|
|
||
|
_rnd_chars = string.ascii_letters+string.digits
|
||
|
_rnd_chars_len = len(_rnd_chars)
|
||
|
def rndstr(len):
|
||
|
retval = ""
|
||
|
while len > 0:
|
||
|
retval += _rnd_chars[random.randint(0, _rnd_chars_len-1)]
|
||
|
len -= 1
|
||
|
return retval
|
||
|
__all__.append("rndstr")
|
||
|
|
||
|
def mktmpsomething(prefix, randchars, createfunc):
|
||
|
old_umask = os.umask(0077)
|
||
|
tries = 5
|
||
|
while tries > 0:
|
||
|
dirname = prefix + rndstr(randchars)
|
||
|
try:
|
||
|
createfunc(dirname)
|
||
|
break
|
||
|
except OSError, e:
|
||
|
if e.errno != errno.EEXIST:
|
||
|
os.umask(old_umask)
|
||
|
raise
|
||
|
tries -= 1
|
||
|
|
||
|
os.umask(old_umask)
|
||
|
return dirname
|
||
|
__all__.append("mktmpsomething")
|
||
|
|
||
|
def mktmpdir(prefix = "/tmp/tmpdir-", randchars = 10):
|
||
|
return mktmpsomething(prefix, randchars, os.mkdir)
|
||
|
__all__.append("mktmpdir")
|
||
|
|
||
|
def mktmpfile(prefix = "/tmp/tmpfile-", randchars = 20):
|
||
|
createfunc = lambda filename : os.close(os.open(filename, os.O_CREAT | os.O_EXCL))
|
||
|
return mktmpsomething(prefix, randchars, createfunc)
|
||
|
__all__.append("mktmpfile")
|
||
|
|
||
|
def hash_file_md5(filename):
|
||
|
h = md5()
|
||
|
f = open(filename, "rb")
|
||
|
while True:
|
||
|
# Hash 32kB chunks
|
||
|
data = f.read(32*1024)
|
||
|
if not data:
|
||
|
break
|
||
|
h.update(data)
|
||
|
f.close()
|
||
|
return h.hexdigest()
|
||
|
__all__.append("hash_file_md5")
|
||
|
|
||
|
def mkdir_with_parents(dir_name):
|
||
|
"""
|
||
|
mkdir_with_parents(dst_dir)
|
||
|
|
||
|
Create directory 'dir_name' with all parent directories
|
||
|
|
||
|
Returns True on success, False otherwise.
|
||
|
"""
|
||
|
pathmembers = dir_name.split(os.sep)
|
||
|
tmp_stack = []
|
||
|
while pathmembers and not os.path.isdir(os.sep.join(pathmembers)):
|
||
|
tmp_stack.append(pathmembers.pop())
|
||
|
while tmp_stack:
|
||
|
pathmembers.append(tmp_stack.pop())
|
||
|
cur_dir = os.sep.join(pathmembers)
|
||
|
try:
|
||
|
debug("mkdir(%s)" % cur_dir)
|
||
|
os.mkdir(cur_dir)
|
||
|
except (OSError, IOError), e:
|
||
|
warning("%s: can not make directory: %s" % (cur_dir, e.strerror))
|
||
|
return False
|
||
|
except Exception, e:
|
||
|
warning("%s: %s" % (cur_dir, e))
|
||
|
return False
|
||
|
return True
|
||
|
__all__.append("mkdir_with_parents")
|
||
|
|
||
|
def unicodise(string, encoding = None, errors = "replace"):
|
||
|
"""
|
||
|
Convert 'string' to Unicode or raise an exception.
|
||
|
"""
|
||
|
|
||
|
if not encoding:
|
||
|
encoding = Config.Config().encoding
|
||
|
|
||
|
if type(string) == unicode:
|
||
|
return string
|
||
|
debug("Unicodising %r using %s" % (string, encoding))
|
||
|
try:
|
||
|
return string.decode(encoding, errors)
|
||
|
except UnicodeDecodeError:
|
||
|
raise UnicodeDecodeError("Conversion to unicode failed: %r" % string)
|
||
|
__all__.append("unicodise")
|
||
|
|
||
|
def deunicodise(string, encoding = None, errors = "replace"):
|
||
|
"""
|
||
|
Convert unicode 'string' to <type str>, by default replacing
|
||
|
all invalid characters with '?' or raise an exception.
|
||
|
"""
|
||
|
|
||
|
if not encoding:
|
||
|
encoding = Config.Config().encoding
|
||
|
|
||
|
if type(string) != unicode:
|
||
|
return str(string)
|
||
|
debug("DeUnicodising %r using %s" % (string, encoding))
|
||
|
try:
|
||
|
return string.encode(encoding, errors)
|
||
|
except UnicodeEncodeError:
|
||
|
raise UnicodeEncodeError("Conversion from unicode failed: %r" % string)
|
||
|
__all__.append("deunicodise")
|
||
|
|
||
|
def unicodise_safe(string, encoding = None):
|
||
|
"""
|
||
|
Convert 'string' to Unicode according to current encoding
|
||
|
and replace all invalid characters with '?'
|
||
|
"""
|
||
|
|
||
|
return unicodise(deunicodise(string, encoding), encoding).replace(u'\ufffd', '?')
|
||
|
__all__.append("unicodise_safe")
|
||
|
|
||
|
def replace_nonprintables(string):
|
||
|
"""
|
||
|
replace_nonprintables(string)
|
||
|
|
||
|
Replaces all non-printable characters 'ch' in 'string'
|
||
|
where ord(ch) <= 26 with ^@, ^A, ... ^Z
|
||
|
"""
|
||
|
new_string = ""
|
||
|
modified = 0
|
||
|
for c in string:
|
||
|
o = ord(c)
|
||
|
if (o <= 31):
|
||
|
new_string += "^" + chr(ord('@') + o)
|
||
|
modified += 1
|
||
|
elif (o == 127):
|
||
|
new_string += "^?"
|
||
|
modified += 1
|
||
|
else:
|
||
|
new_string += c
|
||
|
if modified and Config.Config().urlencoding_mode != "fixbucket":
|
||
|
warning("%d non-printable characters replaced in: %s" % (modified, new_string))
|
||
|
return new_string
|
||
|
__all__.append("replace_nonprintables")
|
||
|
|
||
|
def sign_string(string_to_sign):
|
||
|
"""Sign a string with the secret key, returning base64 encoded results.
|
||
|
By default the configured secret key is used, but may be overridden as
|
||
|
an argument.
|
||
|
|
||
|
Useful for REST authentication. See http://s3.amazonaws.com/doc/s3-developer-guide/RESTAuthentication.html
|
||
|
"""
|
||
|
signature = base64.encodestring(hmac.new(Config.Config().secret_key, string_to_sign, sha1).digest()).strip()
|
||
|
return signature
|
||
|
__all__.append("sign_string")
|
||
|
|
||
|
def sign_url(url_to_sign, expiry):
|
||
|
"""Sign a URL in s3://bucket/object form with the given expiry
|
||
|
time. The object will be accessible via the signed URL until the
|
||
|
AWS key and secret are revoked or the expiry time is reached, even
|
||
|
if the object is otherwise private.
|
||
|
|
||
|
See: http://s3.amazonaws.com/doc/s3-developer-guide/RESTAuthentication.html
|
||
|
"""
|
||
|
return sign_url_base(
|
||
|
bucket = url_to_sign.bucket(),
|
||
|
object = url_to_sign.object(),
|
||
|
expiry = expiry
|
||
|
)
|
||
|
__all__.append("sign_url")
|
||
|
|
||
|
def sign_url_base(**parms):
|
||
|
"""Shared implementation of sign_url methods. Takes a hash of 'bucket', 'object' and 'expiry' as args."""
|
||
|
parms['expiry']=time_to_epoch(parms['expiry'])
|
||
|
parms['access_key']=Config.Config().access_key
|
||
|
debug("Expiry interpreted as epoch time %s", parms['expiry'])
|
||
|
signtext = 'GET\n\n\n%(expiry)d\n/%(bucket)s/%(object)s' % parms
|
||
|
debug("Signing plaintext: %r", signtext)
|
||
|
parms['sig'] = urllib.quote_plus(sign_string(signtext))
|
||
|
debug("Urlencoded signature: %s", parms['sig'])
|
||
|
return "http://%(bucket)s.s3.amazonaws.com/%(object)s?AWSAccessKeyId=%(access_key)s&Expires=%(expiry)d&Signature=%(sig)s" % parms
|
||
|
|
||
|
def time_to_epoch(t):
|
||
|
"""Convert time specified in a variety of forms into UNIX epoch time.
|
||
|
Accepts datetime.datetime, int, anything that has a strftime() method, and standard time 9-tuples
|
||
|
"""
|
||
|
if isinstance(t, int):
|
||
|
# Already an int
|
||
|
return t
|
||
|
elif isinstance(t, tuple) or isinstance(t, time.struct_time):
|
||
|
# Assume it's a time 9-tuple
|
||
|
return int(time.mktime(t))
|
||
|
elif hasattr(t, 'timetuple'):
|
||
|
# Looks like a datetime object or compatible
|
||
|
return int(time.mktime(ex.timetuple()))
|
||
|
elif hasattr(t, 'strftime'):
|
||
|
# Looks like the object supports standard srftime()
|
||
|
return int(t.strftime('%s'))
|
||
|
elif isinstance(t, str) or isinstance(t, unicode):
|
||
|
# See if it's a string representation of an epoch
|
||
|
try:
|
||
|
return int(t)
|
||
|
except ValueError:
|
||
|
# Try to parse it as a timestamp string
|
||
|
try:
|
||
|
return time.strptime(t)
|
||
|
except ValueError, ex:
|
||
|
# Will fall through
|
||
|
debug("Failed to parse date with strptime: %s", ex)
|
||
|
pass
|
||
|
raise Exceptions.ParameterError('Unable to convert %r to an epoch time. Pass an epoch time. Try `date -d \'now + 1 year\' +%%s` (shell) or time.mktime (Python).' % t)
|
||
|
|
||
|
|
||
|
def check_bucket_name(bucket, dns_strict = True):
|
||
|
if dns_strict:
|
||
|
invalid = re.search("([^a-z0-9\.-])", bucket)
|
||
|
if invalid:
|
||
|
raise Exceptions.ParameterError("Bucket name '%s' contains disallowed character '%s'. The only supported ones are: lowercase us-ascii letters (a-z), digits (0-9), dot (.) and hyphen (-)." % (bucket, invalid.groups()[0]))
|
||
|
else:
|
||
|
invalid = re.search("([^A-Za-z0-9\._-])", bucket)
|
||
|
if invalid:
|
||
|
raise Exceptions.ParameterError("Bucket name '%s' contains disallowed character '%s'. The only supported ones are: us-ascii letters (a-z, A-Z), digits (0-9), dot (.), hyphen (-) and underscore (_)." % (bucket, invalid.groups()[0]))
|
||
|
|
||
|
if len(bucket) < 3:
|
||
|
raise Exceptions.ParameterError("Bucket name '%s' is too short (min 3 characters)" % bucket)
|
||
|
if len(bucket) > 255:
|
||
|
raise Exceptions.ParameterError("Bucket name '%s' is too long (max 255 characters)" % bucket)
|
||
|
if dns_strict:
|
||
|
if len(bucket) > 63:
|
||
|
raise Exceptions.ParameterError("Bucket name '%s' is too long (max 63 characters)" % bucket)
|
||
|
if re.search("-\.", bucket):
|
||
|
raise Exceptions.ParameterError("Bucket name '%s' must not contain sequence '-.' for DNS compatibility" % bucket)
|
||
|
if re.search("\.\.", bucket):
|
||
|
raise Exceptions.ParameterError("Bucket name '%s' must not contain sequence '..' for DNS compatibility" % bucket)
|
||
|
if not re.search("^[0-9a-z]", bucket):
|
||
|
raise Exceptions.ParameterError("Bucket name '%s' must start with a letter or a digit" % bucket)
|
||
|
if not re.search("[0-9a-z]$", bucket):
|
||
|
raise Exceptions.ParameterError("Bucket name '%s' must end with a letter or a digit" % bucket)
|
||
|
return True
|
||
|
__all__.append("check_bucket_name")
|
||
|
|
||
|
def check_bucket_name_dns_conformity(bucket):
|
||
|
try:
|
||
|
return check_bucket_name(bucket, dns_strict = True)
|
||
|
except Exceptions.ParameterError:
|
||
|
return False
|
||
|
__all__.append("check_bucket_name_dns_conformity")
|
||
|
|
||
|
def getBucketFromHostname(hostname):
|
||
|
"""
|
||
|
bucket, success = getBucketFromHostname(hostname)
|
||
|
|
||
|
Only works for hostnames derived from bucket names
|
||
|
using Config.host_bucket pattern.
|
||
|
|
||
|
Returns bucket name and a boolean success flag.
|
||
|
"""
|
||
|
|
||
|
# Create RE pattern from Config.host_bucket
|
||
|
pattern = Config.Config().host_bucket % { 'bucket' : '(?P<bucket>.*)' }
|
||
|
m = re.match(pattern, hostname)
|
||
|
if not m:
|
||
|
return (hostname, False)
|
||
|
return m.groups()[0], True
|
||
|
__all__.append("getBucketFromHostname")
|
||
|
|
||
|
def getHostnameFromBucket(bucket):
|
||
|
return Config.Config().host_bucket % { 'bucket' : bucket }
|
||
|
__all__.append("getHostnameFromBucket")
|
||
|
|
||
|
# vim:et:ts=4:sts=4:ai
|