Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v4 signing #419

Merged
merged 13 commits into from
Dec 15, 2014
1 change: 1 addition & 0 deletions S3/Config.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ class Config(object):
use_https = False
bucket_location = "US"
default_mime_type = "binary/octet-stream"
default_region = 'us-east-1'
guess_mime_type = True
use_mime_magic = True
mime_type = ""
Expand Down
92 changes: 91 additions & 1 deletion S3/Crypto.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,20 @@
import base64

import Config
from logging import debug

import os
import datetime
import requests
import urllib

# hashlib backported to python 2.4 / 2.5 is not compatible with hmac!
if sys.version_info[0] == 2 and sys.version_info[1] < 6:
from md5 import md5
import sha as sha1
from Crypto.Hash import SHA256 as sha256
else:
from hashlib import md5, sha1
from hashlib import md5, sha1, sha256

__all__ = []

Expand Down Expand Up @@ -58,3 +65,86 @@ def sign_url_base_v2(**parms):
debug("Urlencoded signature: %s", parms['sig'])
return "http://%(bucket)s.%(host_base)s/%(object)s?AWSAccessKeyId=%(access_key)s&Expires=%(expiry)d&Signature=%(sig)s" % parms

def sign(key, msg):
return hmac.new(key, msg.encode('utf-8'), sha256).digest()

def getSignatureKey(key, dateStamp, regionName, serviceName):
kDate = sign(('AWS4' + key).encode('utf-8'), dateStamp)
kRegion = sign(kDate, regionName)
kService = sign(kRegion, serviceName)
kSigning = sign(kService, 'aws4_request')
return kSigning

def sign_string_v4(method='GET', host='', canonical_uri='/', params={}, region='us-east-1', cur_headers={}, body=''):
service = 's3'

cfg = Config.Config()
access_key = cfg.access_key
secret_key = cfg.secret_key

t = datetime.datetime.utcnow()
amzdate = t.strftime('%Y%m%dT%H%M%SZ')
datestamp = t.strftime('%Y%m%d')

canonical_querystring = '&'.join(['%s=%s' % (urllib.quote_plus(p), quote_param(params[p])) for p in sorted(params.keys())])

splits = canonical_uri.split('?')

canonical_uri = quote_param(splits[0], quote_backslashes=False)
canonical_querystring += '&'.join([('%s' if '=' in qs else '%s=') % qs for qs in splits[1:]])

if type(body) == type(sha256('')):
payload_hash = body.hexdigest()
else:
payload_hash = sha256(body).hexdigest()

canonical_headers = 'host:' + host + '\n' + 'x-amz-content-sha256:' + payload_hash + '\n' + 'x-amz-date:' + amzdate + '\n'
signed_headers = 'host;x-amz-content-sha256;x-amz-date'

for header in cur_headers.keys():
# avoid duplicate headers and previous Authorization
if header == 'Authorization' or header in signed_headers.split(';'):
continue
canonical_headers += header.strip() + ':' + str(cur_headers[header]).strip() + '\n'
signed_headers += ';' + header.strip()

# sort headers
canonical_headers = '\n'.join(sorted(canonical_headers.split())) + '\n'
signed_headers = ';'.join(sorted(signed_headers.split(';')))

canonical_request = method + '\n' + canonical_uri + '\n' + canonical_querystring + '\n' + canonical_headers + '\n' + signed_headers + '\n' + payload_hash
debug('Canonical Request:\n%s\n----------------------' % canonical_request)

algorithm = 'AWS4-HMAC-SHA256'
credential_scope = datestamp + '/' + region + '/' + service + '/' + 'aws4_request'
string_to_sign = algorithm + '\n' + amzdate + '\n' + credential_scope + '\n' + sha256(canonical_request).hexdigest()
signing_key = getSignatureKey(secret_key, datestamp, region, service)
signature = hmac.new(signing_key, (string_to_sign).encode('utf-8'), sha256).hexdigest()
authorization_header = algorithm + ' ' + 'Credential=' + access_key + '/' + credential_scope + ',' + 'SignedHeaders=' + signed_headers + ',' + 'Signature=' + signature
headers = dict(cur_headers.items() + {'x-amz-date':amzdate, 'Authorization':authorization_header, 'x-amz-content-sha256': payload_hash}.items())
debug("signature-v4 headers: %s" % headers)
return headers

def quote_param(param, quote_backslashes=True):
# As stated by Amazon the '/' in the filename should stay unquoted and %20 should be used for space instead of '+'
quoted = urllib.quote_plus(urllib.unquote_plus(param)).replace('+', '%20')
if not quote_backslashes:
quoted = quoted.replace('%2F', '/')
return quoted

def checksum_sha256(filename, offset=0, size=None):
canonical_uri = urllib.quote_plus(filename).replace('%2F', '/')
try:
hash = sha256()
except:
# fallback to Crypto SHA256 module
hash = sha256.new()
with open(filename,'rb') as f:
if size is None:
for chunk in iter(lambda: f.read(8192), b''):
hash.update(chunk)
else:
f.seek(offset)
chunk = f.read(size)
hash.update(chunk)
return hash
81 changes: 71 additions & 10 deletions S3/S3.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from MultiPart import MultiPartUpload
from S3Uri import S3Uri
from ConnMan import ConnMan
from Crypto import sign_string_v2
from Crypto import sign_string_v2, sign_string_v4, checksum_sha256

try:
import magic
Expand Down Expand Up @@ -102,6 +102,8 @@ def _mime_magic(file):

__all__ = []
class S3Request(object):
region_map = {}

def __init__(self, s3, method_string, resource, headers, body, params = {}):
self.s3 = s3
self.headers = SortedDict(headers or {}, ignore_case = True)
Expand Down Expand Up @@ -150,10 +152,23 @@ def sign(self):
if self.resource['bucket']:
h += "/" + self.resource['bucket']
h += self.resource['uri']
debug("SignHeaders: " + repr(h))
signature = sign_string_v2(h)

self.headers["Authorization"] = "AWS "+self.s3.config.access_key+":"+signature
if self.resource['bucket'] is None or not check_bucket_name_dns_conformity(self.resource['bucket']):
# in case of bad DNS name due to bucket name v2 will be used
# this way we can still use capital letters in bucket names for the older regions
debug("Using signature v2")
debug("SignHeaders: " + repr(h))
signature = sign_string_v2(h)
self.headers["Authorization"] = "AWS "+self.s3.config.access_key+":"+signature
else:
debug("Using signature v4")
self.headers = sign_string_v4(self.method_string,
self.s3.get_hostname(self.resource['bucket']),
self.resource['uri'],
self.params,
S3Request.region_map.get(self.resource['bucket'], Config().default_region),
self.headers,
self.body)

def get_triplet(self):
self.update_timestamp()
Expand Down Expand Up @@ -252,6 +267,7 @@ def _get_contents(data):
def _get_common_prefixes(data):
return getListFromXml(data, "CommonPrefixes")


uri_params = uri_params.copy()
truncated = True
list = []
Expand Down Expand Up @@ -304,6 +320,7 @@ def bucket_create(self, bucket, bucket_location = None):
check_bucket_name(bucket, dns_strict = False)
if self.config.acl_public:
headers["x-amz-acl"] = "public-read"

request = self.create_request("BUCKET_CREATE", bucket = bucket, headers = headers, body = body)
response = self.send_request(request)
return response
Expand Down Expand Up @@ -537,7 +554,7 @@ def object_put(self, filename, uri, extra_headers = None, extra_label = ""):
% (remote_size, size, uri))

headers["content-length"] = size
request = self.create_request("OBJECT_PUT", uri = uri, headers = headers)
request = self.create_request("OBJECT_PUT", uri = uri, headers = headers, body=checksum_sha256(filename))
labels = { 'source' : unicodise(filename), 'destination' : unicodise(uri.uri()), 'extra' : extra_label }
response = self.send_file(request, file, labels)
return response
Expand Down Expand Up @@ -828,13 +845,9 @@ def _fail_wait(self, retries):

def send_request(self, request, retries = _max_retries):
method_string, resource, headers = request.get_triplet()

debug("Processing request, please wait...")
if not headers.has_key('content-length'):
headers['content-length'] = request.body and len(request.body) or 0
try:
# "Stringify" all headers
for header in headers.keys():
headers[header] = str(headers[header])
conn = ConnMan.get(self.get_hostname(resource['bucket']))
uri = self.format_uri(resource)
debug("Sending request method_string=%r, uri=%r, headers=%r, body=(%i bytes)" % (method_string, uri, headers, len(request.body or "")))
Expand Down Expand Up @@ -863,6 +876,20 @@ def send_request(self, request, retries = _max_retries):
else:
raise S3RequestError("Request failed for: %s" % resource['uri'])

if response["status"] == 400:
if 'data' in response and len(response['data']) > 0 and getTextFromXml(response['data'], 'Code') == 'AuthorizationHeaderMalformed':
region = getTextFromXml(response['data'], 'Region')
else:
s3_uri = S3Uri('s3://' + request.resource['bucket'])
region = self.get_bucket_location(s3_uri)
if region is not None:
S3Request.region_map[request.resource['bucket']] = region
warning('Forwarding request to %s' % region)
return self.send_request(request)
else:
warning('Could not determine bucket location. Please consider using --region parameter.')
sys.exit(2)

if response["status"] == 307:
## RedirectPermanent
redir_bucket = getTextFromXml(response['data'], ".//Bucket")
Expand All @@ -889,12 +916,24 @@ def send_request(self, request, retries = _max_retries):

def send_file(self, request, file, labels, buffer = '', throttle = 0, retries = _max_retries, offset = 0, chunk_size = -1):
method_string, resource, headers = request.get_triplet()
if S3Request.region_map.get(request.resource['bucket'], None) is None:
debug('asking for bucket location')
print('asking for bucket location')
s3_uri = S3Uri('s3://' + request.resource['bucket'])
region = self.get_bucket_location(s3_uri)
if region is not None:
S3Request.region_map[request.resource['bucket']] = region

size_left = size_total = headers.get("content-length")
if self.config.progress_meter:
progress = self.config.progress_class(labels, size_total)
else:
info("Sending file '%s', please wait..." % file.name)
timestamp_start = time.time()

sha256_hash = checksum_sha256(file.name, offset, size_total)
request.body = sha256_hash
method_string, resource, headers = request.get_triplet()
try:
conn = ConnMan.get(self.get_hostname(resource['bucket']))
conn.c.putrequest(method_string, self.format_uri(resource))
Expand Down Expand Up @@ -981,6 +1020,17 @@ def send_file(self, request, file, labels, buffer = '', throttle = 0, retries =
warning("Redirected to: %s" % (redir_hostname))
return self.send_file(request, file, labels, buffer, offset = offset, chunk_size = chunk_size)

if response["status"] == 400:
if 'data' in response and len(response['data']) > 0 and getTextFromXml(response['data'], 'Code') == 'AuthorizationHeaderMalformed':
region = getTextFromXml(response['data'], 'Region')
else:
s3_uri = S3Uri('s3://' + request.resource['bucket'])
region = self.get_bucket_location(s3_uri)
if region is not None:
S3Request.region_map[request.resource['bucket']] = region
warning('Forwarding request to %s' % region)
return self.send_file(request, file, labels, buffer, offset = offset, chunk_size = chunk_size)

# S3 from time to time doesn't send ETag back in a response :-(
# Force re-upload here.
if not response['headers'].has_key('etag'):
Expand Down Expand Up @@ -1084,6 +1134,17 @@ def recv_file(self, request, stream, labels, start_position = 0, retries = _max_
warning("Redirected to: %s" % (redir_hostname))
return self.recv_file(request, stream, labels)

if response["status"] == 400:
if 'data' in response and len(response['data']) > 0 and getTextFromXml(response['data'], 'Code') == 'AuthorizationHeaderMalformed':
region = getTextFromXml(response['data'], 'Region')
else:
s3_uri = S3Uri('s3://' + request.resource['bucket'])
region = self.get_bucket_location(s3_uri)
if region is not None:
S3Request.region_map[request.resource['bucket']] = region
warning('Forwarding request to %s' % region)
return self.recv_file(request, stream, labels)

if response["status"] < 200 or response["status"] > 299:
raise S3Error(response)

Expand Down
7 changes: 7 additions & 0 deletions S3/Utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,13 @@
import Config
import Exceptions

# hashlib backported to python 2.4 / 2.5 is not compatible with hmac!
if sys.version_info[0] == 2 and sys.version_info[1] < 6:
from md5 import md5
import sha as sha1
else:
from hashlib import md5, sha1

try:
import xml.etree.ElementTree as ET
except ImportError:
Expand Down
2 changes: 2 additions & 0 deletions s3cmd
Original file line number Diff line number Diff line change
Expand Up @@ -1777,6 +1777,7 @@ def run_configure(config_file, args):
options = [
("access_key", "Access Key", "Access key and Secret key are your identifiers for Amazon S3. Leave them empty for using the env variables."),
("secret_key", "Secret Key"),
("default_region", "Default Region"),
("gpg_passphrase", "Encryption password", "Encryption password is used to protect your files from reading\nby unauthorized persons while in transfer to S3"),
("gpg_command", "Path to GPG program"),
("use_https", "Use HTTPS protocol", "When using secure HTTPS protocol all communication with Amazon S3\nservers is protected from 3rd party eavesdropping. This method is\nslower than plain HTTP and can't be used if you're behind a proxy"),
Expand Down Expand Up @@ -2222,6 +2223,7 @@ def main():
optparser.add_option("-F", "--follow-symlinks", dest="follow_symlinks", action="store_true", default=False, help="Follow symbolic links as if they are regular files")
optparser.add_option( "--cache-file", dest="cache_file", action="store", default="", metavar="FILE", help="Cache FILE containing local source MD5 values")
optparser.add_option("-q", "--quiet", dest="quiet", action="store_true", default=False, help="Silence output on stdout")
optparser.add_option( "--region", dest="default_region", action="store", help="Override the default region")

optparser.set_usage(optparser.usage + " COMMAND [parameters]")
optparser.set_description('S3cmd is a tool for managing objects in '+
Expand Down