Skip to content

DNM support client crypto multipart upload #157

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Open
95 changes: 86 additions & 9 deletions examples/custom_crypto.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import oss2
from oss2.crypto import BaseCryptoProvider
from oss2.utils import b64encode_as_string, b64decode_from_string, to_bytes
from oss2.headers import *

from Crypto.Cipher import PKCS1_OAEP
from Crypto.PublicKey import RSA
Expand Down Expand Up @@ -36,7 +37,7 @@ def get_key():
def get_start():
return 'fake_start'

def __init__(self, key=None, start=None):
def __init__(self, key=None, start=None, count=None):
pass

def encrypt(self, raw):
Expand Down Expand Up @@ -74,22 +75,44 @@ def __init__(self, cipher=FakeCrypto):
self.private_key = self.public_key


def build_header(self, headers=None):
def build_header(self, headers=None, multipart_context=None):
if not isinstance(headers, CaseInsensitiveDict):
headers = CaseInsensitiveDict(headers)

if 'content-md5' in headers:
headers['x-oss-meta-unencrypted-content-md5'] = headers['content-md5']
headers[OSS_CLIENT_SIDE_ENCRYPTION_UNENCRYPTED_CONTENT_MD5] = headers['content-md5']
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

之前的逻辑是用户如果传入MD5,那么后端回校验,现在如果传入未加密数据的MD5,后端的逻辑怎么处理?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

后端对此不做任何处理,这里把MD5去掉的原因就是防止后端发现加密数据和明文数据的MD5不一样而报错,可以考虑重新计算加密数据的MD5填入。

del headers['content-md5']

if 'content-length' in headers:
headers['x-oss-meta-unencrypted-content-length'] = headers['content-length']
headers[OSS_CLIENT_SIDE_ENCRYPTION_UNENCRYPTED_CONTENT_LENGTH] = headers['content-length']
del headers['content-length']

headers['x-oss-meta-oss-crypto-key'] = b64encode_as_string(self.public_key.encrypt(self.plain_key))
headers['x-oss-meta-oss-crypto-start'] = b64encode_as_string(self.public_key.encrypt(to_bytes(str(self.plain_start))))
headers['x-oss-meta-oss-cek-alg'] = self.cipher.ALGORITHM
headers['x-oss-meta-oss-wrap-alg'] = 'custom'
headers[OSS_CLIENT_SIDE_ENCRYPTION_KEY] = b64encode_as_string(self.public_key.encrypt(self.plain_key))
headers[OSS_CLIENT_SIDE_ENCRYPTION_START] = b64encode_as_string(self.public_key.encrypt(to_bytes(str(self.plain_start))))
headers[OSS_CLIENT_SIDE_ENCRYPTION_CEK_ALG] = self.cipher.ALGORITHM
headers[OSS_CLIENT_SIDE_ENCRYPTION_WRAP_ALG] = 'custom'

# multipart file build header
if multipart_context:
headers[OSS_CLIENT_SIDE_ENCRYPTION_DATA_SIZE] = str(multipart_context.data_size)
headers[OSS_CLIENT_SIDE_ENCRYPTION_PART_SIZE] = str(multipart_context.part_size)

self.plain_key = None
self.plain_start = None

return headers

def build_header_for_upload_part(self, headers=None):
if not isinstance(headers, CaseInsensitiveDict):
headers = CaseInsensitiveDict(headers)

if 'content-md5' in headers:
headers[OSS_CLIENT_SIDE_ENCRYPTION_UNENCRYPTED_CONTENT_MD5] = headers['content-md5']
del headers['content-md5']

if 'content-length' in headers:
headers[OSS_CLIENT_SIDE_ENCRYPTION_UNENCRYPTED_CONTENT_LENGTH] = headers['content-length']
del headers['content-length']

self.plain_key = None
self.plain_start = None
Expand All @@ -110,6 +133,12 @@ def decrypt_oss_meta_data(self, headers, key, conv=lambda x:x):
except:
return None

def decrypt_from_str(self, key, value, conv=lambda x:x):
try:
return conv(self.private_key.decrypt(b64decode_from_string(value)))
except:
return None



# 首先初始化AccessKeyId、AccessKeySecret、Endpoint等信息。
Expand Down Expand Up @@ -162,4 +191,52 @@ def decrypt_oss_meta_data(self, headers, key, conv=lambda x:x):
with open(filename, 'rb') as fileobj:
assert fileobj.read() == content

os.remove(filename)
os.remove(filename)

"""
分片上传
"""
# 初始化上传分片
part_a = b'a' * 1024 * 100
part_b = b'b' * 1024 * 100
part_c = b'c' * 1024 * 100
multi_content = [part_a, part_b, part_c]

parts = []
data_size = 100 * 1024 * 3
part_size = 100 * 1024
multi_key = "test_crypto_multipart"

res = bucket.init_multipart_upload(multi_key, data_size, part_size)
upload_id = res.upload_id
crypto_multipart_context = res.crypto_multipart_context;

# 分片上传
for i in range(3):
result = bucket.upload_part(multi_key, upload_id, i+1, multi_content[i], crypto_multipart_context)
parts.append(oss2.models.PartInfo(i+1, result.etag, size = part_size, part_crc = result.crc))

## 分片上传时,若意外中断丢失crypto_multipart_context, 利用list_parts找回。
#for i in range(2):
# result = bucket.upload_part(multi_key, upload_id, i+1, multi_content[i], crypto_multipart_context)
# parts.append(oss2.models.PartInfo(i+1, result.etag, size = part_size, part_crc = result.crc))
#
#res = bucket.list_parts(multi_key, upload_id)
#crypto_multipart_context_new = res.crypto_multipart_context
#
#result = bucket.upload_part(multi_key, upload_id, 3, multi_content[2], crypto_multipart_context_new)
#parts.append(oss2.models.PartInfo(3, result.etag, size = part_size, part_crc = result.crc))

# 完成上传
result = bucket.complete_multipart_upload(multi_key, upload_id, parts)

# 下载全部文件
result = bucket.get_object(multi_key)

# 验证一下
content_got = b''
for chunk in result:
content_got += chunk
assert content_got[0:102400] == part_a
assert content_got[102400:204800] == part_b
assert content_got[204800:307200] == part_c
128 changes: 119 additions & 9 deletions examples/object_crypto.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import oss2
from oss2.crypto import LocalRsaProvider, AliKMSProvider

# 以下代码展示了客户端文件加密上传下载的用法,如下载文件、上传文件等,注意在客户端加密的条件下,oss暂不支持文件分片上传下载操作
# 以下代码展示了客户端文件加密上传下载的用法,如下载文件、上传文件等。


# 首先初始化AccessKeyId、AccessKeySecret、Endpoint等信息。
Expand All @@ -30,11 +30,9 @@
content = b'a' * 1024 * 1024
filename = 'download.txt'


# 创建Bucket对象,可以进行客户端数据加密(用户端RSA),此模式下只提供对象整体上传下载操作
# 创建Bucket对象,可以进行客户端数据加密(用户端RSA)
bucket = oss2.CryptoBucket(oss2.Auth(access_key_id, access_key_secret), endpoint, bucket_name, crypto_provider=LocalRsaProvider())

key1 = 'motto-copy.txt'

# 上传文件
bucket.put_object(key, content, headers={'content-length': str(1024 * 1024)})
Expand Down Expand Up @@ -62,12 +60,67 @@

os.remove(filename)

# 下载部分文件
result = bucket.get_object(key, byte_range=(32,1024))

# 创建Bucket对象,可以进行客户端数据加密(使用阿里云KMS),此模式下只提供对象整体上传下载操作
bucket = oss2.CryptoBucket(oss2.Auth(access_key_id, access_key_secret), endpoint, bucket_name,
crypto_provider=AliKMSProvider(access_key_id, access_key_secret, region, cmk, '1234'))
#验证一下
content_got = b''
for chunk in result:
content_got +=chunk
assert content_got == content[32:1025]


"""
分片上传
"""
# 初始化上传分片
part_a = b'a' * 1024 * 100
part_b = b'b' * 1024 * 100
part_c = b'c' * 1024 * 100
multi_content = [part_a, part_b, part_c]

parts = []
data_size = 100 * 1024 * 3
part_size = 100 * 1024
multi_key = "test_crypto_multipart"

res = bucket.init_multipart_upload(multi_key, data_size, part_size)
upload_id = res.upload_id
crypto_multipart_context = res.crypto_multipart_context;

# 分片上传
for i in range(3):
result = bucket.upload_part(multi_key, upload_id, i+1, multi_content[i], crypto_multipart_context)
parts.append(oss2.models.PartInfo(i+1, result.etag, size = part_size, part_crc = result.crc))

## 分片上传时,若意外中断丢失crypto_multipart_context, 利用list_parts找回。
#for i in range(2):
# result = bucket.upload_part(multi_key, upload_id, i+1, multi_content[i], crypto_multipart_context)
# parts.append(oss2.models.PartInfo(i+1, result.etag, size = part_size, part_crc = result.crc))
#
#res = bucket.list_parts(multi_key, upload_id)
#crypto_multipart_context_new = res.crypto_multipart_context
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里的示例能不能加上提示,提醒用户在中断后,获取上一次的context后,再次校验下以符合预期

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

好的,可以加上一些文字提示。由于现在对part的合法性判断全部移到服务端做了,所以用户是否校验符合预期已经不重要了,因为就算不符合,下一次上传时服务端也会报错。

#
#result = bucket.upload_part(multi_key, upload_id, 3, multi_content[2], crypto_multipart_context_new)
#parts.append(oss2.models.PartInfo(3, result.etag, size = part_size, part_crc = result.crc))

# 完成上传
result = bucket.complete_multipart_upload(multi_key, upload_id, parts)

# 下载全部文件
result = bucket.get_object(multi_key)

key1 = 'motto-copy.txt'
# 验证一下
content_got = b''
for chunk in result:
content_got += chunk
assert content_got[0:102400] == part_a
assert content_got[102400:204800] == part_b
assert content_got[204800:307200] == part_c

# 创建Bucket对象,可以进行客户端数据加密(使用阿里云KMS)
bucket = oss2.CryptoBucket(oss2.Auth(access_key_id, access_key_secret), endpoint, bucket_name,
crypto_provider=AliKMSProvider(access_key_id, access_key_secret, region, cmk))

# 上传文件
bucket.put_object(key, content, headers={'content-length': str(1024 * 1024)})
Expand All @@ -93,4 +146,61 @@
with open(filename, 'rb') as fileobj:
assert fileobj.read() == content

os.remove(filename)
os.remove(filename)

# 下载部分文件
result = bucket.get_object(key, byte_range=(32,1024))

#验证一下
content_got = b''
for chunk in result:
content_got +=chunk
assert content_got == content[32:1025]

"""
分片上传
"""
# 初始化上传分片
part_a = b'a' * 1024 * 100
part_b = b'b' * 1024 * 100
part_c = b'c' * 1024 * 100
multi_content = [part_a, part_b, part_c]

parts = []
data_size = 100 * 1024 * 3
part_size = 100 * 1024
multi_key = "test_crypto_multipart"

res = bucket.init_multipart_upload(multi_key, data_size, part_size)
upload_id = res.upload_id
crypto_multipart_context = res.crypto_multipart_context;

# 分片上传
for i in range(3):
result = bucket.upload_part(multi_key, upload_id, i+1, multi_content[i], crypto_multipart_context)
parts.append(oss2.models.PartInfo(i+1, result.etag, size = part_size, part_crc = result.crc))

## 分片上传时,若意外中断丢失crypto_multipart_context, 利用list_parts找回。
#for i in range(2):
# result = bucket.upload_part(multi_key, upload_id, i+1, multi_content[i], crypto_multipart_context)
# parts.append(oss2.models.PartInfo(i+1, result.etag, size = part_size, part_crc = result.crc))
#
#res = bucket.list_parts(multi_key, upload_id)
#crypto_multipart_context_new = res.crypto_multipart_context
#
#result = bucket.upload_part(multi_key, upload_id, 3, multi_content[2], crypto_multipart_context_new)
#parts.append(oss2.models.PartInfo(3, result.etag, size = part_size, part_crc = result.crc))

# 完成上传
result = bucket.complete_multipart_upload(multi_key, upload_id, parts)

# 下载全部文件
result = bucket.get_object(multi_key)

# 验证一下
content_got = b''
for chunk in result:
content_got += chunk
assert content_got[0:102400] == part_a
assert content_got[102400:204800] == part_b
assert content_got[204800:307200] == part_c
Loading