Skip to content

Commit f405d21

Browse files
bladamsNick Zaccardialex-lee
authored
Allow Azure storage to be used with an SAS URL (#28)
Co-authored-by: Nick Zaccardi <nick.zaccardi@level12.io> Co-authored-by: Alex Lee <alex.lee@level12.io>
1 parent 468fec8 commit f405d21

File tree

10 files changed

+176
-61
lines changed

10 files changed

+176
-61
lines changed

keg_storage/backends/azure.py

+58-9
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,15 @@
33
import typing
44
import urllib.parse
55
from datetime import datetime
6+
from typing import ClassVar, List, Optional
67

78
import arrow
89
from azure.storage.blob import (
910
BlobServiceClient,
1011
BlobBlock,
1112
ContainerClient,
1213
generate_blob_sas,
14+
generate_container_sas,
1315
)
1416
from azure.storage.blob._models import BlobPrefix
1517

@@ -47,19 +49,26 @@ class AzureWriter(AzureFile):
4749
2. There is no separate call to instantiate the upload. The first call to put_block will
4850
create the blob.
4951
"""
50-
def __init__(self, path: str, mode: base.FileMode, container_client: ContainerClient,
51-
chunk_size: int = None):
52-
max_block_size = 100 * 1024 * 1024
52+
53+
max_block_size: ClassVar[int] = 100 * 1024 * 1024
54+
55+
def __init__(
56+
self,
57+
path: str,
58+
mode: base.FileMode,
59+
container_client: ContainerClient,
60+
chunk_size: int = max_block_size,
61+
):
5362
if chunk_size is not None:
5463
# chunk_size cannot be larger than max_block_size due to API restrictions
55-
chunk_size = min(chunk_size, max_block_size)
64+
chunk_size = min(chunk_size, self.max_block_size)
5665
super().__init__(
5766
path=path,
5867
mode=mode,
5968
container_client=container_client,
6069
chunk_size=chunk_size,
6170
)
62-
self.blocks = []
71+
self.blocks: List[BlobBlock] = []
6372

6473
def _gen_block_id(self) -> str:
6574
"""
@@ -151,21 +160,43 @@ def read(self, size: int) -> bytes:
151160

152161

153162
class AzureStorage(base.StorageBackend):
154-
def __init__(self, account: str, key: str, bucket: str, name: str = 'azure'):
155-
super().__init__()
156-
self.name = name
163+
account_url: Optional[str]
164+
165+
def __init__(
166+
self,
167+
account: Optional[str] = None,
168+
key: Optional[str] = None,
169+
bucket: Optional[str] = None,
170+
sas_container_url: Optional[str] = None,
171+
name: str = 'azure',
172+
):
173+
super().__init__(name)
157174
self.account = account
158175
self.key = key
159176
self.bucket = bucket
160-
self.account_url = 'https://{}.blob.core.windows.net'.format(self.account)
177+
178+
if account and key and bucket:
179+
self.account_url = 'https://{}.blob.core.windows.net'.format(self.account)
180+
self.container_url = None
181+
elif sas_container_url:
182+
self.account_url = None
183+
self.container_url = sas_container_url
184+
else:
185+
raise ValueError('Must provide either sas_container_url or account, key and bucket')
161186

162187
def _create_service_client(self):
188+
if self.account_url is None:
189+
raise ValueError('Unable to construct a service client from a container SAS URL')
163190
return BlobServiceClient(
164191
account_url=self.account_url,
165192
credential=self.key,
166193
)
167194

168195
def _create_container_client(self):
196+
if self.container_url:
197+
# Constructed using an SAS URL
198+
return ContainerClient.from_container_url(self.container_url)
199+
169200
service_client = self._create_service_client()
170201
return service_client.get_container_client(self.bucket)
171202

@@ -256,6 +287,8 @@ def create_download_url(self, path: str, expire: typing.Union[arrow.Arrow, datet
256287
def _create_sas_url(self, path: str, sas_permissions: str,
257288
expire: typing.Union[arrow.Arrow, datetime],
258289
ip: typing.Optional[str] = None):
290+
if not self.account_url:
291+
raise ValueError('Cannot create a SAS URL without account credentials')
259292
path = self._clean_path(path)
260293
expire = expire.datetime if isinstance(expire, arrow.Arrow) else expire
261294
token = generate_blob_sas(
@@ -269,3 +302,19 @@ def _create_sas_url(self, path: str, sas_permissions: str,
269302
)
270303
url = urllib.parse.urljoin(self.account_url, '{}/{}'.format(self.bucket, path))
271304
return '{}?{}'.format(url, token)
305+
306+
def create_container_url(self, expire: typing.Union[arrow.Arrow, datetime],
307+
ip: typing.Optional[str] = None):
308+
if not self.account_url:
309+
raise ValueError('Cannot create a SAS URL without account credentials')
310+
expire = expire.datetime if isinstance(expire, arrow.Arrow) else expire
311+
token = generate_container_sas(
312+
account_name=self.account,
313+
container_name=self.bucket,
314+
account_key=self.key,
315+
permission='rwdl',
316+
expiry=expire,
317+
ip=ip
318+
)
319+
url = urllib.parse.urljoin(self.account_url, self.bucket)
320+
return '{}?{}'.format(url, token)

keg_storage/backends/base.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def __str__(self):
2323
return f'{s}b'
2424

2525
@classmethod
26-
def as_mode(cls, obj):
26+
def as_mode(cls, obj) -> "FileMode":
2727
if isinstance(obj, cls):
2828
return obj
2929
if not isinstance(obj, str):
@@ -103,10 +103,8 @@ def __exit__(self, exc_type, exc_val, exc_tb):
103103

104104

105105
class StorageBackend:
106-
name = None
107-
108-
def __init__(self, *args, **kwargs):
109-
pass
106+
def __init__(self, name: str):
107+
self.name = name
110108

111109
def list(self, path: str) -> typing.List[ListEntry]:
112110
"""

keg_storage/backends/s3.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,7 @@ def __init__(
181181
aws_profile=None,
182182
name='s3'
183183
):
184-
super().__init__()
185-
self.name = name
184+
super().__init__(name)
186185
self.bucket = bucket
187186
self.session = boto3.session.Session(
188187
aws_access_key_id=aws_access_key_id,

keg_storage/backends/sftp.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,13 @@ def __init__(
5454
look_for_keys=False,
5555
name='sftp'
5656
):
57-
super().__init__()
57+
super().__init__(name)
5858
self.host = host
5959
self.username = username
6060
self.key_filename = key_filename
6161
self.known_hosts_fpath = known_hosts_fpath
6262
self.allow_agent = allow_agent
6363
self.look_for_keys = look_for_keys
64-
self.name = name
6564

6665
def create_client(self):
6766
client = SSHClient()

keg_storage/tests/test_backend_azure.py

+76-8
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import base64
22
import datetime
3+
import re
34
import string
45
import urllib.parse as urlparse
56
from io import BytesIO
@@ -62,17 +63,19 @@ def test_list_leading_slashes(self, m_client):
6263

6364
def test_open_read_write(self, m_client):
6465
storage = self.create_storage()
65-
with pytest.raises(NotImplementedError) as exc:
66-
storage.open('foo', base.FileMode.read | base.FileMode.write)
67-
assert str(exc.value) == 'Read+write mode not supported by the Azure backend'
66+
with pytest.raises(
67+
NotImplementedError,
68+
match=re.escape("Read+write mode not supported by the Azure backend"),
69+
):
70+
storage.open("foo", base.FileMode.read | base.FileMode.write)
6871

6972
def test_open_bad_mode(self, m_client):
7073
storage = self.create_storage()
71-
with pytest.raises(ValueError) as exc:
72-
storage.open('foo', base.FileMode(0))
73-
assert (
74-
str(exc.value) == 'Unsupported mode. Accepted modes are FileMode.read or FileMode.write'
75-
)
74+
with pytest.raises(
75+
ValueError,
76+
match=re.escape("Unsupported mode. Accepted modes are FileMode.read or FileMode.write"),
77+
):
78+
storage.open("foo", base.FileMode(0))
7679

7780
def test_read_operations(self, m_client):
7881
storage = self.create_storage()
@@ -273,3 +276,68 @@ def test_download_url(self, m_client, expire):
273276
assert qs['sp'] == ['r']
274277
assert qs['sig']
275278
assert qs['sip'] == ['127.0.0.1']
279+
280+
def test_construct_from_sas_url(self, m_client):
281+
storage = backends.AzureStorage(
282+
**{"sas_container_url": "https://foo.blob.core.windows.net/test?sp=rwdl"}
283+
)
284+
assert storage.account_url is None
285+
assert storage.container_url == "https://foo.blob.core.windows.net/test?sp=rwdl"
286+
287+
with pytest.raises(
288+
ValueError, match="Unable to construct a service client from a container SAS URL"
289+
):
290+
storage._create_service_client()
291+
292+
def test_construct_neither_sas_url_nor_account_info(self, m_client):
293+
with pytest.raises(
294+
ValueError, match="Must provide either sas_container_url or account, key and bucket"
295+
):
296+
backends.AzureStorage(
297+
account="foo", bucket="test",
298+
)
299+
300+
def test_sas_create_container_url(self, m_client):
301+
storage = backends.AzureStorage(
302+
**{"sas_container_url": "https://foo.blob.core.windows.net/test?sp=rwdl"}
303+
)
304+
305+
with pytest.raises(ValueError, match="Cannot create a SAS URL without account credentials"):
306+
storage.create_container_url(arrow.get(2019, 1, 2, 3, 4, 5))
307+
308+
def test_sas_create_upload_url(self, m_client):
309+
storage = backends.AzureStorage(
310+
**{"sas_container_url": "https://foo.blob.core.windows.net/test?sp=rwdl"}
311+
)
312+
313+
with pytest.raises(ValueError, match="Cannot create a SAS URL without account credentials"):
314+
storage.create_upload_url("foo/bar.txt", arrow.get(2019, 1, 2, 3, 4, 5))
315+
316+
@pytest.mark.parametrize('expire', [
317+
arrow.get(2019, 1, 2, 3, 4, 5),
318+
datetime.datetime(2019, 1, 2, 3, 4, 5)
319+
])
320+
def test_create_container_url(self, m_client, expire):
321+
storage = self.create_storage()
322+
url = storage.create_container_url(expire=expire)
323+
parsed = urlparse.urlparse(url)
324+
assert parsed.netloc == 'foo.blob.core.windows.net'
325+
assert parsed.path == '/test'
326+
qs = urlparse.parse_qs(parsed.query)
327+
328+
assert qs['se'] == ['2019-01-02T03:04:05Z']
329+
assert qs['sp'] == ['rwdl']
330+
assert qs['sig']
331+
assert 'sip' not in qs
332+
333+
# with IP restriction
334+
url = storage.create_container_url(expire=expire, ip='127.0.0.1')
335+
parsed = urlparse.urlparse(url)
336+
assert parsed.netloc == 'foo.blob.core.windows.net'
337+
assert parsed.path == '/test'
338+
qs = urlparse.parse_qs(parsed.query)
339+
340+
assert qs['se'] == ['2019-01-02T03:04:05Z']
341+
assert qs['sp'] == ['rwdl']
342+
assert qs['sig']
343+
assert qs['sip'] == ['127.0.0.1']

keg_storage/tests/test_backend_s3.py

+13-11
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import datetime
22
import io
3+
import re
34
from unittest import mock
45

56
import arrow
@@ -166,16 +167,17 @@ def test_open_write(self, m_boto):
166167
assert isinstance(result, backends.s3.S3Writer)
167168

168169
def test_open_read_write(self, m_boto):
169-
s3 = backends.S3Storage('bucket', aws_region='us-east-1')
170-
with pytest.raises(NotImplementedError) as exc:
171-
s3.open('foo/bar', FileMode.read | FileMode.write)
172-
assert str(exc.value) == 'Read+write mode not supported by the S3 backend'
173-
174-
with pytest.raises(ValueError) as exc:
175-
s3.open('foo/bar', FileMode(0))
176-
assert (
177-
str(exc.value) == 'Unsupported mode. Accepted modes are FileMode.read or FileMode.write'
178-
)
170+
s3 = backends.S3Storage("bucket", aws_region="us-east-1")
171+
with pytest.raises(
172+
NotImplementedError, match=re.escape("Read+write mode not supported by the S3 backend")
173+
):
174+
s3.open("foo/bar", FileMode.read | FileMode.write)
175+
176+
with pytest.raises(
177+
ValueError,
178+
match=re.escape("Unsupported mode. Accepted modes are FileMode.read or FileMode.write"),
179+
):
180+
s3.open("foo/bar", FileMode(0))
179181

180182
def test_read_operations(self, m_boto):
181183
s3 = backends.S3Storage('bucket', aws_region='us-east-1')
@@ -200,7 +202,7 @@ def test_read_not_found(self, m_boto):
200202
with pytest.raises(FileNotFoundInStorageError) as exc:
201203
s3.open('foo/bar', FileMode.read)
202204

203-
assert str(exc.value.filename) == 'foo/bar'
205+
assert exc.value.filename == 'foo/bar'
204206
assert str(exc.value.storage_type) == 'S3Storage'
205207

206208
def test_write_operations(self, m_boto):

keg_storage/tests/test_backend_sftp.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,8 @@ def test_read_operations(self, sftp, m_sftp, m_log):
9494
@sftp_mocked()
9595
def test_read_not_permitted(self, sftp, m_sftp, m_log):
9696
with sftp.open('/tmp/foo.txt', FileMode.write) as file:
97-
with pytest.raises(IOError) as exc:
97+
with pytest.raises(IOError, match="File not opened for reading"):
9898
file.read(1)
99-
assert str(exc.value) == 'File not opened for reading'
10099

101100
@sftp_mocked()
102101
def test_write_operations(self, sftp, m_sftp, m_log):
@@ -109,7 +108,6 @@ def test_write_operations(self, sftp, m_sftp, m_log):
109108

110109
@sftp_mocked()
111110
def test_write_not_permitted(self, sftp, m_sftp, m_log):
112-
with sftp.open('/tmp/foo.txt', FileMode.read) as file:
113-
with pytest.raises(IOError) as exc:
114-
file.write(b'')
115-
assert str(exc.value) == 'File not opened for writing'
111+
with sftp.open("/tmp/foo.txt", FileMode.read) as file:
112+
with pytest.raises(IOError, match="File not opened for writing"):
113+
file.write(b"")

keg_storage/tests/test_backends.py

+14-13
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import io
22
import os
33
import pathlib
4+
import re
45

56
import arrow
67
import click
@@ -34,7 +35,7 @@ def close(self):
3435

3536
class FakeBackend(keg_storage.StorageBackend):
3637
def __init__(self, base_dir: pathlib.Path):
37-
super().__init__()
38+
super().__init__("fake")
3839
self.base_dir = base_dir
3940

4041
def list(self, path: str):
@@ -62,13 +63,12 @@ def delete(self, path):
6263
class TestStorageBackend:
6364

6465
def test_methods_not_implemented(self):
65-
66-
interface = keg_storage.StorageBackend()
66+
interface = keg_storage.StorageBackend("incomplete")
6767

6868
cases = {
69-
interface.list: ('path',),
70-
interface.delete: ('path',),
71-
interface.open: ('path', FileMode.read),
69+
interface.list: ("path",),
70+
interface.delete: ("path",),
71+
interface.open: ("path", FileMode.read),
7272
}
7373

7474
for method, args in cases.items():
@@ -259,12 +259,13 @@ def test_str(self):
259259
def test_as_mode(self):
260260
assert FileMode.as_mode(FileMode.read) == FileMode.read
261261

262-
assert FileMode.as_mode('r') == FileMode.read
263-
assert FileMode.as_mode('w') == FileMode.write
264-
assert FileMode.as_mode('rw') == FileMode.read | FileMode.write
265-
assert FileMode.as_mode('wr') == FileMode.read | FileMode.write
266-
assert FileMode.as_mode('rb') == FileMode.read
262+
assert FileMode.as_mode("r") == FileMode.read
263+
assert FileMode.as_mode("w") == FileMode.write
264+
assert FileMode.as_mode("rw") == FileMode.read | FileMode.write
265+
assert FileMode.as_mode("wr") == FileMode.read | FileMode.write
266+
assert FileMode.as_mode("rb") == FileMode.read
267267

268-
with pytest.raises(ValueError) as exc:
268+
with pytest.raises(
269+
ValueError, match=re.escape("as_mode() accepts only FileMode or str arguments")
270+
):
269271
FileMode.as_mode(1)
270-
assert str(exc.value) == 'as_mode() accepts only FileMode or str arguments'

0 commit comments

Comments
 (0)