Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Storage][Blob][Bug]Support parsing blob url with '/' in blob name #12619

Merged
merged 5 commits into from
Aug 12, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 14 additions & 6 deletions sdk/storage/azure-storage-blob/azure/storage/blob/_blob_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
try:
from urllib.parse import urlparse, quote, unquote
except ImportError:
from urlparse import urlparse # type: ignore
from urllib2 import quote, unquote # type: ignore
from urlparse import urlparse # type: ignore
from urllib2 import quote, unquote # type: ignore

import six
from azure.core.tracing.decorator import distributed_trace
Expand Down Expand Up @@ -180,7 +180,7 @@ def _format_url(self, hostname):
@classmethod
def from_blob_url(cls, blob_url, credential=None, snapshot=None, **kwargs):
# type: (str, Optional[Any], Optional[Union[str, Dict[str, Any]]], Any) -> BlobClient
"""Create BlobClient from a blob url.
"""Create BlobClient from a blob url. This doesn't support customized blob url with '/' in blob name.

:param str blob_url:
The full endpoint URL to the Blob, including SAS token and snapshot if used. This could be
Expand Down Expand Up @@ -209,10 +209,18 @@ def from_blob_url(cls, blob_url, credential=None, snapshot=None, **kwargs):
if not parsed_url.netloc:
raise ValueError("Invalid URL: {}".format(blob_url))

path_blob = parsed_url.path.lstrip('/').split('/')
account_path = ""
if len(path_blob) > 2:
account_path = "/" + "/".join(path_blob[:-2])
if ".core." in parsed_url.netloc:
# .core. is indicating non-customized url. Blob name with directory info can also be parsed.
path_blob = parsed_url.path.lstrip('/').split('/', 1)
elif "localhost" in parsed_url.netloc or "127.0.0.1" in parsed_url.netloc:
path_blob = parsed_url.path.lstrip('/').split('/', 2)
account_path += path_blob[0]
else:
# for customized url. blob name that has directory info cannot be parsed.
path_blob = parsed_url.path.lstrip('/').split('/')
if len(path_blob) > 2:
account_path = "/" + "/".join(path_blob[:-2])
account_url = "{}://{}{}?{}".format(
parsed_url.scheme,
parsed_url.netloc.rstrip('/'),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,17 @@ def __init__(
raise ValueError("Invalid service: {}".format(service))
service_name = service.split('-')[0]
account = parsed_url.netloc.split(".{}.core.".format(service_name))

self.account_name = account[0] if len(account) > 1 else None
secondary_hostname = None
if not self.account_name and parsed_url.netloc.startswith("localhost") \
or parsed_url.netloc.startswith("127.0.0.1"):
self.account_name = parsed_url.path.strip("/")

self.credential = format_shared_key_credential(account, credential)
self.credential = _format_shared_key_credential(self.account_name, credential)
if self.scheme.lower() != "https" and hasattr(self.credential, "get_token"):
raise ValueError("Token credential is only supported with HTTPS.")

secondary_hostname = None
if hasattr(self.credential, "account_name"):
self.account_name = self.credential.account_name
secondary_hostname = "{}-secondary.{}.{}".format(
Expand Down Expand Up @@ -326,11 +331,11 @@ def __exit__(self, *args): # pylint: disable=arguments-differ
pass


def format_shared_key_credential(account, credential):
def _format_shared_key_credential(account_name, credential):
if isinstance(credential, six.string_types):
if len(account) < 2:
if not account_name:
raise ValueError("Unable to determine account name for shared key credential.")
credential = {"account_name": account[0], "account_key": credential}
credential = {"account_name": account_name, "account_key": credential}
if isinstance(credential, dict):
if "account_name" not in credential:
raise ValueError("Shared key credential missing 'account_name")
Expand Down
26 changes: 26 additions & 0 deletions sdk/storage/azure-storage-blob/tests/test_blob_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,7 @@ def test_create_service_with_cstr_succeeds_if_sec_with_prim(self, resource_group
self.assertTrue(service.primary_endpoint.startswith('https://www.mydomain.com/'))
self.assertTrue(service.secondary_endpoint.startswith('https://www-sec.mydomain.com/'))


def test_create_service_with_custom_account_endpoint_path(self):
account_name = "blobstorage"
account_key = "blobkey"
Expand Down Expand Up @@ -438,6 +439,31 @@ def test_create_service_with_custom_account_endpoint_path(self):
self.assertEqual(service.primary_hostname, 'local-machine:11002/custom/account/path')
self.assertEqual(service.url, 'http://local-machine:11002/custom/account/path/foo/bar?snapshot=baz')

def test_create_blob_client_with_sub_directory_path_in_blob_name(self):
blob_url = "https://testaccount.blob.core.windows.net/containername/dir1/sub000/2010_Unit150_Ivan097_img0003.jpg"
blob_client = BlobClient.from_blob_url(blob_url)
self.assertEqual(blob_client.container_name, "containername")
self.assertEqual(blob_client.blob_name, "dir1/sub000/2010_Unit150_Ivan097_img0003.jpg")

blob_emulator_url = 'http://127.0.0.1:1000/devstoreaccount1/containername/dir1/sub000/2010_Unit150_Ivan097_img0003.jpg'
blob_client = BlobClient.from_blob_url(blob_emulator_url)
self.assertEqual(blob_client.container_name, "containername")
self.assertEqual(blob_client.blob_name, "dir1/sub000/2010_Unit150_Ivan097_img0003.jpg")

def test_create_client_for_emulator(self):
container_client = ContainerClient(
account_url='http://127.0.0.1:1000/devstoreaccount1',
container_name='newcontainer',
credential='Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hoping this is not real :P

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's not

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I bet we'll get email to remove it soon :P


self.assertEqual(container_client.container_name, "newcontainer")
self.assertEqual(container_client.account_name, "devstoreaccount1")

ContainerClient.from_container_url('http://127.0.0.1:1000/devstoreaccount1/newcontainer')
self.assertEqual(container_client.container_name, "newcontainer")
self.assertEqual(container_client.account_name, "devstoreaccount1")


@GlobalStorageAccountPreparer()
def test_request_callback_signed_header(self, resource_group, location, storage_account, storage_account_key):
# Arrange
Expand Down
11 changes: 11 additions & 0 deletions sdk/storage/azure-storage-blob/tests/test_blob_client_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,17 @@ def test_create_service_with_custom_account_endpoint_path(self):
self.assertEqual(service.primary_hostname, 'local-machine:11002/custom/account/path')
self.assertEqual(service.url, 'http://local-machine:11002/custom/account/path/foo/bar?snapshot=baz')

def test_create_blob_client_with_sub_directory_path_in_blob_name(self):
blob_url = "https://testaccount.blob.core.windows.net/containername/dir1/sub000/2010_Unit150_Ivan097_img0003.jpg"
blob_client = BlobClient.from_blob_url(blob_url)
self.assertEqual(blob_client.container_name, "containername")
self.assertEqual(blob_client.blob_name, "dir1/sub000/2010_Unit150_Ivan097_img0003.jpg")

blob_emulator_url = 'http://127.0.0.1:1000/devstoreaccount1/containername/dir1/sub000/2010_Unit150_Ivan097_img0003.jpg'
blob_client = BlobClient.from_blob_url(blob_emulator_url)
self.assertEqual(blob_client.container_name, "containername")
self.assertEqual(blob_client.blob_name, "dir1/sub000/2010_Unit150_Ivan097_img0003.jpg")

@GlobalStorageAccountPreparer()
@AsyncStorageTestCase.await_prepared_test
async def test_request_callback_signed_header_async(self, resource_group, location, storage_account, storage_account_key):
Expand Down
12 changes: 6 additions & 6 deletions sdk/storage/azure-storage-blob/tests/test_largest_block_blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
BlobServiceClient,
BlobBlock
)
from azure.storage.blob._shared.base_client import format_shared_key_credential
from azure.storage.blob._shared.base_client import _format_shared_key_credential

from _shared.testcase import StorageTestCase, GlobalStorageAccountPreparer

Expand Down Expand Up @@ -97,7 +97,7 @@ def test_put_block_bytes_largest(self, resource_group, location, storage_account
@GlobalStorageAccountPreparer()
def test_put_block_bytes_largest_without_network(self, resource_group, location, storage_account, storage_account_key):
payload_dropping_policy = PayloadDroppingPolicy()
credential_policy = format_shared_key_credential([storage_account.name, "dummy"], storage_account_key)
credential_policy = _format_shared_key_credential(storage_account.name, storage_account_key)
self._setup(storage_account, storage_account_key, [payload_dropping_policy, credential_policy])
blob = self._create_blob()

Expand Down Expand Up @@ -157,7 +157,7 @@ def test_put_block_stream_largest(self, resource_group, location, storage_accoun
@GlobalStorageAccountPreparer()
def test_put_block_stream_largest_without_network(self, resource_group, location, storage_account, storage_account_key):
payload_dropping_policy = PayloadDroppingPolicy()
credential_policy = format_shared_key_credential([storage_account.name, "dummy"], storage_account_key)
credential_policy = _format_shared_key_credential(storage_account.name, storage_account_key)
self._setup(storage_account, storage_account_key, [payload_dropping_policy, credential_policy])
blob = self._create_blob()

Expand Down Expand Up @@ -211,7 +211,7 @@ def test_create_largest_blob_from_path(self, resource_group, location, storage_a
@GlobalStorageAccountPreparer()
def test_create_largest_blob_from_path_without_network(self, resource_group, location, storage_account, storage_account_key):
payload_dropping_policy = PayloadDroppingPolicy()
credential_policy = format_shared_key_credential([storage_account.name, "dummy"], storage_account_key)
credential_policy = _format_shared_key_credential(storage_account.name, storage_account_key)
self._setup(storage_account, storage_account_key, [payload_dropping_policy, credential_policy])
blob_name = self._get_blob_reference()
blob = self.bsc.get_blob_client(self.container_name, blob_name)
Expand All @@ -237,7 +237,7 @@ def test_create_largest_blob_from_path_without_network(self, resource_group, loc
@GlobalStorageAccountPreparer()
def test_create_largest_blob_from_stream_without_network(self, resource_group, location, storage_account, storage_account_key):
payload_dropping_policy = PayloadDroppingPolicy()
credential_policy = format_shared_key_credential([storage_account.name, "dummy"], storage_account_key)
credential_policy = _format_shared_key_credential(storage_account.name, storage_account_key)
self._setup(storage_account, storage_account_key, [payload_dropping_policy, credential_policy])
blob_name = self._get_blob_reference()
blob = self.bsc.get_blob_client(self.container_name, blob_name)
Expand All @@ -257,7 +257,7 @@ def test_create_largest_blob_from_stream_without_network(self, resource_group, l
@GlobalStorageAccountPreparer()
def test_create_largest_blob_from_stream_single_upload_without_network(self, resource_group, location, storage_account, storage_account_key):
payload_dropping_policy = PayloadDroppingPolicy()
credential_policy = format_shared_key_credential([storage_account.name, "dummy"], storage_account_key)
credential_policy = _format_shared_key_credential(storage_account.name, storage_account_key)
self._setup(storage_account, storage_account_key, [payload_dropping_policy, credential_policy],
max_single_put_size=LARGEST_SINGLE_UPLOAD_SIZE)
blob_name = self._get_blob_reference()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from azure.storage.blob import (
BlobBlock
)
from azure.storage.blob._shared.base_client import format_shared_key_credential
from azure.storage.blob._shared.base_client import _format_shared_key_credential
from azure.storage.blob._shared.constants import CONNECTION_TIMEOUT, READ_TIMEOUT

from _shared.asynctestcase import AsyncStorageTestCase
Expand Down Expand Up @@ -123,7 +123,7 @@ async def test_put_block_bytes_largest(self, resource_group, location, storage_a
@AsyncStorageTestCase.await_prepared_test
async def test_put_block_bytes_largest_without_network(self, resource_group, location, storage_account, storage_account_key):
payload_dropping_policy = PayloadDroppingPolicy()
credential_policy = format_shared_key_credential([storage_account.name, "dummy"], storage_account_key)
credential_policy = _format_shared_key_credential(storage_account.name, storage_account_key)
await self._setup(storage_account, storage_account_key, [payload_dropping_policy, credential_policy])
blob = await self._create_blob()

Expand Down Expand Up @@ -185,7 +185,7 @@ async def test_put_block_stream_largest(self, resource_group, location, storage_
@AsyncStorageTestCase.await_prepared_test
async def test_put_block_stream_largest_without_network(self, resource_group, location, storage_account, storage_account_key):
payload_dropping_policy = PayloadDroppingPolicy()
credential_policy = format_shared_key_credential([storage_account.name, "dummy"], storage_account_key)
credential_policy = _format_shared_key_credential(storage_account.name, storage_account_key)
await self._setup(storage_account, storage_account_key, [payload_dropping_policy, credential_policy])
blob = await self._create_blob()

Expand Down Expand Up @@ -241,7 +241,7 @@ async def test_create_largest_blob_from_path(self, resource_group, location, sto
@AsyncStorageTestCase.await_prepared_test
async def test_create_largest_blob_from_path_without_network(self, resource_group, location, storage_account, storage_account_key):
payload_dropping_policy = PayloadDroppingPolicy()
credential_policy = format_shared_key_credential([storage_account.name, "dummy"], storage_account_key)
credential_policy = _format_shared_key_credential(storage_account.name, storage_account_key)
await self._setup(storage_account, storage_account_key, [payload_dropping_policy, credential_policy])
blob_name = self._get_blob_reference()
blob = self.bsc.get_blob_client(self.container_name, blob_name)
Expand All @@ -268,7 +268,7 @@ async def test_create_largest_blob_from_path_without_network(self, resource_grou
@AsyncStorageTestCase.await_prepared_test
async def test_create_largest_blob_from_stream_without_network(self, resource_group, location, storage_account, storage_account_key):
payload_dropping_policy = PayloadDroppingPolicy()
credential_policy = format_shared_key_credential([storage_account.name, "dummy"], storage_account_key)
credential_policy = _format_shared_key_credential(storage_account.name, storage_account_key)
await self._setup(storage_account, storage_account_key, [payload_dropping_policy, credential_policy])
blob_name = self._get_blob_reference()
blob = self.bsc.get_blob_client(self.container_name, blob_name)
Expand All @@ -289,7 +289,7 @@ async def test_create_largest_blob_from_stream_without_network(self, resource_gr
@AsyncStorageTestCase.await_prepared_test
async def test_create_largest_blob_from_stream_single_upload_without_network(self, resource_group, location, storage_account, storage_account_key):
payload_dropping_policy = PayloadDroppingPolicy()
credential_policy = format_shared_key_credential([storage_account.name, "dummy"], storage_account_key)
credential_policy = _format_shared_key_credential(storage_account.name, storage_account_key)
await self._setup(storage_account, storage_account_key, [payload_dropping_policy, credential_policy],
max_single_put_size=LARGEST_SINGLE_UPLOAD_SIZE)
blob_name = self._get_blob_reference()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,17 @@ def __init__(
raise ValueError("Invalid service: {}".format(service))
service_name = service.split('-')[0]
account = parsed_url.netloc.split(".{}.core.".format(service_name))

self.account_name = account[0] if len(account) > 1 else None
secondary_hostname = None
if not self.account_name and parsed_url.netloc.startswith("localhost") \
or parsed_url.netloc.startswith("127.0.0.1"):
self.account_name = parsed_url.path.strip("/")

self.credential = format_shared_key_credential(account, credential)
self.credential = _format_shared_key_credential(self.account_name, credential)
if self.scheme.lower() != "https" and hasattr(self.credential, "get_token"):
raise ValueError("Token credential is only supported with HTTPS.")

secondary_hostname = None
if hasattr(self.credential, "account_name"):
self.account_name = self.credential.account_name
secondary_hostname = "{}-secondary.{}.{}".format(
Expand Down Expand Up @@ -320,11 +325,11 @@ def __exit__(self, *args): # pylint: disable=arguments-differ
pass


def format_shared_key_credential(account, credential):
def _format_shared_key_credential(account_name, credential):
if isinstance(credential, six.string_types):
if len(account) < 2:
if not account_name:
raise ValueError("Unable to determine account name for shared key credential.")
credential = {"account_name": account[0], "account_key": credential}
credential = {"account_name": account_name, "account_key": credential}
if isinstance(credential, dict):
if "account_name" not in credential:
raise ValueError("Shared key credential missing 'account_name")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from azure.core.pipeline.policies import HTTPPolicy

from azure.core.exceptions import ResourceExistsError
from azure.storage.blob._shared.base_client import format_shared_key_credential
from azure.storage.blob._shared.base_client import _format_shared_key_credential
from azure.storage.filedatalake import DataLakeServiceClient
from testcase import (
StorageTestCase,
Expand All @@ -34,7 +34,7 @@ def setUp(self):
super(LargeFileTest, self).setUp()
url = self._get_account_url()
self.payload_dropping_policy = PayloadDroppingPolicy()
credential_policy = format_shared_key_credential([self.settings.STORAGE_DATA_LAKE_ACCOUNT_NAME, "dummy"],
credential_policy = _format_shared_key_credential(self.settings.STORAGE_DATA_LAKE_ACCOUNT_NAME,
self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY)
self.dsc = DataLakeServiceClient(url,
credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from azure.core.exceptions import ResourceExistsError
from azure.core.pipeline.policies import SansIOHTTPPolicy
from azure.storage.blob._shared.base_client import format_shared_key_credential
from azure.storage.blob._shared.base_client import _format_shared_key_credential
from azure.storage.filedatalake.aio import DataLakeServiceClient
from testcase import (
StorageTestCase,
Expand All @@ -36,7 +36,7 @@ def setUp(self):
super(LargeFileTest, self).setUp()
url = self._get_account_url()
self.payload_dropping_policy = PayloadDroppingPolicy()
credential_policy = format_shared_key_credential([self.settings.STORAGE_DATA_LAKE_ACCOUNT_NAME, "dummy"],
credential_policy = _format_shared_key_credential(self.settings.STORAGE_DATA_LAKE_ACCOUNT_NAME,
self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY)
self.dsc = DataLakeServiceClient(url,
credential=self.settings.STORAGE_DATA_LAKE_ACCOUNT_KEY,
Expand Down
Loading