Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enforcing explicit UTF-8 blob name. #3354

Merged
merged 1 commit into from
May 2, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 27 additions & 5 deletions storage/google/cloud/storage/blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,11 @@ class Blob(_PropertyMixin):
"""A wrapper around Cloud Storage's concept of an ``Object``.

:type name: str
:param name: The name of the blob. This corresponds to the
unique path of the object in the bucket.
:param name: The name of the blob. This corresponds to the unique path of
the object in the bucket. If bytes, will be converted to a
unicode object. Blob / object names can contain any sequence
of valid unicode characters, of length 1-1024 bytes when
UTF-8 encoded.

:type bucket: :class:`google.cloud.storage.bucket.Bucket`
:param bucket: The bucket to which this blob belongs.
Expand Down Expand Up @@ -104,6 +107,7 @@ class Blob(_PropertyMixin):
"""

def __init__(self, name, bucket, chunk_size=None, encryption_key=None):
name = _bytes_to_unicode(name)
super(Blob, self).__init__(name=name)

self.chunk_size = chunk_size # Check that setter accepts value.
Expand Down Expand Up @@ -148,7 +152,7 @@ def path_helper(bucket_path, blob_name):
:rtype: str
:returns: The relative URL path for ``blob_name``.
"""
return bucket_path + '/o/' + quote(blob_name, safe='')
return bucket_path + '/o/' + _quote(blob_name)

@property
def acl(self):
Expand Down Expand Up @@ -190,7 +194,7 @@ def public_url(self):
return '{storage_base_url}/{bucket_name}/{quoted_name}'.format(
storage_base_url='https://storage.googleapis.com',
bucket_name=self.bucket.name,
quoted_name=quote(self.name, safe=''))
quoted_name=_quote(self.name))

def generate_signed_url(self, expiration, method='GET',
content_type=None,
Expand Down Expand Up @@ -261,7 +265,7 @@ def generate_signed_url(self, expiration, method='GET',
"""
resource = '/{bucket_name}/{quoted_name}'.format(
bucket_name=self.bucket.name,
quoted_name=quote(self.name, safe=''))
quoted_name=_quote(self.name))

if credentials is None:
client = self._require_client(client)
Expand Down Expand Up @@ -1362,3 +1366,21 @@ def _get_encryption_headers(key, source=False):
prefix + 'Key': _bytes_to_unicode(key),
prefix + 'Key-Sha256': _bytes_to_unicode(key_hash),
}


def _quote(value):
"""URL-quote a string.

If the value is unicode, this method first UTF-8 encodes it as bytes and
then quotes the bytes. (In Python 3, ``urllib.parse.quote`` does this
encoding automatically, but in Python 2, non-ASCII characters cannot be
quoted.)

:type value: str or bytes
:param value: The value to be URL-quoted.

:rtype: str
:returns: The encoded value (bytes in Python 2, unicode in Python 3).
"""
value = _to_bytes(value, encoding='utf-8')
return quote(value, safe='')
5 changes: 3 additions & 2 deletions storage/tests/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ def test_copy_existing_file(self):


class TestUnicode(unittest.TestCase):

def test_fetch_object_and_check_content(self):
client = storage.Client()
bucket = client.bucket('storage-library-test-bucket')
Expand All @@ -256,8 +257,8 @@ def test_fetch_object_and_check_content(self):
# Normalization Form D: an ASCII e followed by U+0301 combining
# character; URL should end with Caf%C3%A9
test_data = {
u'Caf\u00e9'.encode('utf-8'): b'Normalization Form C',
u'Cafe\u0301'.encode('utf-8'): b'Normalization Form D',
u'Caf\u00e9': b'Normalization Form C',
u'Cafe\u0301': b'Normalization Form D',
}
for blob_name, file_contents in test_data.items():
blob = bucket.blob(blob_name)
Expand Down
59 changes: 51 additions & 8 deletions storage/tests/unit/test_blob.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ def _make_credentials():

class Test_Blob(unittest.TestCase):

def _make_one(self, *args, **kw):
@staticmethod
def _make_one(*args, **kw):
from google.cloud.storage.blob import Blob

properties = kw.pop('properties', None)
Expand All @@ -45,6 +46,13 @@ def test_ctor_wo_encryption_key(self):
self.assertIs(blob._acl.blob, blob)
self.assertEqual(blob._encryption_key, None)

def test_ctor_with_encoded_unicode(self):
blob_name = b'wet \xe2\x9b\xb5'
blob = self._make_one(blob_name, bucket=None)
unicode_name = u'wet \N{sailboat}'
self.assertNotEqual(blob.name, blob_name)
self.assertEqual(blob.name, unicode_name)

def test_ctor_w_encryption_key(self):
KEY = b'01234567890123456789012345678901' # 32 bytes
BLOB_NAME = 'blob-name'
Expand Down Expand Up @@ -91,21 +99,21 @@ def test_chunk_size_setter_bad_value(self):
def test_acl_property(self):
from google.cloud.storage.acl import ObjectACL

FAKE_BUCKET = _Bucket()
blob = self._make_one(None, bucket=FAKE_BUCKET)
fake_bucket = _Bucket()
blob = self._make_one(u'name', bucket=fake_bucket)
acl = blob.acl
self.assertIsInstance(acl, ObjectACL)
self.assertIs(acl, blob._acl)

def test_path_no_bucket(self):
FAKE_BUCKET = object()
NAME = 'blob-name'
blob = self._make_one(NAME, bucket=FAKE_BUCKET)
def test_path_bad_bucket(self):
fake_bucket = object()
name = u'blob-name'
blob = self._make_one(name, bucket=fake_bucket)
self.assertRaises(AttributeError, getattr, blob, 'path')

def test_path_no_name(self):
bucket = _Bucket()
blob = self._make_one(None, bucket=bucket)
blob = self._make_one(u'', bucket=bucket)
self.assertRaises(ValueError, getattr, blob, 'path')

def test_path_normal(self):
Expand All @@ -120,6 +128,12 @@ def test_path_w_slash_in_name(self):
blob = self._make_one(BLOB_NAME, bucket=bucket)
self.assertEqual(blob.path, '/b/name/o/parent%2Fchild')

def test_path_with_non_ascii(self):
blob_name = u'Caf\xe9'
bucket = _Bucket()
blob = self._make_one(blob_name, bucket=bucket)
self.assertEqual(blob.path, '/b/name/o/Caf%C3%A9')

def test_public_url(self):
BLOB_NAME = 'blob-name'
bucket = _Bucket()
Expand All @@ -136,6 +150,13 @@ def test_public_url_w_slash_in_name(self):
blob.public_url,
'https://storage.googleapis.com/name/parent%2Fchild')

def test_public_url_with_non_ascii(self):
blob_name = u'winter \N{snowman}'
bucket = _Bucket()
blob = self._make_one(blob_name, bucket=bucket)
expected_url = 'https://storage.googleapis.com/name/winter%20%E2%98%83'
self.assertEqual(blob.public_url, expected_url)

def _basic_generate_signed_url_helper(self, credentials=None):
BLOB_NAME = 'blob-name'
EXPIRATION = '2014-10-16T20:34:37.000Z'
Expand Down Expand Up @@ -2227,6 +2248,28 @@ def test_updated_unset(self):
self.assertIsNone(blob.updated)


class Test__quote(unittest.TestCase):

@staticmethod
def _call_fut(value):
from google.cloud.storage.blob import _quote

return _quote(value)

def test_bytes(self):
quoted = self._call_fut(b'\xDE\xAD\xBE\xEF')
self.assertEqual(quoted, '%DE%AD%BE%EF')

def test_unicode(self):
helicopter = u'\U0001f681'
quoted = self._call_fut(helicopter)
self.assertEqual(quoted, '%F0%9F%9A%81')

def test_bad_type(self):
with self.assertRaises(TypeError):
self._call_fut(None)


class _Responder(object):

def __init__(self, *responses):
Expand Down