Skip to content

Commit 3e3a7ad

Browse files
committed
1 parent 9313879 commit 3e3a7ad

File tree

3 files changed

+122
-17
lines changed

3 files changed

+122
-17
lines changed

google/cloud/storage/_media/requests/download.py

Lines changed: 53 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,12 @@
4343
Please restart the download.
4444
"""
4545

46+
_RESPONSE_HEADERS_INFO = """\
47+
The X-Goog-Stored-Content-Length is {}. The X-Goog-Stored-Content-Encoding is {}.
48+
The download request read {} bytes of data.
49+
If the download was incomplete, please check the network connection and restart the download.
50+
"""
51+
4652

4753
class Download(_request_helpers.RequestsMixin, _download.Download):
4854
"""Helper to manage downloading a resource from a Google API.
@@ -141,13 +147,30 @@ def _write_to_stream(self, response):
141147
):
142148
actual_checksum = _helpers.prepare_checksum_digest(checksum_object.digest())
143149
if actual_checksum != expected_checksum:
144-
msg = _CHECKSUM_MISMATCH.format(
145-
self.media_url,
146-
expected_checksum,
147-
actual_checksum,
148-
checksum_type=self.checksum.upper(),
150+
headers = self._get_headers(response)
151+
x_goog_encoding = headers.get("x-goog-stored-content-encoding")
152+
x_goog_length = headers.get("x-goog-stored-content-length")
153+
content_length_msg = _RESPONSE_HEADERS_INFO.format(
154+
x_goog_length, x_goog_encoding, self._bytes_downloaded
149155
)
150-
raise DataCorruption(response, msg)
156+
if (
157+
x_goog_length
158+
and self._bytes_downloaded < int(x_goog_length)
159+
and x_goog_encoding != "gzip"
160+
):
161+
# The library will attempt to trigger a retry by raising a ConnectionError, if
162+
# (a) bytes_downloaded is less than response header x-goog-stored-content-length, and
163+
# (b) the object is not gzip-compressed when stored in Cloud Storage.
164+
raise ConnectionError(content_length_msg)
165+
else:
166+
msg = _CHECKSUM_MISMATCH.format(
167+
self.media_url,
168+
expected_checksum,
169+
actual_checksum,
170+
checksum_type=self.checksum.upper(),
171+
)
172+
msg += content_length_msg
173+
raise common.DataCorruption(response, msg)
151174

152175
def consume(
153176
self,
@@ -339,13 +362,31 @@ def _write_to_stream(self, response):
339362
actual_checksum = _helpers.prepare_checksum_digest(checksum_object.digest())
340363

341364
if actual_checksum != expected_checksum:
342-
msg = _CHECKSUM_MISMATCH.format(
343-
self.media_url,
344-
expected_checksum,
345-
actual_checksum,
346-
checksum_type=self.checksum.upper(),
365+
headers = self._get_headers(response)
366+
x_goog_encoding = headers.get("x-goog-stored-content-encoding")
367+
x_goog_length = headers.get("x-goog-stored-content-length")
368+
content_length_msg = _RESPONSE_HEADERS_INFO.format(
369+
x_goog_length, x_goog_encoding, self._bytes_downloaded
347370
)
348-
raise DataCorruption(response, msg)
371+
if (
372+
x_goog_length
373+
and self._bytes_downloaded < int(x_goog_length)
374+
and x_goog_encoding != "gzip"
375+
):
376+
# The library will attempt to trigger a retry by raising a ConnectionError, if
377+
# (a) bytes_downloaded is less than response header x-goog-stored-content-length, and
378+
# (b) the object is not gzip-compressed when stored in Cloud Storage.
379+
raise ConnectionError(content_length_msg)
380+
else:
381+
msg = _CHECKSUM_MISMATCH.format(
382+
self.media_url,
383+
expected_checksum,
384+
actual_checksum,
385+
checksum_type=self.checksum.upper(),
386+
)
387+
msg += content_length_msg
388+
raise common.DataCorruption(response, msg)
389+
349390

350391
def consume(
351392
self,

tests/resumable_media/system/requests/test_download.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -463,7 +463,7 @@ def test_corrupt_download(self, add_files, corrupting_transport, checksum):
463463
info[checksum],
464464
checksum_type=checksum.upper(),
465465
)
466-
assert exc_info.value.args == (msg,)
466+
assert msg in exc_info.value.args[0]
467467

468468
def test_corrupt_download_no_check(self, add_files, corrupting_transport):
469469
for info in ALL_FILES:

tests/resumable_media/unit/requests/test_download.py

Lines changed: 68 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,11 @@ def test__write_to_stream_with_hash_check_fail(self, checksum):
124124
msg = download_mod._CHECKSUM_MISMATCH.format(
125125
EXAMPLE_URL, bad_checksum, good_checksum, checksum_type=checksum.upper()
126126
)
127-
assert error.args[0] == msg
127+
assert msg in error.args[0]
128+
assert (
129+
f"The download request read {download._bytes_downloaded} bytes of data."
130+
in error.args[0]
131+
)
128132

129133
# Check mocks.
130134
response.__enter__.assert_called_once_with()
@@ -186,6 +190,29 @@ def test__write_to_stream_with_invalid_checksum_type(self):
186190
error = exc_info.value
187191
assert error.args[0] == "checksum must be ``'md5'``, ``'crc32c'`` or ``None``"
188192

193+
@pytest.mark.parametrize("checksum", ["md5", "crc32c"])
194+
def test__write_to_stream_incomplete_read(self, checksum):
195+
stream = io.BytesIO()
196+
download = download_mod.Download(EXAMPLE_URL, stream=stream, checksum=checksum)
197+
198+
chunk1 = b"first chunk"
199+
mock_full_content_length = len(chunk1) + 123
200+
headers = {"x-goog-stored-content-length": mock_full_content_length}
201+
bad_checksum = "d3JvbmcgbiBtYWRlIHVwIQ=="
202+
header_value = "crc32c={bad},md5={bad}".format(bad=bad_checksum)
203+
headers[_helpers._HASH_HEADER] = header_value
204+
response = _mock_response(chunks=[chunk1], headers=headers)
205+
206+
with pytest.raises(ConnectionError) as exc_info:
207+
download._write_to_stream(response)
208+
209+
assert not download.finished
210+
error = exc_info.value
211+
assert (
212+
f"The download request read {download._bytes_downloaded} bytes of data."
213+
in error.args[0]
214+
)
215+
189216
def _consume_helper(
190217
self,
191218
stream=None,
@@ -304,7 +331,11 @@ def test_consume_with_stream_hash_check_fail(self, checksum):
304331
msg = download_mod._CHECKSUM_MISMATCH.format(
305332
EXAMPLE_URL, bad_checksum, good_checksum, checksum_type=checksum.upper()
306333
)
307-
assert error.args[0] == msg
334+
assert msg in error.args[0]
335+
assert (
336+
f"The download request read {download._bytes_downloaded} bytes of data."
337+
in error.args[0]
338+
)
308339

309340
# Check mocks.
310341
transport.request.assert_called_once_with(
@@ -599,7 +630,11 @@ def test__write_to_stream_with_hash_check_fail(self, checksum):
599630
msg = download_mod._CHECKSUM_MISMATCH.format(
600631
EXAMPLE_URL, bad_checksum, good_checksum, checksum_type=checksum.upper()
601632
)
602-
assert error.args[0] == msg
633+
assert msg in error.args[0]
634+
assert (
635+
f"The download request read {download._bytes_downloaded} bytes of data."
636+
in error.args[0]
637+
)
603638

604639
# Check mocks.
605640
response.__enter__.assert_called_once_with()
@@ -632,6 +667,31 @@ def test__write_to_stream_with_invalid_checksum_type(self):
632667
error = exc_info.value
633668
assert error.args[0] == "checksum must be ``'md5'``, ``'crc32c'`` or ``None``"
634669

670+
@pytest.mark.parametrize("checksum", ["md5", "crc32c"])
671+
def test__write_to_stream_incomplete_read(self, checksum):
672+
stream = io.BytesIO()
673+
download = download_mod.RawDownload(
674+
EXAMPLE_URL, stream=stream, checksum=checksum
675+
)
676+
677+
chunk1 = b"first chunk"
678+
mock_full_content_length = len(chunk1) + 123
679+
headers = {"x-goog-stored-content-length": mock_full_content_length}
680+
bad_checksum = "d3JvbmcgbiBtYWRlIHVwIQ=="
681+
header_value = "crc32c={bad},md5={bad}".format(bad=bad_checksum)
682+
headers[_helpers._HASH_HEADER] = header_value
683+
response = _mock_raw_response(chunks=[chunk1], headers=headers)
684+
685+
with pytest.raises(ConnectionError) as exc_info:
686+
download._write_to_stream(response)
687+
688+
assert not download.finished
689+
error = exc_info.value
690+
assert (
691+
f"The download request read {download._bytes_downloaded} bytes of data."
692+
in error.args[0]
693+
)
694+
635695
def _consume_helper(
636696
self,
637697
stream=None,
@@ -754,7 +814,11 @@ def test_consume_with_stream_hash_check_fail(self, checksum):
754814
msg = download_mod._CHECKSUM_MISMATCH.format(
755815
EXAMPLE_URL, bad_checksum, good_checksum, checksum_type=checksum.upper()
756816
)
757-
assert error.args[0] == msg
817+
assert msg in error.args[0]
818+
assert (
819+
f"The download request read {download._bytes_downloaded} bytes of data."
820+
in error.args[0]
821+
)
758822

759823
# Check mocks.
760824
transport.request.assert_called_once_with(

0 commit comments

Comments
 (0)