Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Download Failure Resilience #956

Merged
merged 7 commits into from
Nov 8, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/753.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Briefcase is more resilient to file download failures by discarding partially downloaded files.
53 changes: 39 additions & 14 deletions src/briefcase/integrations/download.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import os
import tempfile
from contextlib import suppress
from email.message import Message
from urllib.parse import urlparse

Expand Down Expand Up @@ -74,21 +77,8 @@ def file(self, url, download_path, role=None):
if filename.exists():
self.tools.logger.info(f"{cache_name} already downloaded")
else:
# We have meaningful content, and it hasn't been cached previously,
# so save it in the requested location
self.tools.logger.info(f"Downloading {cache_name}...")
with filename.open("wb") as f:
total = response.headers.get("content-length")
if total is None:
f.write(response.content)
else:
progress_bar = self.tools.input.progress_bar()
task_id = progress_bar.add_task("Downloader", total=int(total))
with progress_bar:
for data in response.iter_content(chunk_size=1024 * 1024):
f.write(data)
progress_bar.update(task_id, advance=len(data))

self._fetch_and_write_content(response, filename)
except requests_exceptions.ConnectionError as e:
if role:
description = role
Expand All @@ -97,3 +87,38 @@ def file(self, url, download_path, role=None):
raise NetworkFailure(f"download {description}") from e

return filename

def _fetch_and_write_content(self, response, filename):
"""Write the content from the Requests response to file.

The data is written in to a temporary file in a filesystem location
designated by the platform. This avoids partially downloaded files
masquerading as complete downloads in later Briefcase runs.
The temporary file is only moved to ``filename`` if the download
is successful; otherwise, it is deleted.

:param response: ``requests.Response``
:param filename: full filesystem path to save data
"""
try:
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
total = response.headers.get("content-length")
if total is None:
temp_file.write(response.content)
else:
progress_bar = self.tools.input.progress_bar()
task_id = progress_bar.add_task("Downloader", total=int(total))
with progress_bar:
for data in response.iter_content(chunk_size=1024 * 1024):
temp_file.write(data)
progress_bar.update(task_id, advance=len(data))

self.tools.shutil.move(temp_file.name, filename)
rmartin16 marked this conversation as resolved.
Show resolved Hide resolved
# retrieving the current umask requires changing it...
os.umask(current_umask := os.umask(0o022))
self.tools.os.chmod(filename, 0o664 & ~current_umask)
Copy link
Member Author

@rmartin16 rmartin16 Nov 5, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The NamedTemporaryFile file is only created with 600 permissions. So, all this umask stuff restores the previous behavior from filename.open("wb").

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A longer explanatory comment would be helpful here. The interplay between the 0o022 mask and the and not with 0o644 wan't clear to me.

There's also no test that the resulting file mask is actually followed; there's code coverage, but no real check of the logic.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added (some probably overly verbose) comments about umask. I also tried to implement file permission verification in the tests.....things are really wonky on windows, though...

finally:
# Ensure the temporary file is deleted; this file may still
# exist if the download fails or the user sends CTRL+C.
with suppress(OSError):
self.tools.os.remove(temp_file.name)
43 changes: 43 additions & 0 deletions tests/integrations/download/test_Download__file.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import shutil
from unittest import mock

import pytest
Expand All @@ -16,6 +17,8 @@
@pytest.fixture
def mock_tools(mock_tools) -> ToolCache:
mock_tools.requests = mock.MagicMock(spec_set=requests)
# Restore move so the temporary file can be moved after downloaded
mock_tools.shutil.move = mock.MagicMock(wraps=shutil.move)
return mock_tools


Expand Down Expand Up @@ -86,6 +89,11 @@ def test_new_download_oneshot(mock_tools, url, content_disposition):
# The filename is derived from the URL or header
assert filename == mock_tools.base_path / "downloads" / "something.zip"

# Temporary file was upgraded to intended destination
mock_tools.shutil.move.assert_called_with(mock.ANY, filename)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While this test passes, it's not especially resilient - mock.ANY really does allow any content. This initial "mock.ANY" call is definitely required to verify that a filename was used, but it would be desirable to add checks that the same temporary filename was used for all the subsequent calls.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's fair; I was initially struggling with how to introspect information about the temporary file and I went too far towards "assume it's working if everything else is working." I think my changes now verify the logic is working as designed.

mock_tools.os.chmod.assert_called_with(filename, mock.ANY)
mock_tools.os.remove.assert_called_with(mock.ANY)

# File content is as expected
with (mock_tools.base_path / "downloads" / "something.zip").open() as f:
assert f.read() == "all content"
Expand Down Expand Up @@ -122,6 +130,11 @@ def test_new_download_chunked(mock_tools):
# The filename is derived from the URL
assert filename == mock_tools.base_path / "something.zip"

# Temporary file was upgraded to intended destination
mock_tools.shutil.move.assert_called_with(mock.ANY, filename)
mock_tools.os.chmod.assert_called_with(filename, mock.ANY)
mock_tools.os.remove.assert_called_with(mock.ANY)

# The downloaded file exists, and content is as expected
assert filename.exists()
with (mock_tools.base_path / "something.zip").open() as f:
Expand Down Expand Up @@ -161,6 +174,11 @@ def test_already_downloaded(mock_tools):
assert filename == existing_file
assert filename.exists()

# Temporary file was not moved
mock_tools.shutil.move.assert_not_called()
mock_tools.os.chmod.assert_not_called()
mock_tools.os.remove.assert_not_called()


def test_missing_resource(mock_tools):
response = mock.MagicMock()
Expand All @@ -185,6 +203,11 @@ def test_missing_resource(mock_tools):
# The file doesn't exist as a result of the download failure
assert not (mock_tools.base_path / "something.zip").exists()

# Temporary file was not moved
mock_tools.shutil.move.assert_not_called()
mock_tools.os.chmod.assert_not_called()
mock_tools.os.remove.assert_not_called()


def test_bad_resource(mock_tools):
response = mock.MagicMock()
Expand All @@ -209,6 +232,11 @@ def test_bad_resource(mock_tools):
# The file doesn't exist as a result of the download failure
assert not (mock_tools.base_path / "something.zip").exists()

# Temporary file was not moved
mock_tools.shutil.move.assert_not_called()
mock_tools.os.chmod.assert_not_called()
mock_tools.os.remove.assert_not_called()


def test_get_connection_error(mock_tools):
"""NetworkFailure raises if requests.get() errors."""
Expand All @@ -233,6 +261,11 @@ def test_get_connection_error(mock_tools):
# The file doesn't exist as a result of the download failure
assert not (mock_tools.base_path / "something.zip").exists()

# Temporary file was not moved
mock_tools.shutil.move.assert_not_called()
mock_tools.os.chmod.assert_not_called()
mock_tools.os.remove.assert_not_called()


def test_iter_content_connection_error(mock_tools):
"""NetworkFailure raised if response.iter_content() errors."""
Expand Down Expand Up @@ -260,6 +293,11 @@ def test_iter_content_connection_error(mock_tools):
# The file doesn't exist as a result of the download failure
assert not (mock_tools.base_path / "something.zip").exists()

# Temporary file was not moved but it was deleted
mock_tools.shutil.move.assert_not_called()
mock_tools.os.chmod.assert_not_called()
mock_tools.os.remove.assert_called_with(mock.ANY)


def test_content_connection_error(mock_tools):
"""NetworkFailure raised if response.content errors."""
Expand Down Expand Up @@ -289,3 +327,8 @@ def test_content_connection_error(mock_tools):

# The file doesn't exist as a result of the download failure
assert not (mock_tools.base_path / "something.zip").exists()

# Temporary file was not moved but it was deleted
mock_tools.shutil.move.assert_not_called()
mock_tools.os.chmod.assert_not_called()
mock_tools.os.remove.assert_called_with(mock.ANY)