Skip to content

Commit 0e63c00

Browse files
authored
feat: Add stream_file_content parameter to upload methods (#890)
Closes: SDK-4567
1 parent 320ffe9 commit 0e63c00

File tree

9 files changed

+142
-38
lines changed

9 files changed

+142
-38
lines changed

.github/workflows/build.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ jobs:
1616
- '3.6'
1717
- '3.7'
1818
- '3.8'
19-
- 'pypy-3.8'
2019
- '3.9'
2120
- '3.10'
2221
- '3.11'

boxsdk/object/folder.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,7 @@ def upload_stream(
263263
additional_attributes: Optional[dict] = None,
264264
sha1: Optional[str] = None,
265265
etag: Optional[str] = None,
266+
stream_file_content: bool = True,
266267
) -> 'File':
267268
"""
268269
Upload a file to the folder.
@@ -298,6 +299,9 @@ def upload_stream(
298299
A sha1 checksum for the file.
299300
:param etag:
300301
If specified, instruct the Box API to update the item only if the current version's etag matches.
302+
:param stream_file_content:
303+
If True, the upload will be performed as a stream request. If False, the file will be read into memory
304+
before being uploaded, but this may be required if using some proxy servers to handle redirects correctly.
301305
:returns:
302306
The newly uploaded file.
303307
"""
@@ -335,7 +339,7 @@ def upload_stream(
335339
if not headers:
336340
headers = None
337341
file_response = self._session.post(
338-
url, data=data, files=files, expect_json_response=False, headers=headers
342+
url, data=data, files=files, expect_json_response=False, headers=headers, stream_file_content=stream_file_content,
339343
).json()
340344
if 'entries' in file_response:
341345
file_response = file_response['entries'][0]
@@ -358,6 +362,7 @@ def upload(
358362
additional_attributes: Optional[dict] = None,
359363
sha1: Optional[str] = None,
360364
etag: Optional[str] = None,
365+
stream_file_content: bool = True,
361366
) -> 'File':
362367
"""
363368
Upload a file to the folder.
@@ -394,6 +399,9 @@ def upload(
394399
A sha1 checksum for the new content.
395400
:param etag:
396401
If specified, instruct the Box API to update the item only if the current version's etag matches.
402+
:param stream_file_content:
403+
If True, the upload will be performed as a stream request. If False, the file will be read into memory
404+
before being uploaded, but this may be required if using some proxy servers to handle redirects correctly.
397405
:returns:
398406
The newly uploaded file.
399407
"""
@@ -412,6 +420,7 @@ def upload(
412420
additional_attributes=additional_attributes,
413421
sha1=sha1,
414422
etag=etag,
423+
stream_file_content=stream_file_content,
415424
)
416425

417426
@api_call

boxsdk/session/session.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,8 @@ def _send_request(self, request: '_BoxRequest', **kwargs: Any) -> 'NetworkRespon
468468
"""
469469
# Reset stream positions to what they were when the request was made so the same data is sent even if this
470470
# is a retried attempt.
471-
files, file_stream_positions = kwargs.get('files'), kwargs.pop('file_stream_positions')
471+
files, file_stream_positions, stream_file_content = (
472+
kwargs.get('files'), kwargs.pop('file_stream_positions'), kwargs.pop('stream_file_content', True))
472473
request_kwargs = self._default_network_request_kwargs.copy()
473474
request_kwargs.update(kwargs)
474475
proxy_dict = self._prepare_proxy()
@@ -477,11 +478,12 @@ def _send_request(self, request: '_BoxRequest', **kwargs: Any) -> 'NetworkRespon
477478
if files and file_stream_positions:
478479
for name, position in file_stream_positions.items():
479480
files[name][1].seek(position)
480-
data = request_kwargs.pop('data', {})
481-
multipart_stream = MultipartStream(data, files)
482-
request_kwargs['data'] = multipart_stream
483-
del request_kwargs['files']
484-
request.headers['Content-Type'] = multipart_stream.content_type
481+
if stream_file_content:
482+
data = request_kwargs.pop('data', {})
483+
multipart_stream = MultipartStream(data, files)
484+
request_kwargs['data'] = multipart_stream
485+
del request_kwargs['files']
486+
request.headers['Content-Type'] = multipart_stream.content_type
485487
request.access_token = request_kwargs.pop('access_token', None)
486488

487489
# send the request

docs/usage/files.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,9 +187,32 @@ new_file = client.folder(folder_id).upload_stream(stream, file_name)
187187
print(f'File "{new_file.name}" uploaded to Box with file ID {new_file.id}')
188188
```
189189

190+
----
191+
**NOTE:**
192+
193+
Both methods `folder.upload()` and `folder.upload_stream()` include the `stream_file_content` parameter,
194+
which controls how the file content is uploaded.
195+
196+
If you are uploading a large file, you may want to stream the request to avoid excessive memory usage.
197+
According to `requests'` library [docs][request_docs], by default, the `requests` library does not support streaming uploads,
198+
and all the data must be read into memory before being sent to the server.
199+
However, the `requests-toolbelt` package includes a `MultipartEncoder` class, which enables file uploads without
200+
loading the entire file into memory. This approach is the default in the Box Python SDK.
201+
202+
That said, handling 307 Temporary Redirects presents a challenge with streamed file uploads.
203+
307 redirect requires that both the request method and body remain unchanged.
204+
This can be problematic when uploading a file stream because the stream will already be exhausted when the redirect occurs.
205+
206+
To address this issue, the `stream_file_content` parameter has been introduced in upload methods. This allows you to choose between:
207+
- Streaming the file (`stream_file_content=True`): Optimizes memory usage but may cause issues with redirects.
208+
209+
- Using the default `requests'` library behavior (`stream_file_content=False`): Ensures the file can be re-read if a
210+
redirect occurs but may consume more memory. This is especially important when working with proxy servers.
211+
190212
[folder_class]: https://box-python-sdk.readthedocs.io/en/latest/boxsdk.object.html#boxsdk.object.folder.Folder
191213
[upload]: https://box-python-sdk.readthedocs.io/en/latest/boxsdk.object.html#boxsdk.object.folder.Folder.upload
192214
[upload_stream]: https://box-python-sdk.readthedocs.io/en/latest/boxsdk.object.html#boxsdk.object.folder.Folder.upload_stream
215+
[request_docs]: https://docs.python-requests.org/en/latest/user/quickstart/#post-a-multipart-encoded-file
193216

194217
Chunked Upload
195218
--------------

test/integration_new/object/ai_itest.py

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,10 @@ def test_send_ai_question(parent_folder, small_file_path):
2222
'type': 'file',
2323
'content': 'The sun raises in the east.'
2424
}]
25-
ai_agent = {
26-
'type': 'ai_agent_ask',
27-
'basic_text_multi': {
28-
'model': 'openai__gpt_3_5_turbo'
29-
}
30-
}
3125
answer = CLIENT.send_ai_question(
3226
items=items,
3327
prompt='Which direction does the sun raise?',
3428
mode='single_item_qa',
35-
ai_agent=ai_agent
3629
)
3730
assert 'east' in answer['answer'].lower()
3831
assert answer['completion_reason'] == 'done'
@@ -54,17 +47,10 @@ def test_send_ai_text_gen(parent_folder, small_file_path):
5447
'answer': 'It takes 24 hours for the sun to rise.',
5548
'created_at': '2013-12-12T11:20:43-08:00'
5649
}]
57-
ai_agent = {
58-
'type': 'ai_agent_text_gen',
59-
'basic_gen': {
60-
'model': 'openai__gpt_3_5_turbo_16k'
61-
}
62-
}
6350
answer = CLIENT.send_ai_text_gen(
6451
dialogue_history=dialogue_history,
6552
items=items,
6653
prompt='Which direction does the sun raise?',
67-
ai_agent=ai_agent
6854
)
6955
assert 'east' in answer['answer'].lower()
7056
assert answer['completion_reason'] == 'done'
@@ -73,8 +59,7 @@ def test_send_ai_text_gen(parent_folder, small_file_path):
7359
def test_get_ai_agent_default_config():
7460
config = CLIENT.get_ai_agent_default_config(
7561
mode='text_gen',
76-
language='en',
77-
model='openai__gpt_3_5_turbo'
62+
language='en'
7863
)
7964
assert config['type'] == 'ai_agent_text_gen'
80-
assert config['basic_gen']['model'] == 'openai__gpt_3_5_turbo'
65+
assert config['basic_gen']['model'] != ''

test/integration_new/object/folder_itest.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,8 @@ def test_auto_chunked_upload_NOT_using_upload_session_urls(parent_folder, large_
103103

104104

105105
def test_get_items(parent_folder, small_file_path):
106-
with BoxTestFolder(parent_folder=parent_folder) as subfolder,\
107-
BoxTestFile(parent_folder=parent_folder, file_path=small_file_path) as file,\
106+
with BoxTestFolder(parent_folder=parent_folder) as subfolder, \
107+
BoxTestFile(parent_folder=parent_folder, file_path=small_file_path) as file, \
108108
BoxTestWebLink(parent_folder=parent_folder, url='https://box.com') as web_link:
109109

110110
assert set(parent_folder.get_items()) == {subfolder, file, web_link}
@@ -130,6 +130,17 @@ def test_upload_small_file_to_folder(parent_folder, small_file_name, small_file_
130130
util.permanently_delete(uploaded_file)
131131

132132

133+
def test_upload_small_file_to_folder_with_disabled_streaming_file_content(
134+
parent_folder, small_file_name, small_file_path
135+
):
136+
uploaded_file = parent_folder.upload(file_path=small_file_path, file_name=small_file_name, stream_file_content=False)
137+
try:
138+
assert uploaded_file.id
139+
assert uploaded_file.parent == parent_folder
140+
finally:
141+
util.permanently_delete(uploaded_file)
142+
143+
133144
def test_create_subfolder(parent_folder):
134145
created_subfolder = parent_folder.create_subfolder(name=util.random_name())
135146
try:
@@ -199,7 +210,7 @@ def test_delete_folder(parent_folder):
199210

200211

201212
def test_cascade_and_get_metadata_cascade_policies(parent_folder):
202-
with BoxTestMetadataTemplate(display_name="test_template") as metadata_template,\
213+
with BoxTestMetadataTemplate(display_name="test_template") as metadata_template, \
203214
BoxTestFolder(parent_folder=parent_folder) as folder:
204215
folder.cascade_metadata(metadata_template)
205216

test/integration_new/object/trash_itest.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,17 @@ def test_trash_get_items(parent_folder, small_file_path):
2323
test_file = parent_folder.upload(file_path=small_file_path, file_name=name)
2424
test_file.delete()
2525
try:
26-
trash_items = CLIENT.trash().get_items()
27-
assert test_file.id in [item.id for item in trash_items]
26+
trashed_file = test_file.get()
27+
assert trashed_file.item_status == 'trashed'
2828
finally:
2929
CLIENT.trash().permanently_delete_item(test_file)
3030

3131

3232
def test_trash_restore_item(parent_folder, small_file_path):
3333
with BoxTestFile(parent_folder=parent_folder, file_path=small_file_path) as test_file:
3434
test_file.delete()
35-
trash_items = CLIENT.trash().get_items()
36-
assert test_file.id in [item.id for item in trash_items]
35+
folder_items = parent_folder.get_items()
36+
assert test_file.id not in [item.id for item in folder_items]
3737
CLIENT.trash().restore_item(test_file)
3838
folder_items = parent_folder.get_items()
3939
assert test_file.id in [item.id for item in folder_items]
@@ -46,7 +46,7 @@ def test_trash_get_items_with_offset(parent_folder, small_file_path):
4646
try:
4747
trash_items = CLIENT.trash().get_items()
4848
assert isinstance(trash_items, LimitOffsetBasedObjectCollection)
49-
assert test_file.id in [item.id for item in trash_items]
49+
assert trash_items.next() is not None
5050
finally:
5151
CLIENT.trash().permanently_delete_item(test_file)
5252

@@ -56,8 +56,8 @@ def test_trash_get_items_with_marker(parent_folder, small_file_path):
5656
test_file = parent_folder.upload(file_path=small_file_path, file_name=name)
5757
test_file.delete()
5858
try:
59-
trash_items = CLIENT.trash().get_items(limit=100, use_marker=True)
59+
trash_items = CLIENT.trash().get_items(limit=5, use_marker=True)
6060
assert isinstance(trash_items, MarkerBasedObjectCollection)
61-
assert test_file.id in [item.id for item in trash_items]
61+
assert trash_items.next() is not None
6262
finally:
6363
CLIENT.trash().permanently_delete_item(test_file)

test/unit/object/test_folder.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from datetime import datetime
33
from io import BytesIO
44
from os.path import basename
5-
from unittest.mock import mock_open, patch, Mock, MagicMock
5+
from unittest.mock import mock_open, patch, Mock, MagicMock, ANY
66
import pytest
77
import pytz
88

@@ -334,7 +334,14 @@ def test_upload(
334334
# in Python 2 tests
335335
attributes.update(additional_attributes)
336336
data = {'attributes': json.dumps(attributes)}
337-
mock_box_session.post.assert_called_once_with(expected_url, expect_json_response=False, files=mock_files, data=data, headers=if_match_sha1_header)
337+
mock_box_session.post.assert_called_once_with(
338+
expected_url,
339+
expect_json_response=False,
340+
files=mock_files,
341+
data=data,
342+
headers=if_match_sha1_header,
343+
stream_file_content=True
344+
)
338345
assert isinstance(new_file, File)
339346
assert new_file.object_id == mock_object_id
340347
assert 'id' in new_file
@@ -438,6 +445,27 @@ def test_upload_does_preflight_check_if_specified(
438445
assert not test_folder.preflight_check.called
439446

440447

448+
@patch('boxsdk.object.folder.open', mock_open(read_data=b'some bytes'), create=True)
449+
@pytest.mark.parametrize('stream_file_content', (True, False))
450+
def test_upload_if_flag_stream_file_content_is_passed_to_session(
451+
mock_box_session,
452+
test_folder,
453+
stream_file_content,
454+
):
455+
expected_url = f'{API.UPLOAD_URL}/files/content'
456+
457+
test_folder.upload('foo.txt', file_name='foo.txt', stream_file_content=stream_file_content)
458+
459+
mock_files = {'file': ('unused', ANY)}
460+
mock_box_session.post.assert_called_once_with(
461+
expected_url,
462+
data=ANY,
463+
files=mock_files,
464+
expect_json_response=False,
465+
headers=None,
466+
stream_file_content=stream_file_content)
467+
468+
441469
def test_create_subfolder(test_folder, mock_box_session, mock_object_id, mock_folder_response):
442470
expected_url = test_folder.get_type_url()
443471
mock_box_session.post.return_value = mock_folder_response

test/unit/session/test_session.py

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
from functools import partial
2-
from io import IOBase
2+
from io import IOBase, BytesIO
33
from numbers import Number
4+
import os
45
from unittest.mock import MagicMock, Mock, PropertyMock, call, patch, ANY
56
from requests.exceptions import RequestException, SSLError, ConnectionError as RequestsConnectionError
7+
from requests_toolbelt import MultipartEncoder
68

79
import pytest
810

@@ -449,3 +451,48 @@ def test_proxy_malformed_dict_does_not_attach(box_session, monkeypatch, mock_net
449451

450452
def test_proxy_network_config_property(box_session):
451453
assert isinstance(box_session.proxy_config, Proxy)
454+
455+
456+
def test_multipart_request_with_disabled_streaming_file_content(
457+
box_session, mock_network_layer, generic_successful_response):
458+
test_url = 'https://example.com'
459+
file_bytes = os.urandom(1024)
460+
mock_network_layer.request.side_effect = [generic_successful_response]
461+
box_session.post(
462+
url=test_url,
463+
files={'file': ('unused', BytesIO(file_bytes))},
464+
data={'attributes': '{"name": "test_file"}'},
465+
stream_file_content=False
466+
)
467+
mock_network_layer.request.assert_called_once_with(
468+
'POST',
469+
test_url,
470+
access_token='fake_access_token',
471+
headers=ANY,
472+
log_response_content=True,
473+
files={'file': ('unused', ANY)},
474+
data={'attributes': '{"name": "test_file"}'},
475+
)
476+
477+
478+
def test_multipart_request_with_enabled_streaming_file_content(
479+
box_session, mock_network_layer, generic_successful_response):
480+
test_url = 'https://example.com'
481+
file_bytes = os.urandom(1024)
482+
mock_network_layer.request.side_effect = [generic_successful_response]
483+
box_session.post(
484+
url=test_url,
485+
files={'file': ('unused', BytesIO(file_bytes))},
486+
data={'attributes': '{"name": "test_file"}'},
487+
stream_file_content=True
488+
)
489+
call_args = mock_network_layer.request.call_args[0]
490+
call_kwargs = mock_network_layer.request.call_args[1]
491+
assert call_args[0] == 'POST'
492+
assert call_args[1] == test_url
493+
assert call_kwargs['access_token'] == 'fake_access_token'
494+
assert call_kwargs['log_response_content'] is True
495+
assert isinstance(call_kwargs['data'], MultipartEncoder)
496+
assert call_kwargs['data'].fields['attributes'] == '{"name": "test_file"}'
497+
assert call_kwargs['data'].fields['file'][0] == 'unused'
498+
assert isinstance(call_kwargs['data'].fields['file'][1], BytesIO)

0 commit comments

Comments
 (0)