Skip to content

Commit c67b03c

Browse files
authored
feat: Use upload session urls for chunk upload (#875)
Closes: SDK-3836
1 parent 5a7c767 commit c67b03c

File tree

10 files changed

+303
-98
lines changed

10 files changed

+303
-98
lines changed

boxsdk/object/file.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,21 @@ def preflight_check(self, size: int, name: Optional[str] = None) -> Optional[str
4949
)
5050

5151
@api_call
52-
def create_upload_session(self, file_size: int, file_name: Optional[str] = None) -> 'UploadSession':
52+
def create_upload_session(
53+
self, file_size: int, file_name: Optional[str] = None, use_upload_session_urls: bool = True
54+
) -> 'UploadSession':
5355
"""
5456
Create a new chunked upload session for uploading a new version of the file.
5557
5658
:param file_size:
5759
The size of the file in bytes that will be uploaded.
5860
:param file_name:
5961
The new name of the file version that will be uploaded.
62+
:param use_upload_session_urls:
63+
The parameter detrermining what urls to use to perform chunked upload.
64+
If True, the urls returned by create_upload_session() endpoint response will be used,
65+
unless a custom API.UPLOAD_URL was set in the config.
66+
If False, the base upload url will be used.
6067
:returns:
6168
A :class:`UploadSession` object.
6269
"""
@@ -68,13 +75,18 @@ def create_upload_session(self, file_size: int, file_name: Optional[str] = None)
6875
body_params['file_name'] = file_name
6976
url = self.get_url('upload_sessions').replace(self.session.api_config.BASE_API_URL, self.session.api_config.UPLOAD_URL)
7077
response = self._session.post(url, data=json.dumps(body_params)).json()
71-
return self.translator.translate(
78+
upload_session = self.translator.translate(
7279
session=self._session,
7380
response_object=response,
7481
)
82+
# pylint:disable=protected-access
83+
upload_session._use_upload_session_urls = use_upload_session_urls
84+
return upload_session
7585

7686
@api_call
77-
def get_chunked_uploader(self, file_path: str, rename_file: bool = False) -> 'ChunkedUploader':
87+
def get_chunked_uploader(
88+
self, file_path: str, rename_file: bool = False, use_upload_session_urls: bool = True
89+
) -> 'ChunkedUploader':
7890
# pylint: disable=consider-using-with
7991
"""
8092
Instantiate the chunked upload instance and create upload session with path to file.
@@ -83,13 +95,18 @@ def get_chunked_uploader(self, file_path: str, rename_file: bool = False) -> 'Ch
8395
The local path to the file you wish to upload.
8496
:param rename_file:
8597
Indicates whether the file should be renamed or not.
98+
:param use_upload_session_urls:
99+
The parameter detrermining what urls to use to perform chunked upload.
100+
If True, the urls returned by create_upload_session() endpoint response will be used,
101+
unless a custom API.UPLOAD_URL was set in the config.
102+
If False, the base upload url will be used.
86103
:returns:
87104
A :class:`ChunkedUploader` object.
88105
"""
89106
total_size = os.stat(file_path).st_size
90107
content_stream = open(file_path, 'rb')
91108
file_name = os.path.basename(file_path) if rename_file else None
92-
upload_session = self.create_upload_session(total_size, file_name)
109+
upload_session = self.create_upload_session(total_size, file_name, use_upload_session_urls)
93110
return upload_session.get_chunked_uploader_for_stream(content_stream, total_size)
94111

95112
def _get_accelerator_upload_url_for_update(self) -> Optional[str]:

boxsdk/object/folder.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,14 +115,19 @@ def preflight_check(self, size: int, name: str) -> Optional[str]:
115115
)
116116

117117
@api_call
118-
def create_upload_session(self, file_size: int, file_name: str) -> 'UploadSession':
118+
def create_upload_session(self, file_size: int, file_name: str, use_upload_session_urls: bool = True) -> 'UploadSession':
119119
"""
120120
Creates a new chunked upload session for upload a new file.
121121
122122
:param file_size:
123123
The size of the file in bytes that will be uploaded.
124124
:param file_name:
125125
The name of the file that will be uploaded.
126+
:param use_upload_session_urls:
127+
The parameter detrermining what urls to use to perform chunked upload.
128+
If True, the urls returned by create_upload_session() endpoint response will be used,
129+
unless a custom API.UPLOAD_URL was set in the config.
130+
If False, the base upload url will be used.
126131
:returns:
127132
A :class:`UploadSession` object.
128133
"""
@@ -133,13 +138,18 @@ def create_upload_session(self, file_size: int, file_name: str) -> 'UploadSessio
133138
'file_name': file_name,
134139
}
135140
response = self._session.post(url, data=json.dumps(body_params)).json()
136-
return self.translator.translate(
141+
upload_session = self.translator.translate(
137142
session=self._session,
138143
response_object=response,
139144
)
145+
# pylint:disable=protected-access
146+
upload_session._use_upload_session_urls = use_upload_session_urls
147+
return upload_session
140148

141149
@api_call
142-
def get_chunked_uploader(self, file_path: str, file_name: Optional[str] = None) -> 'ChunkedUploader':
150+
def get_chunked_uploader(
151+
self, file_path: str, file_name: Optional[str] = None, use_upload_session_urls: bool = True
152+
) -> 'ChunkedUploader':
143153
# pylint: disable=consider-using-with
144154
"""
145155
Instantiate the chunked upload instance and create upload session with path to file.
@@ -149,6 +159,11 @@ def get_chunked_uploader(self, file_path: str, file_name: Optional[str] = None)
149159
:param file_name:
150160
The name with extention of the file that will be uploaded, e.g. new_file_name.zip.
151161
If not specified, the name from the local system is used.
162+
:param use_upload_session_urls:
163+
The parameter detrermining what urls to use to perform chunked upload.
164+
If True, the urls returned by create_upload_session() endpoint response will be used,
165+
unless a custom API.UPLOAD_URL was set in the config.
166+
If False, the base upload url will be used.
152167
:returns:
153168
A :class:`ChunkedUploader` object.
154169
"""
@@ -157,7 +172,7 @@ def get_chunked_uploader(self, file_path: str, file_name: Optional[str] = None)
157172
content_stream = open(file_path, 'rb')
158173

159174
try:
160-
upload_session = self.create_upload_session(total_size, upload_file_name)
175+
upload_session = self.create_upload_session(total_size, upload_file_name, use_upload_session_urls)
161176
return upload_session.get_chunked_uploader_for_stream(content_stream, total_size)
162177
except Exception:
163178
content_stream.close()

boxsdk/object/upload_session.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
from boxsdk import BoxAPIException
99
from boxsdk.util.api_call_decorator import api_call
1010
from boxsdk.util.chunked_uploader import ChunkedUploader
11+
from boxsdk.session.session import Session
12+
from boxsdk.config import API
1113
from .base_object import BaseObject
1214
from ..pagination.limit_offset_based_dict_collection import LimitOffsetBasedDictCollection
1315

@@ -19,11 +21,22 @@
1921
class UploadSession(BaseObject):
2022
_item_type = 'upload_session'
2123
_parent_item_type = 'file'
24+
_default_upload_url = API.UPLOAD_URL
2225

23-
def get_url(self, *args: Any) -> str:
26+
def __init__(
27+
self, session: Session, object_id: str, response_object: dict = None, use_upload_session_urls: bool = True
28+
):
29+
super().__init__(session, object_id, response_object)
30+
self._use_upload_session_urls = use_upload_session_urls
31+
32+
def get_url(self, *args: Any, url_key: str = None) -> str:
2433
"""
2534
Base class override. Endpoint is a little different - it's /files/upload_sessions.
2635
"""
36+
session_endpoints = getattr(self, 'session_endpoints', {})
37+
if self._use_upload_session_urls and url_key in session_endpoints and self.session.api_config.UPLOAD_URL == self._default_upload_url:
38+
return session_endpoints[url_key]
39+
2740
return self._session.get_url(
2841
f'{self._parent_item_type}s/{self._item_type}s',
2942
self._object_id,
@@ -44,7 +57,7 @@ def get_parts(self, limit: Optional[int] = None, offset: Optional[int] = None) -
4457
"""
4558
return LimitOffsetBasedDictCollection(
4659
session=self.session,
47-
url=self.get_url('parts'),
60+
url=self.get_url('parts', url_key='list_parts'),
4861
limit=limit,
4962
offset=offset,
5063
fields=None,
@@ -87,7 +100,7 @@ def upload_part_bytes(
87100
'Content-Range': f'bytes {offset}-{range_end}/{total_size}',
88101
}
89102
response = self._session.put(
90-
self.get_url(),
103+
self.get_url(url_key='upload_part'),
91104
headers=headers,
92105
data=part_bytes,
93106
)
@@ -131,7 +144,7 @@ def commit(
131144

132145
try:
133146
response = self._session.post(
134-
self.get_url('commit'),
147+
self.get_url('commit', url_key='commit'),
135148
headers=headers,
136149
data=json.dumps(body),
137150
)
@@ -154,7 +167,12 @@ def abort(self) -> bool:
154167
:returns:
155168
A boolean indication success of the upload abort.
156169
"""
157-
return self.delete()
170+
171+
box_response = self._session.delete(
172+
self.get_url(url_key='abort'),
173+
expect_json_response=False
174+
)
175+
return box_response.ok
158176

159177
def get_chunked_uploader_for_stream(self, content_stream: IO[bytes], file_size: int) -> ChunkedUploader:
160178
"""

docs/usage/configuration.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ API.OAUTH2_AUTHORIZE_URL = 'https://my-company.com/authorize'
6464

6565
### Upload URL
6666
The default URL used when uploading files to Box can be changed by assigning a new value to the `API.UPLOAD_URL` field.
67+
If this variable is ever changed from default value, the SDK will alwayse use this URL to upload files to Box,
68+
even if `use_upload_session_urls` is set to `True` while creating an upload session for a chunked upload.
6769

6870
```python
6971
from boxsdk.config import API

docs/usage/files.md

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -194,9 +194,14 @@ Chunked Upload
194194
--------------
195195

196196
For large files or in cases where the network connection is less reliable,
197-
you may want to upload the file in parts. This allows a single part to fail
197+
you may want to upload the file in parts. This allows a single part to fail
198198
without aborting the entire upload, and failed parts can then be retried.
199199

200+
Since box-python-sdk 3.11.0 release, by default the SDK uses upload urls provided in response
201+
when creating a new upload session. This allowes to always upload your content to the closest Box data center and
202+
can significantly improve upload speed. You can always disable this feature and always use base upload url by
203+
setting `use_upload_session_urls` flag to `False` when creating upload session.
204+
200205
### Automatic Uploader
201206

202207
Since box-python-sdk 3.7.0 release, automatic uploader uses multiple threads, which significantly speeds up the upload process.
@@ -211,9 +216,11 @@ API.CHUNK_UPLOAD_THREADS = 6
211216
#### Upload new file
212217

213218
The SDK provides a method of automatically handling a chunked upload. First get a folder you want to upload the file to.
214-
Then call [`folder.get_chunked_uploader(file_path, rename_file=False)`][get_chunked_uploader_for_file] to retrieve
215-
a [`ChunkedUploader`][chunked_uploader_class] object. Calling the method [`chunked_upload.start()`][start] will
216-
kick off the chunked upload process and return the [File][file_class]
219+
Then call [`folder.get_chunked_uploader(file_path, rename_file=False, use_upload_session_urls=True)`][get_chunked_uploader_for_file]
220+
to retrieve a [`ChunkedUploader`][chunked_uploader_class] object. Setting `use_upload_session_urls` to `True` inilializes
221+
the uploader that utlizies urls returned by the `Create Upload Session` endpoint response unless a custom
222+
API.UPLOAD_URL was set in the config. Setting `use_upload_session_urls` to `False` inilializes the uploader that uses always base upload urls.
223+
Calling the method [`chunked_upload.start()`][start] will kick off the chunked upload process and return the [File][file_class]
217224
object that was uploaded.
218225

219226
<!-- samples x_chunked_uploads automatic -->
@@ -224,7 +231,10 @@ uploaded_file = chunked_uploader.start()
224231
print(f'File "{uploaded_file.name}" uploaded to Box with file ID {uploaded_file.id}')
225232
```
226233

227-
You can also upload file stream by creating a [`UploadSession`][upload_session_class] first and then calling the
234+
You can also upload file stream by creating a [`UploadSession`][upload_session_class] first. This can be done by calling
235+
[`folder.create_upload_session(file_size, file_name=None, use_upload_session_urls=True)`][create_upload_session] method.
236+
`use_upload_session_urls` flag is used to determine if the upload session should use urls returned by
237+
the `Create Upload Session` endpoint or should it always use base upload urls. Then you can call
228238
method [`upload_session.get_chunked_uploader_for_stream(content_stream, file_size)`][get_chunked_uploader_for_stream].
229239

230240
```python
@@ -240,14 +250,14 @@ with open(test_file_path, 'rb') as content_stream:
240250
#### Upload new file version
241251

242252
To upload a new file version for a large file, first get a file you want to replace.
243-
Then call [`file.get_chunked_uploader(file_path)`][get_chunked_uploader_for_version]
253+
Then call [`file.get_chunked_uploader(file_path, rename_file=False, use_upload_session_urls=True)`][get_chunked_uploader_for_version]
244254
to retrieve a [`ChunkedUploader`][chunked_uploader_class] object. Calling the method [`chunked_upload.start()`][start]
245255
will kick off the chunked upload process and return the updated [File][file_class].
246256

247257
<!-- samples x_chunked_uploads automatic_new_version -->
248258
```python
249259
# uploads new large file version
250-
chunked_uploader = client.file('existing_big_file_id').get_chunked_uploader('/path/to/file')
260+
chunked_uploader = client.file('existing_big_file_id').get_chunked_uploader(file_path='/path/to/file')
251261
uploaded_file = chunked_uploader.start()
252262
print(f'File "{uploaded_file.name}" uploaded to Box with file ID {uploaded_file.id}')
253263
# the uploaded_file.id will be the same as 'existing_big_file_id'
@@ -293,17 +303,6 @@ except:
293303
print(f'File "{uploaded_file.name}" uploaded to Box with file ID {uploaded_file.id}')
294304
```
295305

296-
Alternatively, you can also create a [`UploadSession`][upload_session_class] object by calling
297-
[`client.upload_session(session_id)`][upload_session] if you have the upload session id. This can be helpful in
298-
resuming an existing upload session.
299-
300-
301-
```python
302-
chunked_uploader = client.upload_session('12345').get_chunked_uploader('/path/to/file')
303-
uploaded_file = chunked_uploader.resume()
304-
print(f'File "{uploaded_file.name}" uploaded to Box with file ID {uploaded_file.id}')
305-
```
306-
307306
[resume]: https://box-python-sdk.readthedocs.io/en/latest/boxsdk.object.html#boxsdk.object.chunked_uploader.ChunkedUploader.resume
308307

309308
#### Abort Chunked Upload
@@ -317,7 +316,7 @@ from boxsdk.exception import BoxNetworkException
317316
test_file_path = '/path/to/large_file.mp4'
318317
content_stream = open(test_file_path, 'rb')
319318
total_size = os.stat(test_file_path).st_size
320-
chunked_uploader = client.upload_session('56781').get_chunked_uploader_for_stream(content_stream, total_size)
319+
chunked_uploader = client.file('existing_big_file_id').get_chunked_uploader(file_path='/path/to/file')
321320
try:
322321
uploaded_file = chunked_uploader.start()
323322
except BoxNetworkException:
@@ -371,8 +370,10 @@ The individual endpoint methods are detailed below:
371370
#### Create Upload Session for File Version
372371

373372
To create an upload session for uploading a large version, call
374-
[`file.create_upload_session(file_size, file_name=None)`][create_version_upload_session] with the size of the file to be
375-
uploaded. You can optionally specify a new `file_name` to rename the file on upload. This method returns an
373+
[`file.create_upload_session(file_size, file_name=None, use_upload_session_urls=True)`][create_version_upload_session]
374+
with the size of the file to be uploaded. You can optionally specify a new `file_name` to rename the file on upload.
375+
`use_upload_session_urls` flag is used to determine if the upload session should use urls returned by
376+
the `Create Upload Session` endpoint or should it always use base upload urls. This method returns an
376377
[`UploadSession`][upload_session_class] object representing the created upload session.
377378

378379
<!-- sample post_files_id_upload_sessions -->
@@ -388,9 +389,10 @@ print(f'Created upload session {upload_session.id} with chunk size of {upload_se
388389
#### Create Upload Session for File
389390

390391
To create an upload session for uploading a new large file, call
391-
[`folder.create_upload_session(file_size, file_name)`][create_upload_session] with the size and filename of the file
392-
to be uploaded. This method returns an [`UploadSession`][upload_session_class] object representing the created upload
393-
session.
392+
[`folder.create_upload_session(file_size, file_name, use_upload_session_urls=True)`][create_upload_session] with
393+
the size and filename of the file to be uploaded. `use_upload_session_urls` flag is used to determine if the upload
394+
session should use urls returned by the `Create Upload Session` endpoint or should it always use base upload urls.
395+
This method returns an [`UploadSession`][upload_session_class] object representing the created upload session.
394396

395397
<!-- sample post_files_upload_sessions -->
396398
```python

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99

1010
CLASSIFIERS = [
11-
'Development Status :: 5 - Production/Stable',
11+
'Development Status :: 6 - Mature',
1212
'Intended Audience :: Developers',
1313
'License :: OSI Approved :: Apache Software License',
1414
'Programming Language :: Python',
@@ -18,6 +18,7 @@
1818
'Programming Language :: Python :: 3.9',
1919
'Programming Language :: Python :: 3.10',
2020
'Programming Language :: Python :: 3.11',
21+
'Programming Language :: Python :: 3.12',
2122
'Programming Language :: Python :: Implementation :: CPython',
2223
'Programming Language :: Python :: Implementation :: PyPy',
2324
'Operating System :: OS Independent',

test/integration_new/object/folder_itest.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,24 @@ def test_manual_chunked_upload(parent_folder, large_file, large_file_name):
7272
util.permanently_delete(uploaded_file)
7373

7474

75-
def test_auto_chunked_upload(parent_folder, large_file, large_file_name):
75+
def test_auto_chunked_upload_using_upload_session_urls(parent_folder, large_file, large_file_name):
7676
total_size = os.stat(large_file.path).st_size
77-
chunked_uploader = parent_folder.get_chunked_uploader(large_file.path)
77+
chunked_uploader = parent_folder.get_chunked_uploader(large_file.path, use_upload_session_urls=True)
78+
79+
uploaded_file = chunked_uploader.start()
80+
81+
try:
82+
assert uploaded_file.id
83+
assert uploaded_file.name == large_file_name
84+
assert uploaded_file.parent == parent_folder
85+
assert uploaded_file.size == total_size
86+
finally:
87+
util.permanently_delete(uploaded_file)
88+
89+
90+
def test_auto_chunked_upload_NOT_using_upload_session_urls(parent_folder, large_file, large_file_name):
91+
total_size = os.stat(large_file.path).st_size
92+
chunked_uploader = parent_folder.get_chunked_uploader(large_file.path, use_upload_session_urls=False)
7893

7994
uploaded_file = chunked_uploader.start()
8095

0 commit comments

Comments
 (0)