|
16 | 16 | package com.microsoft.azure.storage.blob;
|
17 | 17 |
|
18 | 18 | import com.microsoft.azure.storage.blob.models.BlobDownloadHeaders;
|
| 19 | +import com.microsoft.azure.storage.blob.models.BlockBlobCommitBlockListResponse; |
19 | 20 | import com.microsoft.azure.storage.blob.models.ModifiedAccessConditions;
|
20 | 21 | import com.microsoft.rest.v2.util.FlowableUtil;
|
21 | 22 | import io.reactivex.Flowable;
|
@@ -286,4 +287,135 @@ private static Single<List<Object>> getSetupSingle(BlobURL blobURL, BlobRange r,
|
286 | 287 | return Single.just(Arrays.asList(r.count(), o.accessConditions()));
|
287 | 288 | }
|
288 | 289 | }
|
| 290 | + |
| 291 | + /** |
| 292 | + * Uploads the contents of an arbitrary {@code Flowable} to a block blob. This Flowable need not be replayable and |
| 293 | + * therefore it may have as its source a network stream or any other data for which the replay behavior is unknown |
| 294 | + * (non-replayable meaning the Flowable may not return the exact same data on each subscription). |
| 295 | + * |
| 296 | + * To eliminate the need for replayability on the source, the client must perform some buffering in order to ensure |
| 297 | + * the actual data passed to the network is replayable. This is important in order to support retries, which are |
| 298 | + * crucial for reliable data transfer. Typically, the greater the number of buffers used, the greater the possible |
| 299 | + * parallelism. Larger buffers means we will have to stage fewer blocks. The tradeoffs between these values are |
| 300 | + * context-dependent, so some experimentation may be required to optimize inputs for a given scenario. |
| 301 | + * |
| 302 | + * Note that buffering must be strictly sequential. Only the upload portion of this operation may be parallelized; |
| 303 | + * the reads cannot be. Therefore, this method is not as optimal as |
| 304 | + * {@link #uploadFileToBlockBlob(AsynchronousFileChannel, BlockBlobURL, int, TransferManagerUploadToBlockBlobOptions)} |
| 305 | + * and if the source is known to be a file, that method should be preferred. |
| 306 | + * |
| 307 | + * @param source |
| 308 | + * Contains the data to upload. Unlike other upload methods in this library, this method does not require |
| 309 | + * that the Flowable be replayable. |
| 310 | + * @param blockBlobURL |
| 311 | + * Points to the blob to which the data should be uploaded. |
| 312 | + * @param blockSize |
| 313 | + * The size of each block that will be staged. This value also determines the size that each buffer used by |
| 314 | + * this method will be and determines the number of requests that need to be made. The amount of memory |
| 315 | + * consumed by this method may be up to blockSize * numBuffers. If block size is large, this method will |
| 316 | + * make fewer network calls, but each individual call will send more data and will therefore take longer. |
| 317 | + * @param numBuffers |
| 318 | + * The maximum number of buffers this method should allocate. Must be at least two. Generally this value |
| 319 | + * should have some relationship to the value for parallelism passed via the options. If the number of |
| 320 | + * available buffers is smaller than the level of parallelism, then this method will not be able to make |
| 321 | + * full use of the available parallelism. It is unlikely that the value need be more than two times the |
| 322 | + * level of parallelism as such a value means that (assuming buffering is fast enough) there are enough |
| 323 | + * available buffers to have both one occupied for each worker and one ready for all workers should they |
| 324 | + * all complete the current request at approximately the same time. The amount of memory consumed by this |
| 325 | + * method may be up to blockSize * numBuffers. |
| 326 | + * @param options |
| 327 | + * {@link TransferManagerUploadToBlockBlobOptions} |
| 328 | + * @return Emits the successful response. |
| 329 | + * |
| 330 | + * @apiNote ## Sample Code \n |
| 331 | + * [!code-java[Sample_Code](../azure-storage-java/src/test/java/com/microsoft/azure/storage/Samples.java?name=tm_nrf "Sample code for TransferManager.uploadFromNonReplayableFlowable")] \n |
| 332 | + * For more samples, please see the [Samples file](%https://github.com/Azure/azure-storage-java/blob/master/src/test/java/com/microsoft/azure/storage/Samples.java) |
| 333 | + */ |
| 334 | + public static Single<BlockBlobCommitBlockListResponse> uploadFromNonReplayableFlowable( |
| 335 | + final Flowable<ByteBuffer> source, final BlockBlobURL blockBlobURL, final int blockSize, |
| 336 | + final int numBuffers, final TransferManagerUploadToBlockBlobOptions options) { |
| 337 | + Utility.assertNotNull("source", source); |
| 338 | + Utility.assertNotNull("blockBlobURL", blockBlobURL); |
| 339 | + |
| 340 | + TransferManagerUploadToBlockBlobOptions optionsReal = options == null ? |
| 341 | + TransferManagerUploadToBlockBlobOptions.DEFAULT : options; |
| 342 | + |
| 343 | + // See ProgressReporter for an explanation on why this lock is necessary and why we use AtomicLong. |
| 344 | + AtomicLong totalProgress = new AtomicLong(0); |
| 345 | + Lock progressLock = new ReentrantLock(); |
| 346 | + |
| 347 | + // Validation done in the constructor. |
| 348 | + UploadFromNRFBufferPool pool = new UploadFromNRFBufferPool(numBuffers, blockSize); |
| 349 | + |
| 350 | + /* |
| 351 | + Break the source flowable into chunks that are <= chunk size. This makes filling the pooled buffers much easier |
| 352 | + as we can guarantee we only need at most two buffers for any call to write (two in the case of one pool buffer |
| 353 | + filling up with more data to write) |
| 354 | + */ |
| 355 | + Flowable<ByteBuffer> chunkedSource = source.flatMap(buffer -> { |
| 356 | + if (buffer.remaining() <= blockSize) { |
| 357 | + return Flowable.just(buffer); |
| 358 | + } |
| 359 | + List<ByteBuffer> smallerChunks = new ArrayList<>(); |
| 360 | + for (int i=0; i < Math.ceil(buffer.remaining() / (double)blockSize); i++) { |
| 361 | + // Note that duplicate does not duplicate data. It simply creates a duplicate view of the data. |
| 362 | + ByteBuffer duplicate = buffer.duplicate(); |
| 363 | + duplicate.position(i * blockSize); |
| 364 | + duplicate.limit(Math.min(duplicate.limit(), (i+1) * blockSize)); |
| 365 | + smallerChunks.add(duplicate); |
| 366 | + } |
| 367 | + return Flowable.fromIterable(smallerChunks); |
| 368 | + }, false, 1); |
| 369 | + |
| 370 | + /* |
| 371 | + Write each buffer from the chunkedSource to the pool and call flush at the end to get the last bits. |
| 372 | + */ |
| 373 | + return chunkedSource.flatMap(pool::write, false, 1) |
| 374 | + .concatWith(Flowable.defer(pool::flush)) |
| 375 | + .concatMapEager(buffer -> { |
| 376 | + // Report progress as necessary. |
| 377 | + Flowable<ByteBuffer> data = ProgressReporter.addParallelProgressReporting(Flowable.just(buffer), |
| 378 | + optionsReal.progressReceiver(), progressLock, totalProgress); |
| 379 | + |
| 380 | + final String blockId = Base64.getEncoder().encodeToString( |
| 381 | + UUID.randomUUID().toString().getBytes()); |
| 382 | + |
| 383 | + /* |
| 384 | + Make a call to stageBlock. Instead of emitting the response, which we don't care about other |
| 385 | + than that it was successful, emit the blockId for this request. These will be collected below. |
| 386 | + Turn that into an Observable which emits one item to comply with the signature of |
| 387 | + concatMapEager. |
| 388 | + */ |
| 389 | + return blockBlobURL.stageBlock(blockId, data, |
| 390 | + buffer.remaining(), optionsReal.accessConditions().leaseAccessConditions(), null) |
| 391 | + .map(x -> { |
| 392 | + pool.returnBuffer(buffer); |
| 393 | + return blockId; |
| 394 | + }).toFlowable(); |
| 395 | + |
| 396 | + /* |
| 397 | + Specify the number of concurrent subscribers to this map. This determines how many concurrent |
| 398 | + rest calls are made. This is so because maxConcurrency is the number of internal subscribers |
| 399 | + available to subscribe to the Observables emitted by the source. A subscriber is not released |
| 400 | + for a new subscription until its Observable calls onComplete, which here means that the call to |
| 401 | + stageBlock is finished. Prefetch is a hint that each of the Observables emitted by the source |
| 402 | + will emit only one value, which is true here because we have converted from a Single. |
| 403 | + */ |
| 404 | + }, optionsReal.parallelism(), 1) |
| 405 | + /* |
| 406 | + collectInto will gather each of the emitted blockIds into a list. Because we used concatMap, the Ids |
| 407 | + will be emitted according to their block number, which means the list generated here will be |
| 408 | + properly ordered. This also converts into a Single. |
| 409 | + */ |
| 410 | + .collectInto(new ArrayList<String>(), ArrayList::add) |
| 411 | + /* |
| 412 | + collectInto will not emit the list until its source calls onComplete. This means that by the time we |
| 413 | + call stageBlock list, all of the stageBlock calls will have finished. By flatMapping the list, we |
| 414 | + can "map" it into a call to commitBlockList. |
| 415 | + */ |
| 416 | + .flatMap(ids -> |
| 417 | + blockBlobURL.commitBlockList(ids, optionsReal.httpHeaders(), optionsReal.metadata(), |
| 418 | + optionsReal.accessConditions(), null)); |
| 419 | + |
| 420 | + } |
289 | 421 | }
|
0 commit comments