From 75bd9f2d341efbe0a795c267af923eb48bffbd91 Mon Sep 17 00:00:00 2001 From: Vikas Bansal <43470111+vikasvb90@users.noreply.github.com> Date: Wed, 18 Oct 2023 18:14:55 +0530 Subject: [PATCH] Added BufferedInputStream to allow mark and reset ops during IO errors (#10690) Signed-off-by: vikasvb90 --- .../repositories/s3/S3RepositoryPlugin.java | 14 ++++++++++---- .../repositories/s3/async/AsyncPartsHandler.java | 6 +++++- .../s3/async/AsyncTransferManager.java | 5 ++++- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java index a80ee0ca35fae..c6450e49d08e2 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java @@ -38,6 +38,7 @@ import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.concurrent.OpenSearchExecutors; import org.opensearch.core.common.io.stream.NamedWriteableRegistry; import org.opensearch.core.xcontent.NamedXContentRegistry; @@ -55,6 +56,7 @@ import org.opensearch.script.ScriptService; import org.opensearch.threadpool.ExecutorBuilder; import org.opensearch.threadpool.FixedExecutorBuilder; +import org.opensearch.threadpool.ScalingExecutorBuilder; import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; @@ -93,17 +95,21 @@ public S3RepositoryPlugin(final Settings settings, final Path configPath) { @Override public List> getExecutorBuilders(Settings settings) { List> executorBuilders = new ArrayList<>(); + int halfProcMaxAt5 = halfAllocatedProcessorsMaxFive(allocatedProcessors(settings)); executorBuilders.add( new FixedExecutorBuilder(settings, PRIORITY_FUTURE_COMPLETION, priorityPoolCount(settings), 10_000, PRIORITY_FUTURE_COMPLETION) ); - executorBuilders.add( - new FixedExecutorBuilder(settings, PRIORITY_STREAM_READER, priorityPoolCount(settings), 10_000, PRIORITY_STREAM_READER) - ); + executorBuilders.add(new ScalingExecutorBuilder(PRIORITY_STREAM_READER, 1, halfProcMaxAt5, TimeValue.timeValueMinutes(5))); + executorBuilders.add(new FixedExecutorBuilder(settings, FUTURE_COMPLETION, normalPoolCount(settings), 10_000, FUTURE_COMPLETION)); - executorBuilders.add(new FixedExecutorBuilder(settings, STREAM_READER, normalPoolCount(settings), 10_000, STREAM_READER)); + executorBuilders.add(new ScalingExecutorBuilder(STREAM_READER, 1, halfProcMaxAt5, TimeValue.timeValueMinutes(5))); return executorBuilders; } + static int halfAllocatedProcessorsMaxFive(final int allocatedProcessors) { + return boundedBy((allocatedProcessors + 1) / 2, 1, 5); + } + S3RepositoryPlugin(final Settings settings, final Path configPath, final S3Service service, final S3AsyncService s3AsyncService) { this.service = Objects.requireNonNull(service, "S3 service must not be null"); this.configPath = configPath; diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java index ad6939ce299d6..86bb70e5a40a2 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java @@ -23,9 +23,11 @@ import org.opensearch.common.StreamContext; import org.opensearch.common.blobstore.stream.write.WritePriority; import org.opensearch.common.io.InputStreamContainer; +import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.repositories.s3.SocketAccess; import org.opensearch.repositories.s3.io.CheckedContainer; +import java.io.BufferedInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -142,7 +144,9 @@ private static void uploadPart( () -> s3AsyncClient.uploadPart( uploadPartRequest, AsyncRequestBody.fromInputStream( - inputStreamContainer.getInputStream(), + // Buffered stream is needed to allow mark and reset ops during IO errors so that only buffered + // data can be retried instead of retrying whole file by the application. + new BufferedInputStream(inputStreamContainer.getInputStream(), (int) (ByteSizeUnit.MB.toBytes(1) + 1)), inputStreamContainer.getContentLength(), streamReadExecutor ) diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java index 8d45c2167a3d1..fb1e409388a69 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java @@ -37,6 +37,7 @@ import org.opensearch.repositories.s3.SocketAccess; import org.opensearch.repositories.s3.io.CheckedContainer; +import java.io.BufferedInputStream; import java.io.IOException; import java.util.Arrays; import java.util.Base64; @@ -303,7 +304,9 @@ private void uploadInOneChunk( () -> s3AsyncClient.putObject( putObjectRequestBuilder.build(), AsyncRequestBody.fromInputStream( - inputStreamContainer.getInputStream(), + // Buffered stream is needed to allow mark and reset ops during IO errors so that only buffered + // data can be retried instead of retrying whole file by the application. + new BufferedInputStream(inputStreamContainer.getInputStream(), (int) (ByteSizeUnit.MB.toBytes(1) + 1)), inputStreamContainer.getContentLength(), streamReadExecutor )