Skip to content

Commit

Permalink
HADOOP-16202. Enhanced openFile(): hadoop-aws changes. (#2584/3)
Browse files Browse the repository at this point in the history
S3A input stream support for the few fs.option.openfile settings.
As well as supporting the read policy option and values,
if the file length is declared in fs.option.openfile.length
then no HEAD request will be issued when opening a file.
This can cut a few tens of milliseconds off the operation.

The patch adds a new openfile parameter/FS configuration option
fs.s3a.input.async.drain.threshold (default: 16000).
It declares the number of bytes remaining in the http input stream
above which any operation to read and discard the rest of the stream,
"draining", is executed asynchronously.
This asynchronous draining offers some performance benefit on seek-heavy
file IO.

Contributed by Steve Loughran.

Change-Id: I9b0626bbe635e9fd97ac0f463f5e7167e0111e39
  • Loading branch information
steveloughran committed Apr 24, 2022
1 parent 6999acf commit e0cd0a8
Show file tree
Hide file tree
Showing 38 changed files with 2,062 additions and 372 deletions.
5 changes: 5 additions & 0 deletions hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@
<Method name="s3Exists" />
<Bug pattern="RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE" />
</Match>
<!-- we are using completable futures, so ignore the Future which submit() returns -->
<Match>
<Class name="org.apache.hadoop.fs.s3a.S3AFileSystem$InputStreamCallbacksImpl" />
<Bug pattern="RV_RETURN_VALUE_IGNORED_BAD_PRACTICE" />
</Match>

<!--
findbugs gets confused by lambda expressions in synchronized methods
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.Options;
import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory;

import java.util.concurrent.TimeUnit;
Expand Down Expand Up @@ -602,37 +603,69 @@ private Constants() {
public static final String READAHEAD_RANGE = "fs.s3a.readahead.range";
public static final long DEFAULT_READAHEAD_RANGE = 64 * 1024;

/**
* The threshold at which drain operations switch
* to being asynchronous with the schedule/wait overhead
* compared to synchronous.
* Value: {@value}
*/
public static final String ASYNC_DRAIN_THRESHOLD = "fs.s3a.input.async.drain.threshold";

/**
* This is a number based purely on experimentation in
* {@code ITestS3AInputStreamPerformance}.
* Value: {@value}
*/
public static final int DEFAULT_ASYNC_DRAIN_THRESHOLD = 16_000;

/**
* Which input strategy to use for buffering, seeking and similar when
* reading data.
* Value: {@value}
*/
@InterfaceStability.Unstable
public static final String INPUT_FADVISE =
"fs.s3a.experimental.input.fadvise";

/**
* The default value for this FS.
* Which for S3A, is adaptive.
* Value: {@value}
* @deprecated use the {@link Options.OpenFileOptions} value
* in code which only needs to be compiled against newer hadoop
* releases.
*/
public static final String INPUT_FADV_DEFAULT =
Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_DEFAULT;

/**
* General input. Some seeks, some reads.
* The policy name "default" is standard across different stores,
* and should be preferred.
* Value: {@value}
*/
@InterfaceStability.Unstable
public static final String INPUT_FADV_NORMAL = "normal";

/**
* Optimized for sequential access.
* Value: {@value}
* @deprecated use the {@link Options.OpenFileOptions} value
* in code which only needs to be compiled against newer hadoop
* releases.
*/
@InterfaceStability.Unstable
public static final String INPUT_FADV_SEQUENTIAL = "sequential";
public static final String INPUT_FADV_SEQUENTIAL =
Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL;

/**
* Optimized purely for random seek+read/positionedRead operations;
* The performance of sequential IO may be reduced in exchange for
* more efficient {@code seek()} operations.
* Value: {@value}
* @deprecated use the {@link Options.OpenFileOptions} value
* in code which only needs to be compiled against newer hadoop
* releases.
*/
@InterfaceStability.Unstable
public static final String INPUT_FADV_RANDOM = "random";
public static final String INPUT_FADV_RANDOM =
Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_RANDOM;

/**
* Gauge name for the input policy : {@value}.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,16 @@

import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.fs.statistics.DurationTracker;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.util.DurationInfo;
import org.apache.hadoop.util.functional.CallableRaisingIOE;
import org.apache.hadoop.util.functional.FutureIO;
import org.apache.hadoop.util.functional.InvocationRaisingIOE;
import org.apache.hadoop.util.Preconditions;

import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.invokeTrackingDuration;

/**
* Class to provide lambda expression invocation of AWS operations.
*
Expand Down Expand Up @@ -122,6 +125,31 @@ public static <T> T once(String action, String path,
}
}

/**
* Execute a function, translating any exception into an IOException.
* The supplied duration tracker instance is updated with success/failure.
* @param action action to execute (used in error messages)
* @param path path of work (used in error messages)
* @param tracker tracker to update
* @param operation operation to execute
* @param <T> type of return value
* @return the result of the function call
* @throws IOException any IOE raised, or translated exception
*/
@Retries.OnceTranslated
public static <T> T onceTrackingDuration(
final String action,
final String path,
final DurationTracker tracker,
final CallableRaisingIOE<T> operation)
throws IOException {
try {
return invokeTrackingDuration(tracker, operation);
} catch (AmazonClientException e) {
throw S3AUtils.translateException(action, path, e);
}
}

/**
* Execute an operation with no result.
* @param action action to execute (used in error messages)
Expand Down
Loading

0 comments on commit e0cd0a8

Please sign in to comment.