Skip to content

Commit 617af28

Browse files
HADOOP-17271. S3A connector to support IOStatistics. (#2580)
S3A connector to support the IOStatistics API of HADOOP-16830, This is a major rework of the S3A Statistics collection to * Embrace the IOStatistics APIs * Move from direct references of S3AInstrumention statistics collectors to interface/implementation classes in new packages. * Ubiquitous support of IOStatistics, including: S3AFileSystem, input and output streams, RemoteIterator instances provided in list calls. * Adoption of new statistic names from hadoop-common Regarding statistic collection, as well as all existing statistics, the connector now records min/max/mean durations of HTTP GET and HEAD requests, and those of LIST operations. Contributed by Steve Loughran.
1 parent 9b2956e commit 617af28

File tree

111 files changed

+5830
-1693
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

111 files changed

+5830
-1693
lines changed

hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,4 +74,14 @@
7474
<Bug pattern="SF_SWITCH_FALLTHROUGH"/>
7575
</Match>
7676

77+
<!--
78+
Some of the S3A Instrumentation classes increment volatile references from
79+
within synchronized contexts; they use volatile to keep the cost
80+
of these updates and reading them down.
81+
-->
82+
<Match>
83+
<Class name="org.apache.hadoop.fs.s3a.S3AInstrumentation$InputStreamStatisticsImpl"/>
84+
<Bug pattern="VO_VOLATILE_INCREMENT"/>
85+
</Match>
86+
7787
</FindBugsFilter>

hadoop-tools/hadoop-aws/pom.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,8 @@
201201
<exclude>**/ITestMarkerToolRootOperations.java</exclude>
202202
<!-- operations across the metastore -->
203203
<exclude>**/ITestS3GuardDDBRootOperations.java</exclude>
204+
<!-- leave this until the end for better statistics -->
205+
<exclude>**/ITestAggregateIOStatistics.java</exclude>
204206
</excludes>
205207
</configuration>
206208
</execution>
@@ -250,6 +252,8 @@
250252
<!-- operations across the metastore -->
251253
<include>**/ITestS3AContractRootDir.java</include>
252254
<include>**/ITestS3GuardDDBRootOperations.java</include>
255+
<!-- leave this until the end for better statistics -->
256+
<include>**/ITestAggregateIOStatistics.java</include>
253257
</includes>
254258
</configuration>
255259
</execution>

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,13 @@ private Constants() {
549549
@InterfaceStability.Unstable
550550
public static final String INPUT_FADV_RANDOM = "random";
551551

552+
/**
553+
* Gauge name for the input policy : {@value}.
554+
* This references an enum currently exclusive to the S3A stream.
555+
*/
556+
public static final String STREAM_READ_GAUGE_INPUT_POLICY =
557+
"stream_read_gauge_input_policy";
558+
552559
@InterfaceAudience.Private
553560
@InterfaceStability.Unstable
554561
public static final String S3_CLIENT_FACTORY_IMPL =

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java

Lines changed: 186 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,26 @@
2323

2424
import com.amazonaws.ClientConfiguration;
2525
import com.amazonaws.auth.AWSCredentialsProvider;
26+
import com.amazonaws.client.builder.AwsClientBuilder;
27+
import com.amazonaws.metrics.RequestMetricCollector;
2628
import com.amazonaws.services.s3.AmazonS3;
2729
import com.amazonaws.services.s3.AmazonS3Client;
30+
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
2831
import com.amazonaws.services.s3.S3ClientOptions;
32+
import com.amazonaws.services.s3.internal.ServiceUtils;
33+
import com.amazonaws.util.AwsHostNameUtils;
34+
import com.amazonaws.util.RuntimeHttpUtils;
35+
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
2936
import org.slf4j.Logger;
37+
import org.slf4j.LoggerFactory;
3038

3139
import org.apache.commons.lang3.StringUtils;
3240
import org.apache.hadoop.classification.InterfaceAudience;
3341
import org.apache.hadoop.classification.InterfaceStability;
3442
import org.apache.hadoop.conf.Configuration;
3543
import org.apache.hadoop.conf.Configured;
44+
import org.apache.hadoop.fs.s3a.statistics.StatisticsFromAwsSdk;
45+
import org.apache.hadoop.fs.s3a.statistics.impl.AwsStatisticsCollector;
3646

3747
import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING;
3848
import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT;
@@ -41,21 +51,38 @@
4151

4252
/**
4353
* The default {@link S3ClientFactory} implementation.
44-
* This which calls the AWS SDK to configure and create an
54+
* This calls the AWS SDK to configure and create an
4555
* {@link AmazonS3Client} that communicates with the S3 service.
4656
*/
4757
@InterfaceAudience.Private
4858
@InterfaceStability.Unstable
4959
public class DefaultS3ClientFactory extends Configured
5060
implements S3ClientFactory {
5161

52-
protected static final Logger LOG = S3AFileSystem.LOG;
62+
private static final String S3_SERVICE_NAME = "s3";
63+
private static final String S3_SIGNER = "S3SignerType";
64+
private static final String S3_V4_SIGNER = "AWSS3V4SignerType";
5365

66+
/**
67+
* Subclasses refer to this.
68+
*/
69+
protected static final Logger LOG =
70+
LoggerFactory.getLogger(DefaultS3ClientFactory.class);
71+
72+
/**
73+
* Create the client.
74+
* <p>
75+
* If the AWS stats are not null then a {@link AwsStatisticsCollector}.
76+
* is created to bind to the two.
77+
* <i>Important: until this binding works properly across regions,
78+
* this should be null.</i>
79+
*/
5480
@Override
5581
public AmazonS3 createS3Client(URI name,
5682
final String bucket,
5783
final AWSCredentialsProvider credentials,
58-
final String userAgentSuffix) throws IOException {
84+
final String userAgentSuffix,
85+
final StatisticsFromAwsSdk statisticsFromAwsSdk) throws IOException {
5986
Configuration conf = getConf();
6087
final ClientConfiguration awsConf = S3AUtils
6188
.createAwsConf(conf, bucket, Constants.AWS_SERVICE_IDENTIFIER_S3);
@@ -72,36 +99,124 @@ public AmazonS3 createS3Client(URI name,
7299
if (!StringUtils.isEmpty(userAgentSuffix)) {
73100
awsConf.setUserAgentSuffix(userAgentSuffix);
74101
}
75-
return configureAmazonS3Client(
76-
newAmazonS3Client(credentials, awsConf), conf);
102+
// optional metrics
103+
RequestMetricCollector metrics = statisticsFromAwsSdk != null
104+
? new AwsStatisticsCollector(statisticsFromAwsSdk)
105+
: null;
106+
107+
return newAmazonS3Client(
108+
credentials,
109+
awsConf,
110+
metrics,
111+
conf.getTrimmed(ENDPOINT, ""),
112+
conf.getBoolean(PATH_STYLE_ACCESS, false));
77113
}
78114

79115
/**
80-
* Wrapper around constructor for {@link AmazonS3} client.
116+
* Create an {@link AmazonS3} client.
81117
* Override this to provide an extended version of the client
82118
* @param credentials credentials to use
83119
* @param awsConf AWS configuration
84-
* @return new AmazonS3 client
120+
* @param metrics metrics collector or null
121+
* @param endpoint endpoint string; may be ""
122+
* @param pathStyleAccess enable path style access?
123+
* @return new AmazonS3 client
85124
*/
86125
protected AmazonS3 newAmazonS3Client(
87-
AWSCredentialsProvider credentials, ClientConfiguration awsConf) {
88-
return new AmazonS3Client(credentials, awsConf);
126+
final AWSCredentialsProvider credentials,
127+
final ClientConfiguration awsConf,
128+
final RequestMetricCollector metrics,
129+
final String endpoint,
130+
final boolean pathStyleAccess) {
131+
if (metrics != null) {
132+
LOG.debug("Building S3 client using the SDK builder API");
133+
return buildAmazonS3Client(credentials, awsConf, metrics, endpoint,
134+
pathStyleAccess);
135+
} else {
136+
LOG.debug("Building S3 client using the SDK builder API");
137+
return classicAmazonS3Client(credentials, awsConf, endpoint,
138+
pathStyleAccess);
139+
}
89140
}
90141

91142
/**
92-
* Configure S3 client from the Hadoop configuration.
93-
*
143+
* Use the (newer) Builder SDK to create a an AWS S3 client.
144+
* <p>
145+
* This has a more complex endpoint configuration in a
146+
* way which does not yet work in this code in a way
147+
* which doesn't trigger regressions. So it is only used
148+
* when SDK metrics are supplied.
149+
* @param credentials credentials to use
150+
* @param awsConf AWS configuration
151+
* @param metrics metrics collector or null
152+
* @param endpoint endpoint string; may be ""
153+
* @param pathStyleAccess enable path style access?
154+
* @return new AmazonS3 client
155+
*/
156+
private AmazonS3 buildAmazonS3Client(
157+
final AWSCredentialsProvider credentials,
158+
final ClientConfiguration awsConf,
159+
final RequestMetricCollector metrics,
160+
final String endpoint,
161+
final boolean pathStyleAccess) {
162+
AmazonS3ClientBuilder b = AmazonS3Client.builder();
163+
b.withCredentials(credentials);
164+
b.withClientConfiguration(awsConf);
165+
b.withPathStyleAccessEnabled(pathStyleAccess);
166+
if (metrics != null) {
167+
b.withMetricsCollector(metrics);
168+
}
169+
170+
// endpoint set up is a PITA
171+
// client.setEndpoint("") is no longer available
172+
AwsClientBuilder.EndpointConfiguration epr
173+
= createEndpointConfiguration(endpoint, awsConf);
174+
if (epr != null) {
175+
// an endpoint binding was constructed: use it.
176+
b.withEndpointConfiguration(epr);
177+
}
178+
final AmazonS3 client = b.build();
179+
return client;
180+
}
181+
182+
/**
183+
* Wrapper around constructor for {@link AmazonS3} client.
184+
* Override this to provide an extended version of the client.
185+
* <p>
186+
* This uses a deprecated constructor -it is currently
187+
* the only one which works for us.
188+
* @param credentials credentials to use
189+
* @param awsConf AWS configuration
190+
* @param endpoint endpoint string; may be ""
191+
* @param pathStyleAccess enable path style access?
192+
* @return new AmazonS3 client
193+
*/
194+
@SuppressWarnings("deprecation")
195+
private AmazonS3 classicAmazonS3Client(
196+
AWSCredentialsProvider credentials,
197+
ClientConfiguration awsConf,
198+
final String endpoint,
199+
final boolean pathStyleAccess) {
200+
final AmazonS3 client = new AmazonS3Client(credentials, awsConf);
201+
return configureAmazonS3Client(client, endpoint, pathStyleAccess);
202+
}
203+
204+
/**
205+
* Configure classic S3 client.
206+
* <p>
94207
* This includes: endpoint, Path Access and possibly other
95208
* options.
96209
*
97-
* @param conf Hadoop configuration
210+
* @param s3 S3 Client.
211+
* @param endPoint s3 endpoint, may be empty
212+
* @param pathStyleAccess enable path style access?
98213
* @return S3 client
99214
* @throws IllegalArgumentException if misconfigured
100215
*/
101-
private static AmazonS3 configureAmazonS3Client(AmazonS3 s3,
102-
Configuration conf)
216+
protected static AmazonS3 configureAmazonS3Client(AmazonS3 s3,
217+
final String endPoint,
218+
final boolean pathStyleAccess)
103219
throws IllegalArgumentException {
104-
String endPoint = conf.getTrimmed(ENDPOINT, "");
105220
if (!endPoint.isEmpty()) {
106221
try {
107222
s3.setEndpoint(endPoint);
@@ -111,31 +226,31 @@ private static AmazonS3 configureAmazonS3Client(AmazonS3 s3,
111226
throw new IllegalArgumentException(msg, e);
112227
}
113228
}
114-
return applyS3ClientOptions(s3, conf);
229+
return applyS3ClientOptions(s3, pathStyleAccess);
115230
}
116231

117232
/**
118233
* Perform any tuning of the {@code S3ClientOptions} settings based on
119234
* the Hadoop configuration.
120235
* This is different from the general AWS configuration creation as
121236
* it is unique to S3 connections.
122-
*
237+
* <p>
123238
* The {@link Constants#PATH_STYLE_ACCESS} option enables path-style access
124239
* to S3 buckets if configured. By default, the
125240
* behavior is to use virtual hosted-style access with URIs of the form
126241
* {@code http://bucketname.s3.amazonaws.com}
242+
* <p>
127243
* Enabling path-style access and a
128244
* region-specific endpoint switches the behavior to use URIs of the form
129245
* {@code http://s3-eu-west-1.amazonaws.com/bucketname}.
130246
* It is common to use this when connecting to private S3 servers, as it
131247
* avoids the need to play with DNS entries.
132248
* @param s3 S3 client
133-
* @param conf Hadoop configuration
249+
* @param pathStyleAccess enable path style access?
134250
* @return the S3 client
135251
*/
136-
private static AmazonS3 applyS3ClientOptions(AmazonS3 s3,
137-
Configuration conf) {
138-
final boolean pathStyleAccess = conf.getBoolean(PATH_STYLE_ACCESS, false);
252+
protected static AmazonS3 applyS3ClientOptions(AmazonS3 s3,
253+
final boolean pathStyleAccess) {
139254
if (pathStyleAccess) {
140255
LOG.debug("Enabling path style access!");
141256
s3.setS3ClientOptions(S3ClientOptions.builder()
@@ -144,4 +259,54 @@ private static AmazonS3 applyS3ClientOptions(AmazonS3 s3,
144259
}
145260
return s3;
146261
}
262+
263+
/**
264+
* Given an endpoint string, return an endpoint config, or null, if none
265+
* is needed.
266+
* <p>
267+
* This is a pretty painful piece of code. It is trying to replicate
268+
* what AwsClient.setEndpoint() does, because you can't
269+
* call that setter on an AwsClient constructed via
270+
* the builder, and you can't pass a metrics collector
271+
* down except through the builder.
272+
* <p>
273+
* Note also that AWS signing is a mystery which nobody fully
274+
* understands, especially given all problems surface in a
275+
* "400 bad request" response, which, like all security systems,
276+
* provides minimal diagnostics out of fear of leaking
277+
* secrets.
278+
*
279+
* @param endpoint possibly null endpoint.
280+
* @param awsConf config to build the URI from.
281+
* @return a configuration for the S3 client builder.
282+
*/
283+
@VisibleForTesting
284+
public static AwsClientBuilder.EndpointConfiguration
285+
createEndpointConfiguration(
286+
final String endpoint, final ClientConfiguration awsConf) {
287+
LOG.debug("Creating endpoint configuration for {}", endpoint);
288+
if (endpoint == null || endpoint.isEmpty()) {
289+
// the default endpoint...we should be using null at this point.
290+
LOG.debug("Using default endpoint -no need to generate a configuration");
291+
return null;
292+
}
293+
294+
final URI epr = RuntimeHttpUtils.toUri(endpoint, awsConf);
295+
LOG.debug("Endpoint URI = {}", epr);
296+
297+
String region;
298+
if (!ServiceUtils.isS3USStandardEndpoint(endpoint)) {
299+
LOG.debug("Endpoint {} is not the default; parsing", epr);
300+
region = AwsHostNameUtils.parseRegion(
301+
epr.getHost(),
302+
S3_SERVICE_NAME);
303+
} else {
304+
// US-east, set region == null.
305+
LOG.debug("Endpoint {} is the standard one; declare region as null", epr);
306+
region = null;
307+
}
308+
LOG.debug("Region for endpoint {}, URI {} is determined as {}",
309+
endpoint, epr, region);
310+
return new AwsClientBuilder.EndpointConfiguration(endpoint, region);
311+
}
147312
}

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import com.amazonaws.ClientConfiguration;
2222
import com.amazonaws.auth.AWSCredentialsProvider;
23+
import com.amazonaws.metrics.RequestMetricCollector;
2324
import com.amazonaws.services.s3.AmazonS3;
2425

2526
import org.apache.hadoop.classification.InterfaceAudience;
@@ -40,12 +41,21 @@ public class InconsistentS3ClientFactory extends DefaultS3ClientFactory {
4041
* Logs a warning that this is being done.
4142
* @param credentials credentials to use
4243
* @param awsConf AWS configuration
44+
* @param metrics metric collector
45+
* @param endpoint AWS endpoint
46+
* @param pathStyleAccess should path style access be supported?
4347
* @return an inconsistent client.
4448
*/
4549
@Override
4650
protected AmazonS3 newAmazonS3Client(AWSCredentialsProvider credentials,
47-
ClientConfiguration awsConf) {
51+
ClientConfiguration awsConf,
52+
final RequestMetricCollector metrics,
53+
final String endpoint,
54+
final boolean pathStyleAccess) {
4855
LOG.warn("** FAILURE INJECTION ENABLED. Do not run in production! **");
49-
return new InconsistentAmazonS3Client(credentials, awsConf, getConf());
56+
InconsistentAmazonS3Client s3
57+
= new InconsistentAmazonS3Client(credentials, awsConf, getConf());
58+
configureAmazonS3Client(s3, endpoint, pathStyleAccess);
59+
return s3;
5060
}
5161
}

0 commit comments

Comments
 (0)