23
23
24
24
import com .amazonaws .ClientConfiguration ;
25
25
import com .amazonaws .auth .AWSCredentialsProvider ;
26
+ import com .amazonaws .client .builder .AwsClientBuilder ;
27
+ import com .amazonaws .metrics .RequestMetricCollector ;
26
28
import com .amazonaws .services .s3 .AmazonS3 ;
27
29
import com .amazonaws .services .s3 .AmazonS3Client ;
30
+ import com .amazonaws .services .s3 .AmazonS3ClientBuilder ;
28
31
import com .amazonaws .services .s3 .S3ClientOptions ;
32
+ import com .amazonaws .services .s3 .internal .ServiceUtils ;
33
+ import com .amazonaws .util .AwsHostNameUtils ;
34
+ import com .amazonaws .util .RuntimeHttpUtils ;
35
+ import org .apache .hadoop .thirdparty .com .google .common .annotations .VisibleForTesting ;
29
36
import org .slf4j .Logger ;
37
+ import org .slf4j .LoggerFactory ;
30
38
31
39
import org .apache .commons .lang3 .StringUtils ;
32
40
import org .apache .hadoop .classification .InterfaceAudience ;
33
41
import org .apache .hadoop .classification .InterfaceStability ;
34
42
import org .apache .hadoop .conf .Configuration ;
35
43
import org .apache .hadoop .conf .Configured ;
44
+ import org .apache .hadoop .fs .s3a .statistics .StatisticsFromAwsSdk ;
45
+ import org .apache .hadoop .fs .s3a .statistics .impl .AwsStatisticsCollector ;
36
46
37
47
import static org .apache .hadoop .fs .s3a .Constants .EXPERIMENTAL_AWS_INTERNAL_THROTTLING ;
38
48
import static org .apache .hadoop .fs .s3a .Constants .ENDPOINT ;
41
51
42
52
/**
43
53
* The default {@link S3ClientFactory} implementation.
44
- * This which calls the AWS SDK to configure and create an
54
+ * This calls the AWS SDK to configure and create an
45
55
* {@link AmazonS3Client} that communicates with the S3 service.
46
56
*/
47
57
@ InterfaceAudience .Private
48
58
@ InterfaceStability .Unstable
49
59
public class DefaultS3ClientFactory extends Configured
50
60
implements S3ClientFactory {
51
61
52
- protected static final Logger LOG = S3AFileSystem .LOG ;
62
+ private static final String S3_SERVICE_NAME = "s3" ;
63
+ private static final String S3_SIGNER = "S3SignerType" ;
64
+ private static final String S3_V4_SIGNER = "AWSS3V4SignerType" ;
53
65
66
+ /**
67
+ * Subclasses refer to this.
68
+ */
69
+ protected static final Logger LOG =
70
+ LoggerFactory .getLogger (DefaultS3ClientFactory .class );
71
+
72
+ /**
73
+ * Create the client.
74
+ * <p>
75
+ * If the AWS stats are not null then a {@link AwsStatisticsCollector}.
76
+ * is created to bind to the two.
77
+ * <i>Important: until this binding works properly across regions,
78
+ * this should be null.</i>
79
+ */
54
80
@ Override
55
81
public AmazonS3 createS3Client (URI name ,
56
82
final String bucket ,
57
83
final AWSCredentialsProvider credentials ,
58
- final String userAgentSuffix ) throws IOException {
84
+ final String userAgentSuffix ,
85
+ final StatisticsFromAwsSdk statisticsFromAwsSdk ) throws IOException {
59
86
Configuration conf = getConf ();
60
87
final ClientConfiguration awsConf = S3AUtils
61
88
.createAwsConf (conf , bucket , Constants .AWS_SERVICE_IDENTIFIER_S3 );
@@ -72,36 +99,124 @@ public AmazonS3 createS3Client(URI name,
72
99
if (!StringUtils .isEmpty (userAgentSuffix )) {
73
100
awsConf .setUserAgentSuffix (userAgentSuffix );
74
101
}
75
- return configureAmazonS3Client (
76
- newAmazonS3Client (credentials , awsConf ), conf );
102
+ // optional metrics
103
+ RequestMetricCollector metrics = statisticsFromAwsSdk != null
104
+ ? new AwsStatisticsCollector (statisticsFromAwsSdk )
105
+ : null ;
106
+
107
+ return newAmazonS3Client (
108
+ credentials ,
109
+ awsConf ,
110
+ metrics ,
111
+ conf .getTrimmed (ENDPOINT , "" ),
112
+ conf .getBoolean (PATH_STYLE_ACCESS , false ));
77
113
}
78
114
79
115
/**
80
- * Wrapper around constructor for {@link AmazonS3} client.
116
+ * Create an {@link AmazonS3} client.
81
117
* Override this to provide an extended version of the client
82
118
* @param credentials credentials to use
83
119
* @param awsConf AWS configuration
84
- * @return new AmazonS3 client
120
+ * @param metrics metrics collector or null
121
+ * @param endpoint endpoint string; may be ""
122
+ * @param pathStyleAccess enable path style access?
123
+ * @return new AmazonS3 client
85
124
*/
86
125
protected AmazonS3 newAmazonS3Client (
87
- AWSCredentialsProvider credentials , ClientConfiguration awsConf ) {
88
- return new AmazonS3Client (credentials , awsConf );
126
+ final AWSCredentialsProvider credentials ,
127
+ final ClientConfiguration awsConf ,
128
+ final RequestMetricCollector metrics ,
129
+ final String endpoint ,
130
+ final boolean pathStyleAccess ) {
131
+ if (metrics != null ) {
132
+ LOG .debug ("Building S3 client using the SDK builder API" );
133
+ return buildAmazonS3Client (credentials , awsConf , metrics , endpoint ,
134
+ pathStyleAccess );
135
+ } else {
136
+ LOG .debug ("Building S3 client using the SDK builder API" );
137
+ return classicAmazonS3Client (credentials , awsConf , endpoint ,
138
+ pathStyleAccess );
139
+ }
89
140
}
90
141
91
142
/**
92
- * Configure S3 client from the Hadoop configuration.
93
- *
143
+ * Use the (newer) Builder SDK to create a an AWS S3 client.
144
+ * <p>
145
+ * This has a more complex endpoint configuration in a
146
+ * way which does not yet work in this code in a way
147
+ * which doesn't trigger regressions. So it is only used
148
+ * when SDK metrics are supplied.
149
+ * @param credentials credentials to use
150
+ * @param awsConf AWS configuration
151
+ * @param metrics metrics collector or null
152
+ * @param endpoint endpoint string; may be ""
153
+ * @param pathStyleAccess enable path style access?
154
+ * @return new AmazonS3 client
155
+ */
156
+ private AmazonS3 buildAmazonS3Client (
157
+ final AWSCredentialsProvider credentials ,
158
+ final ClientConfiguration awsConf ,
159
+ final RequestMetricCollector metrics ,
160
+ final String endpoint ,
161
+ final boolean pathStyleAccess ) {
162
+ AmazonS3ClientBuilder b = AmazonS3Client .builder ();
163
+ b .withCredentials (credentials );
164
+ b .withClientConfiguration (awsConf );
165
+ b .withPathStyleAccessEnabled (pathStyleAccess );
166
+ if (metrics != null ) {
167
+ b .withMetricsCollector (metrics );
168
+ }
169
+
170
+ // endpoint set up is a PITA
171
+ // client.setEndpoint("") is no longer available
172
+ AwsClientBuilder .EndpointConfiguration epr
173
+ = createEndpointConfiguration (endpoint , awsConf );
174
+ if (epr != null ) {
175
+ // an endpoint binding was constructed: use it.
176
+ b .withEndpointConfiguration (epr );
177
+ }
178
+ final AmazonS3 client = b .build ();
179
+ return client ;
180
+ }
181
+
182
+ /**
183
+ * Wrapper around constructor for {@link AmazonS3} client.
184
+ * Override this to provide an extended version of the client.
185
+ * <p>
186
+ * This uses a deprecated constructor -it is currently
187
+ * the only one which works for us.
188
+ * @param credentials credentials to use
189
+ * @param awsConf AWS configuration
190
+ * @param endpoint endpoint string; may be ""
191
+ * @param pathStyleAccess enable path style access?
192
+ * @return new AmazonS3 client
193
+ */
194
+ @ SuppressWarnings ("deprecation" )
195
+ private AmazonS3 classicAmazonS3Client (
196
+ AWSCredentialsProvider credentials ,
197
+ ClientConfiguration awsConf ,
198
+ final String endpoint ,
199
+ final boolean pathStyleAccess ) {
200
+ final AmazonS3 client = new AmazonS3Client (credentials , awsConf );
201
+ return configureAmazonS3Client (client , endpoint , pathStyleAccess );
202
+ }
203
+
204
+ /**
205
+ * Configure classic S3 client.
206
+ * <p>
94
207
* This includes: endpoint, Path Access and possibly other
95
208
* options.
96
209
*
97
- * @param conf Hadoop configuration
210
+ * @param s3 S3 Client.
211
+ * @param endPoint s3 endpoint, may be empty
212
+ * @param pathStyleAccess enable path style access?
98
213
* @return S3 client
99
214
* @throws IllegalArgumentException if misconfigured
100
215
*/
101
- private static AmazonS3 configureAmazonS3Client (AmazonS3 s3 ,
102
- Configuration conf )
216
+ protected static AmazonS3 configureAmazonS3Client (AmazonS3 s3 ,
217
+ final String endPoint ,
218
+ final boolean pathStyleAccess )
103
219
throws IllegalArgumentException {
104
- String endPoint = conf .getTrimmed (ENDPOINT , "" );
105
220
if (!endPoint .isEmpty ()) {
106
221
try {
107
222
s3 .setEndpoint (endPoint );
@@ -111,31 +226,31 @@ private static AmazonS3 configureAmazonS3Client(AmazonS3 s3,
111
226
throw new IllegalArgumentException (msg , e );
112
227
}
113
228
}
114
- return applyS3ClientOptions (s3 , conf );
229
+ return applyS3ClientOptions (s3 , pathStyleAccess );
115
230
}
116
231
117
232
/**
118
233
* Perform any tuning of the {@code S3ClientOptions} settings based on
119
234
* the Hadoop configuration.
120
235
* This is different from the general AWS configuration creation as
121
236
* it is unique to S3 connections.
122
- *
237
+ * <p>
123
238
* The {@link Constants#PATH_STYLE_ACCESS} option enables path-style access
124
239
* to S3 buckets if configured. By default, the
125
240
* behavior is to use virtual hosted-style access with URIs of the form
126
241
* {@code http://bucketname.s3.amazonaws.com}
242
+ * <p>
127
243
* Enabling path-style access and a
128
244
* region-specific endpoint switches the behavior to use URIs of the form
129
245
* {@code http://s3-eu-west-1.amazonaws.com/bucketname}.
130
246
* It is common to use this when connecting to private S3 servers, as it
131
247
* avoids the need to play with DNS entries.
132
248
* @param s3 S3 client
133
- * @param conf Hadoop configuration
249
+ * @param pathStyleAccess enable path style access?
134
250
* @return the S3 client
135
251
*/
136
- private static AmazonS3 applyS3ClientOptions (AmazonS3 s3 ,
137
- Configuration conf ) {
138
- final boolean pathStyleAccess = conf .getBoolean (PATH_STYLE_ACCESS , false );
252
+ protected static AmazonS3 applyS3ClientOptions (AmazonS3 s3 ,
253
+ final boolean pathStyleAccess ) {
139
254
if (pathStyleAccess ) {
140
255
LOG .debug ("Enabling path style access!" );
141
256
s3 .setS3ClientOptions (S3ClientOptions .builder ()
@@ -144,4 +259,54 @@ private static AmazonS3 applyS3ClientOptions(AmazonS3 s3,
144
259
}
145
260
return s3 ;
146
261
}
262
+
263
+ /**
264
+ * Given an endpoint string, return an endpoint config, or null, if none
265
+ * is needed.
266
+ * <p>
267
+ * This is a pretty painful piece of code. It is trying to replicate
268
+ * what AwsClient.setEndpoint() does, because you can't
269
+ * call that setter on an AwsClient constructed via
270
+ * the builder, and you can't pass a metrics collector
271
+ * down except through the builder.
272
+ * <p>
273
+ * Note also that AWS signing is a mystery which nobody fully
274
+ * understands, especially given all problems surface in a
275
+ * "400 bad request" response, which, like all security systems,
276
+ * provides minimal diagnostics out of fear of leaking
277
+ * secrets.
278
+ *
279
+ * @param endpoint possibly null endpoint.
280
+ * @param awsConf config to build the URI from.
281
+ * @return a configuration for the S3 client builder.
282
+ */
283
+ @ VisibleForTesting
284
+ public static AwsClientBuilder .EndpointConfiguration
285
+ createEndpointConfiguration (
286
+ final String endpoint , final ClientConfiguration awsConf ) {
287
+ LOG .debug ("Creating endpoint configuration for {}" , endpoint );
288
+ if (endpoint == null || endpoint .isEmpty ()) {
289
+ // the default endpoint...we should be using null at this point.
290
+ LOG .debug ("Using default endpoint -no need to generate a configuration" );
291
+ return null ;
292
+ }
293
+
294
+ final URI epr = RuntimeHttpUtils .toUri (endpoint , awsConf );
295
+ LOG .debug ("Endpoint URI = {}" , epr );
296
+
297
+ String region ;
298
+ if (!ServiceUtils .isS3USStandardEndpoint (endpoint )) {
299
+ LOG .debug ("Endpoint {} is not the default; parsing" , epr );
300
+ region = AwsHostNameUtils .parseRegion (
301
+ epr .getHost (),
302
+ S3_SERVICE_NAME );
303
+ } else {
304
+ // US-east, set region == null.
305
+ LOG .debug ("Endpoint {} is the standard one; declare region as null" , epr );
306
+ region = null ;
307
+ }
308
+ LOG .debug ("Region for endpoint {}, URI {} is determined as {}" ,
309
+ endpoint , epr , region );
310
+ return new AwsClientBuilder .EndpointConfiguration (endpoint , region );
311
+ }
147
312
}
0 commit comments