Skip to content

Commit 2124c83

Browse files
committed
improve region handling
1 parent 81edbeb commit 2124c83

File tree

10 files changed

+257
-172
lines changed

10 files changed

+257
-172
lines changed

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StoreStatisticNames.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -407,10 +407,6 @@ public final class StoreStatisticNames {
407407
public static final String MULTIPART_UPLOAD_LIST
408408
= "multipart_upload_list";
409409

410-
/** Probe for store region: {@value}. */
411-
public static final String STORE_REGION_PROBE
412-
= "store_region_probe";
413-
414410
private StoreStatisticNames() {
415411
}
416412

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1179,6 +1179,12 @@ private Constants() {
11791179
*/
11801180
public static final String AWS_S3_CENTRAL_REGION = "us-east-1";
11811181

1182+
/**
1183+
* The default S3 region when using cross region client.
1184+
* Value {@value}.
1185+
*/
1186+
public static final String AWS_S3_DEFAULT_REGION = "us-east-2";
1187+
11821188
/**
11831189
* Require that all S3 access is made through Access Points.
11841190
*/

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java

Lines changed: 99 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.slf4j.Logger;
2727
import org.slf4j.LoggerFactory;
2828

29+
import software.amazon.awssdk.awscore.util.AwsHostNameUtils;
2930
import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration;
3031
import software.amazon.awssdk.core.client.config.SdkAdvancedClientOption;
3132
import software.amazon.awssdk.core.interceptor.ExecutionInterceptor;
@@ -48,6 +49,10 @@
4849
import org.apache.hadoop.fs.s3a.statistics.impl.AwsStatisticsCollector;
4950
import org.apache.hadoop.fs.store.LogExactlyOnce;
5051

52+
import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
53+
import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CENTRAL_REGION;
54+
import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_DEFAULT_REGION;
55+
import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT;
5156
import static org.apache.hadoop.fs.s3a.impl.AWSHeaders.REQUESTER_PAYS_HEADER;
5257
import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SECURE_CONNECTIONS;
5358
import static org.apache.hadoop.fs.s3a.Constants.SECURE_CONNECTIONS;
@@ -66,12 +71,27 @@ public class DefaultS3ClientFactory extends Configured
6671

6772
private static final String REQUESTER_PAYS_HEADER_VALUE = "requester";
6873

74+
private static final String S3_SERVICE_NAME = "s3";
75+
6976
/**
7077
* Subclasses refer to this.
7178
*/
7279
protected static final Logger LOG =
7380
LoggerFactory.getLogger(DefaultS3ClientFactory.class);
7481

82+
/**
83+
* A one-off warning of default region chains in use.
84+
*/
85+
private static final LogExactlyOnce WARN_OF_DEFAULT_REGION_CHAIN =
86+
new LogExactlyOnce(LOG);
87+
88+
/**
89+
* Warning message printed when the SDK Region chain is in use.
90+
*/
91+
private static final String SDK_REGION_CHAIN_IN_USE =
92+
"S3A filesystem client is using"
93+
+ " the SDK region resolution chain.";
94+
7595

7696
/** Exactly once log to inform about ignoring the AWS-SDK Warnings for CSE. */
7797
private static final LogExactlyOnce IGNORE_CSE_WARN = new LogExactlyOnce(LOG);
@@ -138,15 +158,7 @@ private <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> Build
138158
BuilderT builder, S3ClientCreationParameters parameters, Configuration conf, String bucket)
139159
throws IOException {
140160

141-
Region region = parameters.getRegion();
142-
LOG.debug("Using region {}", region);
143-
144-
URI endpoint = getS3Endpoint(parameters.getEndpoint(), conf);
145-
146-
if (endpoint != null) {
147-
builder.endpointOverride(endpoint);
148-
LOG.debug("Using endpoint {}", endpoint);
149-
}
161+
configureEndpointAndRegion(builder, parameters, conf);
150162

151163
S3Configuration serviceConfiguration = S3Configuration.builder()
152164
.pathStyleAccessEnabled(parameters.isPathStyleAccess())
@@ -155,7 +167,6 @@ private <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> Build
155167
return builder
156168
.overrideConfiguration(createClientOverrideConfiguration(parameters, conf))
157169
.credentialsProvider(parameters.getCredentialSet())
158-
.region(region)
159170
.serviceConfiguration(serviceConfiguration);
160171
}
161172

@@ -201,6 +212,65 @@ protected ClientOverrideConfiguration createClientOverrideConfiguration(
201212
return clientOverrideConfigBuilder.build();
202213
}
203214

215+
/**
216+
* This method configures the endpoint and region for a S3 client.
217+
* The order of configuration is:
218+
*
219+
* <ol>
220+
* <li>If region is configured via fs.s3a.endpoint.region, use it.</li>
221+
* <li>If endpoint is configured via via fs.s3a.endpoint, set it.
222+
* If no region is configured, try to parse region from endpoint. </li>
223+
* <li> If no region is configured, and it could not be parsed from the endpoint,
224+
* set the default region as US_EAST_2 and enable cross region access. </li>
225+
* <li> If configured region is empty, fallback to SDK resolution chain. </li>
226+
* </ol>
227+
*
228+
* @param builder S3 client builder.
229+
* @param parameters parameter object
230+
* @param conf conf configuration object
231+
* @param <BuilderT> S3 client builder type
232+
* @param <ClientT> S3 client type
233+
*/
234+
private <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> void configureEndpointAndRegion(
235+
BuilderT builder, S3ClientCreationParameters parameters, Configuration conf) {
236+
URI endpoint = getS3Endpoint(parameters.getEndpoint(), conf);
237+
238+
String configuredRegion = parameters.getRegion();
239+
Region region = null;
240+
241+
// If the region was configured, set it.
242+
if (configuredRegion != null && !configuredRegion.isEmpty()) {
243+
region = Region.of(configuredRegion);
244+
}
245+
246+
if (endpoint != null) {
247+
builder.endpointOverride(endpoint);
248+
// No region was configured, try to determine it from the endpoint.
249+
if (region == null) {
250+
region = getS3RegionFromEndpoint(parameters.getEndpoint());
251+
}
252+
LOG.debug("Setting endpoint to {}", endpoint);
253+
}
254+
255+
if (region != null) {
256+
builder.region(region);
257+
} else if (configuredRegion == null) {
258+
// no region is configured, and none could be determined from the endpoint.
259+
// Use US_EAST_2 as default.
260+
region = Region.of(AWS_S3_DEFAULT_REGION);
261+
builder.crossRegionAccessEnabled(true);
262+
builder.region(region);
263+
} else if (configuredRegion.isEmpty()) {
264+
// region configuration was set to empty string.
265+
// allow this if people really want it; it is OK to rely on this
266+
// when deployed in EC2.
267+
WARN_OF_DEFAULT_REGION_CHAIN.warn(SDK_REGION_CHAIN_IN_USE);
268+
LOG.debug(SDK_REGION_CHAIN_IN_USE);
269+
}
270+
271+
LOG.debug("Setting region to {}", region);
272+
}
273+
204274
/**
205275
* Given a endpoint string, create the endpoint URI.
206276
*
@@ -229,4 +299,23 @@ private static URI getS3Endpoint(String endpoint, final Configuration conf) {
229299
throw new IllegalArgumentException(e);
230300
}
231301
}
302+
303+
/**
304+
* Parses the endpoint to get the region.
305+
* If endpoint is the central one, use US_EAST_1.
306+
*
307+
* @param endpoint the configure endpoint.
308+
* @return the S3 region, null if unable to resolve from endpoint.
309+
*/
310+
private static Region getS3RegionFromEndpoint(String endpoint) {
311+
312+
if(!endpoint.endsWith(CENTRAL_ENDPOINT)) {
313+
LOG.debug("Endpoint {} is not the default; parsing", endpoint);
314+
return AwsHostNameUtils.parseSigningRegion(endpoint, S3_SERVICE_NAME).orElse(null);
315+
}
316+
317+
// endpoint is for US_EAST_1;
318+
return Region.US_EAST_1;
319+
}
320+
232321
}

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java

Lines changed: 1 addition & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -332,8 +332,6 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
332332
private int executorCapacity;
333333
private long multiPartThreshold;
334334
public static final Logger LOG = LoggerFactory.getLogger(S3AFileSystem.class);
335-
/** Exactly once log to warn about setting the region in config to avoid probe. */
336-
private static final LogExactlyOnce SET_REGION_WARNING = new LogExactlyOnce(LOG);
337335

338336
/** Log to warn of storage class configuration problems. */
339337
private static final LogExactlyOnce STORAGE_CLASS_WARNING = new LogExactlyOnce(LOG);
@@ -461,8 +459,6 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
461459
*/
462460
private String scheme = FS_S3A;
463461

464-
private final static Map<String, Region> BUCKET_REGIONS = new HashMap<>();
465-
466462
/** Add any deprecated keys. */
467463
@SuppressWarnings("deprecation")
468464
private static void addDeprecatedKeys() {
@@ -870,9 +866,6 @@ protected void verifyBucketExists() throws UnknownStoreException, IOException {
870866
STORE_EXISTS_PROBE, bucket, null, () ->
871867
invoker.retry("doesBucketExist", bucket, true, () -> {
872868
try {
873-
if (BUCKET_REGIONS.containsKey(bucket)) {
874-
return true;
875-
}
876869
s3Client.headBucket(HeadBucketRequest.builder().bucket(bucket).build());
877870
return true;
878871
} catch (AwsServiceException ex) {
@@ -982,8 +975,6 @@ private void bindAWSClient(URI name, boolean dtEnabled) throws IOException {
982975
? conf.getTrimmed(AWS_REGION)
983976
: accessPoint.getRegion();
984977

985-
Region region = getS3Region(configuredRegion);
986-
987978
S3ClientFactory.S3ClientCreationParameters parameters =
988979
new S3ClientFactory.S3ClientCreationParameters()
989980
.withCredentialSet(credentials)
@@ -998,7 +989,7 @@ private void bindAWSClient(URI name, boolean dtEnabled) throws IOException {
998989
.withMultipartCopyEnabled(isMultipartCopyEnabled)
999990
.withMultipartThreshold(multiPartThreshold)
1000991
.withTransferManagerExecutor(unboundedThreadPool)
1001-
.withRegion(region);
992+
.withRegion(configuredRegion);
1002993

1003994
S3ClientFactory clientFactory = ReflectionUtils.newInstance(s3ClientFactoryClass, conf);
1004995
s3Client = clientFactory.createS3Client(getUri(), parameters);
@@ -1019,75 +1010,6 @@ private void createS3AsyncClient(S3ClientFactory clientFactory,
10191010
s3AsyncClient = clientFactory.createS3AsyncClient(getUri(), parameters);
10201011
}
10211012

1022-
/**
1023-
* Get the bucket region.
1024-
*
1025-
* @param region AWS S3 Region set in the config. This property may not be set, in which case
1026-
* ask S3 for the region.
1027-
* @return region of the bucket.
1028-
*/
1029-
private Region getS3Region(String region) throws IOException {
1030-
1031-
if (!StringUtils.isBlank(region)) {
1032-
return Region.of(region);
1033-
}
1034-
1035-
Region cachedRegion = BUCKET_REGIONS.get(bucket);
1036-
1037-
if (cachedRegion != null) {
1038-
LOG.debug("Got region {} for bucket {} from cache", cachedRegion, bucket);
1039-
return cachedRegion;
1040-
}
1041-
1042-
Region s3Region = trackDurationAndSpan(STORE_REGION_PROBE, bucket, null,
1043-
() -> invoker.retry("getS3Region", bucket, true, () -> {
1044-
try {
1045-
1046-
SET_REGION_WARNING.warn(
1047-
"Getting region for bucket {} from S3, this will slow down FS initialisation. "
1048-
+ "To avoid this, set the region using property {}", bucket,
1049-
FS_S3A_BUCKET_PREFIX + bucket + ".endpoint.region");
1050-
1051-
// build a s3 client with region eu-west-1 that can be used to get the region of the
1052-
// bucket. Using eu-west-1, as headBucket() doesn't work with us-east-1. This is because
1053-
// us-east-1 uses the endpoint s3.amazonaws.com, which resolves bucket.s3.amazonaws.com
1054-
// to the actual region the bucket is in. As the request is signed with us-east-1 and
1055-
// not the bucket's region, it fails.
1056-
S3Client getRegionS3Client =
1057-
S3Client.builder().region(Region.EU_WEST_1).credentialsProvider(credentials)
1058-
.build();
1059-
1060-
HeadBucketResponse headBucketResponse =
1061-
getRegionS3Client.headBucket(HeadBucketRequest.builder().bucket(bucket).build());
1062-
1063-
Region bucketRegion = Region.of(
1064-
headBucketResponse.sdkHttpResponse().headers().get(BUCKET_REGION_HEADER).get(0));
1065-
BUCKET_REGIONS.put(bucket, bucketRegion);
1066-
1067-
return bucketRegion;
1068-
} catch (S3Exception exception) {
1069-
if (exception.statusCode() == SC_301_MOVED_PERMANENTLY) {
1070-
Region bucketRegion = Region.of(
1071-
exception.awsErrorDetails().sdkHttpResponse().headers().get(BUCKET_REGION_HEADER)
1072-
.get(0));
1073-
BUCKET_REGIONS.put(bucket, bucketRegion);
1074-
1075-
return bucketRegion;
1076-
}
1077-
1078-
if (exception.statusCode() == SC_404_NOT_FOUND) {
1079-
throw new UnknownStoreException("s3a://" + bucket + "/",
1080-
" Bucket does not exist: " + exception,
1081-
exception);
1082-
}
1083-
1084-
throw exception;
1085-
}
1086-
}));
1087-
1088-
return s3Region;
1089-
}
1090-
10911013
/**
10921014
* Initialize and launch the audit manager and service.
10931015
* As this takes the FS IOStatistics store, it must be invoked

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ final class S3ClientCreationParameters {
169169
/**
170170
* Region of the S3 bucket.
171171
*/
172-
private Region region;
172+
private String region;
173173

174174

175175
/**
@@ -386,42 +386,44 @@ public S3ClientCreationParameters withTransferManagerExecutor(
386386
}
387387

388388
/**
389-
* Set region.
389+
* Set the multipart flag..
390390
*
391391
* @param value new value
392392
* @return the builder
393393
*/
394-
public S3ClientCreationParameters withRegion(
395-
final Region value) {
396-
region = value;
394+
public S3ClientCreationParameters withMultipartCopyEnabled(final boolean value) {
395+
this.multipartCopy = value;
397396
return this;
398397
}
399398

400399
/**
401-
* Get the region.
402-
* @return invoker
400+
* Get the multipart flag.
401+
* @return multipart flag
403402
*/
404-
public Region getRegion() {
405-
return region;
403+
public boolean isMultipartCopy() {
404+
return multipartCopy;
406405
}
407406

408407
/**
409-
* Set the multipart flag..
408+
* Set region.
410409
*
411410
* @param value new value
412411
* @return the builder
413412
*/
414-
public S3ClientCreationParameters withMultipartCopyEnabled(final boolean value) {
415-
this.multipartCopy = value;
413+
public S3ClientCreationParameters withRegion(
414+
final String value) {
415+
region = value;
416416
return this;
417417
}
418418

419419
/**
420-
* Get the multipart flag.
421-
* @return multipart flag
420+
* Get the region.
421+
* @return invoker
422422
*/
423-
public boolean isMultipartCopy() {
424-
return multipartCopy;
423+
public String getRegion() {
424+
return region;
425425
}
426426
}
427+
428+
427429
}

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -548,11 +548,6 @@ public enum Statistic {
548548
StoreStatisticNames.STORE_IO_THROTTLE_RATE,
549549
"Rate of S3 request throttling",
550550
TYPE_QUANTILE),
551-
STORE_REGION_PROBE(
552-
StoreStatisticNames.STORE_REGION_PROBE,
553-
"Store Region Probe",
554-
TYPE_DURATION
555-
),
556551

557552
/*
558553
* Delegation Token Operations.

0 commit comments

Comments
 (0)