Skip to content

Commit 4ba7bee

Browse files
steveloughranMehakmeet Singh
authored andcommitted
CDPD-27264. HADOOP-17771. S3AFS creation fails "Unable to find a region via the region provider chain." (apache#3133)
This addresses the regression in Hadoop 3.3.1 where if no S3 endpoint is set in fs.s3a.endpoint, S3A filesystem creation may fail on non-EC2 deployments, depending on the local host environment setup. * If fs.s3a.endpoint is empty/null, and fs.s3a.endpoint.region is null, the region is set to "us-east-1". * If fs.s3a.endpoint.region is explicitly set to "" then the client falls back to the SDK region resolution chain; this works on EC2 * Details in troubleshooting.md, including a workaround for Hadoop-3.3.1+ * Also contains some minor restructuring of troubleshooting.md * uses pre-Auditing LogExactlyOnce import, so doesn't depend on that patch. Contributed by Steve Loughran. This is a critical follow on patch to CDPD-26441. HADOOP-17705. S3A to add Config to set AWS region (apache#3020) Both patches must be included Change-Id: Icca928e1752423d68591508c360ff6434997fb64
1 parent 47d9978 commit 4ba7bee

File tree

6 files changed

+508
-261
lines changed

6 files changed

+508
-261
lines changed

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1073,4 +1073,10 @@ private Constants() {
10731073
*/
10741074
public static final String AWS_REGION = "fs.s3a.endpoint.region";
10751075

1076+
/**
1077+
* The special S3 region which can be used to talk to any bucket.
1078+
* Value {@value}.
1079+
*/
1080+
public static final String AWS_S3_CENTRAL_REGION = "us-east-1";
1081+
10761082
}

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.net.URI;
2323

2424
import com.amazonaws.ClientConfiguration;
25+
import com.amazonaws.SdkClientException;
2526
import com.amazonaws.client.builder.AwsClientBuilder;
2627
import com.amazonaws.handlers.RequestHandler2;
2728
import com.amazonaws.services.s3.AmazonS3;
@@ -40,11 +41,14 @@
4041
import org.apache.hadoop.classification.InterfaceStability;
4142
import org.apache.hadoop.conf.Configuration;
4243
import org.apache.hadoop.conf.Configured;
44+
import org.apache.hadoop.fs.s3a.impl.LogExactlyOnce;
4345
import org.apache.hadoop.fs.s3a.statistics.impl.AwsStatisticsCollector;
4446

4547
import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
48+
import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CENTRAL_REGION;
4649
import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING;
4750
import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING_DEFAULT;
51+
import static org.apache.hadoop.fs.s3a.S3AUtils.translateException;
4852

4953
/**
5054
* The default {@link S3ClientFactory} implementation.
@@ -64,6 +68,19 @@ public class DefaultS3ClientFactory extends Configured
6468
protected static final Logger LOG =
6569
LoggerFactory.getLogger(DefaultS3ClientFactory.class);
6670

71+
/**
72+
* A one-off warning of default region chains in use.
73+
*/
74+
private static final LogExactlyOnce WARN_OF_DEFAULT_REGION_CHAIN =
75+
new LogExactlyOnce(LOG);
76+
77+
/**
78+
* Warning message printed when the SDK Region chain is in use.
79+
*/
80+
private static final String SDK_REGION_CHAIN_IN_USE =
81+
"S3A filesystem client is using"
82+
+ " the SDK region resolution chain.";
83+
6784
/**
6885
* Create the client by preparing the AwsConf configuration
6986
* and then invoking {@code buildAmazonS3Client()}.
@@ -94,9 +111,14 @@ public AmazonS3 createS3Client(
94111
awsConf.setUserAgentSuffix(parameters.getUserAgentSuffix());
95112
}
96113

97-
return buildAmazonS3Client(
98-
awsConf,
99-
parameters);
114+
try {
115+
return buildAmazonS3Client(
116+
awsConf,
117+
parameters);
118+
} catch (SdkClientException e) {
119+
// SDK refused to build.
120+
throw translateException("creating AWS S3 client", uri.toString(), e);
121+
}
100122
}
101123

102124
/**
@@ -109,6 +131,7 @@ public AmazonS3 createS3Client(
109131
* @param awsConf AWS configuration
110132
* @param parameters parameters
111133
* @return new AmazonS3 client
134+
* @throws SdkClientException if the configuration is invalid.
112135
*/
113136
protected AmazonS3 buildAmazonS3Client(
114137
final ClientConfiguration awsConf,
@@ -141,6 +164,21 @@ protected AmazonS3 buildAmazonS3Client(
141164
// no idea what the endpoint is, so tell the SDK
142165
// to work it out at the cost of an extra HEAD request
143166
b.withForceGlobalBucketAccessEnabled(true);
167+
// HADOOP-17771 force set the region so the build process doesn't halt.
168+
String region = getConf().getTrimmed(AWS_REGION, AWS_S3_CENTRAL_REGION);
169+
LOG.debug("fs.s3a.endpoint.region=\"{}\"", region);
170+
if (!region.isEmpty()) {
171+
// there's either an explicit region or we have fallen back
172+
// to the central one.
173+
LOG.debug("Using default endpoint; setting region to {}", region);
174+
b.setRegion(region);
175+
} else {
176+
// no region.
177+
// allow this if people really want it; it is OK to rely on this
178+
// when deployed in EC2.
179+
WARN_OF_DEFAULT_REGION_CHAIN.warn(SDK_REGION_CHAIN_IN_USE);
180+
LOG.debug(SDK_REGION_CHAIN_IN_USE);
181+
}
144182
}
145183
final AmazonS3 client = b.build();
146184
return client;
@@ -206,7 +244,7 @@ protected static AmazonS3 configureAmazonS3Client(AmazonS3 s3,
206244
createEndpointConfiguration(
207245
final String endpoint, final ClientConfiguration awsConf,
208246
String awsRegion) {
209-
LOG.debug("Creating endpoint configuration for {}", endpoint);
247+
LOG.debug("Creating endpoint configuration for \"{}\"", endpoint);
210248
if (endpoint == null || endpoint.isEmpty()) {
211249
// the default endpoint...we should be using null at this point.
212250
LOG.debug("Using default endpoint -no need to generate a configuration");

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,4 +111,9 @@ private InternalConstants() {
111111
*/
112112
public static final int DEFAULT_UPLOAD_PART_COUNT_LIMIT = 10000;
113113

114+
/**
115+
* The system property used by the AWS SDK to identify the region.
116+
*/
117+
public static final String AWS_REGION_SYSPROP = "aws.region";
118+
114119
}

hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,12 @@ you'll need to remove the `profile` prefix from the AWS configuration section he
435435
aws_session_token = ...
436436
aws_security_token = ...
437437
```
438+
Note:
439+
440+
1. The `region` setting is only used if `fs.s3a.endpoint.region` is set to the empty string.
441+
1. For the credentials to be available to applications running in a Hadoop cluster, the
442+
configuration files MUST be in the `~/.aws/` directory on the local filesystem in
443+
all hosts in the cluster.
438444

439445
### <a name="auth_session"></a> Using Session Credentials with `TemporaryAWSCredentialsProvider`
440446

@@ -799,8 +805,10 @@ options are covered in [Testing](./testing.md).
799805
<property>
800806
<name>fs.s3a.endpoint.region</name>
801807
<description>AWS S3 region for a bucket, which bypasses the parsing of
802-
fs.s3a.endpoint to know the region. Would be helpful in avoiding errors
803-
while using privateLink URL and explicitly set the bucket region.
808+
fs.s3a.endpoint to know the region. Would be helpful in avoiding errors
809+
while using privateLink URL and explicitly set the bucket region.
810+
If set to a blank string (or 1+ space), falls back to the
811+
(potentially brittle) SDK region resolution process.
804812
</description>
805813
</property>
806814

0 commit comments

Comments
 (0)