Skip to content

Commit 5b7f68a

Browse files
HADOOP-17771. S3AFS creation fails "Unable to find a region via the region provider chain." (#3133)
This addresses the regression in Hadoop 3.3.1 where if no S3 endpoint is set in fs.s3a.endpoint, S3A filesystem creation may fail on non-EC2 deployments, depending on the local host environment setup. * If fs.s3a.endpoint is empty/null, and fs.s3a.endpoint.region is null, the region is set to "us-east-1". * If fs.s3a.endpoint.region is explicitly set to "" then the client falls back to the SDK region resolution chain; this works on EC2 * Details in troubleshooting.md, including a workaround for Hadoop-3.3.1+ * Also contains some minor restructuring of troubleshooting.md Contributed by Steve Loughran.
1 parent 581f43d commit 5b7f68a

File tree

6 files changed

+526
-264
lines changed

6 files changed

+526
-264
lines changed

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1087,4 +1087,10 @@ private Constants() {
10871087
*/
10881088
public static final String AWS_REGION = "fs.s3a.endpoint.region";
10891089

1090+
/**
1091+
* The special S3 region which can be used to talk to any bucket.
1092+
* Value {@value}.
1093+
*/
1094+
public static final String AWS_S3_CENTRAL_REGION = "us-east-1";
1095+
10901096
}

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.net.URI;
2323

2424
import com.amazonaws.ClientConfiguration;
25+
import com.amazonaws.SdkClientException;
2526
import com.amazonaws.client.builder.AwsClientBuilder;
2627
import com.amazonaws.handlers.RequestHandler2;
2728
import com.amazonaws.services.s3.AmazonS3;
@@ -41,10 +42,13 @@
4142
import org.apache.hadoop.conf.Configuration;
4243
import org.apache.hadoop.conf.Configured;
4344
import org.apache.hadoop.fs.s3a.statistics.impl.AwsStatisticsCollector;
45+
import org.apache.hadoop.fs.store.LogExactlyOnce;
4446

4547
import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
48+
import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CENTRAL_REGION;
4649
import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING;
4750
import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING_DEFAULT;
51+
import static org.apache.hadoop.fs.s3a.S3AUtils.translateException;
4852

4953
/**
5054
* The default {@link S3ClientFactory} implementation.
@@ -64,6 +68,19 @@ public class DefaultS3ClientFactory extends Configured
6468
protected static final Logger LOG =
6569
LoggerFactory.getLogger(DefaultS3ClientFactory.class);
6670

71+
/**
72+
* A one-off warning of default region chains in use.
73+
*/
74+
private static final LogExactlyOnce WARN_OF_DEFAULT_REGION_CHAIN =
75+
new LogExactlyOnce(LOG);
76+
77+
/**
78+
* Warning message printed when the SDK Region chain is in use.
79+
*/
80+
private static final String SDK_REGION_CHAIN_IN_USE =
81+
"S3A filesystem client is using"
82+
+ " the SDK region resolution chain.";
83+
6784
/**
6885
* Create the client by preparing the AwsConf configuration
6986
* and then invoking {@code buildAmazonS3Client()}.
@@ -94,9 +111,14 @@ public AmazonS3 createS3Client(
94111
awsConf.setUserAgentSuffix(parameters.getUserAgentSuffix());
95112
}
96113

97-
return buildAmazonS3Client(
98-
awsConf,
99-
parameters);
114+
try {
115+
return buildAmazonS3Client(
116+
awsConf,
117+
parameters);
118+
} catch (SdkClientException e) {
119+
// SDK refused to build.
120+
throw translateException("creating AWS S3 client", uri.toString(), e);
121+
}
100122
}
101123

102124
/**
@@ -109,6 +131,7 @@ public AmazonS3 createS3Client(
109131
* @param awsConf AWS configuration
110132
* @param parameters parameters
111133
* @return new AmazonS3 client
134+
* @throws SdkClientException if the configuration is invalid.
112135
*/
113136
protected AmazonS3 buildAmazonS3Client(
114137
final ClientConfiguration awsConf,
@@ -141,6 +164,21 @@ protected AmazonS3 buildAmazonS3Client(
141164
// no idea what the endpoint is, so tell the SDK
142165
// to work it out at the cost of an extra HEAD request
143166
b.withForceGlobalBucketAccessEnabled(true);
167+
// HADOOP-17771 force set the region so the build process doesn't halt.
168+
String region = getConf().getTrimmed(AWS_REGION, AWS_S3_CENTRAL_REGION);
169+
LOG.debug("fs.s3a.endpoint.region=\"{}\"", region);
170+
if (!region.isEmpty()) {
171+
// there's either an explicit region or we have fallen back
172+
// to the central one.
173+
LOG.debug("Using default endpoint; setting region to {}", region);
174+
b.setRegion(region);
175+
} else {
176+
// no region.
177+
// allow this if people really want it; it is OK to rely on this
178+
// when deployed in EC2.
179+
WARN_OF_DEFAULT_REGION_CHAIN.warn(SDK_REGION_CHAIN_IN_USE);
180+
LOG.debug(SDK_REGION_CHAIN_IN_USE);
181+
}
144182
}
145183
final AmazonS3 client = b.build();
146184
return client;
@@ -206,7 +244,7 @@ protected static AmazonS3 configureAmazonS3Client(AmazonS3 s3,
206244
createEndpointConfiguration(
207245
final String endpoint, final ClientConfiguration awsConf,
208246
String awsRegion) {
209-
LOG.debug("Creating endpoint configuration for {}", endpoint);
247+
LOG.debug("Creating endpoint configuration for \"{}\"", endpoint);
210248
if (endpoint == null || endpoint.isEmpty()) {
211249
// the default endpoint...we should be using null at this point.
212250
LOG.debug("Using default endpoint -no need to generate a configuration");

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,4 +121,9 @@ private InternalConstants() {
121121
*/
122122
public static final int DEFAULT_UPLOAD_PART_COUNT_LIMIT = 10000;
123123

124+
/**
125+
* The system property used by the AWS SDK to identify the region.
126+
*/
127+
public static final String AWS_REGION_SYSPROP = "aws.region";
128+
124129
}

hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,12 @@ you'll need to remove the `profile` prefix from the AWS configuration section he
438438
aws_session_token = ...
439439
aws_security_token = ...
440440
```
441+
Note:
442+
443+
1. The `region` setting is only used if `fs.s3a.endpoint.region` is set to the empty string.
444+
1. For the credentials to be available to applications running in a Hadoop cluster, the
445+
configuration files MUST be in the `~/.aws/` directory on the local filesystem in
446+
all hosts in the cluster.
441447

442448
### <a name="auth_session"></a> Using Session Credentials with `TemporaryAWSCredentialsProvider`
443449

@@ -802,8 +808,10 @@ options are covered in [Testing](./testing.md).
802808
<property>
803809
<name>fs.s3a.endpoint.region</name>
804810
<description>AWS S3 region for a bucket, which bypasses the parsing of
805-
fs.s3a.endpoint to know the region. Would be helpful in avoiding errors
806-
while using privateLink URL and explicitly set the bucket region.
811+
fs.s3a.endpoint to know the region. Would be helpful in avoiding errors
812+
while using privateLink URL and explicitly set the bucket region.
813+
If set to a blank string (or 1+ space), falls back to the
814+
(potentially brittle) SDK region resolution process.
807815
</description>
808816
</property>
809817

0 commit comments

Comments
 (0)