Skip to content

Commit

Permalink
HADOOP-17771. S3AFS creation fails "Unable to find a region via the r…
Browse files Browse the repository at this point in the history
…egion provider chain." (apache#3133)


This addresses the regression in Hadoop 3.3.1 where if no S3 endpoint
is set in fs.s3a.endpoint, S3A filesystem creation may fail on
non-EC2 deployments, depending on the local host environment setup.

* If fs.s3a.endpoint is empty/null, and fs.s3a.endpoint.region
  is null, the region is set to "us-east-1".
* If fs.s3a.endpoint.region is explicitly set to "" then the client
  falls back to the SDK region resolution chain; this works on EC2
* Details in troubleshooting.md, including a workaround for Hadoop-3.3.1+
* Also contains some minor restructuring of troubleshooting.md

Contributed by Steve Loughran.
  • Loading branch information
steveloughran authored and Kiran Kumar Maturi committed Nov 24, 2021
1 parent fb0a20b commit 38d276f
Show file tree
Hide file tree
Showing 6 changed files with 526 additions and 264 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1087,4 +1087,10 @@ private Constants() {
*/
public static final String AWS_REGION = "fs.s3a.endpoint.region";

/**
* The special S3 region which can be used to talk to any bucket.
* Value {@value}.
*/
public static final String AWS_S3_CENTRAL_REGION = "us-east-1";

}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.net.URI;

import com.amazonaws.ClientConfiguration;
import com.amazonaws.SdkClientException;
import com.amazonaws.client.builder.AwsClientBuilder;
import com.amazonaws.handlers.RequestHandler2;
import com.amazonaws.services.s3.AmazonS3;
Expand All @@ -41,10 +42,13 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.s3a.statistics.impl.AwsStatisticsCollector;
import org.apache.hadoop.fs.store.LogExactlyOnce;

import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CENTRAL_REGION;
import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING;
import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING_DEFAULT;
import static org.apache.hadoop.fs.s3a.S3AUtils.translateException;

/**
* The default {@link S3ClientFactory} implementation.
Expand All @@ -64,6 +68,19 @@ public class DefaultS3ClientFactory extends Configured
protected static final Logger LOG =
LoggerFactory.getLogger(DefaultS3ClientFactory.class);

/**
* A one-off warning of default region chains in use.
*/
private static final LogExactlyOnce WARN_OF_DEFAULT_REGION_CHAIN =
new LogExactlyOnce(LOG);

/**
* Warning message printed when the SDK Region chain is in use.
*/
private static final String SDK_REGION_CHAIN_IN_USE =
"S3A filesystem client is using"
+ " the SDK region resolution chain.";

/**
* Create the client by preparing the AwsConf configuration
* and then invoking {@code buildAmazonS3Client()}.
Expand Down Expand Up @@ -94,9 +111,14 @@ public AmazonS3 createS3Client(
awsConf.setUserAgentSuffix(parameters.getUserAgentSuffix());
}

return buildAmazonS3Client(
awsConf,
parameters);
try {
return buildAmazonS3Client(
awsConf,
parameters);
} catch (SdkClientException e) {
// SDK refused to build.
throw translateException("creating AWS S3 client", uri.toString(), e);
}
}

/**
Expand All @@ -109,6 +131,7 @@ public AmazonS3 createS3Client(
* @param awsConf AWS configuration
* @param parameters parameters
* @return new AmazonS3 client
* @throws SdkClientException if the configuration is invalid.
*/
protected AmazonS3 buildAmazonS3Client(
final ClientConfiguration awsConf,
Expand Down Expand Up @@ -141,6 +164,21 @@ protected AmazonS3 buildAmazonS3Client(
// no idea what the endpoint is, so tell the SDK
// to work it out at the cost of an extra HEAD request
b.withForceGlobalBucketAccessEnabled(true);
// HADOOP-17771 force set the region so the build process doesn't halt.
String region = getConf().getTrimmed(AWS_REGION, AWS_S3_CENTRAL_REGION);
LOG.debug("fs.s3a.endpoint.region=\"{}\"", region);
if (!region.isEmpty()) {
// there's either an explicit region or we have fallen back
// to the central one.
LOG.debug("Using default endpoint; setting region to {}", region);
b.setRegion(region);
} else {
// no region.
// allow this if people really want it; it is OK to rely on this
// when deployed in EC2.
WARN_OF_DEFAULT_REGION_CHAIN.warn(SDK_REGION_CHAIN_IN_USE);
LOG.debug(SDK_REGION_CHAIN_IN_USE);
}
}
final AmazonS3 client = b.build();
return client;
Expand Down Expand Up @@ -206,7 +244,7 @@ protected static AmazonS3 configureAmazonS3Client(AmazonS3 s3,
createEndpointConfiguration(
final String endpoint, final ClientConfiguration awsConf,
String awsRegion) {
LOG.debug("Creating endpoint configuration for {}", endpoint);
LOG.debug("Creating endpoint configuration for \"{}\"", endpoint);
if (endpoint == null || endpoint.isEmpty()) {
// the default endpoint...we should be using null at this point.
LOG.debug("Using default endpoint -no need to generate a configuration");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,4 +121,9 @@ private InternalConstants() {
*/
public static final int DEFAULT_UPLOAD_PART_COUNT_LIMIT = 10000;

/**
* The system property used by the AWS SDK to identify the region.
*/
public static final String AWS_REGION_SYSPROP = "aws.region";

}
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,12 @@ you'll need to remove the `profile` prefix from the AWS configuration section he
aws_session_token = ...
aws_security_token = ...
```
Note:

1. The `region` setting is only used if `fs.s3a.endpoint.region` is set to the empty string.
1. For the credentials to be available to applications running in a Hadoop cluster, the
configuration files MUST be in the `~/.aws/` directory on the local filesystem in
all hosts in the cluster.

### <a name="auth_session"></a> Using Session Credentials with `TemporaryAWSCredentialsProvider`

Expand Down Expand Up @@ -802,8 +808,10 @@ options are covered in [Testing](./testing.md).
<property>
<name>fs.s3a.endpoint.region</name>
<description>AWS S3 region for a bucket, which bypasses the parsing of
fs.s3a.endpoint to know the region. Would be helpful in avoiding errors
while using privateLink URL and explicitly set the bucket region.
fs.s3a.endpoint to know the region. Would be helpful in avoiding errors
while using privateLink URL and explicitly set the bucket region.
If set to a blank string (or 1+ space), falls back to the
(potentially brittle) SDK region resolution process.
</description>
</property>

Expand Down
Loading

0 comments on commit 38d276f

Please sign in to comment.