-
Notifications
You must be signed in to change notification settings - Fork 9.1k
HADOOP-18908. Improve S3A region handling. #6106
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,7 @@ | |
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import software.amazon.awssdk.awscore.util.AwsHostNameUtils; | ||
import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; | ||
import software.amazon.awssdk.core.client.config.SdkAdvancedClientOption; | ||
import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; | ||
|
@@ -48,6 +49,8 @@ | |
import org.apache.hadoop.fs.s3a.statistics.impl.AwsStatisticsCollector; | ||
import org.apache.hadoop.fs.store.LogExactlyOnce; | ||
|
||
import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_DEFAULT_REGION; | ||
import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT; | ||
import static org.apache.hadoop.fs.s3a.impl.AWSHeaders.REQUESTER_PAYS_HEADER; | ||
import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SECURE_CONNECTIONS; | ||
import static org.apache.hadoop.fs.s3a.Constants.SECURE_CONNECTIONS; | ||
|
@@ -66,12 +69,27 @@ public class DefaultS3ClientFactory extends Configured | |
|
||
private static final String REQUESTER_PAYS_HEADER_VALUE = "requester"; | ||
|
||
private static final String S3_SERVICE_NAME = "s3"; | ||
|
||
/** | ||
* Subclasses refer to this. | ||
*/ | ||
protected static final Logger LOG = | ||
LoggerFactory.getLogger(DefaultS3ClientFactory.class); | ||
|
||
/** | ||
* A one-off warning of default region chains in use. | ||
*/ | ||
private static final LogExactlyOnce WARN_OF_DEFAULT_REGION_CHAIN = | ||
new LogExactlyOnce(LOG); | ||
|
||
/** | ||
* Warning message printed when the SDK Region chain is in use. | ||
*/ | ||
private static final String SDK_REGION_CHAIN_IN_USE = | ||
"S3A filesystem client is using" | ||
+ " the SDK region resolution chain."; | ||
|
||
|
||
/** Exactly once log to inform about ignoring the AWS-SDK Warnings for CSE. */ | ||
private static final LogExactlyOnce IGNORE_CSE_WARN = new LogExactlyOnce(LOG); | ||
|
@@ -138,15 +156,7 @@ private <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> Build | |
BuilderT builder, S3ClientCreationParameters parameters, Configuration conf, String bucket) | ||
throws IOException { | ||
|
||
Region region = parameters.getRegion(); | ||
LOG.debug("Using region {}", region); | ||
|
||
URI endpoint = getS3Endpoint(parameters.getEndpoint(), conf); | ||
|
||
if (endpoint != null) { | ||
builder.endpointOverride(endpoint); | ||
LOG.debug("Using endpoint {}", endpoint); | ||
} | ||
configureEndpointAndRegion(builder, parameters, conf); | ||
|
||
S3Configuration serviceConfiguration = S3Configuration.builder() | ||
.pathStyleAccessEnabled(parameters.isPathStyleAccess()) | ||
|
@@ -155,7 +165,6 @@ private <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> Build | |
return builder | ||
.overrideConfiguration(createClientOverrideConfiguration(parameters, conf)) | ||
.credentialsProvider(parameters.getCredentialSet()) | ||
.region(region) | ||
.serviceConfiguration(serviceConfiguration); | ||
} | ||
|
||
|
@@ -201,6 +210,65 @@ protected ClientOverrideConfiguration createClientOverrideConfiguration( | |
return clientOverrideConfigBuilder.build(); | ||
} | ||
|
||
/** | ||
* This method configures the endpoint and region for a S3 client. | ||
* The order of configuration is: | ||
* | ||
* <ol> | ||
* <li>If region is configured via fs.s3a.endpoint.region, use it.</li> | ||
* <li>If endpoint is configured via via fs.s3a.endpoint, set it. | ||
* If no region is configured, try to parse region from endpoint. </li> | ||
* <li> If no region is configured, and it could not be parsed from the endpoint, | ||
* set the default region as US_EAST_2 and enable cross region access. </li> | ||
* <li> If configured region is empty, fallback to SDK resolution chain. </li> | ||
* </ol> | ||
* | ||
* @param builder S3 client builder. | ||
* @param parameters parameter object | ||
* @param conf conf configuration object | ||
* @param <BuilderT> S3 client builder type | ||
* @param <ClientT> S3 client type | ||
*/ | ||
private <BuilderT extends S3BaseClientBuilder<BuilderT, ClientT>, ClientT> void configureEndpointAndRegion( | ||
BuilderT builder, S3ClientCreationParameters parameters, Configuration conf) { | ||
URI endpoint = getS3Endpoint(parameters.getEndpoint(), conf); | ||
|
||
String configuredRegion = parameters.getRegion(); | ||
Region region = null; | ||
|
||
// If the region was configured, set it. | ||
if (configuredRegion != null && !configuredRegion.isEmpty()) { | ||
region = Region.of(configuredRegion); | ||
} | ||
|
||
if (endpoint != null) { | ||
builder.endpointOverride(endpoint); | ||
// No region was configured, try to determine it from the endpoint. | ||
if (region == null) { | ||
region = getS3RegionFromEndpoint(parameters.getEndpoint()); | ||
} | ||
LOG.debug("Setting endpoint to {}", endpoint); | ||
} | ||
|
||
if (region != null) { | ||
builder.region(region); | ||
} else if (configuredRegion == null) { | ||
// no region is configured, and none could be determined from the endpoint. | ||
// Use US_EAST_2 as default. | ||
region = Region.of(AWS_S3_DEFAULT_REGION); | ||
builder.crossRegionAccessEnabled(true); | ||
builder.region(region); | ||
} else if (configuredRegion.isEmpty()) { | ||
// region configuration was set to empty string. | ||
// allow this if people really want it; it is OK to rely on this | ||
// when deployed in EC2. | ||
WARN_OF_DEFAULT_REGION_CHAIN.warn(SDK_REGION_CHAIN_IN_USE); | ||
LOG.debug(SDK_REGION_CHAIN_IN_USE); | ||
} | ||
|
||
LOG.debug("Setting region to {}", region); | ||
} | ||
|
||
/** | ||
* Given a endpoint string, create the endpoint URI. | ||
* | ||
|
@@ -229,4 +297,23 @@ private static URI getS3Endpoint(String endpoint, final Configuration conf) { | |
throw new IllegalArgumentException(e); | ||
} | ||
} | ||
|
||
/** | ||
* Parses the endpoint to get the region. | ||
* If endpoint is the central one, use US_EAST_1. | ||
* | ||
* @param endpoint the configure endpoint. | ||
* @return the S3 region, null if unable to resolve from endpoint. | ||
*/ | ||
private static Region getS3RegionFromEndpoint(String endpoint) { | ||
|
||
if(!endpoint.endsWith(CENTRAL_ENDPOINT)) { | ||
LOG.debug("Endpoint {} is not the default; parsing", endpoint); | ||
return AwsHostNameUtils.parseSigningRegion(endpoint, S3_SERVICE_NAME).orElse(null); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. weve had so much trouble in the past here with vpce, govcloud, cn that we need a set of unit tests to verify this code does what we actually want -and therea are no regressions There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. turns out this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok. getting pressure to merge this as without it then its a major regression. also, we need a "what is a region" section, or maybe "all about signing, endpoints and regions" |
||
} | ||
|
||
// endpoint is for US_EAST_1; | ||
return Region.US_EAST_1; | ||
} | ||
|
||
} |
Uh oh!
There was an error while loading. Please reload this page.