diff --git a/cpp/velox/compute/VeloxInitializer.cc b/cpp/velox/compute/VeloxInitializer.cc index d941ea6899bd..5485f8f3c04b 100644 --- a/cpp/velox/compute/VeloxInitializer.cc +++ b/cpp/velox/compute/VeloxInitializer.cc @@ -171,9 +171,13 @@ void VeloxInitializer::init(const std::unordered_map& {"hive.s3.aws-secret-key", awsSecretKey}, }); } - + // Only need to set s3 endpoint when not use instance credentials. + if (useInstanceCredentials != "true") { + s3Config.insert({ + {"hive.s3.endpoint", awsEndpoint}, + }); + } s3Config.insert({ - {"hive.s3.endpoint", awsEndpoint}, {"hive.s3.ssl.enabled", sslEnabled}, {"hive.s3.path-style-access", pathStyleAccess}, }); diff --git a/docs/get-started/VeloxS3.md b/docs/get-started/VeloxS3.md index 86d5382306cb..1a300c4cf2b0 100644 --- a/docs/get-started/VeloxS3.md +++ b/docs/get-started/VeloxS3.md @@ -31,6 +31,7 @@ S3 also provides other methods for accessing, you can also use instance credenti ``` spark.hadoop.fs.s3a.use.instance.credentials true ``` +Note that in this case, "spark.hadoop.fs.s3a.endpoint" won't take affect as Gluten will use the endpoint set during instance creation. ## Configuring S3 IAM roles You can also use iam role credentials by setting the following configurations. Instance credentials have higher priority than iam credentials. @@ -60,4 +61,4 @@ spark.gluten.sql.columnar.backend.velox.ssdCacheIOThreads // the IO threads for spark.gluten.sql.columnar.backend.velox.ssdODirect // enbale or disable O_DIRECT on cache write, default false. ``` -It's recommended to mount SSDs to the cache path to get the best performance of local caching. On the start up of Spark context, the cache files will be allocated under "spark.gluten.sql.columnar.backend.velox.cachePath", with UUID based suffix, e.g. "/tmp/cache.13e8ab65-3af4-46ac-8d28-ff99b2a9ec9b0". Gluten is not able to reuse older caches for now, and the old cache files are left there after Spark context shutdown. \ No newline at end of file +It's recommended to mount SSDs to the cache path to get the best performance of local caching. On the start up of Spark context, the cache files will be allocated under "spark.gluten.sql.columnar.backend.velox.cachePath", with UUID based suffix, e.g. "/tmp/cache.13e8ab65-3af4-46ac-8d28-ff99b2a9ec9b0". Gluten is not able to reuse older caches for now, and the old cache files are left there after Spark context shutdown.