Skip to content

Commit

Permalink
Remove config property hive.gcs.scheme (#11235)
Browse files Browse the repository at this point in the history
Summary:
The GCS adapter expects two configs `hive.gcs.scheme` value for the scheme and `hive.gcs.endpoint` value without the scheme.
However, in practice, the endpoint value contains the scheme. We can remove `hive.gcs.scheme` to be consistent.
See Trino for example https://trino.io/docs/current/admin/fault-tolerant-execution.html#google-cloud-storage

Pull Request resolved: #11235

Reviewed By: bikramSingh91

Differential Revision: D64335881

Pulled By: kgpai

fbshipit-source-id: 47db7ed37e562ca14f5ff2dafa45f9b0813d63da
  • Loading branch information
majetideepak authored and facebook-github-bot committed Oct 17, 2024
1 parent ad510db commit 9d4cfb2
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 26 deletions.
4 changes: 0 additions & 4 deletions velox/connectors/hive/HiveConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,6 @@ std::string HiveConfig::gcsEndpoint() const {
return config_->get<std::string>(kGCSEndpoint, std::string(""));
}

std::string HiveConfig::gcsScheme() const {
return config_->get<std::string>(kGCSScheme, std::string("https"));
}

std::string HiveConfig::gcsCredentialsPath() const {
return config_->get<std::string>(kGCSCredentialsPath, std::string(""));
}
Expand Down
5 changes: 0 additions & 5 deletions velox/connectors/hive/HiveConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,6 @@ class HiveConfig {
/// The GCS storage endpoint server.
static constexpr const char* kGCSEndpoint = "hive.gcs.endpoint";

/// The GCS storage scheme, https for default credentials.
static constexpr const char* kGCSScheme = "hive.gcs.scheme";

/// The GCS service account configuration JSON key file.
static constexpr const char* kGCSCredentialsPath =
"hive.gcs.json-key-file-path";
Expand Down Expand Up @@ -296,8 +293,6 @@ class HiveConfig {

std::string gcsEndpoint() const;

std::string gcsScheme() const;

std::string gcsCredentialsPath() const;

std::optional<int> gcsMaxRetryCount() const;
Expand Down
23 changes: 14 additions & 9 deletions velox/connectors/hive/storage_adapters/gcs/GCSFileSystem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,13 +266,23 @@ class GCSFileSystem::Impl {

// Use the input Config parameters and initialize the GCSClient.
void initializeClient() {
constexpr std::string_view kHttpsScheme{"https://"};
auto options = gc::Options{};
auto scheme = hiveConfig_->gcsScheme();
if (scheme == "https") {
auto endpointOverride = hiveConfig_->gcsEndpoint();
// Use secure credentials by default.
if (!endpointOverride.empty()) {
options.set<gcs::RestEndpointOption>(endpointOverride);
// Use Google default credentials if endpoint has https scheme.
if (endpointOverride.find(kHttpsScheme) == 0) {
options.set<gc::UnifiedCredentialsOption>(
gc::MakeGoogleDefaultCredentials());
} else {
options.set<gc::UnifiedCredentialsOption>(
gc::MakeInsecureCredentials());
}
} else {
options.set<gc::UnifiedCredentialsOption>(
gc::MakeGoogleDefaultCredentials());
} else {
options.set<gc::UnifiedCredentialsOption>(gc::MakeInsecureCredentials());
}
options.set<gcs::UploadBufferSizeOption>(kUploadBufferSize);

Expand All @@ -290,11 +300,6 @@ class GCSFileSystem::Impl {
gcs::LimitedTimeRetryPolicy(retry_time).clone());
}

auto endpointOverride = hiveConfig_->gcsEndpoint();
if (!endpointOverride.empty()) {
options.set<gcs::RestEndpointOption>(scheme + "://" + endpointOverride);
}

auto credFile = hiveConfig_->gcsCredentialsPath();
if (!credFile.empty() && std::filesystem::exists(credFile)) {
std::ifstream jsonFile(credFile, std::ios::in);
Expand Down
4 changes: 0 additions & 4 deletions velox/connectors/hive/tests/HiveConfigTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ TEST(HiveConfigTest, defaultConfig) {
ASSERT_EQ(hiveConfig.s3IAMRole(), std::nullopt);
ASSERT_EQ(hiveConfig.s3IAMRoleSessionName(), "velox-session");
ASSERT_EQ(hiveConfig.gcsEndpoint(), "");
ASSERT_EQ(hiveConfig.gcsScheme(), "https");
ASSERT_EQ(hiveConfig.gcsCredentialsPath(), "");
ASSERT_EQ(hiveConfig.isOrcUseColumnNames(emptySession.get()), false);
ASSERT_EQ(
Expand Down Expand Up @@ -96,7 +95,6 @@ TEST(HiveConfigTest, overrideConfig) {
{HiveConfig::kS3IamRole, "hello"},
{HiveConfig::kS3IamRoleSessionName, "velox"},
{HiveConfig::kGCSEndpoint, "hey"},
{HiveConfig::kGCSScheme, "http"},
{HiveConfig::kGCSCredentialsPath, "hey"},
{HiveConfig::kOrcUseColumnNames, "true"},
{HiveConfig::kFileColumnNamesReadAsLowerCase, "true"},
Expand Down Expand Up @@ -136,7 +134,6 @@ TEST(HiveConfigTest, overrideConfig) {
ASSERT_EQ(hiveConfig.s3IAMRole(), std::optional("hello"));
ASSERT_EQ(hiveConfig.s3IAMRoleSessionName(), "velox");
ASSERT_EQ(hiveConfig.gcsEndpoint(), "hey");
ASSERT_EQ(hiveConfig.gcsScheme(), "http");
ASSERT_EQ(hiveConfig.gcsCredentialsPath(), "hey");
ASSERT_EQ(hiveConfig.isOrcUseColumnNames(emptySession.get()), true);
ASSERT_EQ(
Expand Down Expand Up @@ -211,7 +208,6 @@ TEST(HiveConfigTest, overrideSession) {
ASSERT_EQ(hiveConfig.s3IAMRole(), std::nullopt);
ASSERT_EQ(hiveConfig.s3IAMRoleSessionName(), "velox-session");
ASSERT_EQ(hiveConfig.gcsEndpoint(), "");
ASSERT_EQ(hiveConfig.gcsScheme(), "https");
ASSERT_EQ(hiveConfig.gcsCredentialsPath(), "");
ASSERT_EQ(hiveConfig.isOrcUseColumnNames(session.get()), true);
ASSERT_EQ(hiveConfig.isFileColumnNamesReadAsLowerCase(session.get()), true);
Expand Down
4 changes: 0 additions & 4 deletions velox/docs/configs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -638,10 +638,6 @@ Each query can override the config by setting corresponding query session proper
- string
-
- The GCS storage endpoint server.
* - hive.gcs.scheme
- string
-
- The GCS storage scheme, https for default credentials.
* - hive.gcs.json-key-file-path
- string
-
Expand Down

0 comments on commit 9d4cfb2

Please sign in to comment.