Skip to content

Commit acfab9b

Browse files
authored
Fix connection error handling in database resolver (#2990)
1 parent 17b15df commit acfab9b

File tree

4 files changed

+127
-54
lines changed

4 files changed

+127
-54
lines changed

ydb/core/fq/libs/actors/database_resolver.cpp

Lines changed: 32 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -155,12 +155,19 @@ class TResponseProcessor : public TActorBootstrapped<TResponseProcessor>
155155
NHttp::TEvHttpProxy::TEvHttpIncomingResponse::TPtr& ev,
156156
const TRequestMap::const_iterator& requestIter,
157157
TMaybe<TDatabaseDescription>& result)
158-
{
158+
{
159159
TString errorMessage;
160-
if (ev->Get()->Error.empty() && (ev->Get()->Response && ev->Get()->Response->Status == "200")) {
161-
errorMessage = HandleSuccessfulResponse(ev, requestIter, result);
160+
161+
if (requestIter == Requests.end()) {
162+
// Requests are guaranteed to be kept in within TResponseProcessor until the response arrives.
163+
// If there is no appropriate request, it's a fatal error.
164+
errorMessage = "Invariant violation: unknown request";
162165
} else {
163-
errorMessage = HandleFailedResponse(ev, requestIter);
166+
if (ev->Get()->Error.empty() && (ev->Get()->Response && ev->Get()->Response->Status == "200")) {
167+
errorMessage = HandleSuccessfulResponse(ev, *requestIter, result);
168+
} else {
169+
errorMessage = HandleFailedResponse(ev, *requestIter);
170+
}
164171
}
165172

166173
if (errorMessage) {
@@ -185,17 +192,13 @@ class TResponseProcessor : public TActorBootstrapped<TResponseProcessor>
185192

186193
TString HandleSuccessfulResponse(
187194
NHttp::TEvHttpProxy::TEvHttpIncomingResponse::TPtr& ev,
188-
const TRequestMap::const_iterator& requestIter,
195+
const TRequestMap::value_type& requestWithParams,
189196
TMaybe<TDatabaseDescription>& result
190197
) {
191-
if (requestIter == Requests.end()) {
192-
return "unknown request";
193-
}
194-
195198
NJson::TJsonReaderConfig jsonConfig;
196199
NJson::TJsonValue databaseInfo;
197200

198-
const auto& params = requestIter->second;
201+
const auto& params = requestWithParams.second;
199202
const bool parseJsonOk = NJson::ReadJsonTree(ev->Get()->Response->Body, &jsonConfig, &databaseInfo);
200203
TParsers::const_iterator parserIt;
201204
if (parseJsonOk && (parserIt = Parsers.find(params.DatabaseType)) != Parsers.end()) {
@@ -226,37 +229,37 @@ class TResponseProcessor : public TActorBootstrapped<TResponseProcessor>
226229

227230
TString HandleFailedResponse(
228231
NHttp::TEvHttpProxy::TEvHttpIncomingResponse::TPtr& ev,
229-
const TRequestMap::const_iterator& requestIter
232+
const TRequestMap::value_type& requestWithParams
230233
) const {
231-
if (requestIter == Requests.end()) {
232-
return "unknown request";
233-
}
234+
auto sb = TStringBuilder()
235+
<< "Error while trying to resolve managed " << ToString(requestWithParams.second.DatabaseType)
236+
<< " database with id " << requestWithParams.second.Id << " via HTTP request to"
237+
<< ": endpoint '" << requestWithParams.first->Host << "'"
238+
<< ", url '" << requestWithParams.first->URL << "'"
239+
<< ": ";
240+
241+
// Handle network error (when the response is empty)
242+
if (!ev->Get()->Response) {
243+
return sb << ev->Get()->Error;
244+
}
234245

246+
// Handle unauthenticated error
235247
const auto& status = ev->Get()->Response->Status;
236-
237248
if (status == "403") {
238-
return TStringBuilder() << "You have no permission to resolve database id into database endpoint. " + DetailedPermissionsError(requestIter->second);
239-
}
240-
241-
auto errorMessage = ev->Get()->Error;
242-
243-
const TString error = TStringBuilder()
244-
<< "Cannot resolve database id (status = " << status << "). "
245-
<< "Response body from " << ev->Get()->Request->URL << ": " << (ev->Get()->Response ? ev->Get()->Response->Body : "empty");
246-
if (!errorMessage.empty()) {
247-
errorMessage += '\n';
249+
return sb << "you have no permission to resolve database id into database endpoint." + DetailedPermissionsError(requestWithParams.second);
248250
}
249-
errorMessage += error;
250251

251-
return errorMessage;
252+
// Unexpected error. Add response body for debug
253+
return sb << Endl
254+
<< "Status: " << status << Endl
255+
<< "Response body: " << ev->Get()->Response->Body;
252256
}
253257

254258

255259
TString DetailedPermissionsError(const TResolveParams& params) const {
256-
257260
if (params.DatabaseType == EDatabaseType::ClickHouse || params.DatabaseType == EDatabaseType::PostgreSQL) {
258261
auto mdbTypeStr = NYql::DatabaseTypeLowercase(params.DatabaseType);
259-
return TStringBuilder() << "Please check that your service account has role " <<
262+
return TStringBuilder() << " Please check that your service account has role " <<
260263
"`managed-" << mdbTypeStr << ".viewer`.";
261264
}
262265
return {};

ydb/core/fq/libs/actors/ut/database_resolver_ut.cpp

Lines changed: 81 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,22 @@ namespace {
1515
using namespace NKikimr;
1616
using namespace NFq;
1717

18-
TString NoPermissionStr = "You have no permission to resolve database id into database endpoint. ";
18+
TString MakeErrorPrefix(
19+
const TString& host,
20+
const TString& url,
21+
const TString& databaseId,
22+
const NYql::EDatabaseType& databaseType) {
23+
TStringBuilder ss;
24+
25+
return TStringBuilder()
26+
<< "Error while trying to resolve managed " << ToString(databaseType)
27+
<< " database with id " << databaseId << " via HTTP request to"
28+
<< ": endpoint '" << host << "'"
29+
<< ", url '" << url << "'"
30+
<< ": ";
31+
}
32+
33+
TString NoPermissionStr = "you have no permission to resolve database id into database endpoint.";
1934

2035
struct TTestBootstrap : public TTestActorRuntime {
2136
NConfig::TCheckpointCoordinatorConfig Settings;
@@ -114,7 +129,9 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) {
114129
const TString& status,
115130
const TString& responseBody,
116131
const NYql::TDatabaseResolverResponse::TDatabaseDescription& description,
117-
const NYql::TIssues& issues)
132+
const NYql::TIssues& issues,
133+
const TString& error = ""
134+
)
118135
{
119136
TTestBootstrap bootstrap;
120137

@@ -132,7 +149,7 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) {
132149
NYql::IDatabaseAsyncResolver::TDatabaseAuthMap(
133150
{std::make_pair(requestIdAndDatabaseType, databaseAuth)}),
134151
TString("https://ydbc.ydb.cloud.yandex.net:8789/ydbc/cloud-prod"),
135-
TString("mdbGateway"),
152+
TString("https://mdb.api.cloud.yandex.net:443"),
136153
TString("traceId"),
137154
NFq::MakeMdbEndpointGeneratorGeneric(true))));
138155

@@ -145,14 +162,17 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) {
145162

146163
bootstrap.WaitForBootstrap();
147164

148-
auto response = std::make_unique<NHttp::THttpIncomingResponse>(nullptr);
149-
response->Status = status;
150-
response->Body = responseBody;
165+
std::unique_ptr<NHttp::THttpIncomingResponse> httpIncomingResponse;
166+
if (!error) {
167+
httpIncomingResponse = std::make_unique<NHttp::THttpIncomingResponse>(nullptr);
168+
httpIncomingResponse->Status = status;
169+
httpIncomingResponse->Body = responseBody;
170+
}
151171

152172
bootstrap.Send(new IEventHandle(
153173
processorActorId,
154174
bootstrap.HttpProxy,
155-
new NHttp::TEvHttpProxy::TEvHttpIncomingResponse(httpOutgoingRequest->Request, response.release(), "")));
175+
new NHttp::TEvHttpProxy::TEvHttpIncomingResponse(httpOutgoingRequest->Request, httpIncomingResponse.release(), error)));
156176

157177
NYql::TDatabaseResolverResponse::TDatabaseDescriptionMap result;
158178
if (status == "200") {
@@ -184,6 +204,36 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) {
184204
);
185205
}
186206

207+
Y_UNIT_TEST(Ydb_Serverless_Timeout) {
208+
NYql::TIssues issues{
209+
NYql::TIssue(
210+
TStringBuilder{} << MakeErrorPrefix(
211+
"ydbc.ydb.cloud.yandex.net:8789",
212+
"/ydbc/cloud-prod/database?databaseId=etn021us5r9rhld1vgbh",
213+
"etn021us5r9rhld1vgbh",
214+
NYql::EDatabaseType::Ydb
215+
) << "Connection timeout"
216+
)
217+
};
218+
219+
Test(
220+
NYql::EDatabaseType::Ydb,
221+
NYql::NConnector::NApi::EProtocol::PROTOCOL_UNSPECIFIED,
222+
"https://ydbc.ydb.cloud.yandex.net:8789/ydbc/cloud-prod/database?databaseId=etn021us5r9rhld1vgbh",
223+
"",
224+
"",
225+
NYql::TDatabaseResolverResponse::TDatabaseDescription{
226+
TString{"ydb.serverless.yandexcloud.net:2135"},
227+
TString{"ydb.serverless.yandexcloud.net"},
228+
2135,
229+
TString("/ru-central1/b1g7jdjqd07qg43c4fmp/etn021us5r9rhld1vgbh"),
230+
true
231+
},
232+
issues,
233+
"Connection timeout"
234+
);
235+
}
236+
187237
Y_UNIT_TEST(DataStreams_Serverless) {
188238
Test(
189239
NYql::EDatabaseType::DataStreams,
@@ -298,7 +348,12 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) {
298348
Y_UNIT_TEST(ClickHouse_PermissionDenied) {
299349
NYql::TIssues issues{
300350
NYql::TIssue(
301-
TStringBuilder{} << NoPermissionStr << "Please check that your service account has role `managed-clickhouse.viewer`."
351+
TStringBuilder{} << MakeErrorPrefix(
352+
"mdb.api.cloud.yandex.net:443",
353+
"/managed-clickhouse/v1/clusters/etn021us5r9rhld1vgbh/hosts",
354+
"etn021us5r9rhld1vgbh",
355+
NYql::EDatabaseType::ClickHouse
356+
) << NoPermissionStr << " Please check that your service account has role `managed-clickhouse.viewer`."
302357
)
303358
};
304359

@@ -366,7 +421,12 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) {
366421
Y_UNIT_TEST(PostgreSQL_PermissionDenied) {
367422
NYql::TIssues issues{
368423
NYql::TIssue(
369-
TStringBuilder{} << NoPermissionStr << "Please check that your service account has role `managed-postgresql.viewer`."
424+
TStringBuilder{} << MakeErrorPrefix(
425+
"mdb.api.cloud.yandex.net:443",
426+
"/managed-postgresql/v1/clusters/etn021us5r9rhld1vgbh/hosts",
427+
"etn021us5r9rhld1vgbh",
428+
NYql::EDatabaseType::PostgreSQL
429+
) << NoPermissionStr << " Please check that your service account has role `managed-postgresql.viewer`."
370430
)
371431
};
372432

@@ -396,7 +456,12 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) {
396456
Y_UNIT_TEST(DataStreams_PermissionDenied) {
397457
NYql::TIssues issues{
398458
NYql::TIssue(
399-
NoPermissionStr
459+
TStringBuilder{} << MakeErrorPrefix(
460+
"ydbc.ydb.cloud.yandex.net:8789",
461+
"/ydbc/cloud-prod/database?databaseId=etn021us5r9rhld1vgbh",
462+
"etn021us5r9rhld1vgbh",
463+
NYql::EDatabaseType::DataStreams
464+
) << NoPermissionStr
400465
)
401466
};
402467
Test(
@@ -434,7 +499,7 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) {
434499
std::make_pair(requestIdAndDatabaseType1, databaseAuth),
435500
std::make_pair(requestIdAndDatabaseType2, databaseAuth)}),
436501
TString("https://ydbc.ydb.cloud.yandex.net:8789/ydbc/cloud-prod"),
437-
TString("mdbGateway"),
502+
TString("https://mdb.api.cloud.yandex.net:443"),
438503
TString("traceId"),
439504
NFq::MakeMdbEndpointGeneratorGeneric(true))));
440505

@@ -481,7 +546,11 @@ Y_UNIT_TEST_SUITE(TDatabaseResolverTests) {
481546

482547
NYql::TIssues issues{
483548
NYql::TIssue(
484-
TStringBuilder{} << "Cannot resolve database id (status = 404). Response body from /ydbc/cloud-prod/database?databaseId=etn021us5r9rhld1vgb1: {\"message\":\"Database not found\"}"
549+
TStringBuilder() << MakeErrorPrefix(
550+
"ydbc.ydb.cloud.yandex.net:8789",
551+
"/ydbc/cloud-prod/database?databaseId=etn021us5r9rhld1vgb1",
552+
"etn021us5r9rhld1vgb1",
553+
NYql::EDatabaseType::DataStreams)<< "\nStatus: 404\nResponse body: {\"message\":\"Database not found\"}"
485554
)
486555
};
487556

ydb/library/yql/providers/generic/provider/yql_generic_provider.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ namespace NYql {
2121
Y_UNUSED(randomProvider);
2222
Y_UNUSED(progressWriter);
2323
Y_UNUSED(operationOptions);
24+
Y_ENSURE(credentialsFactory);
2425

2526
auto state = MakeIntrusive<TGenericState>(
2627
typeCtx.Get(),

ydb/library/yql/tools/dqrun/dqrun.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,8 @@ struct TActorIds {
276276
std::tuple<std::unique_ptr<TActorSystemManager>, TActorIds> RunActorSystem(
277277
const TGatewaysConfig& gatewaysConfig,
278278
IMetricsRegistryPtr& metricsRegistry,
279-
NYql::NLog::ELevel loggingLevel
279+
NYql::NLog::ELevel loggingLevel,
280+
ISecuredServiceAccountCredentialsFactory::TPtr& credentialsFactory
280281
) {
281282
auto actorSystemManager = std::make_unique<TActorSystemManager>(metricsRegistry, YqlToActorsLogLevel(loggingLevel));
282283
TActorIds actorIds;
@@ -297,7 +298,7 @@ std::tuple<std::unique_ptr<TActorSystemManager>, TActorIds> RunActorSystem(
297298
auto httpProxy = NHttp::CreateHttpProxy();
298299
actorIds.HttpProxy = actorSystemManager->GetActorSystem()->Register(httpProxy);
299300

300-
auto databaseResolver = NFq::CreateDatabaseResolver(actorIds.HttpProxy, nullptr);
301+
auto databaseResolver = NFq::CreateDatabaseResolver(actorIds.HttpProxy, credentialsFactory);
301302
actorIds.DatabaseResolver = actorSystemManager->GetActorSystem()->Register(databaseResolver);
302303
}
303304

@@ -760,12 +761,21 @@ int RunMain(int argc, const char* argv[])
760761
dataProvidersInit.push_back(GetYtNativeDataProviderInitializer(ytNativeGateway));
761762
}
762763

764+
ISecuredServiceAccountCredentialsFactory::TPtr credentialsFactory;
765+
766+
if (tokenAccessorEndpoint) {
767+
TVector<TString> ss = StringSplitter(tokenAccessorEndpoint).SplitByString("://");
768+
YQL_ENSURE(ss.size() == 2, "Invalid tokenAccessorEndpoint: " << tokenAccessorEndpoint);
769+
770+
credentialsFactory = NYql::CreateSecuredServiceAccountCredentialsOverTokenAccessorFactory(ss[1], ss[0] == "grpcs", "");
771+
}
772+
763773
auto dqCompFactory = NMiniKQL::GetCompositeWithBuiltinFactory(factories);
764774

765775
// Actor system starts here and will be automatically destroyed when goes out of the scope.
766776
std::unique_ptr<TActorSystemManager> actorSystemManager;
767777
TActorIds actorIds;
768-
std::tie(actorSystemManager, actorIds) = RunActorSystem(gatewaysConfig, metricsRegistry, loggingLevel);
778+
std::tie(actorSystemManager, actorIds) = RunActorSystem(gatewaysConfig, metricsRegistry, loggingLevel, credentialsFactory);
769779

770780
IHTTPGateway::TPtr httpGateway;
771781
if (gatewaysConfig.HasClickHouse()) {
@@ -789,16 +799,6 @@ int RunMain(int argc, const char* argv[])
789799
);
790800
}
791801

792-
ISecuredServiceAccountCredentialsFactory::TPtr credentialsFactory;
793-
794-
if (tokenAccessorEndpoint) {
795-
TVector<TString> ss = StringSplitter(tokenAccessorEndpoint).SplitByString("://");
796-
YQL_ENSURE(ss.size() == 2, "Invalid tokenAccessorEndpoint: " << tokenAccessorEndpoint);
797-
798-
credentialsFactory = NYql::CreateSecuredServiceAccountCredentialsOverTokenAccessorFactory(ss[1], ss[0] == "grpcs", "");
799-
}
800-
801-
802802
NConnector::IClient::TPtr genericClient;
803803
if (gatewaysConfig.HasGeneric()) {
804804
for (auto& cluster : *gatewaysConfig.MutableGeneric()->MutableClusterMapping()) {

0 commit comments

Comments
 (0)