Skip to content

Commit 47715fe

Browse files
authored
Merge ff57f91 into 38137f4
2 parents 38137f4 + ff57f91 commit 47715fe

File tree

2 files changed

+44
-35
lines changed

2 files changed

+44
-35
lines changed

ydb/library/yql/providers/generic/actors/yql_generic_base_actor.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -91,12 +91,6 @@ namespace NYql::NDq {
9191
};
9292

9393
struct TEvRetry: NActors::TEventLocal<TEvRetry, EvRetry> {
94-
explicit TEvRetry(ui32 nextRetries)
95-
: NextRetries(nextRetries)
96-
{
97-
}
98-
99-
ui32 NextRetries;
10094
};
10195

10296
protected: // TODO move common logic here

ydb/library/yql/providers/generic/actors/yql_generic_lookup_actor.cpp

Lines changed: 44 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
#include <yql/essentials/utils/yql_panic.h>
2424
#include <ydb/core/formats/arrow/serializer/abstract.h>
2525

26+
#include <library/cpp/retry/retry_policy.h>
27+
2628
namespace NYql::NDq {
2729

2830
using namespace NActors;
@@ -61,6 +63,13 @@ namespace NYql::NDq {
6163
public TGenericBaseActor<TGenericLookupActor> {
6264
using TBase = TGenericBaseActor<TGenericLookupActor>;
6365

66+
using ILookupRetryPolicy = IRetryPolicy<const NYdbGrpc::TGrpcStatus&>;
67+
using ILookupRetryState = ILookupRetryPolicy::IRetryState;
68+
69+
struct TEvLookupRetry : NActors::TEventLocal<TEvLookupRetry, EvRetry> {
70+
typedef typename THandle::TPtr TPtr;
71+
};
72+
6473
public:
6574
TGenericLookupActor(
6675
NConnector::IClient::TPtr connectorClient,
@@ -87,6 +96,22 @@ namespace NYql::NDq {
8796
, HolderFactory(holderFactory)
8897
, ColumnDestinations(CreateColumnDestination())
8998
, MaxKeysInRequest(maxKeysInRequest)
99+
, RetryPolicy(
100+
ILookupRetryPolicy::GetExponentialBackoffPolicy(
101+
/* retryClassFunction */
102+
[](const NYdbGrpc::TGrpcStatus& status) {
103+
if (NConnector::GrpcStatusNeedsRetry(status))
104+
return ERetryErrorClass::ShortRetry;
105+
if (status.GRpcStatusCode == grpc::DEADLINE_EXCEEDED)
106+
return ERetryErrorClass::ShortRetry; // TODO LongRetry?
107+
return ERetryErrorClass::NoRetry;
108+
},
109+
/* minDelay */ TDuration::MilliSeconds(1),
110+
/* minLongRetryDelay */ TDuration::MilliSeconds(500),
111+
/* maxDelay */ TDuration::Seconds(1),
112+
/* maxRetries */ RequestRetriesLimit,
113+
/* maxTime */ TDuration::Minutes(5),
114+
/* scaleFactor */ 2))
90115
{
91116
InitMonCounters(taskCounters);
92117
}
@@ -157,7 +182,7 @@ namespace NYql::NDq {
157182
hFunc(TEvReadSplitsPart, Handle);
158183
hFunc(TEvReadSplitsFinished, Handle);
159184
hFunc(TEvError, Handle);
160-
hFunc(TEvRetry, Handle);
185+
hFunc(TEvLookupRetry, Handle);
161186
hFunc(NActors::TEvents::TEvPoison, Handle);)
162187

163188
void Handle(TEvListSplitsIterator::TPtr ev) {
@@ -166,7 +191,7 @@ namespace NYql::NDq {
166191
[
167192
actorSystem = TActivationContext::ActorSystem(),
168193
selfId = SelfId(),
169-
retriesRemaining = RetriesRemaining
194+
retryState = RetryState
170195
](const NConnector::TAsyncResult<NConnector::NApi::TListSplitsResponse>& asyncResult) {
171196
YQL_CLOG(DEBUG, ProviderGeneric) << "ActorId=" << selfId << " Got TListSplitsResponse from Connector";
172197
auto result = ExtractFromConstFuture(asyncResult);
@@ -175,7 +200,7 @@ namespace NYql::NDq {
175200
auto ev = new TEvListSplitsPart(std::move(*result.Response));
176201
actorSystem->Send(new NActors::IEventHandle(selfId, selfId, ev));
177202
} else {
178-
SendRetryOrError(actorSystem, selfId, result.Status, retriesRemaining);
203+
SendRetryOrError(actorSystem, selfId, result.Status, retryState);
179204
}
180205
});
181206
}
@@ -199,15 +224,15 @@ namespace NYql::NDq {
199224
Connector->ReadSplits(readRequest, RequestTimeout).Subscribe([
200225
actorSystem = TActivationContext::ActorSystem(),
201226
selfId = SelfId(),
202-
retriesRemaining = RetriesRemaining
227+
retryState = RetryState
203228
](const NConnector::TReadSplitsStreamIteratorAsyncResult& asyncResult) {
204229
YQL_CLOG(DEBUG, ProviderGeneric) << "ActorId=" << selfId << " Got ReadSplitsStreamIterator from Connector";
205230
auto result = ExtractFromConstFuture(asyncResult);
206231
if (result.Status.Ok()) {
207232
auto ev = new TEvReadSplitsIterator(std::move(result.Iterator));
208233
actorSystem->Send(new NActors::IEventHandle(selfId, selfId, ev));
209234
} else {
210-
SendRetryOrError(actorSystem, selfId, result.Status, retriesRemaining);
235+
SendRetryOrError(actorSystem, selfId, result.Status, retryState);
211236
}
212237
});
213238
}
@@ -236,9 +261,8 @@ namespace NYql::NDq {
236261
actorSystem->Send(new NActors::IEventHandle(ParentId, SelfId(), errEv.release()));
237262
}
238263

239-
void Handle(TEvRetry::TPtr ev) {
264+
void Handle(TEvLookupRetry::TPtr) {
240265
auto guard = Guard(*Alloc);
241-
RetriesRemaining = ev->Get()->NextRetries;
242266
SendRequest();
243267
}
244268

@@ -270,7 +294,7 @@ namespace NYql::NDq {
270294
}
271295

272296
Request = std::move(request);
273-
RetriesRemaining = RequestRetriesLimit;
297+
RetryState = std::shared_ptr<ILookupRetryState>(RetryPolicy->CreateRetryState().release());
274298
SendRequest();
275299
}
276300

@@ -288,7 +312,7 @@ namespace NYql::NDq {
288312
Connector->ListSplits(splitRequest, RequestTimeout).Subscribe([
289313
actorSystem = TActivationContext::ActorSystem(),
290314
selfId = SelfId(),
291-
retriesRemaining = RetriesRemaining
315+
retryState = RetryState
292316
](const NConnector::TListSplitsStreamIteratorAsyncResult& asyncResult) {
293317
auto result = ExtractFromConstFuture(asyncResult);
294318
if (result.Status.Ok()) {
@@ -297,7 +321,7 @@ namespace NYql::NDq {
297321
auto ev = new TEvListSplitsIterator(std::move(result.Iterator));
298322
actorSystem->Send(new NActors::IEventHandle(selfId, selfId, ev));
299323
} else {
300-
SendRetryOrError(actorSystem, selfId, result.Status, retriesRemaining);
324+
SendRetryOrError(actorSystem, selfId, result.Status, retryState);
301325
}
302326
});
303327
if (CpuTime) {
@@ -310,7 +334,7 @@ namespace NYql::NDq {
310334
[
311335
actorSystem = TActivationContext::ActorSystem(),
312336
selfId = SelfId(),
313-
retriesRemaining = RetriesRemaining
337+
retryState = RetryState
314338
](const NConnector::TAsyncResult<NConnector::NApi::TReadSplitsResponse>& asyncResult) {
315339
auto result = ExtractFromConstFuture(asyncResult);
316340
if (result.Status.Ok()) {
@@ -329,7 +353,7 @@ namespace NYql::NDq {
329353
auto ev = new TEvReadSplitsFinished(std::move(result.Status));
330354
actorSystem->Send(new NActors::IEventHandle(selfId, selfId, ev));
331355
} else {
332-
SendRetryOrError(actorSystem, selfId, result.Status, retriesRemaining);
356+
SendRetryOrError(actorSystem, selfId, result.Status, retryState);
333357
}
334358
});
335359
}
@@ -395,22 +419,12 @@ namespace NYql::NDq {
395419
new TEvError(std::move(error)));
396420
}
397421

398-
static void SendRetryOrError(NActors::TActorSystem* actorSystem, const NActors::TActorId& selfId, const NYdbGrpc::TGrpcStatus& status, ui32 retriesRemaining) {
399-
if (NConnector::GrpcStatusNeedsRetry(status) || status.GRpcStatusCode == grpc::DEADLINE_EXCEEDED) {
400-
if (retriesRemaining) {
401-
const auto retry = RequestRetriesLimit - retriesRemaining;
402-
const auto delay = TDuration::MilliSeconds(1u << retry); // Exponential delay from 1ms to ~0.5s
403-
// <<< TODO tune/tweak
404-
YQL_CLOG(WARN, ProviderGeneric) << "ActorId=" << selfId << " Got retrievable GRPC Error from Connector: " << status.ToDebugString() << ", retry " << (retry + 1) << " of " << RequestRetriesLimit << ", scheduled in " << delay;
405-
--retriesRemaining;
406-
if (status.GRpcStatusCode == grpc::DEADLINE_EXCEEDED) {
407-
// if error was deadline, retry only once
408-
retriesRemaining = 0; // TODO tune/tweak
409-
}
410-
actorSystem->Schedule(delay, new IEventHandle(selfId, selfId, new TEvRetry(retriesRemaining)));
411-
return;
412-
}
413-
YQL_CLOG(ERROR, ProviderGeneric) << "ActorId=" << selfId << " Got retrievable GRPC Error from Connector: " << status.ToDebugString() << ", retry count exceed limit " << RequestRetriesLimit;
422+
static void SendRetryOrError(NActors::TActorSystem* actorSystem, const NActors::TActorId& selfId, const NYdbGrpc::TGrpcStatus& status, std::shared_ptr<ILookupRetryState> retryState) {
423+
auto nextRetry = retryState->GetNextRetryDelay(status);
424+
if (nextRetry) {
425+
YQL_CLOG(WARN, ProviderGeneric) << "ActorId=" << selfId << " Got retrievable GRPC Error from Connector: " << status.ToDebugString() << ", retry scheduled in " << *nextRetry;
426+
actorSystem->Schedule(*nextRetry, new IEventHandle(selfId, selfId, new TEvLookupRetry()));
427+
return;
414428
}
415429
SendError(actorSystem, selfId, NConnector::ErrorFromGRPCStatus(status));
416430
}
@@ -502,7 +516,8 @@ namespace NYql::NDq {
502516
std::shared_ptr<IDqAsyncLookupSource::TUnboxedValueMap> Request;
503517
NConnector::IReadSplitsStreamIterator::TPtr ReadSplitsIterator; // TODO move me to TEvReadSplitsPart
504518
NKikimr::NMiniKQL::TKeyPayloadPairVector LookupResult;
505-
ui32 RetriesRemaining;
519+
ILookupRetryPolicy::TPtr RetryPolicy;
520+
std::shared_ptr<ILookupRetryState> RetryState;
506521
::NMonitoring::TDynamicCounters::TCounterPtr Count;
507522
::NMonitoring::TDynamicCounters::TCounterPtr Keys;
508523
::NMonitoring::TDynamicCounters::TCounterPtr ResultRows;

0 commit comments

Comments
 (0)