Skip to content

Commit 12147f5

Browse files
authored
Merge b4fa8d5 into 7102021
2 parents 7102021 + b4fa8d5 commit 12147f5

File tree

2 files changed

+46
-35
lines changed

2 files changed

+46
-35
lines changed

ydb/library/yql/providers/generic/actors/yql_generic_base_actor.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -91,12 +91,6 @@ namespace NYql::NDq {
9191
};
9292

9393
struct TEvRetry: NActors::TEventLocal<TEvRetry, EvRetry> {
94-
explicit TEvRetry(ui32 nextRetries)
95-
: NextRetries(nextRetries)
96-
{
97-
}
98-
99-
ui32 NextRetries;
10094
};
10195

10296
protected: // TODO move common logic here

ydb/library/yql/providers/generic/actors/yql_generic_lookup_actor.cpp

Lines changed: 46 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
#include <yql/essentials/utils/yql_panic.h>
2424
#include <ydb/core/formats/arrow/serializer/abstract.h>
2525

26+
#include <library/cpp/retry/retry_policy.h>
27+
2628
namespace NYql::NDq {
2729

2830
using namespace NActors;
@@ -61,6 +63,13 @@ namespace NYql::NDq {
6163
public TGenericBaseActor<TGenericLookupActor> {
6264
using TBase = TGenericBaseActor<TGenericLookupActor>;
6365

66+
using ILookupRetryPolicy = IRetryPolicy<const NYdbGrpc::TGrpcStatus&>;
67+
using ILookupRetryState = ILookupRetryPolicy::IRetryState;
68+
69+
struct TEvLookupRetry : NActors::TEventLocal<TEvLookupRetry, EvRetry> {
70+
typedef typename THandle::TPtr TPtr;
71+
};
72+
6473
public:
6574
TGenericLookupActor(
6675
NConnector::IClient::TPtr connectorClient,
@@ -87,6 +96,24 @@ namespace NYql::NDq {
8796
, HolderFactory(holderFactory)
8897
, ColumnDestinations(CreateColumnDestination())
8998
, MaxKeysInRequest(maxKeysInRequest)
99+
, RetryPolicy(
100+
ILookupRetryPolicy::GetExponentialBackoffPolicy(
101+
/* retryClassFunction */
102+
[](const NYdbGrpc::TGrpcStatus& status) {
103+
if (NConnector::GrpcStatusNeedsRetry(status)) {
104+
return ERetryErrorClass::ShortRetry;
105+
}
106+
if (status.GRpcStatusCode == grpc::DEADLINE_EXCEEDED) {
107+
return ERetryErrorClass::ShortRetry; // TODO LongRetry?
108+
}
109+
return ERetryErrorClass::NoRetry;
110+
},
111+
/* minDelay */ TDuration::MilliSeconds(1),
112+
/* minLongRetryDelay */ TDuration::MilliSeconds(500),
113+
/* maxDelay */ TDuration::Seconds(1),
114+
/* maxRetries */ RequestRetriesLimit,
115+
/* maxTime */ TDuration::Minutes(5),
116+
/* scaleFactor */ 2))
90117
{
91118
InitMonCounters(taskCounters);
92119
}
@@ -157,7 +184,7 @@ namespace NYql::NDq {
157184
hFunc(TEvReadSplitsPart, Handle);
158185
hFunc(TEvReadSplitsFinished, Handle);
159186
hFunc(TEvError, Handle);
160-
hFunc(TEvRetry, Handle);
187+
hFunc(TEvLookupRetry, Handle);
161188
hFunc(NActors::TEvents::TEvPoison, Handle);)
162189

163190
void Handle(TEvListSplitsIterator::TPtr ev) {
@@ -166,7 +193,7 @@ namespace NYql::NDq {
166193
[
167194
actorSystem = TActivationContext::ActorSystem(),
168195
selfId = SelfId(),
169-
retriesRemaining = RetriesRemaining
196+
retryState = RetryState
170197
](const NConnector::TAsyncResult<NConnector::NApi::TListSplitsResponse>& asyncResult) {
171198
YQL_CLOG(DEBUG, ProviderGeneric) << "ActorId=" << selfId << " Got TListSplitsResponse from Connector";
172199
auto result = ExtractFromConstFuture(asyncResult);
@@ -175,7 +202,7 @@ namespace NYql::NDq {
175202
auto ev = new TEvListSplitsPart(std::move(*result.Response));
176203
actorSystem->Send(new NActors::IEventHandle(selfId, selfId, ev));
177204
} else {
178-
SendRetryOrError(actorSystem, selfId, result.Status, retriesRemaining);
205+
SendRetryOrError(actorSystem, selfId, result.Status, retryState);
179206
}
180207
});
181208
}
@@ -199,15 +226,15 @@ namespace NYql::NDq {
199226
Connector->ReadSplits(readRequest, RequestTimeout).Subscribe([
200227
actorSystem = TActivationContext::ActorSystem(),
201228
selfId = SelfId(),
202-
retriesRemaining = RetriesRemaining
229+
retryState = RetryState
203230
](const NConnector::TReadSplitsStreamIteratorAsyncResult& asyncResult) {
204231
YQL_CLOG(DEBUG, ProviderGeneric) << "ActorId=" << selfId << " Got ReadSplitsStreamIterator from Connector";
205232
auto result = ExtractFromConstFuture(asyncResult);
206233
if (result.Status.Ok()) {
207234
auto ev = new TEvReadSplitsIterator(std::move(result.Iterator));
208235
actorSystem->Send(new NActors::IEventHandle(selfId, selfId, ev));
209236
} else {
210-
SendRetryOrError(actorSystem, selfId, result.Status, retriesRemaining);
237+
SendRetryOrError(actorSystem, selfId, result.Status, retryState);
211238
}
212239
});
213240
}
@@ -236,9 +263,8 @@ namespace NYql::NDq {
236263
actorSystem->Send(new NActors::IEventHandle(ParentId, SelfId(), errEv.release()));
237264
}
238265

239-
void Handle(TEvRetry::TPtr ev) {
266+
void Handle(TEvLookupRetry::TPtr) {
240267
auto guard = Guard(*Alloc);
241-
RetriesRemaining = ev->Get()->NextRetries;
242268
SendRequest();
243269
}
244270

@@ -270,7 +296,7 @@ namespace NYql::NDq {
270296
}
271297

272298
Request = std::move(request);
273-
RetriesRemaining = RequestRetriesLimit;
299+
RetryState = std::shared_ptr<ILookupRetryState>(RetryPolicy->CreateRetryState());
274300
SendRequest();
275301
}
276302

@@ -288,7 +314,7 @@ namespace NYql::NDq {
288314
Connector->ListSplits(splitRequest, RequestTimeout).Subscribe([
289315
actorSystem = TActivationContext::ActorSystem(),
290316
selfId = SelfId(),
291-
retriesRemaining = RetriesRemaining
317+
retryState = RetryState
292318
](const NConnector::TListSplitsStreamIteratorAsyncResult& asyncResult) {
293319
auto result = ExtractFromConstFuture(asyncResult);
294320
if (result.Status.Ok()) {
@@ -297,7 +323,7 @@ namespace NYql::NDq {
297323
auto ev = new TEvListSplitsIterator(std::move(result.Iterator));
298324
actorSystem->Send(new NActors::IEventHandle(selfId, selfId, ev));
299325
} else {
300-
SendRetryOrError(actorSystem, selfId, result.Status, retriesRemaining);
326+
SendRetryOrError(actorSystem, selfId, result.Status, retryState);
301327
}
302328
});
303329
if (CpuTime) {
@@ -310,7 +336,7 @@ namespace NYql::NDq {
310336
[
311337
actorSystem = TActivationContext::ActorSystem(),
312338
selfId = SelfId(),
313-
retriesRemaining = RetriesRemaining
339+
retryState = RetryState
314340
](const NConnector::TAsyncResult<NConnector::NApi::TReadSplitsResponse>& asyncResult) {
315341
auto result = ExtractFromConstFuture(asyncResult);
316342
if (result.Status.Ok()) {
@@ -329,7 +355,7 @@ namespace NYql::NDq {
329355
auto ev = new TEvReadSplitsFinished(std::move(result.Status));
330356
actorSystem->Send(new NActors::IEventHandle(selfId, selfId, ev));
331357
} else {
332-
SendRetryOrError(actorSystem, selfId, result.Status, retriesRemaining);
358+
SendRetryOrError(actorSystem, selfId, result.Status, retryState);
333359
}
334360
});
335361
}
@@ -395,22 +421,12 @@ namespace NYql::NDq {
395421
new TEvError(std::move(error)));
396422
}
397423

398-
static void SendRetryOrError(NActors::TActorSystem* actorSystem, const NActors::TActorId& selfId, const NYdbGrpc::TGrpcStatus& status, ui32 retriesRemaining) {
399-
if (NConnector::GrpcStatusNeedsRetry(status) || status.GRpcStatusCode == grpc::DEADLINE_EXCEEDED) {
400-
if (retriesRemaining) {
401-
const auto retry = RequestRetriesLimit - retriesRemaining;
402-
const auto delay = TDuration::MilliSeconds(1u << retry); // Exponential delay from 1ms to ~0.5s
403-
// << TODO tune/tweak
404-
YQL_CLOG(WARN, ProviderGeneric) << "ActorId=" << selfId << " Got retrievable GRPC Error from Connector: " << status.ToDebugString() << ", retry " << (retry + 1) << " of " << RequestRetriesLimit << ", scheduled in " << delay;
405-
--retriesRemaining;
406-
if (status.GRpcStatusCode == grpc::DEADLINE_EXCEEDED) {
407-
// if error was deadline, retry only once
408-
retriesRemaining = 0; // TODO tune/tweak
409-
}
410-
actorSystem->Schedule(delay, new IEventHandle(selfId, selfId, new TEvRetry(retriesRemaining)));
411-
return;
412-
}
413-
YQL_CLOG(ERROR, ProviderGeneric) << "ActorId=" << selfId << " Got retrievable GRPC Error from Connector: " << status.ToDebugString() << ", retry count exceed limit " << RequestRetriesLimit;
424+
static void SendRetryOrError(NActors::TActorSystem* actorSystem, const NActors::TActorId& selfId, const NYdbGrpc::TGrpcStatus& status, std::shared_ptr<ILookupRetryState> retryState) {
425+
auto nextRetry = retryState->GetNextRetryDelay(status);
426+
if (nextRetry) {
427+
YQL_CLOG(WARN, ProviderGeneric) << "ActorId=" << selfId << " Got retrievable GRPC Error from Connector: " << status.ToDebugString() << ", retry scheduled in " << *nextRetry;
428+
actorSystem->Schedule(*nextRetry, new IEventHandle(selfId, selfId, new TEvLookupRetry()));
429+
return;
414430
}
415431
SendError(actorSystem, selfId, NConnector::ErrorFromGRPCStatus(status));
416432
}
@@ -510,7 +526,8 @@ namespace NYql::NDq {
510526
std::shared_ptr<IDqAsyncLookupSource::TUnboxedValueMap> Request;
511527
NConnector::IReadSplitsStreamIterator::TPtr ReadSplitsIterator; // TODO move me to TEvReadSplitsPart
512528
NKikimr::NMiniKQL::TKeyPayloadPairVector LookupResult;
513-
ui32 RetriesRemaining;
529+
ILookupRetryPolicy::TPtr RetryPolicy;
530+
std::shared_ptr<ILookupRetryState> RetryState;
514531
::NMonitoring::TDynamicCounters::TCounterPtr Count;
515532
::NMonitoring::TDynamicCounters::TCounterPtr Keys;
516533
::NMonitoring::TDynamicCounters::TCounterPtr ResultRows;

0 commit comments

Comments
 (0)