Skip to content

Commit 24a7b3b

Browse files
committed
trace(kqp): add tracing ro read actors
1 parent 8579425 commit 24a7b3b

File tree

8 files changed

+82
-40
lines changed

8 files changed

+82
-40
lines changed

ydb/core/kqp/executer_actor/kqp_planner.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,7 @@ void TKqpPlanner::ExecuteDataComputeTask(ui64 taskId, bool shareMailbox, bool op
349349
limits.MemoryQuotaManager = std::make_shared<NYql::NDq::TGuaranteeQuotaManager>(limit * 2, limit);
350350

351351
auto computeActor = NKikimr::NKqp::CreateKqpComputeActor(ExecuterId, TxId, taskDesc, AsyncIoFactory,
352-
AppData()->FunctionRegistry, settings, limits, NWilson::TTraceId(), TasksGraph.GetMeta().GetArenaIntrusivePtr());
352+
AppData()->FunctionRegistry, settings, limits, ExecuterSpan.GetTraceId(), TasksGraph.GetMeta().GetArenaIntrusivePtr());
353353

354354
if (optimizeProtoForLocalExecution) {
355355
TVector<google::protobuf::Message*>& taskSourceSettings = static_cast<TKqpComputeActor*>(computeActor)->MutableTaskSourceSettings();

ydb/core/kqp/runtime/kqp_read_actor.cpp

+22-3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <library/cpp/threading/hot_swap/hot_swap.h>
1818
#include <ydb/library/actors/core/interconnect.h>
1919
#include <ydb/library/actors/core/actorsystem.h>
20+
#include <ydb/library/wilson_ids/wilson.h>
2021

2122
#include <util/generic/intrlist.h>
2223

@@ -399,6 +400,7 @@ class TKqpReadActor : public TActorBootstrapped<TKqpReadActor>, public NYql::NDq
399400
, Counters(counters)
400401
, UseFollowers(false)
401402
, PipeCacheId(MainPipeCacheId)
403+
, ReadActorSpan(TWilsonKqp::ReadActor, NWilson::TTraceId(args.TraceId), "ReadActor")
402404
{
403405
Y_ABORT_UNLESS(Arena);
404406
Y_ABORT_UNLESS(settings->GetArena() == Arena->Get());
@@ -569,6 +571,9 @@ class TKqpReadActor : public TActorBootstrapped<TKqpReadActor>, public NYql::NDq
569571
ResolveShards[ResolveShardId] = state;
570572
ResolveShardId += 1;
571573

574+
ReadActorStateSpan = NWilson::TSpan(TWilsonKqp::ReadActorShardResolve, ReadActorSpan.GetTraceId(),
575+
"WaitForShardResolve", NWilson::EFlags::AUTO_END);
576+
572577
Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvInvalidateTable(TableId, {}));
573578
Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvResolveKeySet(request));
574579
}
@@ -617,9 +622,13 @@ class TKqpReadActor : public TActorBootstrapped<TKqpReadActor>, public NYql::NDq
617622
}
618623
}
619624

625+
ReadActorStateSpan.EndError(error);
626+
620627
return RuntimeError(error, statusCode);
621628
}
622629

630+
ReadActorStateSpan.EndOk();
631+
623632
auto keyDesc = std::move(request->ResultSet[0].KeyDescription);
624633

625634
if (keyDesc->GetPartitions().size() == 1) {
@@ -896,10 +905,8 @@ class TKqpReadActor : public TActorBootstrapped<TKqpReadActor>, public NYql::NDq
896905
Counters->CreatedIterators->Inc();
897906
ReadIdByTabletId[state->TabletId].push_back(id);
898907

899-
NWilson::TTraceId traceId; // TODO: get traceId from kqp.
900-
901908
Send(PipeCacheId, new TEvPipeCache::TEvForward(ev.Release(), state->TabletId, true),
902-
IEventHandle::FlagTrackDelivery, 0, std::move(traceId));
909+
IEventHandle::FlagTrackDelivery, 0, ReadActorSpan.GetTraceId());
903910

904911
if (!FirstShardStarted) {
905912
state->IsFirst = true;
@@ -1385,6 +1392,10 @@ class TKqpReadActor : public TActorBootstrapped<TKqpReadActor>, public NYql::NDq
13851392
}
13861393
}
13871394
TBase::PassAway();
1395+
1396+
if (ReadActorSpan) {
1397+
ReadActorSpan.End();
1398+
}
13881399
}
13891400

13901401
void RuntimeError(const TString& message, NYql::NDqProto::StatusIds::StatusCode statusCode, const NYql::TIssues& subIssues = {}) {
@@ -1395,6 +1406,11 @@ class TKqpReadActor : public TActorBootstrapped<TKqpReadActor>, public NYql::NDq
13951406

13961407
NYql::TIssues issues;
13971408
issues.AddIssue(std::move(issue));
1409+
1410+
if (ReadActorSpan) {
1411+
ReadActorSpan.EndError(issues.ToOneLineString());
1412+
}
1413+
13981414
Send(ComputeActorId, new TEvAsyncInputError(InputIndex, std::move(issues), statusCode));
13991415
}
14001416

@@ -1491,6 +1507,9 @@ class TKqpReadActor : public TActorBootstrapped<TKqpReadActor>, public NYql::NDq
14911507
size_t TotalRetries = 0;
14921508

14931509
bool FirstShardStarted = false;
1510+
1511+
NWilson::TSpan ReadActorSpan;
1512+
NWilson::TSpan ReadActorStateSpan;
14941513
};
14951514

14961515

ydb/core/kqp/runtime/kqp_stream_lookup_actor.cpp

+45-28
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,20 @@
11
#include "kqp_stream_lookup_actor.h"
22

3-
#include <ydb/library/actors/core/actor_bootstrapped.h>
4-
53
#include <ydb/core/actorlib_impl/long_timer.h>
64
#include <ydb/core/base/tablet_pipecache.h>
75
#include <ydb/core/engine/minikql/minikql_engine_host.h>
86
#include <ydb/core/kqp/common/kqp_resolve.h>
7+
#include <ydb/core/kqp/common/kqp_event_ids.h>
98
#include <ydb/core/kqp/gateway/kqp_gateway.h>
9+
#include <ydb/core/kqp/runtime/kqp_scan_data.h>
10+
#include <ydb/core/kqp/runtime/kqp_stream_lookup_worker.h>
1011
#include <ydb/core/protos/kqp_stats.pb.h>
1112
#include <ydb/core/tx/scheme_cache/scheme_cache.h>
12-
#include <ydb/core/kqp/common/kqp_event_ids.h>
13+
14+
#include <ydb/library/actors/core/actor_bootstrapped.h>
1315
#include <ydb/library/yql/public/issue/yql_issue_message.h>
14-
#include <ydb/core/kqp/runtime/kqp_scan_data.h>
15-
#include <ydb/core/kqp/runtime/kqp_stream_lookup_worker.h>
1616
#include <ydb/library/yql/dq/actors/compute/dq_compute_actor_impl.h>
17+
#include <ydb/library/wilson_ids/wilson.h>
1718

1819
namespace NKikimr {
1920
namespace NKqp {
@@ -25,24 +26,22 @@ static constexpr ui64 MAX_SHARD_RETRIES = 10;
2526

2627
class TKqpStreamLookupActor : public NActors::TActorBootstrapped<TKqpStreamLookupActor>, public NYql::NDq::IDqComputeActorAsyncInput {
2728
public:
28-
TKqpStreamLookupActor(ui64 inputIndex, NYql::NDq::TCollectStatsLevel statsLevel, const NUdf::TUnboxedValue& input,
29-
const NActors::TActorId& computeActorId, const NMiniKQL::TTypeEnvironment& typeEnv,
30-
const NMiniKQL::THolderFactory& holderFactory, std::shared_ptr<NMiniKQL::TScopedAlloc>& alloc,
31-
const NYql::NDqProto::TTaskInput& inputDesc, NKikimrKqp::TKqpStreamLookupSettings&& settings,
29+
TKqpStreamLookupActor(NYql::NDq::IDqAsyncIoFactory::TInputTransformArguments&& args, NKikimrKqp::TKqpStreamLookupSettings&& settings,
3230
TIntrusivePtr<TKqpCounters> counters)
33-
: LogPrefix(TStringBuilder() << "StreamLookupActor, inputIndex: " << inputIndex << ", CA Id " << computeActorId)
34-
, InputIndex(inputIndex)
35-
, Input(input)
36-
, ComputeActorId(computeActorId)
37-
, TypeEnv(typeEnv)
38-
, Alloc(alloc)
31+
: LogPrefix(TStringBuilder() << "StreamLookupActor, inputIndex: " << args.InputIndex << ", CA Id " << args.ComputeActorId)
32+
, InputIndex(args.InputIndex)
33+
, Input(args.TransformInput)
34+
, ComputeActorId(args.ComputeActorId)
35+
, TypeEnv(args.TypeEnv)
36+
, Alloc(args.Alloc)
3937
, Snapshot(settings.GetSnapshot().GetStep(), settings.GetSnapshot().GetTxId())
4038
, LockTxId(settings.HasLockTxId() ? settings.GetLockTxId() : TMaybe<ui64>())
4139
, SchemeCacheRequestTimeout(SCHEME_CACHE_REQUEST_TIMEOUT)
42-
, StreamLookupWorker(CreateStreamLookupWorker(std::move(settings), typeEnv, holderFactory, inputDesc))
40+
, StreamLookupWorker(CreateStreamLookupWorker(std::move(settings), args.TypeEnv, args.HolderFactory, args.InputDesc))
4341
, Counters(counters)
42+
, LookupActorSpan(TWilsonKqp::LookupActor, std::move(args.TraceId), "LookupActor")
4443
{
45-
IngressStats.Level = statsLevel;
44+
IngressStats.Level = args.StatsLevel;
4645
}
4746

4847
virtual ~TKqpStreamLookupActor() {
@@ -174,6 +173,10 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped<TKqpStreamLooku
174173

175174
Send(MakePipePeNodeCacheID(false), new TEvPipeCache::TEvUnlink(0));
176175
TActorBootstrapped<TKqpStreamLookupActor>::PassAway();
176+
177+
if (LookupActorSpan) {
178+
LookupActorSpan.End();
179+
}
177180
}
178181

179182
i64 GetAsyncInputData(NKikimr::NMiniKQL::TUnboxedValueBatch& batch, TMaybe<TInstant>&, bool& finished, i64 freeSpace) final {
@@ -234,10 +237,15 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped<TKqpStreamLooku
234237
void Handle(TEvTxProxySchemeCache::TEvResolveKeySetResult::TPtr& ev) {
235238
CA_LOG_D("TEvResolveKeySetResult was received for table: " << StreamLookupWorker->GetTablePath());
236239
if (ev->Get()->Request->ErrorCount > 0) {
237-
return RuntimeError(TStringBuilder() << "Failed to get partitioning for table: "
238-
<< StreamLookupWorker->GetTablePath(), NYql::NDqProto::StatusIds::SCHEME_ERROR);
240+
TString errorMsg = TStringBuilder() << "Failed to get partitioning for table: "
241+
<< StreamLookupWorker->GetTablePath();
242+
LookupActorStateSpan.EndError(errorMsg);
243+
244+
return RuntimeError(errorMsg, NYql::NDqProto::StatusIds::SCHEME_ERROR);
239245
}
240246

247+
LookupActorStateSpan.EndOk();
248+
241249
auto& resultSet = ev->Get()->Request->ResultSet;
242250
YQL_ENSURE(resultSet.size() == 1, "Expected one result for range [NULL, +inf)");
243251
Partitioning = resultSet[0].KeyDescription->Partitioning;
@@ -342,8 +350,11 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped<TKqpStreamLooku
342350
<< " was resolved: " << !!Partitioning);
343351

344352
if (!Partitioning) {
345-
RuntimeError(TStringBuilder() << "Failed to resolve shards for table: " << StreamLookupWorker->GetTablePath()
346-
<< " (request timeout exceeded)", NYql::NDqProto::StatusIds::TIMEOUT);
353+
TString errorMsg = TStringBuilder() << "Failed to resolve shards for table: " << StreamLookupWorker->GetTablePath()
354+
<< " (request timeout exceeded)";
355+
LookupActorStateSpan.EndError(errorMsg);
356+
357+
RuntimeError(errorMsg, NYql::NDqProto::StatusIds::TIMEOUT);
347358
}
348359
}
349360

@@ -392,7 +403,7 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped<TKqpStreamLooku
392403
record.SetResultFormat(NKikimrDataEvents::FORMAT_CELLVEC);
393404

394405
Send(MakePipePeNodeCacheID(false), new TEvPipeCache::TEvForward(request.Release(), shardId, true),
395-
IEventHandle::FlagTrackDelivery);
406+
IEventHandle::FlagTrackDelivery, 0, LookupActorSpan.GetTraceId());
396407

397408
read.State = EReadState::Running;
398409

@@ -438,6 +449,9 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped<TKqpStreamLooku
438449
keyColumnTypes, TVector<TKeyDesc::TColumnOp>{}));
439450

440451
Counters->IteratorsShardResolve->Inc();
452+
LookupActorStateSpan = NWilson::TSpan(TWilsonKqp::LookupActorShardsResolve, LookupActorSpan.GetTraceId(),
453+
"WaitForShardsResolve", NWilson::EFlags::AUTO_END);
454+
441455
Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvInvalidateTable(StreamLookupWorker->GetTableId(), {}));
442456
Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvResolveKeySet(request));
443457

@@ -467,6 +481,11 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped<TKqpStreamLooku
467481

468482
NYql::TIssues issues;
469483
issues.AddIssue(std::move(issue));
484+
485+
if (LookupActorSpan) {
486+
LookupActorSpan.EndError(issues.ToOneLineString());
487+
}
488+
470489
Send(ComputeActorId, new TEvAsyncInputError(InputIndex, std::move(issues), statusCode));
471490
}
472491

@@ -495,17 +514,15 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped<TKqpStreamLooku
495514
ui64 ReadBytesCount = 0;
496515

497516
TIntrusivePtr<TKqpCounters> Counters;
517+
NWilson::TSpan LookupActorSpan;
518+
NWilson::TSpan LookupActorStateSpan;
498519
};
499520

500521
} // namespace
501522

502-
std::pair<NYql::NDq::IDqComputeActorAsyncInput*, NActors::IActor*> CreateStreamLookupActor(ui64 inputIndex,
503-
NYql::NDq::TCollectStatsLevel statsLevel, const NUdf::TUnboxedValue& input, const NActors::TActorId& computeActorId,
504-
const NMiniKQL::TTypeEnvironment& typeEnv, const NMiniKQL::THolderFactory& holderFactory,
505-
std::shared_ptr<NMiniKQL::TScopedAlloc>& alloc, const NYql::NDqProto::TTaskInput& inputDesc,
523+
std::pair<NYql::NDq::IDqComputeActorAsyncInput*, NActors::IActor*> CreateStreamLookupActor(NYql::NDq::IDqAsyncIoFactory::TInputTransformArguments&& args,
506524
NKikimrKqp::TKqpStreamLookupSettings&& settings, TIntrusivePtr<TKqpCounters> counters) {
507-
auto actor = new TKqpStreamLookupActor(inputIndex, statsLevel, input, computeActorId, typeEnv, holderFactory,
508-
alloc, inputDesc, std::move(settings), counters);
525+
auto actor = new TKqpStreamLookupActor(std::move(args), std::move(settings), counters);
509526
return {actor, actor};
510527
}
511528

ydb/core/kqp/runtime/kqp_stream_lookup_actor.h

+1-4
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,7 @@
77
namespace NKikimr {
88
namespace NKqp {
99

10-
std::pair<NYql::NDq::IDqComputeActorAsyncInput*, NActors::IActor*> CreateStreamLookupActor(ui64 inputIndex,
11-
NYql::NDq::TCollectStatsLevel statsLevel, const NUdf::TUnboxedValue& input, const NActors::TActorId& computeActorId,
12-
const NMiniKQL::TTypeEnvironment& typeEnv, const NMiniKQL::THolderFactory& holderFactory,
13-
std::shared_ptr<NMiniKQL::TScopedAlloc>& alloc, const NYql::NDqProto::TTaskInput& inputDesc,
10+
std::pair<NYql::NDq::IDqComputeActorAsyncInput*, NActors::IActor*> CreateStreamLookupActor(NYql::NDq::IDqAsyncIoFactory::TInputTransformArguments&& args,
1411
NKikimrKqp::TKqpStreamLookupSettings&& settings, TIntrusivePtr<TKqpCounters>);
1512

1613
} // namespace NKqp

ydb/core/kqp/runtime/kqp_stream_lookup_factory.cpp

+1-2
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,7 @@ namespace NKqp {
77
void RegisterStreamLookupActorFactory(NYql::NDq::TDqAsyncIoFactory& factory, TIntrusivePtr<TKqpCounters> counters) {
88
factory.RegisterInputTransform<NKikimrKqp::TKqpStreamLookupSettings>("StreamLookupInputTransformer", [counters](NKikimrKqp::TKqpStreamLookupSettings&& settings,
99
NYql::NDq::TDqAsyncIoFactory::TInputTransformArguments&& args) {
10-
return CreateStreamLookupActor(args.InputIndex, args.StatsLevel, args.TransformInput, args.ComputeActorId, args.TypeEnv,
11-
args.HolderFactory, args.Alloc, args.InputDesc, std::move(settings), counters);
10+
return CreateStreamLookupActor(std::move(args), std::move(settings), counters);
1211
});
1312
}
1413

ydb/library/wilson_ids/wilson.h

+6
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,12 @@ namespace NKikimr {
3636
ProposeTransaction = 9,
3737

3838
ComputeActor = 9,
39+
40+
ReadActor = 9,
41+
ReadActorShardResolve = 10,
42+
43+
LookupActor = 9,
44+
LookupActorShardsResolve = 10,
3945
};
4046
};
4147

ydb/library/yql/dq/actors/compute/dq_compute_actor_async_io.h

+2
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ struct IDqAsyncIoFactory : public TThrRefBase {
217217
IMemoryQuotaManager::TPtr MemoryQuotaManager;
218218
const google::protobuf::Message* SourceSettings = nullptr; // used only in case if we execute compute actor locally
219219
TIntrusivePtr<NActors::TProtoArenaHolder> Arena; // Arena for SourceSettings
220+
NWilson::TTraceId TraceId;
220221
};
221222

222223
struct TSinkArguments {
@@ -247,6 +248,7 @@ struct IDqAsyncIoFactory : public TThrRefBase {
247248
const NKikimr::NMiniKQL::THolderFactory& HolderFactory;
248249
NKikimr::NMiniKQL::TProgramBuilder& ProgramBuilder;
249250
std::shared_ptr<NKikimr::NMiniKQL::TScopedAlloc> Alloc;
251+
NWilson::TTraceId TraceId;
250252
};
251253

252254
struct TOutputTransformArguments {

ydb/library/yql/dq/actors/compute/dq_compute_actor_impl.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -1593,7 +1593,8 @@ class TDqComputeActorBase : public NActors::TActorBootstrapped<TDerived>
15931593
.Alloc = TaskRunner ? TaskRunner->GetAllocatorPtr() : nullptr,
15941594
.MemoryQuotaManager = MemoryLimits.MemoryQuotaManager,
15951595
.SourceSettings = (!settings.empty() ? settings.at(inputIndex) : nullptr),
1596-
.Arena = Task.GetArena()
1596+
.Arena = Task.GetArena(),
1597+
.TraceId = ComputeActorSpan.GetTraceId()
15971598
});
15981599
} catch (const std::exception& ex) {
15991600
throw yexception() << "Failed to create source " << inputDesc.GetSource().GetType() << ": " << ex.what();
@@ -1623,7 +1624,8 @@ class TDqComputeActorBase : public NActors::TActorBootstrapped<TDerived>
16231624
.TypeEnv = typeEnv,
16241625
.HolderFactory = holderFactory,
16251626
.ProgramBuilder = *transform.ProgramBuilder,
1626-
.Alloc = TaskRunner->GetAllocatorPtr()
1627+
.Alloc = TaskRunner->GetAllocatorPtr(),
1628+
.TraceId = ComputeActorSpan.GetTraceId()
16271629
});
16281630
} catch (const std::exception& ex) {
16291631
throw yexception() << "Failed to create input transform " << inputDesc.GetTransform().GetType() << ": " << ex.what();

0 commit comments

Comments
 (0)