Skip to content

Commit 331ebd5

Browse files
authored
Prefixed vector index construction (#15166)
1 parent e4a0946 commit 331ebd5

23 files changed

+2398
-152
lines changed

ydb/core/kqp/opt/logical/kqp_opt_log_indexes.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ bool CanUseVectorIndex(const TIndexDescription& indexDesc, const TExprBase& lamb
150150
// TODO(mbkkt) We need to account top.Count(), but not clear what to if it's value is runtime?
151151
auto checkMember = [&] (const TExprBase& expr) {
152152
auto member = expr.Maybe<TCoMember>();
153-
return member && member.Cast().Name().Value() == indexDesc.KeyColumns[0];
153+
return member && member.Cast().Name().Value() == indexDesc.KeyColumns.back();
154154
};
155155
auto checkUdf = [&] (const TExprBase& expr, bool checkMembers) {
156156
auto apply = expr.Maybe<TCoApply>();
@@ -452,7 +452,7 @@ TExprBase DoRewriteTopSortOverKMeansTree(
452452
auto apply = newLambda.Body().Cast<TCoApply>();
453453
for (auto arg : apply.Args()) {
454454
auto oldMember = arg.Maybe<TCoMember>();
455-
if (oldMember && oldMember.Cast().Name().Value() == indexDesc.KeyColumns[0]) {
455+
if (oldMember && oldMember.Cast().Name().Value() == indexDesc.KeyColumns.back()) {
456456
auto newMember = Build<TCoMember>(ctx, pos)
457457
.Name().Build(NTableIndex::NTableVectorKmeansTreeIndex::CentroidColumn)
458458
.Struct(oldMember.Cast().Struct())

ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp

Lines changed: 571 additions & 8 deletions
Large diffs are not rendered by default.

ydb/core/protos/tx_datashard.proto

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1632,6 +1632,54 @@ message TEvReshuffleKMeansResponse {
16321632
// optional last written primary key
16331633
}
16341634

1635+
message TEvPrefixKMeansRequest {
1636+
optional uint64 Id = 1;
1637+
1638+
optional uint64 TabletId = 2;
1639+
optional NKikimrProto.TPathID PathId = 3;
1640+
1641+
optional uint64 SeqNoGeneration = 4;
1642+
optional uint64 SeqNoRound = 5;
1643+
1644+
optional Ydb.Table.VectorIndexSettings Settings = 6;
1645+
1646+
optional uint64 Seed = 7;
1647+
1648+
optional TEvLocalKMeansRequest.EState Upload = 8;
1649+
1650+
optional uint32 K = 9;
1651+
optional uint32 NeedsRounds = 10;
1652+
1653+
// [Child ... Child + (1 + TableSize) * ShardIndex]
1654+
optional uint64 Child = 11;
1655+
1656+
optional string LevelName = 12;
1657+
optional string PostingName = 13;
1658+
optional string PrefixName = 14;
1659+
1660+
optional string EmbeddingColumn = 15;
1661+
repeated string DataColumns = 16;
1662+
optional uint32 PrefixColumns = 17;
1663+
}
1664+
1665+
message TEvPrefixKMeansResponse {
1666+
optional uint64 Id = 1;
1667+
1668+
optional uint64 TabletId = 2;
1669+
optional NKikimrProto.TPathID PathId = 3;
1670+
1671+
optional uint64 RequestSeqNoGeneration = 4;
1672+
optional uint64 RequestSeqNoRound = 5;
1673+
1674+
optional NKikimrIndexBuilder.EBuildStatus Status = 6;
1675+
repeated Ydb.Issue.IssueMessage Issues = 7;
1676+
1677+
optional uint64 UploadRows = 8;
1678+
optional uint64 UploadBytes = 9;
1679+
optional uint64 ReadRows = 10;
1680+
optional uint64 ReadBytes = 11;
1681+
}
1682+
16351683
message TEvCdcStreamScanRequest {
16361684
message TLimits {
16371685
optional uint32 BatchMaxBytes = 1 [default = 512000];

ydb/core/tx/datashard/datashard.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,9 @@ namespace TEvDataShard {
349349
EvForceDataCleanup,
350350
EvForceDataCleanupResult,
351351

352+
EvPrefixKMeansRequest,
353+
EvPrefixKMeansResponse,
354+
352355
EvEnd
353356
};
354357

@@ -1514,6 +1517,18 @@ namespace TEvDataShard {
15141517
TEvDataShard::EvLocalKMeansResponse> {
15151518
};
15161519

1520+
struct TEvPrefixKMeansRequest
1521+
: public TEventPB<TEvPrefixKMeansRequest,
1522+
NKikimrTxDataShard::TEvPrefixKMeansRequest,
1523+
TEvDataShard::EvPrefixKMeansRequest> {
1524+
};
1525+
1526+
struct TEvPrefixKMeansResponse
1527+
: public TEventPB<TEvPrefixKMeansResponse,
1528+
NKikimrTxDataShard::TEvPrefixKMeansResponse,
1529+
TEvDataShard::EvPrefixKMeansResponse> {
1530+
};
1531+
15171532
struct TEvKqpScan
15181533
: public TEventPB<TEvKqpScan,
15191534
NKikimrTxDataShard::TEvKqpScan,

ydb/core/tx/datashard/datashard_impl.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,7 @@ class TDataShard
250250
class TTxHandleSafeBuildIndexScan;
251251
class TTxHandleSafeSampleKScan;
252252
class TTxHandleSafeLocalKMeansScan;
253+
class TTxHandleSafePrefixKMeansScan;
253254
class TTxHandleSafeReshuffleKMeansScan;
254255
class TTxHandleSafeStatisticsScan;
255256

@@ -1335,6 +1336,8 @@ class TDataShard
13351336
void HandleSafe(TEvDataShard::TEvReshuffleKMeansRequest::TPtr& ev, const TActorContext& ctx);
13361337
void Handle(TEvDataShard::TEvLocalKMeansRequest::TPtr& ev, const TActorContext& ctx);
13371338
void HandleSafe(TEvDataShard::TEvLocalKMeansRequest::TPtr& ev, const TActorContext& ctx);
1339+
void Handle(TEvDataShard::TEvPrefixKMeansRequest::TPtr& ev, const TActorContext& ctx);
1340+
void HandleSafe(TEvDataShard::TEvPrefixKMeansRequest::TPtr& ev, const TActorContext& ctx);
13381341
void Handle(TEvDataShard::TEvCdcStreamScanRequest::TPtr& ev, const TActorContext& ctx);
13391342
void Handle(TEvPrivate::TEvCdcStreamScanRegistered::TPtr& ev, const TActorContext& ctx);
13401343
void Handle(TEvPrivate::TEvCdcStreamScanProgress::TPtr& ev, const TActorContext& ctx);
@@ -3206,6 +3209,7 @@ class TDataShard
32063209
HFunc(TEvDataShard::TEvSampleKRequest, Handle);
32073210
HFunc(TEvDataShard::TEvReshuffleKMeansRequest, Handle);
32083211
HFunc(TEvDataShard::TEvLocalKMeansRequest, Handle);
3212+
HFunc(TEvDataShard::TEvPrefixKMeansRequest, Handle);
32093213
HFunc(TEvDataShard::TEvCdcStreamScanRequest, Handle);
32103214
HFunc(TEvPrivate::TEvCdcStreamScanRegistered, Handle);
32113215
HFunc(TEvPrivate::TEvCdcStreamScanProgress, Handle);

0 commit comments

Comments
 (0)