Skip to content

Commit eb88635

Browse files
authored
Adjust vector index settings (#10542)
1 parent 0cd2faa commit eb88635

25 files changed

+419
-495
lines changed

ydb/core/kqp/provider/yql_kikimr_exec.cpp

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1697,19 +1697,24 @@ class TKiSinkCallableExecutionTransformer : public TAsyncCallbackTransformer<TKi
16971697
YQL_ENSURE(indexSettings.Maybe<TCoNameValueTupleList>());
16981698
for (const auto& vectorSetting : indexSettings.Cast<TCoNameValueTupleList>()) {
16991699
YQL_ENSURE(vectorSetting.Value().Maybe<TCoAtom>());
1700+
auto parseU32 = [] (const char* key, const TString& value) {
1701+
ui32 num = 0;
1702+
YQL_ENSURE(TryFromString(value, num), "Wrong " << key << ": " << value);
1703+
return num;
1704+
};
1705+
const auto value = vectorSetting.Value().Cast<TCoAtom>().StringValue();
17001706
if (vectorSetting.Name().Value() == "distance") {
1701-
protoVectorSettings.set_distance(VectorIndexSettingsParseDistance(vectorSetting.Value().Cast<TCoAtom>().StringValue()));
1707+
protoVectorSettings.mutable_settings()->set_metric(VectorIndexSettingsParseDistance(value));
17021708
} else if (vectorSetting.Name().Value() == "similarity") {
1703-
protoVectorSettings.set_similarity(VectorIndexSettingsParseSimilarity(vectorSetting.Value().Cast<TCoAtom>().StringValue()));
1709+
protoVectorSettings.mutable_settings()->set_metric(VectorIndexSettingsParseSimilarity(value));
17041710
} else if (vectorSetting.Name().Value() == "vector_type") {
1705-
protoVectorSettings.set_vector_type(VectorIndexSettingsParseVectorType(vectorSetting.Value().Cast<TCoAtom>().StringValue()));
1711+
protoVectorSettings.mutable_settings()->set_vector_type(VectorIndexSettingsParseVectorType(value));
17061712
} else if (vectorSetting.Name().Value() == "vector_dimension") {
1707-
auto parseInt = [] (const TString vectorDimensionStr) {
1708-
ui32 vectorDimension;
1709-
YQL_ENSURE(TryFromString(vectorDimensionStr, vectorDimension), "Wrong vector_dimension: " << vectorDimensionStr);
1710-
return vectorDimension;
1711-
};
1712-
protoVectorSettings.set_vector_dimension(parseInt(vectorSetting.Value().Cast<TCoAtom>().StringValue()));
1713+
protoVectorSettings.mutable_settings()->set_vector_dimension(parseU32("vector_dimension", value));
1714+
} else if (vectorSetting.Name().Value() == "clusters") {
1715+
protoVectorSettings.set_clusters(parseU32("clusters", value));
1716+
} else if (vectorSetting.Name().Value() == "levels") {
1717+
protoVectorSettings.set_levels(parseU32("levels", value));
17131718
} else {
17141719
YQL_ENSURE(false, "Wrong vector setting name: " << vectorSetting.Name().Value());
17151720
}

ydb/core/kqp/provider/yql_kikimr_provider.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -831,7 +831,7 @@ void TableDescriptionToTableInfo(const TKikimrTableDescription& desc, TYdbOperat
831831
TableDescriptionToTableInfoImpl(desc, op, std::back_inserter(infos));
832832
}
833833

834-
Ydb::Table::VectorIndexSettings_Distance VectorIndexSettingsParseDistance(std::string_view distance) {
834+
Ydb::Table::VectorIndexSettings_Metric VectorIndexSettingsParseDistance(std::string_view distance) {
835835
if (distance == "cosine")
836836
return Ydb::Table::VectorIndexSettings::DISTANCE_COSINE;
837837
else if (distance == "manhattan")
@@ -842,7 +842,7 @@ Ydb::Table::VectorIndexSettings_Distance VectorIndexSettingsParseDistance(std::s
842842
YQL_ENSURE(false, "Wrong index setting distance: " << distance);
843843
};
844844

845-
Ydb::Table::VectorIndexSettings_Similarity VectorIndexSettingsParseSimilarity(std::string_view similarity) {
845+
Ydb::Table::VectorIndexSettings_Metric VectorIndexSettingsParseSimilarity(std::string_view similarity) {
846846
if (similarity == "cosine")
847847
return Ydb::Table::VectorIndexSettings::SIMILARITY_COSINE;
848848
else if (similarity == "inner_product")

ydb/core/kqp/provider/yql_kikimr_provider_impl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,8 +243,8 @@ void TableDescriptionToTableInfo(const TKikimrTableDescription& desc, TYdbOperat
243243
void TableDescriptionToTableInfo(const TKikimrTableDescription& desc, TYdbOperation op,
244244
TVector<NKqpProto::TKqpTableInfo>& infos);
245245

246-
Ydb::Table::VectorIndexSettings_Distance VectorIndexSettingsParseDistance(std::string_view distance);
247-
Ydb::Table::VectorIndexSettings_Similarity VectorIndexSettingsParseSimilarity(std::string_view similarity);
246+
Ydb::Table::VectorIndexSettings_Metric VectorIndexSettingsParseDistance(std::string_view distance);
247+
Ydb::Table::VectorIndexSettings_Metric VectorIndexSettingsParseSimilarity(std::string_view similarity);
248248
Ydb::Table::VectorIndexSettings_VectorType VectorIndexSettingsParseVectorType(std::string_view vectorType);
249249

250250
bool IsPgNullExprNode(const NNodes::TExprBase& maybeLiteral);

ydb/core/kqp/provider/yql_kikimr_type_ann.cpp

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -831,28 +831,33 @@ class TKiSinkTypeAnnotationTransformer : public TKiSinkVisitorTransformer
831831
return TStatus::Ok;
832832
}
833833

834-
Ydb::Table::VectorIndexSettings SerializeVectorIndexSettingsToProto(const TCoNameValueTupleList& indexSettings) {
835-
Ydb::Table::VectorIndexSettings proto;
834+
Ydb::Table::KMeansTreeSettings SerializeVectorIndexSettingsToProto(const TCoNameValueTupleList& indexSettings) {
835+
Ydb::Table::KMeansTreeSettings proto;
836836

837837
for (const auto& indexSetting : indexSettings) {
838838
const auto& name = indexSetting.Name().Value();
839839
const auto& value = indexSetting.Value().Cast<TCoAtom>().StringValue();
840840

841-
if (name == "distance")
842-
proto.set_distance(VectorIndexSettingsParseDistance(value));
843-
else if (name =="similarity")
844-
proto.set_similarity(VectorIndexSettingsParseSimilarity(value));
845-
else if (name =="vector_type")
846-
proto.set_vector_type(VectorIndexSettingsParseVectorType(value));
847-
else if (name =="vector_dimension")
848-
proto.set_vector_dimension(FromString<ui32>(value));
849-
else
841+
if (name == "distance") {
842+
proto.mutable_settings()->set_metric(VectorIndexSettingsParseDistance(value));
843+
} else if (name == "similarity") {
844+
proto.mutable_settings()->set_metric(VectorIndexSettingsParseSimilarity(value));
845+
} else if (name =="vector_type") {
846+
proto.mutable_settings()->set_vector_type(VectorIndexSettingsParseVectorType(value));
847+
} else if (name =="vector_dimension") {
848+
proto.mutable_settings()->set_vector_dimension(FromString<ui32>(value));
849+
} else if (name =="clusters") {
850+
proto.set_clusters(FromString<ui32>(value));
851+
} else if (name =="levels") {
852+
proto.set_levels(FromString<ui32>(value));
853+
} else {
850854
YQL_ENSURE(false, "Wrong index setting name: " << name);
855+
}
851856
}
852857

853-
YQL_ENSURE(proto.metric_case() != Ydb::Table::VectorIndexSettings::METRIC_NOT_SET, "Missed index setting distance or similarity");
854-
YQL_ENSURE(proto.vector_type() != Ydb::Table::VectorIndexSettings::VECTOR_TYPE_UNSPECIFIED, "Missed index setting vector_type");
855-
YQL_ENSURE(proto.vector_dimension(), "Missed index setting vector_dimension");
858+
YQL_ENSURE(proto.settings().metric() != Ydb::Table::VectorIndexSettings::METRIC_UNSPECIFIED, "Missed index setting metric");
859+
YQL_ENSURE(proto.settings().vector_type() != Ydb::Table::VectorIndexSettings::VECTOR_TYPE_UNSPECIFIED, "Missed index setting vector_type");
860+
YQL_ENSURE(proto.settings().vector_dimension(), "Missed index setting vector_dimension");
856861

857862
return proto;
858863
}
@@ -991,9 +996,8 @@ virtual TStatus HandleCreateTable(TKiCreateTable create, TExprContext& ctx) over
991996

992997
TIndexDescription::TSpecializedIndexDescription specializedIndexDescription;
993998
if (indexType == TIndexDescription::EType::GlobalSyncVectorKMeansTree) {
994-
NKikimrKqp::TVectorIndexKmeansTreeDescription vectorIndexDescription;
995-
*vectorIndexDescription.MutableSettings() = SerializeVectorIndexSettingsToProto(index.IndexSettings());
996-
specializedIndexDescription = vectorIndexDescription;
999+
*specializedIndexDescription.emplace<NKikimrKqp::TVectorIndexKmeansTreeDescription>()
1000+
.MutableSettings() = SerializeVectorIndexSettingsToProto(index.IndexSettings());
9971001
}
9981002

9991003
// IndexState and version, pathId are ignored for create table with index request

ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2379,8 +2379,8 @@ Y_UNIT_TEST_SUITE(KqpScheme) {
23792379
UNIT_ASSERT_VALUES_EQUAL(indexDesc.back().GetDataColumns().size(), 0);
23802380

23812381
if (type == EIndexTypeSql::GlobalVectorKMeansTree) {
2382-
const TVectorIndexSettings& vectorIndexSettings = *indexDesc.back().GetVectorIndexSettings();
2383-
UNIT_ASSERT_VALUES_EQUAL(std::get<TVectorIndexSettings::ESimilarity>(vectorIndexSettings.Metric), TVectorIndexSettings::ESimilarity::InnerProduct);
2382+
const auto& vectorIndexSettings = std::get<TKMeansTreeSettings>(indexDesc.back().GetVectorIndexSettings()).Settings;
2383+
UNIT_ASSERT_VALUES_EQUAL(vectorIndexSettings.Metric, TVectorIndexSettings::EMetric::InnerProduct);
23842384
UNIT_ASSERT_VALUES_EQUAL(vectorIndexSettings.VectorType, TVectorIndexSettings::EVectorType::Float);
23852385
UNIT_ASSERT_VALUES_EQUAL(vectorIndexSettings.VectorDimension, 1024);
23862386
}
@@ -2421,8 +2421,8 @@ Y_UNIT_TEST_SUITE(KqpScheme) {
24212421
UNIT_ASSERT_VALUES_EQUAL(indexDesc.back().GetDataColumns().size(), 1);
24222422

24232423
if (type == EIndexTypeSql::GlobalVectorKMeansTree) {
2424-
const TVectorIndexSettings& vectorIndexSettings = *indexDesc.back().GetVectorIndexSettings();
2425-
UNIT_ASSERT_VALUES_EQUAL(std::get<TVectorIndexSettings::ESimilarity>(vectorIndexSettings.Metric), TVectorIndexSettings::ESimilarity::InnerProduct);
2424+
const auto& vectorIndexSettings = std::get<TKMeansTreeSettings>(indexDesc.back().GetVectorIndexSettings()).Settings;
2425+
UNIT_ASSERT_VALUES_EQUAL(vectorIndexSettings.Metric, TVectorIndexSettings::EMetric::InnerProduct);
24262426
UNIT_ASSERT_VALUES_EQUAL(vectorIndexSettings.VectorType, TVectorIndexSettings::EVectorType::Float);
24272427
UNIT_ASSERT_VALUES_EQUAL(vectorIndexSettings.VectorDimension, 1024);
24282428
}
@@ -2775,9 +2775,10 @@ Y_UNIT_TEST_SUITE(KqpScheme) {
27752775
UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetIndexColumns().size(), 1);
27762776
UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetIndexColumns()[0], "Embedding");
27772777
UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetDataColumns().size(), 0);
2778-
UNIT_ASSERT_VALUES_EQUAL(std::get<NYdb::NTable::TVectorIndexSettings::ESimilarity>(indexDesc.GetVectorIndexSettings()->Metric), NYdb::NTable::TVectorIndexSettings::ESimilarity::InnerProduct);
2779-
UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetVectorIndexSettings()->VectorType, NYdb::NTable::TVectorIndexSettings::EVectorType::Float);
2780-
UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetVectorIndexSettings()->VectorDimension, 1024);
2778+
const auto& vectorIndexSettings = std::get<TKMeansTreeSettings>(indexDesc.GetVectorIndexSettings()).Settings;
2779+
UNIT_ASSERT_VALUES_EQUAL(vectorIndexSettings.Metric, NYdb::NTable::TVectorIndexSettings::EMetric::InnerProduct);
2780+
UNIT_ASSERT_VALUES_EQUAL(vectorIndexSettings.VectorType, NYdb::NTable::TVectorIndexSettings::EVectorType::Float);
2781+
UNIT_ASSERT_VALUES_EQUAL(vectorIndexSettings.VectorDimension, 1024);
27812782
}
27822783
{
27832784
auto describeLevelTable = session.DescribeTable("/Root/TestTable/vector_idx/indexImplLevelTable").GetValueSync();
@@ -2826,9 +2827,10 @@ Y_UNIT_TEST_SUITE(KqpScheme) {
28262827
UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetIndexColumns()[0], "Embedding");
28272828
UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetDataColumns().size(), 1);
28282829
UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetDataColumns()[0], "Covered");
2829-
UNIT_ASSERT_VALUES_EQUAL(std::get<NYdb::NTable::TVectorIndexSettings::ESimilarity>(indexDesc.GetVectorIndexSettings()->Metric), NYdb::NTable::TVectorIndexSettings::ESimilarity::InnerProduct);
2830-
UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetVectorIndexSettings()->VectorType, NYdb::NTable::TVectorIndexSettings::EVectorType::Float);
2831-
UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetVectorIndexSettings()->VectorDimension, 1024);
2830+
const auto& vectorIndexSettings = std::get<TKMeansTreeSettings>(indexDesc.GetVectorIndexSettings()).Settings;
2831+
UNIT_ASSERT_VALUES_EQUAL(vectorIndexSettings.Metric, NYdb::NTable::TVectorIndexSettings::EMetric::InnerProduct);
2832+
UNIT_ASSERT_VALUES_EQUAL(vectorIndexSettings.VectorType, NYdb::NTable::TVectorIndexSettings::EVectorType::Float);
2833+
UNIT_ASSERT_VALUES_EQUAL(vectorIndexSettings.VectorDimension, 1024);
28322834
}
28332835
}
28342836

@@ -2894,10 +2896,11 @@ Y_UNIT_TEST_SUITE(KqpScheme) {
28942896
.AddNullableColumn("Key", EPrimitiveType::Uint64)
28952897
.AddNullableColumn("Embedding", EPrimitiveType::String)
28962898
.SetPrimaryKeyColumn("Key")
2897-
.AddVectorKMeansTreeSecondaryIndex("vector_idx", {"Embedding"},
2898-
{ NYdb::NTable::TVectorIndexSettings::EDistance::Cosine,
2899-
NYdb::NTable::TVectorIndexSettings::EVectorType::Float,
2900-
1024});
2899+
.AddVectorKMeansTreeIndex("vector_idx", {"Embedding"}, {TVectorIndexSettings{
2900+
NYdb::NTable::TVectorIndexSettings::EMetric::CosineDistance,
2901+
NYdb::NTable::TVectorIndexSettings::EVectorType::Float,
2902+
1024,
2903+
}});
29012904

29022905
auto result = session.CreateTable("/Root/TestTable", builder.Build()).ExtractValueSync();
29032906
UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString());
@@ -2913,9 +2916,10 @@ Y_UNIT_TEST_SUITE(KqpScheme) {
29132916
UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetIndexColumns().size(), 1);
29142917
UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetIndexColumns()[0], "Embedding");
29152918
UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetDataColumns().size(), 0);
2916-
UNIT_ASSERT_VALUES_EQUAL(std::get<NYdb::NTable::TVectorIndexSettings::EDistance>(indexDesc.GetVectorIndexSettings()->Metric), NYdb::NTable::TVectorIndexSettings::EDistance::Cosine);
2917-
UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetVectorIndexSettings()->VectorType, NYdb::NTable::TVectorIndexSettings::EVectorType::Float);
2918-
UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetVectorIndexSettings()->VectorDimension, 1024);
2919+
const auto& vectorIndexSettings = std::get<TKMeansTreeSettings>(indexDesc.GetVectorIndexSettings()).Settings;
2920+
UNIT_ASSERT_VALUES_EQUAL(vectorIndexSettings.Metric, NYdb::NTable::TVectorIndexSettings::EMetric::CosineDistance);
2921+
UNIT_ASSERT_VALUES_EQUAL(vectorIndexSettings.VectorType, NYdb::NTable::TVectorIndexSettings::EVectorType::Float);
2922+
UNIT_ASSERT_VALUES_EQUAL(vectorIndexSettings.VectorDimension, 1024);
29192923
}
29202924
}
29212925

@@ -2932,10 +2936,11 @@ Y_UNIT_TEST_SUITE(KqpScheme) {
29322936
.AddNullableColumn("Embedding", EPrimitiveType::String)
29332937
.AddNullableColumn("Covered", EPrimitiveType::String)
29342938
.SetPrimaryKeyColumn("Key")
2935-
.AddVectorKMeansTreeSecondaryIndex("vector_idx", {"Embedding"}, {"Covered"},
2936-
{ NYdb::NTable::TVectorIndexSettings::EDistance::Cosine,
2937-
NYdb::NTable::TVectorIndexSettings::EVectorType::Float,
2938-
1024});
2939+
.AddVectorKMeansTreeIndex("vector_idx", {"Embedding"}, {"Covered"}, {TVectorIndexSettings{
2940+
NYdb::NTable::TVectorIndexSettings::EMetric::CosineDistance,
2941+
NYdb::NTable::TVectorIndexSettings::EVectorType::Float,
2942+
1024,
2943+
}});
29392944

29402945
auto result = session.CreateTable("/Root/TestTable", builder.Build()).ExtractValueSync();
29412946
UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString());
@@ -2952,9 +2957,10 @@ Y_UNIT_TEST_SUITE(KqpScheme) {
29522957
UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetIndexColumns()[0], "Embedding");
29532958
UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetDataColumns().size(), 1);
29542959
UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetDataColumns()[0], "Covered");
2955-
UNIT_ASSERT_VALUES_EQUAL(std::get<NYdb::NTable::TVectorIndexSettings::EDistance>(indexDesc.GetVectorIndexSettings()->Metric), NYdb::NTable::TVectorIndexSettings::EDistance::Cosine);
2956-
UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetVectorIndexSettings()->VectorType, NYdb::NTable::TVectorIndexSettings::EVectorType::Float);
2957-
UNIT_ASSERT_VALUES_EQUAL(indexDesc.GetVectorIndexSettings()->VectorDimension, 1024);
2960+
const auto& vectorIndexSettings = std::get<TKMeansTreeSettings>(indexDesc.GetVectorIndexSettings()).Settings;
2961+
UNIT_ASSERT_VALUES_EQUAL(vectorIndexSettings.Metric, NYdb::NTable::TVectorIndexSettings::EMetric::CosineDistance);
2962+
UNIT_ASSERT_VALUES_EQUAL(vectorIndexSettings.VectorType, NYdb::NTable::TVectorIndexSettings::EVectorType::Float);
2963+
UNIT_ASSERT_VALUES_EQUAL(vectorIndexSettings.VectorDimension, 1024);
29582964
}
29592965
}
29602966

ydb/core/protos/flat_scheme_op.proto

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -934,7 +934,7 @@ enum EIndexState {
934934
}
935935

936936
message TVectorIndexKmeansTreeDescription {
937-
optional Ydb.Table.VectorIndexSettings Settings = 1;
937+
optional Ydb.Table.KMeansTreeSettings Settings = 1;
938938
}
939939

940940
message TIndexDescription {

ydb/core/protos/kqp.proto

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ message TKqpPathIdProto {
126126
}
127127

128128
message TVectorIndexKmeansTreeDescription {
129-
optional Ydb.Table.VectorIndexSettings Settings = 1;
129+
optional Ydb.Table.KMeansTreeSettings Settings = 1;
130130
}
131131

132132
message TIndexDescriptionProto {

0 commit comments

Comments
 (0)