From 4245c5bed1f109993c2695aa0a0c559467b3f7c1 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Fri, 3 Jan 2025 14:20:55 +0800 Subject: [PATCH] fix: text match panics when enable_match is set be false (#38950) fix: https://github.com/milvus-io/milvus/issues/38949 --------- Signed-off-by: SpadeA-Tang --- internal/core/src/common/EasyAssert.h | 1 + internal/core/src/segcore/SegmentGrowingImpl.cpp | 6 +++++- internal/core/src/segcore/SegmentInterface.cpp | 7 +++++-- internal/parser/planparserv2/parser_visitor.go | 6 +++++- .../parser/planparserv2/plan_parser_v2_test.go | 16 ++++++++++++++++ pkg/util/typeutil/schema.go | 8 ++++++++ 6 files changed, 40 insertions(+), 4 deletions(-) diff --git a/internal/core/src/common/EasyAssert.h b/internal/core/src/common/EasyAssert.h index 22726938ffbdc..8258438b28052 100644 --- a/internal/core/src/common/EasyAssert.h +++ b/internal/core/src/common/EasyAssert.h @@ -69,6 +69,7 @@ enum ErrorCode { FollyCancel = 2038, OutOfRange = 2039, GcpNativeError = 2040, + TextIndexNotFound = 2041, KnowhereError = 2099 }; diff --git a/internal/core/src/segcore/SegmentGrowingImpl.cpp b/internal/core/src/segcore/SegmentGrowingImpl.cpp index 0ab4825d0e10e..ccc793fbb65b0 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.cpp +++ b/internal/core/src/segcore/SegmentGrowingImpl.cpp @@ -863,7 +863,11 @@ SegmentGrowingImpl::AddTexts(milvus::FieldId field_id, int64_t offset_begin) { std::unique_lock lock(mutex_); auto iter = text_indexes_.find(field_id); - AssertInfo(iter != text_indexes_.end(), "text index not found"); + if (iter == text_indexes_.end()) { + throw SegcoreError( + ErrorCode::TextIndexNotFound, + fmt::format("text index not found for field {}", field_id.get())); + } iter->second->AddTexts(n, texts, texts_valid_data, offset_begin); } diff --git a/internal/core/src/segcore/SegmentInterface.cpp b/internal/core/src/segcore/SegmentInterface.cpp index ee31b16d5fab4..f9c9ab615790a 100644 --- a/internal/core/src/segcore/SegmentInterface.cpp +++ b/internal/core/src/segcore/SegmentInterface.cpp @@ -396,8 +396,11 @@ index::TextMatchIndex* SegmentInternalInterface::GetTextIndex(FieldId field_id) const { std::shared_lock lock(mutex_); auto iter = text_indexes_.find(field_id); - AssertInfo(iter != text_indexes_.end(), - "failed to get text index, text index not found"); + if (iter == text_indexes_.end()) { + throw SegcoreError( + ErrorCode::TextIndexNotFound, + fmt::format("text index not found for field {}", field_id.get())); + } return iter->second.get(); } diff --git a/internal/parser/planparserv2/parser_visitor.go b/internal/parser/planparserv2/parser_visitor.go index 9da5075f67c49..fa7363c39151d 100644 --- a/internal/parser/planparserv2/parser_visitor.go +++ b/internal/parser/planparserv2/parser_visitor.go @@ -486,6 +486,10 @@ func (v *ParserVisitor) VisitTextMatch(ctx *parser.TextMatchContext) interface{} if err != nil { return err } + columnInfo := toColumnInfo(column) + if !v.schema.IsFieldTextMatchEnabled(columnInfo.FieldId) { + return fmt.Errorf("field %v does not enable text match", columnInfo.FieldId) + } if !typeutil.IsStringType(column.dataType) { return fmt.Errorf("text match operation on non-string is unsupported") } @@ -499,7 +503,7 @@ func (v *ParserVisitor) VisitTextMatch(ctx *parser.TextMatchContext) interface{} expr: &planpb.Expr{ Expr: &planpb.Expr_UnaryRangeExpr{ UnaryRangeExpr: &planpb.UnaryRangeExpr{ - ColumnInfo: toColumnInfo(column), + ColumnInfo: columnInfo, Op: planpb.OpType_TextMatch, Value: NewString(queryText), }, diff --git a/internal/parser/planparserv2/plan_parser_v2_test.go b/internal/parser/planparserv2/plan_parser_v2_test.go index 50b3447dd86f3..4c3a665930885 100644 --- a/internal/parser/planparserv2/plan_parser_v2_test.go +++ b/internal/parser/planparserv2/plan_parser_v2_test.go @@ -10,6 +10,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/internal/proto/planpb" "github.com/milvus-io/milvus/pkg/common" @@ -53,6 +54,16 @@ func newTestSchema(EnableDynamicField bool) *schemapb.CollectionSchema { } } +func enableMatch(schema *schemapb.CollectionSchema) { + for _, field := range schema.Fields { + if typeutil.IsStringType(field.DataType) { + field.TypeParams = append(field.TypeParams, &commonpb.KeyValuePair{ + Key: "enable_match", Value: "True", + }) + } + } +} + func newTestSchemaHelper(t *testing.T) *typeutil.SchemaHelper { schema := newTestSchema(true) schemaHelper, err := typeutil.CreateSchemaHelper(schema) @@ -221,6 +232,11 @@ func TestExpr_TextMatch(t *testing.T) { exprStrs := []string{ `text_match(VarCharField, "query")`, } + for _, exprStr := range exprStrs { + assertInvalidExpr(t, helper, exprStr) + } + + enableMatch(schema) for _, exprStr := range exprStrs { assertValidExpr(t, helper, exprStr) } diff --git a/pkg/util/typeutil/schema.go b/pkg/util/typeutil/schema.go index 8596628ad65a7..d0f89baab5295 100644 --- a/pkg/util/typeutil/schema.go +++ b/pkg/util/typeutil/schema.go @@ -389,6 +389,14 @@ func (helper *SchemaHelper) IsFieldLoaded(fieldID int64) bool { return helper.loadFields.Contain(fieldID) } +func (helper *SchemaHelper) IsFieldTextMatchEnabled(fieldId int64) bool { + sche, err := helper.GetFieldFromID(fieldId) + if err != nil { + return false + } + return CreateFieldSchemaHelper(sche).EnableMatch() +} + func (helper *SchemaHelper) getDefaultJSONField(fieldName string) (*schemapb.FieldSchema, error) { for _, f := range helper.schema.GetFields() { if f.DataType == schemapb.DataType_JSON && f.IsDynamic {