-
Notifications
You must be signed in to change notification settings - Fork 3.4k
Add implicit ORDER BY Distance for VectorSearch translation #38086
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
07fb69c
e8af99c
a1c2e5d
3d949f2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -147,12 +147,18 @@ var metric | |
| var valueProjectionMember = new ProjectionMember().Append(resultType.GetProperty(nameof(VectorSearchResult<>.Value))!); | ||
| var distanceProjectionMember = new ProjectionMember().Append(resultType.GetProperty(nameof(VectorSearchResult<>.Distance))!); | ||
|
|
||
| var distanceColumn = new ColumnExpression("Distance", vectorSearchFunction.Alias, typeof(double), _typeMappingSource.FindMapping(typeof(double)), nullable: false); | ||
|
|
||
| select.ReplaceProjection(new Dictionary<ProjectionMember, Expression> | ||
| { | ||
| [valueProjectionMember] = entityProjection, | ||
| [distanceProjectionMember] = new ColumnExpression("Distance", vectorSearchFunction.Alias, typeof(double), _typeMappingSource.FindMapping(typeof(double)), nullable: false) | ||
| [distanceProjectionMember] = distanceColumn | ||
| }); | ||
|
|
||
| // VECTOR_SEARCH() results are implicitly ordered by distance ascending; add this ordering so that | ||
| // users can compose with Take() without needing an explicit OrderBy(r => r.Distance). | ||
| select.AppendOrdering(new OrderingExpression(distanceColumn, ascending: true)); | ||
|
|
||
roji marked this conversation as resolved.
Show resolved
Hide resolved
roji marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| var shaper = Expression.New( | ||
| resultType.GetConstructors().Single(), | ||
| arguments: | ||
|
|
@@ -795,29 +801,92 @@ bool TryTranslate( | |
| } | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// This is an internal API that supports the Entity Framework Core infrastructure and not subject to | ||
| /// the same compatibility standards as public APIs. It may be changed or removed without notice in | ||
| /// any release. You should only use it directly in your code with extreme caution and knowing that | ||
| /// doing so can result in application failures when updating to a new Entity Framework Core release. | ||
| /// </summary> | ||
| protected override ShapedQueryExpression? TranslateTake(ShapedQueryExpression source, Expression count) | ||
| { | ||
| var selectExpression = (SelectExpression)source.QueryExpression; | ||
|
|
||
| // When VECTOR_SEARCH() is present and an Offset has already been applied (i.e. Skip().Take()), we need to ensure that the | ||
| // generated SQL uses TOP WITH APPROXIMATE in a subquery, rather than the default OFFSET...FETCH which doesn't use the | ||
| // vector index. We push down a subquery with TOP(Skip + Take) WITH APPROXIMATE, and apply OFFSET...FETCH on the outer query. | ||
| if (selectExpression is { Offset: { } existingOffset } | ||
| && selectExpression.Tables.Any(t => t.UnwrapJoin() is TableValuedFunctionExpression { Name: "VECTOR_SEARCH" })) | ||
| { | ||
| var translation = TranslateExpression(count); | ||
| if (translation == null) | ||
| { | ||
| return null; | ||
| } | ||
|
|
||
| var combinedLimit = _sqlExpressionFactory.Add(existingOffset, translation); | ||
|
|
||
| #pragma warning disable EF1001 // Internal EF Core API usage. | ||
| // Clear the offset so the inner subquery uses TOP(M+N) instead of OFFSET...FETCH | ||
| selectExpression.SetOffset(null); | ||
|
Comment on lines
+826
to
+830
|
||
| selectExpression.SetLimit(combinedLimit); | ||
|
|
||
| // Push down: inner gets TOP(Skip+Take) WITH APPROXIMATE, outer is clean | ||
| selectExpression.PushdownIntoSubquery(); | ||
|
|
||
| // Apply the original offset and take on the outer query as OFFSET...FETCH | ||
| selectExpression.ApplyOffset(existingOffset); | ||
| selectExpression.SetLimit(translation); | ||
| #pragma warning restore EF1001 // Internal EF Core API usage. | ||
|
|
||
| return source; | ||
| } | ||
|
|
||
| return base.TranslateTake(source, count); | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// This is an internal API that supports the Entity Framework Core infrastructure and not subject to | ||
| /// the same compatibility standards as public APIs. It may be changed or removed without notice in | ||
| /// any release. You should only use it directly in your code with extreme caution and knowing that | ||
| /// doing so can result in application failures when updating to a new Entity Framework Core release. | ||
| /// </summary> | ||
| protected override bool IsNaturallyOrdered(SelectExpression selectExpression) | ||
| => selectExpression is | ||
| => selectExpression switch | ||
| { | ||
| Tables: [SqlServerOpenJsonExpression openJsonExpression, ..], | ||
| Orderings: | ||
| [ | ||
| { | ||
| Expression: SqlUnaryExpression | ||
| // OPENJSON rows are naturally ordered by their "key" column (array index), ascending. | ||
| // EF adds this ordering implicitly when expanding JSON arrays; treat it as natural to avoid spurious | ||
| // "Distinct after OrderBy without row limiting operator" warnings. | ||
| { | ||
| Tables: [SqlServerOpenJsonExpression openJsonExpression, ..], | ||
| Orderings: | ||
| [ | ||
| { | ||
| OperatorType: ExpressionType.Convert, | ||
| Operand: ColumnExpression { Name: "key", TableAlias: var orderingTableAlias } | ||
| }, | ||
| IsAscending: true | ||
| } | ||
| ] | ||
| } | ||
| && orderingTableAlias == openJsonExpression.Alias; | ||
| Expression: SqlUnaryExpression | ||
| { | ||
| OperatorType: ExpressionType.Convert, | ||
| Operand: ColumnExpression { Name: "key", TableAlias: var openJsonOrderingTableAlias } | ||
| }, | ||
| IsAscending: true | ||
| } | ||
| ] | ||
| } when openJsonOrderingTableAlias == openJsonExpression.Alias => true, | ||
|
|
||
| // VECTOR_SEARCH() results are naturally ordered by Distance ascending. | ||
| // EF adds this ordering implicitly during VectorSearch translation; treat it as natural to avoid spurious | ||
| // "Distinct after OrderBy without row limiting operator" warnings. | ||
| { | ||
| Tables: [TableValuedFunctionExpression { Name: "VECTOR_SEARCH" } vectorSearchFunction, ..], | ||
| Orderings: | ||
| [ | ||
| { | ||
| Expression: ColumnExpression { Name: "Distance", TableAlias: var vectorSearchOrderingTableAlias }, | ||
| IsAscending: true | ||
| } | ||
| ] | ||
| } when vectorSearchOrderingTableAlias == vectorSearchFunction.Alias => true, | ||
|
|
||
| _ => false | ||
| }; | ||
|
|
||
| /// <summary> | ||
| /// This is an internal API that supports the Entity Framework Core infrastructure and not subject to | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Adding
SelectExpression.SetOffset(...)introduces a new (public) API surface area (even with[EntityFrameworkInternal]). The repo tracks public API inEFCore.Relational.baseline.json, so this PR likely needs the corresponding baseline update to keep API validation passing.