diff --git a/applications/tests/Evaluation.Tests/appsettings.json b/applications/tests/Evaluation.Tests/appsettings.json index 6b9c02d2d..f5fa9abe3 100644 --- a/applications/tests/Evaluation.Tests/appsettings.json +++ b/applications/tests/Evaluation.Tests/appsettings.json @@ -70,7 +70,18 @@ "APIKey": "", // Hybrid search is not enabled by default. Note that when using hybrid search // relevance scores are different, usually lower, than when using just vector search - "UseHybridSearch": false + "UseHybridSearch": false, + // Helps improve relevance score consistency for search services with multiple replicas by + // attempting to route a given request to the same replica for that session. Use this when + // favoring consistent scoring over lower latency. Can adversely affect performance. + // + // Whether to use sticky sessions, which can help getting more consistent results. + // When using sticky sessions, a best-effort attempt will be made to target the same replica set. + // Be wary that reusing the same replica repeatedly can interfere with the load balancing of + // the requests across replicas and adversely affect the performance of the search service. + // + // See https://learn.microsoft.com/rest/api/searchservice/documents/search-post?view=rest-searchservice-2024-07-01&tabs=HTTP#request-body + "UseStickySessions": false }, "OpenAI": { // Name of the model used to generate text (text completion or chat completion) diff --git a/examples/002-dotnet-Serverless/appsettings.json b/examples/002-dotnet-Serverless/appsettings.json index 8e060a1df..c4a64ce23 100644 --- a/examples/002-dotnet-Serverless/appsettings.json +++ b/examples/002-dotnet-Serverless/appsettings.json @@ -70,7 +70,18 @@ "APIKey": "", // Hybrid search is not enabled by default. Note that when using hybrid search // relevance scores are different, usually lower, than when using just vector search - "UseHybridSearch": false + "UseHybridSearch": false, + // Helps improve relevance score consistency for search services with multiple replicas by + // attempting to route a given request to the same replica for that session. Use this when + // favoring consistent scoring over lower latency. Can adversely affect performance. + // + // Whether to use sticky sessions, which can help getting more consistent results. + // When using sticky sessions, a best-effort attempt will be made to target the same replica set. + // Be wary that reusing the same replica repeatedly can interfere with the load balancing of + // the requests across replicas and adversely affect the performance of the search service. + // + // See https://learn.microsoft.com/rest/api/searchservice/documents/search-post?view=rest-searchservice-2024-07-01&tabs=HTTP#request-body + "UseStickySessions": false }, "OpenAI": { // Name of the model used to generate text (text completion or chat completion) diff --git a/examples/210-KM-without-builder/appsettings.json b/examples/210-KM-without-builder/appsettings.json index 83857dd2c..331025b97 100644 --- a/examples/210-KM-without-builder/appsettings.json +++ b/examples/210-KM-without-builder/appsettings.json @@ -248,7 +248,18 @@ "APIKey": "", // Hybrid search is not enabled by default. Note that when using hybrid search // relevance scores are different, usually lower, than when using just vector search - "UseHybridSearch": false + "UseHybridSearch": false, + // Helps improve relevance score consistency for search services with multiple replicas by + // attempting to route a given request to the same replica for that session. Use this when + // favoring consistent scoring over lower latency. Can adversely affect performance. + // + // Whether to use sticky sessions, which can help getting more consistent results. + // When using sticky sessions, a best-effort attempt will be made to target the same replica set. + // Be wary that reusing the same replica repeatedly can interfere with the load balancing of + // the requests across replicas and adversely affect the performance of the search service. + // + // See https://learn.microsoft.com/rest/api/searchservice/documents/search-post?view=rest-searchservice-2024-07-01&tabs=HTTP#request-body + "UseStickySessions": false }, "AzureAIDocIntel": { // "APIKey" or "AzureIdentity". diff --git a/examples/401-evaluation/appsettings.json b/examples/401-evaluation/appsettings.json index 1bb5005d3..f40ad6c98 100644 --- a/examples/401-evaluation/appsettings.json +++ b/examples/401-evaluation/appsettings.json @@ -70,7 +70,18 @@ "APIKey": "", // Hybrid search is not enabled by default. Note that when using hybrid search // relevance scores are different, usually lower, than when using just vector search - "UseHybridSearch": false + "UseHybridSearch": false, + // Helps improve relevance score consistency for search services with multiple replicas by + // attempting to route a given request to the same replica for that session. Use this when + // favoring consistent scoring over lower latency. Can adversely affect performance. + // + // Whether to use sticky sessions, which can help getting more consistent results. + // When using sticky sessions, a best-effort attempt will be made to target the same replica set. + // Be wary that reusing the same replica repeatedly can interfere with the load balancing of + // the requests across replicas and adversely affect the performance of the search service. + // + // See https://learn.microsoft.com/rest/api/searchservice/documents/search-post?view=rest-searchservice-2024-07-01&tabs=HTTP#request-body + "UseStickySessions": false }, "OpenAI": { // Name of the model used to generate text (text completion or chat completion) diff --git a/extensions/AzureAISearch/AzureAISearch.FunctionalTests/DefaultTests.cs b/extensions/AzureAISearch/AzureAISearch.FunctionalTests/DefaultTests.cs index c02bb50b3..3977ceb6d 100644 --- a/extensions/AzureAISearch/AzureAISearch.FunctionalTests/DefaultTests.cs +++ b/extensions/AzureAISearch/AzureAISearch.FunctionalTests/DefaultTests.cs @@ -14,14 +14,14 @@ public class DefaultTests : BaseFunctionalTestCase public DefaultTests(IConfiguration cfg, ITestOutputHelper output) : base(cfg, output) { Assert.False(string.IsNullOrEmpty(this.AzureAiSearchConfig.Endpoint)); - Assert.False(string.IsNullOrEmpty(this.AzureAiSearchConfig.APIKey)); + Assert.False(this.AzureAiSearchConfig.Auth == AzureAISearchConfig.AuthTypes.APIKey && string.IsNullOrEmpty(this.AzureAiSearchConfig.APIKey)); Assert.False(string.IsNullOrEmpty(this.OpenAiConfig.APIKey)); this._memory = new KernelMemoryBuilder() .With(new KernelMemoryConfig { DefaultIndexName = "default4tests" }) .WithSearchClientConfig(new SearchClientConfig { EmptyAnswer = NotFound }) .WithOpenAI(this.OpenAiConfig) - .WithAzureAISearchMemoryDb(this.AzureAiSearchConfig.Endpoint, this.AzureAiSearchConfig.APIKey) + .WithAzureAISearchMemoryDb(this.AzureAiSearchConfig) .Build(); } diff --git a/extensions/AzureAISearch/AzureAISearch.FunctionalTests/appsettings.json b/extensions/AzureAISearch/AzureAISearch.FunctionalTests/appsettings.json index d61b27fa1..5262d1964 100644 --- a/extensions/AzureAISearch/AzureAISearch.FunctionalTests/appsettings.json +++ b/extensions/AzureAISearch/AzureAISearch.FunctionalTests/appsettings.json @@ -11,7 +11,8 @@ // using the env vars AZURE_TENANT_ID, AZURE_CLIENT_ID, AZURE_CLIENT_SECRET. "Auth": "AzureIdentity", "Endpoint": "https://<...>", - "APIKey": "" + "APIKey": "", + "UseStickySessions": true }, "OpenAI": { // Name of the model used to generate text (text completion or chat completion) diff --git a/extensions/AzureAISearch/AzureAISearch/AzureAISearchConfig.cs b/extensions/AzureAISearch/AzureAISearch/AzureAISearchConfig.cs index d9023ab68..31990811d 100644 --- a/extensions/AzureAISearch/AzureAISearch/AzureAISearchConfig.cs +++ b/extensions/AzureAISearch/AzureAISearch/AzureAISearchConfig.cs @@ -31,6 +31,20 @@ public enum AuthTypes /// public bool UseHybridSearch { get; set; } = false; + /// + /// Helps improve relevance score consistency for search services with multiple replicas by + /// attempting to route a given request to the same replica for that session. Use this when + /// favoring consistent scoring over lower latency. Can adversely affect performance. + /// + /// Whether to use sticky sessions, which can help getting more consistent results. + /// When using sticky sessions, a best-effort attempt will be made to target the same replica set. + /// Be wary that reusing the same replica repeatedly can interfere with the load balancing of + /// the requests across replicas and adversely affect the performance of the search service. + /// + /// See https://learn.microsoft.com/rest/api/searchservice/documents/search-post?view=rest-searchservice-2024-07-01&tabs=HTTP#request-body + /// + public bool UseStickySessions { get; set; } = false; + public void SetCredential(TokenCredential credential) { this.Auth = AuthTypes.ManualTokenCredential; diff --git a/extensions/AzureAISearch/AzureAISearch/AzureAISearchMemory.cs b/extensions/AzureAISearch/AzureAISearch/AzureAISearchMemory.cs index 82e28e5c7..10c97827d 100644 --- a/extensions/AzureAISearch/AzureAISearch/AzureAISearchMemory.cs +++ b/extensions/AzureAISearch/AzureAISearch/AzureAISearchMemory.cs @@ -34,6 +34,7 @@ public class AzureAISearchMemory : IMemoryDb, IMemoryDbUpsertBatch private readonly ITextEmbeddingGenerator _embeddingGenerator; private readonly ILogger _log; private readonly bool _useHybridSearch; + private readonly bool _useStickySessions; /// /// Create a new instance @@ -49,6 +50,7 @@ public AzureAISearchMemory( this._embeddingGenerator = embeddingGenerator; this._log = (loggerFactory ?? DefaultLogger.Factory).CreateLogger(); this._useHybridSearch = config.UseHybridSearch; + this._useStickySessions = config.UseStickySessions; if (string.IsNullOrEmpty(config.Endpoint)) { @@ -190,22 +192,12 @@ await client.IndexDocumentsAsync( FilterMode = VectorFilterMode.PreFilter } }; - DefineFieldsToSelect(options, withEmbeddings); + options = this.PrepareSearchOptions(options, withEmbeddings, filters, limit); if (limit > 0) { vectorQuery.KNearestNeighborsCount = limit; - options.Size = limit; - this._log.LogDebug("KNearestNeighborsCount and max results: {0}", limit); - } - - // Remove empty filters - filters = filters?.Where(f => !f.IsEmpty()).ToList(); - - if (filters is { Count: > 0 }) - { - options.Filter = AzureAISearchFiltering.BuildSearchFilter(filters); - this._log.LogDebug("Filtering vectors, condition: {0}", options.Filter); + this._log.LogDebug("KNearestNeighborsCount: {0}", limit); } Response>? searchResult = null; @@ -253,33 +245,7 @@ public async IAsyncEnumerable GetListAsync( { var client = this.GetSearchClient(index); - SearchOptions options = new(); - DefineFieldsToSelect(options, withEmbeddings); - - if (limit > 0) - { - options.Size = limit; - this._log.LogDebug("Max results: {0}", limit); - } - - // Remove empty filters - filters = filters?.Where(f => !f.IsEmpty()).ToList(); - - if (filters is { Count: > 0 }) - { - options.Filter = AzureAISearchFiltering.BuildSearchFilter(filters); - this._log.LogDebug("Filtering vectors, condition: {0}", options.Filter); - } - - // See: https://learn.microsoft.com/azure/search/search-query-understand-collection-filters - // fieldValue = fieldValue.Replace("'", "''", StringComparison.Ordinal); - // var options = new SearchOptions - // { - // Filter = fieldIsCollection - // ? $"{fieldName}/any(s: s eq '{fieldValue}')" - // : $"{fieldName} eq '{fieldValue}')", - // Size = limit - // }; + SearchOptions options = this.PrepareSearchOptions(null, withEmbeddings, filters, limit); Response>? searchResult = null; try @@ -627,15 +593,57 @@ at Azure.Search.Documents.SearchClient.SearchInternal[T](SearchOptions options, return indexSchema; } - private static void DefineFieldsToSelect(SearchOptions options, bool withEmbeddings) + private SearchOptions PrepareSearchOptions( + SearchOptions? options, + bool withEmbeddings, + ICollection? filters = null, + int limit = 1) { + options ??= new SearchOptions(); + + // Define which fields to fetch options.Select.Add(AzureAISearchMemoryRecord.IdField); options.Select.Add(AzureAISearchMemoryRecord.TagsField); options.Select.Add(AzureAISearchMemoryRecord.PayloadField); + + // Embeddings are fetched only when needed, to reduce latency and cost if (withEmbeddings) { options.Select.Add(AzureAISearchMemoryRecord.VectorField); } + + // Remove empty filters + filters = filters?.Where(f => !f.IsEmpty()).ToList(); + + if (filters is { Count: > 0 }) + { + options.Filter = AzureAISearchFiltering.BuildSearchFilter(filters); + this._log.LogDebug("Filtering vectors, condition: {0}", options.Filter); + } + + // See: https://learn.microsoft.com/azure/search/search-query-understand-collection-filters + // fieldValue = fieldValue.Replace("'", "''", StringComparison.Ordinal); + // var options = new SearchOptions + // { + // Filter = fieldIsCollection + // ? $"{fieldName}/any(s: s eq '{fieldValue}')" + // : $"{fieldName} eq '{fieldValue}')", + // Size = limit + // }; + + if (limit > 0) + { + options.Size = limit; + this._log.LogDebug("Max results: {0}", limit); + } + + // Decide whether to use a sticky session for the current request + if (this._useStickySessions) + { + options.SessionId = Guid.NewGuid().ToString("N"); + } + + return options; } private static double ScoreToCosineSimilarity(double score) diff --git a/service/Service/appsettings.json b/service/Service/appsettings.json index 49da0a3fb..83f71fcca 100644 --- a/service/Service/appsettings.json +++ b/service/Service/appsettings.json @@ -280,7 +280,18 @@ "APIKey": "", // Hybrid search is not enabled by default. Note that when using hybrid search // relevance scores are different, usually lower, than when using just vector search - "UseHybridSearch": false + "UseHybridSearch": false, + // Helps improve relevance score consistency for search services with multiple replicas by + // attempting to route a given request to the same replica for that session. Use this when + // favoring consistent scoring over lower latency. Can adversely affect performance. + // + // Whether to use sticky sessions, which can help getting more consistent results. + // When using sticky sessions, a best-effort attempt will be made to target the same replica set. + // Be wary that reusing the same replica repeatedly can interfere with the load balancing of + // the requests across replicas and adversely affect the performance of the search service. + // + // See https://learn.microsoft.com/rest/api/searchservice/documents/search-post?view=rest-searchservice-2024-07-01&tabs=HTTP#request-body + "UseStickySessions": false }, "AzureAIDocIntel": { // "APIKey" or "AzureIdentity". diff --git a/service/tests/Abstractions.UnitTests/Diagnostics/SensitiveDataLoggerTests.cs b/service/tests/Abstractions.UnitTests/Diagnostics/SensitiveDataLoggerTests.cs index daa78cd1a..7ef6b1bba 100644 --- a/service/tests/Abstractions.UnitTests/Diagnostics/SensitiveDataLoggerTests.cs +++ b/service/tests/Abstractions.UnitTests/Diagnostics/SensitiveDataLoggerTests.cs @@ -10,6 +10,7 @@ public sealed class SensitiveDataLoggerTests : IDisposable private const string DotNetEnvVar = "DOTNET_ENVIRONMENT"; [Fact] + [Trait("Category", "UnitTest")] public void ItIsDisabledByDefault() { // Assert diff --git a/service/tests/Core.FunctionalTests/appsettings.json b/service/tests/Core.FunctionalTests/appsettings.json index 2b0e90b31..19a342d60 100644 --- a/service/tests/Core.FunctionalTests/appsettings.json +++ b/service/tests/Core.FunctionalTests/appsettings.json @@ -12,7 +12,8 @@ "Auth": "AzureIdentity", "Endpoint": "https://<...>", "APIKey": "", - "UseHybridSearch": false + "UseHybridSearch": false, + "UseStickySessions": true }, "LlamaSharp": { "TextModel": { diff --git a/tools/InteractiveSetup/Services/AzureAISearch.cs b/tools/InteractiveSetup/Services/AzureAISearch.cs index 15962c2e4..2e929e869 100644 --- a/tools/InteractiveSetup/Services/AzureAISearch.cs +++ b/tools/InteractiveSetup/Services/AzureAISearch.cs @@ -22,6 +22,7 @@ public static void Setup(Context ctx, bool force = false) { "Auth", "ApiKey" }, { "APIKey", "" }, { "UseHybridSearch", false }, + { "UseStickySessions", false } }; } @@ -45,5 +46,6 @@ public static void Setup(Context ctx, bool force = false) AppSettings.Change(x => x.Services[ServiceName]["Endpoint"] = SetupUI.AskOpenQuestion("Azure AI Search ", config["Endpoint"].ToString())); AppSettings.Change(x => x.Services[ServiceName]["UseHybridSearch"] = SetupUI.AskBoolean("Use hybrid search (yes/no)?", (bool)config["UseHybridSearch"])); + AppSettings.Change(x => x.Services[ServiceName]["UseStickySessions"] = SetupUI.AskBoolean("Use sticky sessions (yes/no)?", (bool)config["UseStickySessions"])); } }