Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
using System.ClientModel.Primitives;
using Cellm.Models.Prompts;
using Microsoft.Extensions.AI;
using Microsoft.Extensions.DependencyInjection;
using OpenAI;

namespace Cellm.Models.Providers.OpenAiCompatible;

internal class OpenAiCompatibleRequestHandler(HttpClient httpClient)
internal class OpenAiCompatibleRequestHandler([FromKeyedServices("ResilientHttpClient")] HttpClient httpClient)
: IModelRequestHandler<OpenAiCompatibleRequest, OpenAiCompatibleResponse>
{

Expand Down
30 changes: 23 additions & 7 deletions src/Cellm.Models/ServiceCollectionExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@ public static IServiceCollection AddAnthropicChatClient(this IServiceCollection
{
anthropicHttpClient.BaseAddress = anthropicConfiguration.BaseAddress;
anthropicHttpClient.DefaultRequestHeaders.Add("anthropic-version", anthropicConfiguration.Version);
anthropicHttpClient.Timeout = TimeSpan.FromHours(1);
anthropicHttpClient.Timeout = TimeSpan.FromSeconds(configuration
.GetSection(nameof(ProviderConfiguration))
.GetValue<int>(nameof(ProviderConfiguration.HttpTimeoutInSeconds)));
})
.AddResilienceHandler($"{nameof(AnthropicRequestHandler)}", resiliencePipelineConfigurator.ConfigureResiliencePipeline);
.AddResilienceHandler($"{nameof(AnthropicRequestHandler)}{nameof(ResiliencePipelineConfigurator)}", resiliencePipelineConfigurator.ConfigureResiliencePipeline);

// TODO: Add IChatClient-compatible Anthropic client

Expand All @@ -36,20 +38,36 @@ public static IServiceCollection AddAnthropicChatClient(this IServiceCollection

public static IServiceCollection AddOllamaChatClient(this IServiceCollection services, IConfiguration configuration)
{
var resiliencePipelineConfigurator = new ResiliencePipelineConfigurator(configuration);

var ollamaConfiguration = configuration.GetRequiredSection(nameof(OllamaConfiguration)).Get<OllamaConfiguration>()
?? throw new NullReferenceException(nameof(OllamaConfiguration));

services
.AddKeyedChatClient(Provider.Ollama, serviceProvider => new OllamaChatClient(
ollamaConfiguration.BaseAddress,
ollamaConfiguration.DefaultModel))
ollamaConfiguration.DefaultModel,
serviceProvider.GetKeyedService<HttpClient>("ResilientHttpClient")))
.UseFunctionInvocation();

return services;
}

public static IServiceCollection AddResilientHttpClient(this IServiceCollection services, IConfiguration configuration)
{
var resiliencePipelineConfigurator = new ResiliencePipelineConfigurator(configuration);

services
.AddHttpClient("ResilientHttpClient", resilientHttpClient =>
{
resilientHttpClient.Timeout = TimeSpan.FromSeconds(configuration
.GetSection(nameof(ProviderConfiguration))
.GetValue<int>(nameof(ProviderConfiguration.HttpTimeoutInSeconds)));
})
.AddAsKeyed()
.AddResilienceHandler("ResilientHttpClientHandler", resiliencePipelineConfigurator.ConfigureResiliencePipeline);

return services;
}

public static IServiceCollection AddSentryBehavior(this IServiceCollection services)
{
services
Expand Down Expand Up @@ -85,8 +103,6 @@ public static IServiceCollection AddTools(this IServiceCollection services, para

public static IServiceCollection AddTools(this IServiceCollection services, params Func<IServiceProvider, AIFunction>[] toolBuilders)
{


foreach (var toolBuilder in toolBuilders)
{
services.AddSingleton((serviceProvider) => toolBuilder(serviceProvider));
Expand Down
12 changes: 2 additions & 10 deletions src/Cellm/Services/ServiceLocator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -104,15 +104,8 @@ private static IServiceCollection ConfigureServices(IServiceCollection services)
// Add providers
services
.AddAnthropicChatClient(configuration)
.AddOllamaChatClient(configuration);

services
.AddHttpClient<IModelRequestHandler<OpenAiCompatibleRequest, OpenAiCompatibleResponse>>(openAiCompatibleHttpClient =>
{
openAiCompatibleHttpClient.Timeout = TimeSpan.FromSeconds(configuration
.GetSection(nameof(ProviderConfiguration))
.GetValue<int>(nameof(ProviderConfiguration.HttpTimeoutInSeconds)));
});
.AddOllamaChatClient(configuration)
.AddResilientHttpClient(configuration);

// Add provider middleware
services
Expand All @@ -124,7 +117,6 @@ private static IServiceCollection ConfigureServices(IServiceCollection services)
.AddSingleton<FileReaderFactory>()
.AddSingleton<IFileReader, PdfReader>()
.AddSingleton<IFileReader, TextReader>()
.AddSingleton<Functions>()
.AddTools(
serviceProvider => AIFunctionFactory.Create(serviceProvider.GetRequiredService<Functions>().GlobRequest),
serviceProvider => AIFunctionFactory.Create(serviceProvider.GetRequiredService<Functions>().FileReaderRequest));
Expand Down
1 change: 0 additions & 1 deletion src/Cellm/appsettings.Local.Llamafile.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
{
"ProviderConfiguration": {
"BaseAddress": "http://127.0.0.1:8080",
"DefaultProvider": "Llamafile"
}
}
6 changes: 3 additions & 3 deletions src/Cellm/appsettings.Local.OpenAiCompatible.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"OpenAiCompatibleConfiguration": {
"BaseAddress": "YOUR_API_ENDPOINT",
"DefaultModel": "YOUR_DEFAULT_MODEL",
"ApiKey": "YOUR_API_KEY_OPTIONAL"
"BaseAddress": "BASE_ADDRESS",
"DefaultModel": "MODEL_NAME",
"ApiKey": "API_KEY"
},
"ProviderConfiguration": {
"DefaultProvider": "OpenAiCompatible"
Expand Down
18 changes: 9 additions & 9 deletions src/Cellm/appsettings.json
Original file line number Diff line number Diff line change
Expand Up @@ -72,28 +72,28 @@
"DefaultModel": "gemma2:2b",
"DefaultTemperature": 0,
"CacheTimeoutInSeconds": 3600,
"HttpTimeoutInSeconds": 3600,
"HttpTimeoutInSeconds": 900,
"EnableCache": true,
"EnableTools": {
"GlobRequest": false,
"FileReaderRequest": false
}
},
"RateLimiterConfiguration": {
"TokenLimit": 4,
"QueueLimit": 3600,
"ConcurrencyLimit": 4,
"ReplenishmentPeriodInSeconds": 2,
"TokenLimit": 2,
"QueueLimit": 16384,
"ConcurrencyLimit": 2,
"ReplenishmentPeriodInSeconds": 1,
"TokensPerPeriod": 1
},
"CircuitBreakerConfiguration": {
"FailureRatio": 0.2,
"FailureRatio": 0.3,
"SamplingDurationInSeconds": 30,
"MinimumThroughput": 10,
"BreakDurationInSeconds": 4
"MinimumThroughput": 4,
"BreakDurationInSeconds": 8
},
"RetryConfiguration": {
"MaxRetryAttempts": 4,
"MaxRetryAttempts": 3,
"DelayInSeconds": 4
},
"SentryConfiguration": {
Expand Down