Skip to content

add gemini file content #1064

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
using BotSharp.Abstraction.Files.Converters;
using BotSharp.Abstraction.Instructs.Models;
using BotSharp.Abstraction.Instructs;
using BotSharp.Abstraction.Infrastructures;

namespace BotSharp.Core.Files.Services;

Expand All @@ -22,14 +21,24 @@ public async Task<string> ReadPdf(string text, List<InstructFileModel> files, In

try
{
var provider = options?.Provider ?? "openai";
var pdfFiles = await DownloadFiles(sessionDir, files);
var images = await ConvertPdfToImages(pdfFiles);
if (images.IsNullOrEmpty()) return content;

var targetFiles = pdfFiles;
if (provider != "google-ai")
{
targetFiles = await ConvertPdfToImages(pdfFiles);
}

if (targetFiles.IsNullOrEmpty())
{
return content;
}

var innerAgentId = options?.AgentId ?? Guid.Empty.ToString();
var instruction = await GetAgentTemplate(innerAgentId, options?.TemplateName);

var completion = CompletionProvider.GetChatCompletion(_services, provider: options?.Provider ?? "openai",
var completion = CompletionProvider.GetChatCompletion(_services, provider: provider,
model: options?.Model ?? "gpt-4o", multiModal: true);
var message = await completion.GetChatCompletions(new Agent()
{
Expand All @@ -39,7 +48,7 @@ public async Task<string> ReadPdf(string text, List<InstructFileModel> files, In
{
new RoleDialogModel(AgentRole.User, text)
{
Files = images.Select(x => new BotSharpFile { FileStorageUrl = x }).ToList()
Files = targetFiles.Select(x => new BotSharpFile { FileStorageUrl = x }).ToList()
}
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public async Task<bool> Execute(RoleDialogModel message)
{
Id = BuiltInAgentId.UtilityAssistant,
Name = "Utility Agent",
Instruction = fromAgent?.Instruction ?? args.UserRequest ?? "Please describe the image(s).",
Instruction = fromAgent?.Instruction ?? args?.UserRequest ?? "Please describe the image(s).",
TemplateDict = new Dictionary<string, object>()
};

Expand Down
29 changes: 21 additions & 8 deletions src/Plugins/BotSharp.Plugin.FileHandler/Functions/ReadPdfFn.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using BotSharp.Abstraction.Routing;

namespace BotSharp.Plugin.FileHandler.Functions;

public class ReadPdfFn : IFunctionCallback
Expand Down Expand Up @@ -25,20 +27,31 @@ public async Task<bool> Execute(RoleDialogModel message)
{
var args = JsonSerializer.Deserialize<LlmContextIn>(message.FunctionArgs);
var conv = _services.GetRequiredService<IConversationService>();
var routingCtx = _services.GetRequiredService<IRoutingContext>();
var agentService = _services.GetRequiredService<IAgentService>();

var wholeDialogs = conv.GetDialogHistory();
var dialogs = await AssembleFiles(conv.ConversationId, wholeDialogs);
var agent = await agentService.LoadAgent(BuiltInAgentId.UtilityAssistant);
var fileAgent = new Agent
Agent? fromAgent = null;
if (!string.IsNullOrEmpty(message.CurrentAgentId))
{
fromAgent = await agentService.LoadAgent(message.CurrentAgentId);
}

var agent = new Agent
{
Id = agent?.Id ?? Guid.Empty.ToString(),
Name = agent?.Name ?? "Unkown",
Instruction = !string.IsNullOrWhiteSpace(args?.UserRequest) ? args.UserRequest : "Please describe the pdf file(s).",
Id = BuiltInAgentId.UtilityAssistant,
Name = "Utility Agent",
Instruction = fromAgent?.Instruction ?? args?.UserRequest ?? "Please describe the pdf file(s).",
TemplateDict = new Dictionary<string, object>()
};

var response = await GetChatCompletion(fileAgent, dialogs);
var wholeDialogs = routingCtx.GetDialogs();
if (wholeDialogs.IsNullOrEmpty())
{
wholeDialogs = conv.GetDialogHistory();
}

var dialogs = await AssembleFiles(conv.ConversationId, wholeDialogs);
var response = await GetChatCompletion(agent, dialogs);
message.Content = response;
return true;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
using System.Text.Json.Nodes;
using BotSharp.Abstraction.Agents;
using BotSharp.Abstraction.Agents.Enums;
using BotSharp.Abstraction.Conversations;
using BotSharp.Abstraction.Files;
using BotSharp.Abstraction.Files.Utilities;
using BotSharp.Abstraction.Hooks;
using BotSharp.Abstraction.Loggers;
using GenerativeAI;
using GenerativeAI.Core;
using GenerativeAI.Types;
Expand Down Expand Up @@ -43,7 +40,7 @@ public async Task<RoleDialogModel> GetChatCompletions(Agent agent, List<RoleDial
}

var client = ProviderHelper.GetGeminiClient(Provider, _model, _services);
var aiModel = client.CreateGenerativeModel(_model);
var aiModel = client.CreateGenerativeModel(_model.ToModelId());
var (prompt, request) = PrepareOptions(aiModel, agent, conversations);

var response = await aiModel.GenerateContentAsync(request);
Expand Down Expand Up @@ -101,7 +98,7 @@ public async Task<bool> GetChatCompletionsAsync(Agent agent, List<RoleDialogMode
}

var client = ProviderHelper.GetGeminiClient(Provider, _model, _services);
var chatClient = client.CreateGenerativeModel(_model);
var chatClient = client.CreateGenerativeModel(_model.ToModelId());
var (prompt, messages) = PrepareOptions(chatClient, agent, conversations);

var response = await chatClient.GenerateContentAsync(messages);
Expand Down Expand Up @@ -165,7 +162,7 @@ public async Task<bool> GetChatCompletionsAsync(Agent agent, List<RoleDialogMode
public async Task<bool> GetChatCompletionsStreamingAsync(Agent agent, List<RoleDialogModel> conversations, Func<RoleDialogModel, Task> onMessageReceived)
{
var client = ProviderHelper.GetGeminiClient(Provider, _model, _services);
var chatClient = client.CreateGenerativeModel(_model);
var chatClient = client.CreateGenerativeModel(_model.ToModelId());
var (prompt, messages) = PrepareOptions(chatClient,agent, conversations);

var asyncEnumerable = chatClient.StreamContentAsync(messages);
Expand Down Expand Up @@ -207,6 +204,10 @@ public void SetModelName(string model)
{
var agentService = _services.GetRequiredService<IAgentService>();
var googleSettings = _services.GetRequiredService<GoogleAiSettings>();
var fileStorage = _services.GetRequiredService<IFileStorageService>();
var settingsService = _services.GetRequiredService<ILlmProviderService>();
var settings = settingsService.GetSetting(Provider, _model);
var allowMultiModal = settings != null && settings.MultiModal;
renderedInstructions = [];

// Add settings
Expand Down Expand Up @@ -298,7 +299,50 @@ public void SetModelName(string model)
else if (message.Role == AgentRole.User)
{
var text = !string.IsNullOrWhiteSpace(message.Payload) ? message.Payload : message.Content;
contents.Add(new Content(text, AgentRole.User));
var contentParts = new List<Part> { new() { Text = text } };

if (allowMultiModal && !message.Files.IsNullOrEmpty())
{
foreach (var file in message.Files)
{
if (!string.IsNullOrEmpty(file.FileData))
{
var (contentType, bytes) = FileUtility.GetFileInfoFromData(file.FileData);
contentParts.Add(new Part()
{
InlineData = new()
{
MimeType = contentType,
Data = Convert.ToBase64String(bytes)
}
});
}
else if (!string.IsNullOrEmpty(file.FileStorageUrl))
{
var contentType = FileUtility.GetFileContentType(file.FileStorageUrl);
var bytes = fileStorage.GetFileBytes(file.FileStorageUrl);
contentParts.Add(new Part()
{
InlineData = new()
{
MimeType = contentType,
Data = Convert.ToBase64String(bytes)
}
});
}
else if (!string.IsNullOrEmpty(file.FileUrl))
{
contentParts.Add(new Part()
{
FileData = new()
{
FileUri = file.FileUrl
}
});
}
}
}
contents.Add(new Content(contentParts, AgentRole.User));
convPrompts.Add($"{AgentRole.User}: {text}");
}
else if (message.Role == AgentRole.Assistant)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ public static class ProviderHelper
public static GenerativeAI.GoogleAi GetGeminiClient(string provider, string model, IServiceProvider services)
{
var aiSettings = services.GetRequiredService<GoogleAiSettings>();
if (aiSettings == null || aiSettings.Gemini ==null || string.IsNullOrEmpty(aiSettings.Gemini.ApiKey))
if (string.IsNullOrEmpty(aiSettings?.Gemini?.ApiKey))
{
var settingsService = services.GetRequiredService<ILlmProviderService>();
var settings = settingsService.GetSetting(provider, model);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,6 @@ await onMessageReceived(new RoleDialogModel(choice.Role?.ToString() ?? ChatMessa
protected (string, IEnumerable<ChatMessage>, ChatCompletionOptions) PrepareOptions(Agent agent, List<RoleDialogModel> conversations)
{
var agentService = _services.GetRequiredService<IAgentService>();
var state = _services.GetRequiredService<IConversationStateService>();
var fileStorage = _services.GetRequiredService<IFileStorageService>();
var settingsService = _services.GetRequiredService<ILlmProviderService>();
var settings = settingsService.GetSetting(Provider, _model);
Expand Down
Loading