Skip to content

Commit

Permalink
.Net Agents - Fix interoperability of assistant generated content (mi…
Browse files Browse the repository at this point in the history
…crosoft#7415)

### Motivation and Context
<!-- Thank you for your contribution to the semantic-kernel repo!
Please help reviewers and future users, providing the following
information:
  1. Why is this change required?
  2. What problem does it solve?
  3. What scenario does it contribute to?
  4. If it fixes an open issue, please link to the issue here.
-->
The `OpenAIAssistantAgent` produced messages that resulted in an
exception when processed by the chat-completion service.
- Code-Interpreter Output: `Tool` role implies preceeding function-call
- `FileReferenceContent`: Image output being isolated from accompanying
text

Fixed: microsoft#7414

### Description
<!-- Describe your changes, the overall approach, the underlying design.
These notes will help understanding how your code works. Thanks! -->

- Changed role of code-interpreter output to `assistant`
- Merged message-content into a single `ChatMessageContent` object

### Contribution Checklist

<!-- Before submitting this PR, please make sure: -->

- [X] The code builds clean without any errors or warnings
- [X] The PR follows the [SK Contribution
Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md)
and the [pre-submission formatting
script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts)
raises no violations
- [X] All unit tests pass, and I have added new tests where possible
- [X] I didn't break anyone 😄
  • Loading branch information
crickman authored Jul 25, 2024
1 parent 699e1ae commit 5c7f9ba
Show file tree
Hide file tree
Showing 3 changed files with 254 additions and 78 deletions.
99 changes: 99 additions & 0 deletions dotnet/samples/Concepts/Agents/MixedChat_Files.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
// Copyright (c) Microsoft. All rights reserved.
using System.Text;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.Agents;
using Microsoft.SemanticKernel.Agents.OpenAI;
using Microsoft.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.Connectors.OpenAI;
using Resources;

namespace Agents;

/// <summary>
/// Demonstrate <see cref="ChatCompletionAgent"/> agent interacts with
/// <see cref="OpenAIAssistantAgent"/> when it produces file output.
/// </summary>
public class MixedChat_Files(ITestOutputHelper output) : BaseTest(output)
{
/// <summary>
/// Target OpenAI services.
/// </summary>
protected override bool ForceOpenAI => true;

private const string SummaryInstructions = "Summarize the entire conversation for the user in natural language.";

[Fact]
public async Task AnalyzeFileAndGenerateReportAsync()
{
OpenAIFileService fileService = new(TestConfiguration.OpenAI.ApiKey);

OpenAIFileReference uploadFile =
await fileService.UploadContentAsync(
new BinaryContent(await EmbeddedResource.ReadAllAsync("30-user-context.txt"), mimeType: "text/plain"),
new OpenAIFileUploadExecutionSettings("30-user-context.txt", OpenAIFilePurpose.Assistants));

Console.WriteLine(this.ApiKey);

// Define the agents
OpenAIAssistantAgent analystAgent =
await OpenAIAssistantAgent.CreateAsync(
kernel: new(),
config: new(this.ApiKey, this.Endpoint),
new()
{
EnableCodeInterpreter = true, // Enable code-interpreter
ModelId = this.Model,
FileIds = [uploadFile.Id] // Associate uploaded file with assistant
});

ChatCompletionAgent summaryAgent =
new()
{
Instructions = SummaryInstructions,
Kernel = this.CreateKernelWithChatCompletion(),
};

// Create a chat for agent interaction.
AgentGroupChat chat = new();

// Respond to user input
try
{
await InvokeAgentAsync(
analystAgent,
"""
Create a tab delimited file report of the ordered (descending) frequency distribution
of words in the file '30-user-context.txt' for any words used more than once.
""");
await InvokeAgentAsync(summaryAgent);
}
finally
{
await analystAgent.DeleteAsync();
await fileService.DeleteFileAsync(uploadFile.Id);
}

// Local function to invoke agent and display the conversation messages.
async Task InvokeAgentAsync(Agent agent, string? input = null)
{
if (!string.IsNullOrWhiteSpace(input))
{
chat.AddChatMessage(new(AuthorRole.User, input));
Console.WriteLine($"# {AuthorRole.User}: '{input}'");
}

await foreach (ChatMessageContent content in chat.InvokeAsync(agent))
{
Console.WriteLine($"\n# {content.Role} - {content.AuthorName ?? "*"}: '{content.Content}'");

foreach (AnnotationContent annotation in content.Items.OfType<AnnotationContent>())
{
Console.WriteLine($"\t* '{annotation.Quote}' => {annotation.FileId}");
BinaryContent fileContent = await fileService.GetFileContentAsync(annotation.FileId!);
byte[] byteContent = fileContent.Data?.ToArray() ?? [];
Console.WriteLine($"\n{Encoding.Default.GetString(byteContent)}");
}
}
}
}
}
112 changes: 112 additions & 0 deletions dotnet/samples/Concepts/Agents/MixedChat_Images.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// Copyright (c) Microsoft. All rights reserved.
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.Agents;
using Microsoft.SemanticKernel.Agents.OpenAI;
using Microsoft.SemanticKernel.ChatCompletion;
using Microsoft.SemanticKernel.Connectors.OpenAI;

namespace Agents;

/// <summary>
/// Demonstrate <see cref="ChatCompletionAgent"/> agent interacts with
/// <see cref="OpenAIAssistantAgent"/> when it produces image output.
/// </summary>
public class MixedChat_Images(ITestOutputHelper output) : BaseTest(output)
{
/// <summary>
/// Target OpenAI services.
/// </summary>
protected override bool ForceOpenAI => true;

private const string AnalystName = "Analyst";
private const string AnalystInstructions = "Create charts as requested without explanation.";

private const string SummarizerName = "Summarizer";
private const string SummarizerInstructions = "Summarize the entire conversation for the user in natural language.";

[Fact]
public async Task AnalyzeDataAndGenerateChartAsync()
{
OpenAIFileService fileService = new(TestConfiguration.OpenAI.ApiKey);

// Define the agents
OpenAIAssistantAgent analystAgent =
await OpenAIAssistantAgent.CreateAsync(
kernel: new(),
config: new(this.ApiKey, this.Endpoint),
new()
{
Instructions = AnalystInstructions,
Name = AnalystName,
EnableCodeInterpreter = true,
ModelId = this.Model,
});

ChatCompletionAgent summaryAgent =
new()
{
Instructions = SummarizerInstructions,
Name = SummarizerName,
Kernel = this.CreateKernelWithChatCompletion(),
};

// Create a chat for agent interaction.
AgentGroupChat chat = new();

// Respond to user input
try
{
await InvokeAgentAsync(
analystAgent,
"""
Graph the percentage of storm events by state using a pie chart:
State, StormCount
TEXAS, 4701
KANSAS, 3166
IOWA, 2337
ILLINOIS, 2022
MISSOURI, 2016
GEORGIA, 1983
MINNESOTA, 1881
WISCONSIN, 1850
NEBRASKA, 1766
NEW YORK, 1750
""");

await InvokeAgentAsync(summaryAgent);
}
finally
{
await analystAgent.DeleteAsync();
}

// Local function to invoke agent and display the conversation messages.
async Task InvokeAgentAsync(Agent agent, string? input = null)
{
if (!string.IsNullOrWhiteSpace(input))
{
chat.AddChatMessage(new(AuthorRole.User, input));
Console.WriteLine($"# {AuthorRole.User}: '{input}'");
}

await foreach (ChatMessageContent message in chat.InvokeAsync(agent))
{
if (!string.IsNullOrWhiteSpace(message.Content))
{
Console.WriteLine($"\n# {message.Role} - {message.AuthorName ?? "*"}: '{message.Content}'");
}

foreach (FileReferenceContent fileReference in message.Items.OfType<FileReferenceContent>())
{
Console.WriteLine($"\t* Generated image - @{fileReference.FileId}");
BinaryContent fileContent = await fileService.GetFileContentAsync(fileReference.FileId!);
byte[] byteContent = fileContent.Data?.ToArray() ?? [];
string filePath = Path.ChangeExtension(Path.GetTempFileName(), ".png");
await File.WriteAllBytesAsync($"{filePath}.png", byteContent);
Console.WriteLine($"\t* Local path - {filePath}");
}
}
}
}
}
121 changes: 43 additions & 78 deletions dotnet/src/Agents/OpenAI/AssistantThreadActions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@ internal static class AssistantThreadActions
/// <throws><see cref="KernelException"/> if a system message is present, without taking any other action</throws>
public static async Task CreateMessageAsync(AssistantsClient client, string threadId, ChatMessageContent message, CancellationToken cancellationToken)
{
if (string.IsNullOrEmpty(message.Content) ||
message.Items.Any(i => i is FunctionCallContent))
if (message.Items.Any(i => i is FunctionCallContent))
{
return;
}
Expand Down Expand Up @@ -76,8 +75,6 @@ public static async IAsyncEnumerable<ChatMessageContent> GetMessagesAsync(Assist
messages = await client.GetMessagesAsync(threadId, limit: 100, ListSortOrder.Descending, after: lastId, null, cancellationToken).ConfigureAwait(false);
foreach (ThreadMessage message in messages)
{
AuthorRole role = new(message.Role.ToString());

string? assistantName = null;
if (!string.IsNullOrWhiteSpace(message.AssistantId) &&
!agentNames.TryGetValue(message.AssistantId, out assistantName))
Expand All @@ -91,23 +88,11 @@ public static async IAsyncEnumerable<ChatMessageContent> GetMessagesAsync(Assist

assistantName ??= message.AssistantId;

foreach (MessageContent item in message.ContentItems)
{
ChatMessageContent? content = null;

if (item is MessageTextContent contentMessage)
{
content = GenerateTextMessageContent(assistantName, role, contentMessage);
}
else if (item is MessageImageFileContent contentImage)
{
content = GenerateImageFileContent(assistantName, role, contentImage);
}
ChatMessageContent content = GenerateMessageContent(assistantName, message);

if (content is not null)
{
yield return content;
}
if (content.Items.Count > 0)
{
yield return content;
}

lastId = message.Id;
Expand Down Expand Up @@ -247,29 +232,13 @@ public static async IAsyncEnumerable<ChatMessageContent> GetMessagesAsync(Assist

if (message is not null)
{
AuthorRole role = new(message.Role.ToString());
ChatMessageContent content = GenerateMessageContent(agent.GetName(), message);

foreach (MessageContent itemContent in message.ContentItems)
if (content.Items.Count > 0)
{
ChatMessageContent? content = null;

// Process text content
if (itemContent is MessageTextContent contentMessage)
{
content = GenerateTextMessageContent(agent.GetName(), role, contentMessage);
}
// Process image content
else if (itemContent is MessageImageFileContent contentImage)
{
content = GenerateImageFileContent(agent.GetName(), role, contentImage);
}

if (content is not null)
{
++messageCount;

yield return (IsVisible: true, Message: content);
}
++messageCount;

yield return (IsVisible: true, Message: content);
}
}
}
Expand Down Expand Up @@ -375,6 +344,38 @@ IEnumerable<FunctionCallContent> ParseFunctionStep(OpenAIAssistantAgent agent, R
}
}

private static ChatMessageContent GenerateMessageContent(string? assistantName, ThreadMessage message)
{
AuthorRole role = new(message.Role.ToString());

ChatMessageContent content =
new(role, content: null)
{
AuthorName = assistantName,
};

foreach (MessageContent itemContent in message.ContentItems)
{
// Process text content
if (itemContent is MessageTextContent contentMessage)
{
content.Items.Add(new TextContent(contentMessage.Text.Trim()));

foreach (MessageTextAnnotation annotation in contentMessage.Annotations)
{
content.Items.Add(GenerateAnnotationContent(annotation));
}
}
// Process image content
else if (itemContent is MessageImageFileContent contentImage)
{
content.Items.Add(new FileReferenceContent(contentImage.FileId));
}
}

return content;
}

private static AnnotationContent GenerateAnnotationContent(MessageTextAnnotation annotation)
{
string? fileId = null;
Expand All @@ -397,47 +398,11 @@ private static AnnotationContent GenerateAnnotationContent(MessageTextAnnotation
};
}

private static ChatMessageContent GenerateImageFileContent(string agentName, AuthorRole role, MessageImageFileContent contentImage)
{
return
new ChatMessageContent(
role,
[
new FileReferenceContent(contentImage.FileId)
])
{
AuthorName = agentName,
};
}

private static ChatMessageContent? GenerateTextMessageContent(string agentName, AuthorRole role, MessageTextContent contentMessage)
{
ChatMessageContent? messageContent = null;

string textContent = contentMessage.Text.Trim();

if (!string.IsNullOrWhiteSpace(textContent))
{
messageContent =
new(role, textContent)
{
AuthorName = agentName
};

foreach (MessageTextAnnotation annotation in contentMessage.Annotations)
{
messageContent.Items.Add(GenerateAnnotationContent(annotation));
}
}

return messageContent;
}

private static ChatMessageContent GenerateCodeInterpreterContent(string agentName, RunStepCodeInterpreterToolCall contentCodeInterpreter)
{
return
new ChatMessageContent(
AuthorRole.Tool,
AuthorRole.Assistant,
[
new TextContent(contentCodeInterpreter.Input)
])
Expand Down

0 comments on commit 5c7f9ba

Please sign in to comment.