Skip to content

Commit 8505901

Browse files
authored
Image generation tool (#6749)
* Prototype of using ImageGenerationTool * Handle DataContent returned from ImageGen * React to rename and improve metadata * Handle image_generation tool content from streaming * Add handling for combining updates with images * Add tests for new ChatResponseUpdateExtensions * Rename ImageGenerationTool to HostedImageGenerationTool * Remove ChatResponseUpdateCoalescingOptions * Add ImageGeneratingChatClient * Fix namespace of tool * Replace traces of function calling * More namepsace fix * Enable editing * Update to preview OpenAI with image tool support * Temporary OpenAI feed * Fix tests * Add integration tests for ImageGeneratingChatClient * Remove ChatRole.Tool -> Assistant workaround * Remove use of private reflection for Image results * Add ChatResponseUpdate.Clone * Move all mutable state into RequestState object * Adjust prompt to improve integration test reliability * Refactor tool initialization I verified that the tool creation is cached by ReflectionAIFunctionDescriptor This change includes a small optimization to avoid additional allocation around inserting tools into the options. * Add integration tests for streaming Fixes the removal of tool content - this was broken for streaming when I changed removal to be based on callId. We don't have the CallId yet in the streaming case so we have to remove by name. * React to changes and fix tests * Address feedback * Fix SkipTestException from ConditionalTheory * Fix formatting * Add back image replacement coalescing (removed in merge) * Fix template tests and use new OpenAI * Remove use of temporary staging nuget feed * Address feedback * Make ImageGeneratingChatClient use ImageGenerationTool*Content * Remove ApplyUpdates and Coalesce ImageResults instead of DataContent. * Workaround OpenAI issue where image data is not read for partial images. openai/openai-dotnet#809 * Improved workaround * Return ImageGenerationToolCallContent from OpenAI * Add OpenAI image tool tests with representation of real traffic * Correct the event sequence for streaming single image * Fix some docs and refactor for clarity
1 parent a8aad1a commit 8505901

File tree

20 files changed

+2422
-25
lines changed

20 files changed

+2422
-25
lines changed

src/Libraries/Microsoft.Extensions.AI.Abstractions/ChatCompletion/ChatResponseExtensions.cs

Lines changed: 67 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,47 @@ static async Task<ChatResponse> ToChatResponseAsync(
184184
}
185185
}
186186

187+
/// <summary>
188+
/// Coalesces image result content elements in the provided list of <see cref="AIContent"/> items.
189+
/// Unlike other content coalescing methods, this will coalesce non-sequential items based on their Name property,
190+
/// and it will replace earlier items with later ones when duplicates are found.
191+
/// </summary>
192+
private static void CoalesceImageResultContent(IList<AIContent> contents)
193+
{
194+
Dictionary<string, int>? imageResultIndexById = null;
195+
bool hasRemovals = false;
196+
197+
for (int i = 0; i < contents.Count; i++)
198+
{
199+
if (contents[i] is ImageGenerationToolResultContent imageResult && !string.IsNullOrEmpty(imageResult.ImageId))
200+
{
201+
// Check if there's an existing ImageGenerationToolResultContent with the same ImageId to replace
202+
if (imageResultIndexById is null)
203+
{
204+
imageResultIndexById = new(StringComparer.Ordinal);
205+
}
206+
207+
if (imageResultIndexById.TryGetValue(imageResult.ImageId!, out int existingIndex))
208+
{
209+
// Replace the existing imageResult with the new one
210+
contents[existingIndex] = imageResult;
211+
contents[i] = null!; // Mark the current one for removal, then remove in single o(n) pass
212+
hasRemovals = true;
213+
}
214+
else
215+
{
216+
imageResultIndexById[imageResult.ImageId!] = i;
217+
}
218+
}
219+
}
220+
221+
// Remove all of the null slots left over from the coalescing process.
222+
if (hasRemovals)
223+
{
224+
RemoveNullContents(contents);
225+
}
226+
}
227+
187228
/// <summary>Coalesces sequential <see cref="AIContent"/> content elements.</summary>
188229
internal static void CoalesceContent(IList<AIContent> contents)
189230
{
@@ -219,6 +260,8 @@ internal static void CoalesceContent(IList<AIContent> contents)
219260
return content;
220261
});
221262

263+
CoalesceImageResultContent(contents);
264+
222265
Coalesce<DataContent>(
223266
contents,
224267
mergeSingle: false,
@@ -394,29 +437,35 @@ static bool TryAsCoalescable(AIContent content, [NotNullWhen(true)] out TContent
394437
}
395438

396439
// Remove all of the null slots left over from the coalescing process.
397-
if (contents is List<AIContent> contentsList)
398-
{
399-
_ = contentsList.RemoveAll(u => u is null);
400-
}
401-
else
402-
{
403-
int nextSlot = 0;
404-
int contentsCount = contents.Count;
405-
for (int i = 0; i < contentsCount; i++)
406-
{
407-
if (contents[i] is { } content)
408-
{
409-
contents[nextSlot++] = content;
410-
}
411-
}
440+
RemoveNullContents(contents);
441+
}
442+
}
412443

413-
for (int i = contentsCount - 1; i >= nextSlot; i--)
444+
private static void RemoveNullContents<T>(IList<T> contents)
445+
where T : class
446+
{
447+
if (contents is List<AIContent> contentsList)
448+
{
449+
_ = contentsList.RemoveAll(u => u is null);
450+
}
451+
else
452+
{
453+
int nextSlot = 0;
454+
int contentsCount = contents.Count;
455+
for (int i = 0; i < contentsCount; i++)
456+
{
457+
if (contents[i] is { } content)
414458
{
415-
contents.RemoveAt(i);
459+
contents[nextSlot++] = content;
416460
}
461+
}
417462

418-
Debug.Assert(nextSlot == contents.Count, "Expected final count to equal list length.");
463+
for (int i = contentsCount - 1; i >= nextSlot; i--)
464+
{
465+
contents.RemoveAt(i);
419466
}
467+
468+
Debug.Assert(nextSlot == contents.Count, "Expected final count to equal list length.");
420469
}
421470
}
422471

src/Libraries/Microsoft.Extensions.AI.Abstractions/ChatCompletion/ChatResponseUpdate.cs

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ namespace Microsoft.Extensions.AI;
2020
/// </para>
2121
/// <para>
2222
/// The relationship between <see cref="ChatResponse"/> and <see cref="ChatResponseUpdate"/> is
23-
/// codified in the <see cref="ChatResponseExtensions.ToChatResponseAsync"/> and
23+
/// codified in the <see cref="ChatResponseExtensions.ToChatResponseAsync(IAsyncEnumerable{ChatResponseUpdate}, System.Threading.CancellationToken)"/> and
2424
/// <see cref="ChatResponse.ToChatResponseUpdates"/>, which enable bidirectional conversions
2525
/// between the two. Note, however, that the provided conversions might be lossy, for example, if multiple
2626
/// updates all have different <see cref="RawRepresentation"/> objects whereas there's only one slot for
@@ -58,6 +58,29 @@ public ChatResponseUpdate(ChatRole? role, IList<AIContent>? contents)
5858
_contents = contents;
5959
}
6060

61+
/// <summary>
62+
/// Creates a new ChatResponseUpdate instance that is a copy of the current object.
63+
/// </summary>
64+
/// <remarks>The cloned object is a shallow copy; reference-type properties will reference the same
65+
/// objects as the original. Use this method to duplicate the response update for further modification without
66+
/// affecting the original instance.</remarks>
67+
/// <returns>A new ChatResponseUpdate object with the same property values as the current instance.</returns>
68+
public ChatResponseUpdate Clone() =>
69+
new()
70+
{
71+
AdditionalProperties = AdditionalProperties,
72+
AuthorName = AuthorName,
73+
Contents = Contents,
74+
CreatedAt = CreatedAt,
75+
ConversationId = ConversationId,
76+
FinishReason = FinishReason,
77+
MessageId = MessageId,
78+
ModelId = ModelId,
79+
RawRepresentation = RawRepresentation,
80+
ResponseId = ResponseId,
81+
Role = Role,
82+
};
83+
6184
/// <summary>Gets or sets the name of the author of the response update.</summary>
6285
public string? AuthorName
6386
{
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Diagnostics.CodeAnalysis;
5+
6+
namespace Microsoft.Extensions.AI;
7+
8+
/// <summary>
9+
/// Represents the invocation of an image generation tool call by a hosted service.
10+
/// </summary>
11+
[Experimental("MEAI001")]
12+
public sealed class ImageGenerationToolCallContent : AIContent
13+
{
14+
/// <summary>
15+
/// Initializes a new instance of the <see cref="ImageGenerationToolCallContent"/> class.
16+
/// </summary>
17+
public ImageGenerationToolCallContent()
18+
{
19+
}
20+
21+
/// <summary>
22+
/// Gets or sets the unique identifier of the image generation item.
23+
/// </summary>
24+
public string? ImageId { get; set; }
25+
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Collections.Generic;
5+
using System.Diagnostics.CodeAnalysis;
6+
7+
namespace Microsoft.Extensions.AI;
8+
9+
/// <summary>
10+
/// Represents an image generation tool call invocation by a hosted service.
11+
/// </summary>
12+
/// <remarks>
13+
/// This content type represents when a hosted AI service invokes an image generation tool.
14+
/// It is informational only and represents the call itself, not the result.
15+
/// </remarks>
16+
[Experimental("MEAI001")]
17+
public sealed class ImageGenerationToolResultContent : AIContent
18+
{
19+
/// <summary>
20+
/// Initializes a new instance of the <see cref="ImageGenerationToolResultContent"/> class.
21+
/// </summary>
22+
public ImageGenerationToolResultContent()
23+
{
24+
}
25+
26+
/// <summary>
27+
/// Gets or sets the unique identifier of the image generation item.
28+
/// </summary>
29+
public string? ImageId { get; set; }
30+
31+
/// <summary>
32+
/// Gets or sets the generated content items.
33+
/// </summary>
34+
/// <remarks>
35+
/// Content is typically <see cref="DataContent"/> for images streamed from the tool, or <see cref="UriContent"/> for remotely hosted images, but
36+
/// can also be provider-specific content types that represent the generated images.
37+
/// </remarks>
38+
public IList<AIContent>? Outputs { get; set; }
39+
}

src/Libraries/Microsoft.Extensions.AI.Abstractions/Image/ImageGenerationOptions.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,11 @@ protected ImageGenerationOptions(ImageGenerationOptions? other)
8181
/// </summary>
8282
public ImageGenerationResponseFormat? ResponseFormat { get; set; }
8383

84+
/// <summary>
85+
/// Gets or sets the number of intermediate streaming images to generate.
86+
/// </summary>
87+
public int? StreamingCount { get; set; }
88+
8489
/// <summary>Gets or sets any additional properties associated with the options.</summary>
8590
public AdditionalPropertiesDictionary? AdditionalProperties { get; set; }
8691

src/Libraries/Microsoft.Extensions.AI.Abstractions/Microsoft.Extensions.AI.Abstractions.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,6 +1247,10 @@
12471247
"Member": "Microsoft.Extensions.AI.ChatResponseUpdate.ChatResponseUpdate(Microsoft.Extensions.AI.ChatRole? role, System.Collections.Generic.IList<Microsoft.Extensions.AI.AIContent>? contents);",
12481248
"Stage": "Stable"
12491249
},
1250+
{
1251+
"Member": "Microsoft.Extensions.AI.ChatResponseUpdate Microsoft.Extensions.AI.ChatResponseUpdate.Clone();",
1252+
"Stage": "Stable"
1253+
},
12501254
{
12511255
"Member": "override string Microsoft.Extensions.AI.ChatResponseUpdate.ToString();",
12521256
"Stage": "Stable"
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
using System.Diagnostics.CodeAnalysis;
5+
6+
namespace Microsoft.Extensions.AI;
7+
8+
/// <summary>Represents a hosted tool that can be specified to an AI service to enable it to perform image generation.</summary>
9+
/// <remarks>
10+
/// This tool does not itself implement image generation. It is a marker that can be used to inform a service
11+
/// that the service is allowed to perform image generation if the service is capable of doing so.
12+
/// </remarks>
13+
[Experimental("MEAI001")]
14+
public class HostedImageGenerationTool : AITool
15+
{
16+
/// <summary>
17+
/// Initializes a new instance of the <see cref="HostedImageGenerationTool"/> class with the specified options.
18+
/// </summary>
19+
public HostedImageGenerationTool()
20+
{
21+
}
22+
23+
/// <summary>
24+
/// Gets or sets the options used to configure image generation.
25+
/// </summary>
26+
public ImageGenerationOptions? Options { get; set; }
27+
}

0 commit comments

Comments
 (0)