Skip to content

Commit a0dce99

Browse files
Added Feature to pass document to Mistral AI's CHAT Models (#10921)
### Motivation and Context <!-- Thank you for your contribution to the semantic-kernel repo! Please help reviewers and future users, by providing the following information: 1. Why is this change required? 2. What problem does it solve? 3. What scenario does it contribute to? 4. If it fixes an open issue, please link to the issue here. --> ### Description I implemented the feature to ask questions on a document provided to Chat models as content, which was recently added to MistralAI models. It is supported by all the models. https://mistral.ai/news/mistral-ocr ``` curl https://api.mistral.ai/v1/chat/completions \ -H "Content-Type: application/json" \ -H "Authorization: Bearer ${MISTRAL_API_KEY}" \ -d '{ "model": "mistral-small-latest", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "what is the last sentence in the document" }, { "type": "document_url", "document_url": "https://arxiv.org/pdf/1805.04770" } ] } ], "document_image_limit": 8, "document_page_limit": 64 }' ``` ### Contribution Checklist <!-- Before submitting this PR, please make sure: --> - [ ] The code builds clean without any errors or warnings - [ ] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [ ] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone 😄 --------- Co-authored-by: Roger Barreto <19890735+RogerBarreto@users.noreply.github.com>
1 parent bf8a773 commit a0dce99

File tree

8 files changed

+246
-9
lines changed

8 files changed

+246
-9
lines changed

dotnet/src/Connectors/Connectors.MistralAI.UnitTests/Client/MistralClientTests.cs

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,8 @@ public async Task ValidateChatMessageRequestWithToolsAsync()
206206
Assert.NotNull(request);
207207
var chatRequest = JsonSerializer.Deserialize<ChatCompletionRequest>(request);
208208
Assert.NotNull(chatRequest);
209+
Assert.Null(chatRequest.DocumentPageLimit);
210+
Assert.Null(chatRequest.DocumentImageLimit);
209211
Assert.Equal("auto", chatRequest.ToolChoice);
210212
Assert.NotNull(chatRequest.Tools);
211213
Assert.Single(chatRequest.Tools);
@@ -622,6 +624,153 @@ public void ValidateCloneMistralAIPromptExecutionSettings()
622624
Assert.Equal(settings.ResponseFormat, clonedMistralAISettings.ResponseFormat);
623625
}
624626

627+
[Fact]
628+
public void ToMistralChatMessagesWithArrayOfByteBinaryContentShouldThrow()
629+
{
630+
// Arrange
631+
using var httpClient = new HttpClient();
632+
var client = new MistralClient("mistral-large-latest", httpClient, "key");
633+
var chatMessage = new ChatMessageContent()
634+
{
635+
Role = AuthorRole.User,
636+
Items =
637+
[
638+
new BinaryContent(data: new byte[] { 1, 2, 3 }, mimeType: "application/pdf")
639+
],
640+
};
641+
642+
// Act
643+
// Assert
644+
Assert.Throws<NotSupportedException>(() => client.ToMistralChatMessages(chatMessage, default));
645+
}
646+
647+
[Fact]
648+
public void ToMistralChatMessagesWithBase64BinaryContentShouldThrow()
649+
{
650+
// Arrange
651+
using var httpClient = new HttpClient();
652+
var client = new MistralClient("mistral-large-latest", httpClient, "key");
653+
var chatMessage = new ChatMessageContent()
654+
{
655+
Role = AuthorRole.User,
656+
Items =
657+
[
658+
new BinaryContent(dataUri: "data:application/pdf:base64,sdfghjyswedfghjjhertgiutdgbg")
659+
],
660+
};
661+
662+
// Act
663+
// Assert
664+
Assert.Throws<NotSupportedException>(() => client.ToMistralChatMessages(chatMessage, default));
665+
}
666+
667+
[Fact]
668+
public void ValidateToMistralChatMessagesWithUrlBinaryContent()
669+
{
670+
// Arrange
671+
using var httpClient = new HttpClient();
672+
var client = new MistralClient("mistral-large-latest", httpClient, "key");
673+
var chatMessage = new ChatMessageContent()
674+
{
675+
Role = AuthorRole.User,
676+
Items =
677+
[
678+
new BinaryContent(new Uri("https://arxiv.org/pdf/1805.04770"))
679+
],
680+
};
681+
682+
// Act
683+
var message = client.ToMistralChatMessages(chatMessage, default);
684+
var contents = message[0].Content as List<ContentChunk>;
685+
var content = contents![0] as DocumentUrlChunk;
686+
687+
// Assert
688+
Assert.NotNull(message);
689+
Assert.Single(message);
690+
Assert.IsType<MistralChatMessage>(message[0]);
691+
Assert.Equal("user", message[0].Role);
692+
693+
Assert.IsType<List<ContentChunk>>(message[0].Content);
694+
Assert.NotNull(contents);
695+
Assert.Single(contents);
696+
697+
Assert.IsType<DocumentUrlChunk>(content);
698+
Assert.NotNull(content);
699+
Assert.Equal("https://arxiv.org/pdf/1805.04770", content.DocumentUrl);
700+
Assert.Equal("document_url", content.Type);
701+
}
702+
703+
[Fact]
704+
public async Task ValidateToMistralChatMessagesWithDocumentRequestAsync()
705+
{
706+
// Arrange
707+
var client = this.CreateMistralClient("mistral-small-latest", "https://api.mistral.ai/v1/chat/completions", "chat_completions_response_with_document.json");
708+
709+
var chatHistory = new ChatHistory
710+
{
711+
new ChatMessageContent(
712+
AuthorRole.User,
713+
[
714+
new TextContent("Summarize the document for me."),
715+
new BinaryContent(new Uri("https://arxiv.org/pdf/1805.04770"))
716+
]),
717+
};
718+
719+
// Act
720+
var executionSettings = new MistralAIPromptExecutionSettings { DocumentPageLimit = 64, DocumentImageLimit = 8 };
721+
await client.GetChatMessageContentsAsync(chatHistory, default, executionSettings);
722+
var request = this.DelegatingHandler!.RequestContent;
723+
724+
// Assert
725+
Assert.NotNull(request);
726+
var chatRequest = JsonSerializer.Deserialize<ChatCompletionRequest>(request);
727+
Assert.NotNull(chatRequest);
728+
Assert.Equal("mistral-small-latest", chatRequest.Model);
729+
Assert.Single(chatRequest.Messages);
730+
Assert.Equal("user", chatRequest.Messages[0].Role);
731+
Assert.NotNull(chatRequest.Messages[0].Content);
732+
Assert.Equal(64, chatRequest.DocumentPageLimit);
733+
Assert.Equal(8, chatRequest.DocumentImageLimit);
734+
735+
// Assert
736+
var content = JsonSerializer.Serialize(chatRequest.Messages[0].Content);
737+
string json = """[{"text":"Summarize the document for me.","type":"text"},{"document_url":"https://arxiv.org/pdf/1805.04770","type":"document_url"}]""";
738+
Assert.Equal(json, content);
739+
}
740+
741+
[Fact]
742+
public async Task ValidateToMistralChatMessagesWithDocumentResponseAsync()
743+
{
744+
// Arrange
745+
var client = this.CreateMistralClient("mistral-small-latest", "https://api.mistral.ai/v1/chat/completions", "chat_completions_response_with_document.json");
746+
747+
var chatHistory = new ChatHistory
748+
{
749+
new ChatMessageContent(
750+
AuthorRole.User,
751+
[
752+
new TextContent("Summarize the document for me."),
753+
new BinaryContent(new Uri("https://arxiv.org/pdf/1805.04770"))
754+
]),
755+
};
756+
757+
// Act
758+
var executionSettings = new MistralAIPromptExecutionSettings { DocumentPageLimit = 64, DocumentImageLimit = 8 };
759+
var response = await client.GetChatMessageContentsAsync(chatHistory, default, executionSettings);
760+
761+
// Assert
762+
Assert.NotNull(response);
763+
Assert.Single(response);
764+
Assert.Contains("The document titled \"Born-Again Neural Networks\"", response[0].Content);
765+
Assert.Equal("mistral-small-latest", response[0].ModelId);
766+
Assert.Equal(AuthorRole.Assistant, response[0].Role);
767+
Assert.NotNull(response[0].Metadata);
768+
Assert.Equal(7, response[0].Metadata?.Count);
769+
Assert.NotNull(response[0].Metadata?["Usage"]);
770+
Assert.NotNull(response[0].InnerContent);
771+
Assert.IsType<MistralChatChoice>(response[0].InnerContent);
772+
}
773+
625774
public sealed class WeatherPlugin
626775
{
627776
[KernelFunction]
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
{
2+
"id": "cfa5bc963e1640ebbd6c25f9671e5398",
3+
"object": "chat.completion",
4+
"created": 1741974042,
5+
"model": "mistral-small-latest",
6+
"choices": [
7+
{
8+
"index": 0,
9+
"message": {
10+
"role": "assistant",
11+
"tool_calls": null,
12+
"content": "The document titled \"Born-Again Neural Networks\" explores the concept of Knowledge Distillation."
13+
},
14+
"finish_reason": "stop"
15+
}
16+
],
17+
"usage": {
18+
"prompt_tokens": 12999,
19+
"total_tokens": 13488,
20+
"completion_tokens": 489
21+
}
22+
}

dotnet/src/Connectors/Connectors.MistralAI/Client/ChatCompletionRequest.cs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,14 @@ internal sealed class ChatCompletionRequest
6060
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
6161
public IList<string>? Stop { get; set; }
6262

63+
[JsonPropertyName("document_image_limit")]
64+
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
65+
public int? DocumentImageLimit { get; set; }
66+
67+
[JsonPropertyName("document_page_limit")]
68+
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
69+
public int? DocumentPageLimit { get; set; }
70+
6371
/// <summary>
6472
/// Construct an instance of <see cref="ChatCompletionRequest"/>.
6573
/// </summary>

dotnet/src/Connectors/Connectors.MistralAI/Client/ContentChunk.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ namespace Microsoft.SemanticKernel.Connectors.MistralAI.Client;
66

77
[JsonDerivedType(typeof(TextChunk))]
88
[JsonDerivedType(typeof(ImageUrlChunk))]
9+
[JsonDerivedType(typeof(DocumentUrlChunk))]
910
internal abstract class ContentChunk(ContentChunkType type)
1011
{
1112
[JsonPropertyName("type")]

dotnet/src/Connectors/Connectors.MistralAI/Client/ContentChunkType.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ namespace Microsoft.SemanticKernel.Connectors.MistralAI.Client;
1212

1313
public static ContentChunkType ImageUrl { get; } = new("image_url");
1414

15+
public static ContentChunkType DocumentUrl { get; } = new("document_url");
16+
1517
public string Type { get; }
1618

1719
/// <summary>
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// Copyright (c) Microsoft. All rights reserved.
2+
3+
using System.Text.Json.Serialization;
4+
5+
namespace Microsoft.SemanticKernel.Connectors.MistralAI.Client;
6+
7+
internal class DocumentUrlChunk(string documentUrl) : ContentChunk(ContentChunkType.DocumentUrl)
8+
{
9+
[JsonPropertyName("document_url")]
10+
public string DocumentUrl { get; set; } = documentUrl;
11+
}

dotnet/src/Connectors/Connectors.MistralAI/Client/MistralClient.cs

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,8 @@ private ChatCompletionRequest CreateChatCompletionRequest(string modelId, bool s
698698
FrequencyPenalty = executionSettings.FrequencyPenalty,
699699
PresencePenalty = executionSettings.PresencePenalty,
700700
Stop = executionSettings.Stop,
701+
DocumentImageLimit = executionSettings.DocumentImageLimit,
702+
DocumentPageLimit = executionSettings.DocumentPageLimit
701703
};
702704

703705
executionSettings.ToolCallBehavior?.ConfigureRequest(kernel, request);
@@ -737,10 +739,12 @@ internal List<MistralChatMessage> ToMistralChatMessages(ChatMessageContent chatM
737739
};
738740
toolCalls.Add(callRequest.Id, toolCall);
739741
}
742+
740743
if (toolCalls.Count > 0)
741744
{
742745
message.ToolCalls = [.. toolCalls.Values];
743746
}
747+
744748
return [message];
745749
}
746750

@@ -764,12 +768,9 @@ internal List<MistralChatMessage> ToMistralChatMessages(ChatMessageContent chatM
764768
ToolCallId = resultContent.CallId
765769
});
766770
}
767-
if (messages is not null)
768-
{
769-
return messages;
770-
}
771771

772-
throw new NotSupportedException("No function result provided in the tool message.");
772+
return messages
773+
?? throw new NotSupportedException("No function result provided in the tool message.");
773774
}
774775

775776
if (chatMessage.Items.Count == 1 && chatMessage.Items[0] is TextContent text)
@@ -783,22 +784,31 @@ internal List<MistralChatMessage> ToMistralChatMessages(ChatMessageContent chatM
783784
if (item is TextContent textContent && !string.IsNullOrEmpty(textContent.Text))
784785
{
785786
content.Add(new TextChunk(textContent.Text!));
787+
continue;
786788
}
787-
else if (item is ImageContent imageContent)
789+
790+
if (item is ImageContent imageContent)
788791
{
789792
if (imageContent.Uri is not null)
790793
{
791794
content.Add(new ImageUrlChunk(imageContent.Uri.ToString()));
795+
continue;
792796
}
793-
else if (imageContent.DataUri is not null)
797+
798+
if (imageContent.DataUri is not null)
794799
{
795800
content.Add(new ImageUrlChunk(imageContent.DataUri));
801+
continue;
796802
}
797803
}
798-
else
804+
805+
if (item is BinaryContent binaryContent && binaryContent.Uri is not null)
799806
{
800-
throw new NotSupportedException("Invalid message content, only text and image url are supported.");
807+
content.Add(new DocumentUrlChunk(binaryContent.Uri.ToString()));
808+
continue;
801809
}
810+
811+
throw new NotSupportedException("Invalid message content, only text, image url and document url are supported.");
802812
}
803813

804814
return [new MistralChatMessage(chatMessage.Role.ToString(), content)];

dotnet/src/Connectors/Connectors.MistralAI/MistralAIPromptExecutionSettings.cs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,38 @@ public double? FrequencyPenalty
240240
}
241241
}
242242

243+
/// <summary>
244+
/// Limit Image OCR in document
245+
/// </summary>
246+
[JsonPropertyName("document_image_limit")]
247+
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
248+
public int? DocumentImageLimit
249+
{
250+
get => this._documentImageLimit;
251+
252+
set
253+
{
254+
this.ThrowIfFrozen();
255+
this._documentImageLimit = value;
256+
}
257+
}
258+
259+
/// <summary>
260+
/// Limit Pages upto which OCR will be done
261+
/// </summary>
262+
[JsonPropertyName("document_page_limit")]
263+
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
264+
public int? DocumentPageLimit
265+
{
266+
get => this._documentPageLimit;
267+
268+
set
269+
{
270+
this.ThrowIfFrozen();
271+
this._documentPageLimit = value;
272+
}
273+
}
274+
243275
/// <inheritdoc/>
244276
public override void Freeze()
245277
{
@@ -313,6 +345,8 @@ public static MistralAIPromptExecutionSettings FromExecutionSettings(PromptExecu
313345
private double? _presencePenalty;
314346
private double? _frequencyPenalty;
315347
private IList<string>? _stop;
348+
private int? _documentImageLimit;
349+
private int? _documentPageLimit;
316350

317351
#endregion
318352
}

0 commit comments

Comments
 (0)