Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,76 +7,140 @@
// constructor syntax.

using System;
using System.Text.Json.Serialization;

namespace Microsoft.Extensions.AI.Evaluation.Reporting;

/// <summary>
/// A class that records details related to a particular LLM chat conversation turn involved in the execution of a
/// particular <see cref="ScenarioRun"/>.
/// </summary>
/// <param name="latency">
/// The duration between the time when the request was sent to the LLM and the time when the response was received for
/// the chat conversation turn.
/// </param>
/// <param name="model">
/// The model that was used in the creation of the response for the chat conversation turn. Can be
/// <see langword="null"/> if this information was not available via <see cref="ChatResponse.ModelId"/>.
/// </param>
/// <param name="usage">
/// Usage details for the chat conversation turn (including input and output token counts). Can be
/// <see langword="null"/> if usage details were not available via <see cref="ChatResponse.Usage"/>.
/// </param>
/// <param name="cacheKey">
/// The cache key for the cached model response for the chat conversation turn if response caching was enabled;
/// <see langword="null"/> otherwise.
/// </param>
/// <param name="cacheHit">
/// <see langword="true"/> if response caching was enabled and the model response for the chat conversation turn was
/// retrieved from the cache; <see langword="false"/> if response caching was enabled and the model response was not
/// retrieved from the cache; <see langword="null"/> if response caching was disabled.
/// </param>
public sealed class ChatTurnDetails(
TimeSpan latency,
string? model = null,
UsageDetails? usage = null,
string? cacheKey = null,
bool? cacheHit = null)
public sealed class ChatTurnDetails
{
/// <summary>
/// Gets or sets the duration between the time when the request was sent to the LLM and the time when the response
/// was received for the chat conversation turn.
/// </summary>
public TimeSpan Latency { get; set; } = latency;
public TimeSpan Latency { get; set; }

/// <summary>
/// Gets or sets the model that was used in the creation of the response for the chat conversation turn.
/// </summary>
/// <remarks>
/// Returns <see langword="null"/> if this information was not available via <see cref="ChatResponse.ModelId"/>.
/// </remarks>
public string? Model { get; set; } = model;
public string? Model { get; set; }

/// <summary>
/// Gets or sets the name of the provider for the model identified by <see cref="Model"/>.
/// </summary>
/// <remarks>
/// Returns <see langword="null"/> if this information was not available via the
/// <see cref="ChatClientMetadata.ProviderName"/> property for the <see cref="IChatClient"/>.
/// </remarks>
public string? ModelProvider { get; set; }

/// <summary>
/// Gets or sets usage details for the chat conversation turn (including input and output token counts).
/// </summary>
/// <remarks>
/// Returns <see langword="null"/> if usage details were not available via <see cref="ChatResponse.Usage"/>.
/// </remarks>
public UsageDetails? Usage { get; set; } = usage;
public UsageDetails? Usage { get; set; }

/// <summary>
/// Gets or sets the cache key for the cached model response for the chat conversation turn.
/// </summary>
/// <remarks>
/// Returns <see langword="null"/> if response caching was disabled.
/// </remarks>
public string? CacheKey { get; set; } = cacheKey;
public string? CacheKey { get; set; }

/// <summary>
/// Gets or sets a value indicating whether the model response was retrieved from the cache.
/// </summary>
/// <remarks>
/// Returns <see langword="null"/> if response caching was disabled.
/// </remarks>
public bool? CacheHit { get; set; } = cacheHit;
public bool? CacheHit { get; set; }

/// <summary>
/// Initializes a new instance of the <see cref="ChatTurnDetails"/> class.
/// </summary>
/// <param name="latency">
/// The duration between the time when the request was sent to the LLM and the time when the response was received
/// for the chat conversation turn.
/// </param>
/// <param name="model">
/// The model that was used in the creation of the response for the chat conversation turn. Can be
/// <see langword="null"/> if this information was not available via <see cref="ChatResponse.ModelId"/>.
/// </param>
/// <param name="usage">
/// Usage details for the chat conversation turn (including input and output token counts). Can be
/// <see langword="null"/> if usage details were not available via <see cref="ChatResponse.Usage"/>.
/// </param>
/// <param name="cacheKey">
/// The cache key for the cached model response for the chat conversation turn if response caching was enabled;
/// <see langword="null"/> otherwise.
/// </param>
/// <param name="cacheHit">
/// <see langword="true"/> if response caching was enabled and the model response for the chat conversation turn
/// was retrieved from the cache; <see langword="false"/> if response caching was enabled and the model response
/// was not retrieved from the cache; <see langword="null"/> if response caching was disabled.
/// </param>
public ChatTurnDetails(
TimeSpan latency,
string? model = null,
UsageDetails? usage = null,
string? cacheKey = null,
bool? cacheHit = null)
: this(latency, model, modelProvider: null, usage, cacheKey, cacheHit)
{
}

/// <summary>
/// Initializes a new instance of the <see cref="ChatTurnDetails"/> class.
/// </summary>
/// <param name="latency">
/// The duration between the time when the request was sent to the LLM and the time when the response was received
/// for the chat conversation turn.
/// </param>
/// <param name="model">
/// The model that was used in the creation of the response for the chat conversation turn. Can be
/// <see langword="null"/> if this information was not available via <see cref="ChatResponse.ModelId"/>.
/// </param>
/// <param name="modelProvider">
/// The name of the provider for the model identified by <paramref name="model"/>. Can be
/// <see langword="null"/> if this information was not available via the
/// <see cref="ChatClientMetadata.ProviderName"/> property for the <see cref="IChatClient"/>.
/// </param>
/// <param name="usage">
/// Usage details for the chat conversation turn (including input and output token counts). Can be
/// <see langword="null"/> if usage details were not available via <see cref="ChatResponse.Usage"/>.
/// </param>
/// <param name="cacheKey">
/// The cache key for the cached model response for the chat conversation turn if response caching was enabled;
/// <see langword="null"/> otherwise.
/// </param>
/// <param name="cacheHit">
/// <see langword="true"/> if response caching was enabled and the model response for the chat conversation turn
/// was retrieved from the cache; <see langword="false"/> if response caching was enabled and the model response
/// was not retrieved from the cache; <see langword="null"/> if response caching was disabled.
/// </param>
[JsonConstructor]
public ChatTurnDetails(
TimeSpan latency,
string? model,
string? modelProvider,
UsageDetails? usage = null,
string? cacheKey = null,
bool? cacheHit = null)
{
Latency = latency;
Model = model;
ModelProvider = modelProvider;
Usage = usage;
CacheKey = cacheKey;
CacheHit = cacheHit;
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"Name": "Microsoft.Extensions.AI.Evaluation.Reporting, Version=9.6.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35",
"Name": "Microsoft.Extensions.AI.Evaluation.Reporting, Version=9.9.0.0, Culture=neutral, PublicKeyToken=31bf3856ad364e35",
"Types": [
{
"Type": "sealed class Microsoft.Extensions.AI.Evaluation.Reporting.ChatDetails",
Expand Down Expand Up @@ -40,15 +40,16 @@
]
},
{
// After generating the baseline, manually edit this file to remove primary constructor portion
// This is needed until ICSharpCode.Decompiler adds support for primary constructors
// See: https://github.com/icsharpcode/ILSpy/issues/829
"Type": "sealed class Microsoft.Extensions.AI.Evaluation.Reporting.ChatTurnDetails",
"Stage": "Stable",
"Methods": [
{
"Member": "Microsoft.Extensions.AI.Evaluation.Reporting.ChatTurnDetails.ChatTurnDetails(System.TimeSpan latency, string? model = null, Microsoft.Extensions.AI.UsageDetails? usage = null, string? cacheKey = null, bool? cacheHit = null);",
"Stage": "Stable"
},
{
"Member": "Microsoft.Extensions.AI.Evaluation.Reporting.ChatTurnDetails.ChatTurnDetails(System.TimeSpan latency, string? model, string? modelProvider, Microsoft.Extensions.AI.UsageDetails? usage = null, string? cacheKey = null, bool? cacheHit = null);",
"Stage": "Stable"
}
],
"Properties": [
Expand All @@ -68,6 +69,10 @@
"Member": "string? Microsoft.Extensions.AI.Evaluation.Reporting.ChatTurnDetails.Model { get; set; }",
"Stage": "Stable"
},
{
"Member": "string? Microsoft.Extensions.AI.Evaluation.Reporting.ChatTurnDetails.ModelProvider { get; set; }",
"Stage": "Stable"
},
{
"Member": "Microsoft.Extensions.AI.UsageDetails? Microsoft.Extensions.AI.Evaluation.Reporting.ChatTurnDetails.Usage { get; set; }",
"Stage": "Stable"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ internal sealed class ResponseCachingChatClient : DistributedCachingChatClient
{
private readonly ChatDetails _chatDetails;
private readonly ConcurrentDictionary<string, Stopwatch> _stopWatches;
private readonly ChatClientMetadata? _metadata;

internal ResponseCachingChatClient(
IChatClient originalChatClient,
Expand All @@ -23,8 +24,10 @@ internal ResponseCachingChatClient(
: base(originalChatClient, cache)
{
CacheKeyAdditionalValues = [.. cachingKeys];

_chatDetails = chatDetails;
_stopWatches = new ConcurrentDictionary<string, Stopwatch>();
_metadata = this.GetService<ChatClientMetadata>();
}

protected override async Task<ChatResponse?> ReadCacheAsync(string key, CancellationToken cancellationToken)
Expand All @@ -45,6 +48,7 @@ internal ResponseCachingChatClient(
new ChatTurnDetails(
latency: stopwatch.Elapsed,
model: response.ModelId,
modelProvider: _metadata?.ProviderName,
usage: response.Usage,
cacheKey: key,
cacheHit: true));
Expand Down Expand Up @@ -75,6 +79,7 @@ internal ResponseCachingChatClient(
new ChatTurnDetails(
latency: stopwatch.Elapsed,
model: response.ModelId,
modelProvider: _metadata?.ProviderName,
usage: response.Usage,
cacheKey: key,
cacheHit: true));
Expand All @@ -95,6 +100,7 @@ protected override async Task WriteCacheAsync(string key, ChatResponse value, Ca
new ChatTurnDetails(
latency: stopwatch.Elapsed,
model: value.ModelId,
modelProvider: _metadata?.ProviderName,
usage: value.Usage,
cacheKey: key,
cacheHit: false));
Expand All @@ -117,6 +123,7 @@ protected override async Task WriteCacheStreamingAsync(
new ChatTurnDetails(
latency: stopwatch.Elapsed,
model: response.ModelId,
modelProvider: _metadata?.ProviderName,
usage: response.Usage,
cacheKey: key,
cacheHit: false));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@ namespace Microsoft.Extensions.AI.Evaluation.Reporting;
internal sealed class SimpleChatClient : DelegatingChatClient
{
private readonly ChatDetails _chatDetails;
private readonly ChatClientMetadata? _metadata;

internal SimpleChatClient(IChatClient originalChatClient, ChatDetails chatDetails)
: base(originalChatClient)
{
_chatDetails = chatDetails;
_metadata = this.GetService<ChatClientMetadata>();
}

public async override Task<ChatResponse> GetResponseAsync(
Expand All @@ -41,6 +43,7 @@ public async override Task<ChatResponse> GetResponseAsync(
new ChatTurnDetails(
latency: stopwatch.Elapsed,
model: response.ModelId,
modelProvider: _metadata?.ProviderName,
usage: response.Usage));
}
}
Expand Down Expand Up @@ -78,6 +81,7 @@ public override async IAsyncEnumerable<ChatResponseUpdate> GetStreamingResponseA
new ChatTurnDetails(
latency: stopwatch.Elapsed,
model: response.ModelId,
modelProvider: _metadata?.ProviderName,
usage: response.Usage));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ export const ChatDetailsSection = ({ chatDetails }: { chatDetails: ChatDetails;

const hasCacheKey = chatDetails.turnDetails.some(turn => turn.cacheKey !== undefined);
const hasCacheStatus = chatDetails.turnDetails.some(turn => turn.cacheHit !== undefined);
const hasModelInfo = chatDetails.turnDetails.some(turn => turn.model !== undefined);
const hasModel = chatDetails.turnDetails.some(turn => turn.model !== undefined);
const hasModelProvider = chatDetails.turnDetails.some(turn => turn.modelProvider !== undefined);
const hasInputTokens = chatDetails.turnDetails.some(turn => turn.usage?.inputTokenCount !== undefined);
const hasOutputTokens = chatDetails.turnDetails.some(turn => turn.usage?.outputTokenCount !== undefined);
const hasTotalTokens = chatDetails.turnDetails.some(turn => turn.usage?.totalTokenCount !== undefined);
Expand All @@ -42,13 +43,14 @@ export const ChatDetailsSection = ({ chatDetails }: { chatDetails: ChatDetails;
{isExpanded && (
<div className={classes.sectionContainer}>
<div className={classes.tableContainer}>
<Table>
<Table className={classes.autoWidthTable}>
<TableHeader>
<TableRow>
{hasCacheKey && <TableHeaderCell className={classes.tableHeaderCell}>Cache Key</TableHeaderCell>}
{hasCacheStatus && <TableHeaderCell className={classes.tableHeaderCell}>Cache Status</TableHeaderCell>}
<TableHeaderCell className={classes.tableHeaderCell}>Latency (s)</TableHeaderCell>
{hasModelInfo && <TableHeaderCell className={classes.tableHeaderCell}>Model Used</TableHeaderCell>}
{hasModel && <TableHeaderCell className={classes.tableHeaderCell}>Model</TableHeaderCell>}
{hasModelProvider && <TableHeaderCell className={classes.tableHeaderCell}>Model Provider</TableHeaderCell>}
{hasInputTokens && <TableHeaderCell className={classes.tableHeaderCell}>Input Tokens</TableHeaderCell>}
{hasOutputTokens && <TableHeaderCell className={classes.tableHeaderCell}>Output Tokens</TableHeaderCell>}
{hasTotalTokens && <TableHeaderCell className={classes.tableHeaderCell}>Total Tokens</TableHeaderCell>}
Expand Down Expand Up @@ -92,7 +94,8 @@ export const ChatDetailsSection = ({ chatDetails }: { chatDetails: ChatDetails;
</TableCell>
)}
<TableCell>{turn.latency.toFixed(2)}</TableCell>
{hasModelInfo && <TableCell>{turn.model || '-'}</TableCell>}
{hasModel && <TableCell>{turn.model || '-'}</TableCell>}
{hasModelProvider && <TableCell>{turn.modelProvider || '-'}</TableCell>}
{hasInputTokens && <TableCell>{turn.usage?.inputTokenCount || '-'}</TableCell>}
{hasOutputTokens && <TableCell>{turn.usage?.outputTokenCount || '-'}</TableCell>}
{hasTotalTokens && <TableCell>{turn.usage?.totalTokenCount || '-'}</TableCell>}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ type ChatDetails = {
type ChatTurnDetails = {
latency: number;
model?: string;
modelProvider?: string;
usage?: UsageDetails;
cacheKey?: string;
cacheHit?: boolean;
Expand Down
Loading
Loading