File tree Expand file tree Collapse file tree 7 files changed +24
-22
lines changed
Microsoft.Extensions.AI.Evaluation.NLP
Microsoft.Extensions.AI.Evaluation.Quality Expand file tree Collapse file tree 7 files changed +24
-22
lines changed Original file line number Diff line number Diff line change @@ -20,11 +20,11 @@ namespace Microsoft.Extensions.AI.Evaluation.NLP;
20
20
/// </summary>
21
21
/// <remarks>
22
22
/// <para>
23
- /// The <see cref="BLEUEvaluator"/> computes the BLEU score of a response ("hypothesis") compared to a reference
24
- /// supplied via <see cref="BLEUEvaluatorContext.References"/>. The score is returned in a <see cref="NumericMetric"/>
25
- /// with a value between 0.0 and 1.0 where 0.0 represents no match at all and 1.0 indicates a perfect match.
26
- /// By default, the score is interpreted with a pass/fail cutoff of 0.5. So a score of 0.5 or higher is
27
- /// passing and a score below 0.5 is failing.
23
+ /// The <see cref="BLEUEvaluator"/> computes the BLEU score of a response ("hypothesis") compared to one or more
24
+ /// reference responses supplied via <see cref="BLEUEvaluatorContext.References"/>. The score is returned in a
25
+ /// <see cref="NumericMetric"/> with a value between 0.0 and 1.0 where 0.0 represents no match at all and 1.0 indicates
26
+ /// a perfect match. By default, the score is interpreted with a pass/fail cutoff of 0.5. So a score of 0.5 or higher
27
+ /// is passing and a score below 0.5 is failing.
28
28
/// </para>
29
29
/// </remarks>
30
30
public sealed class BLEUEvaluator : IEvaluator
Original file line number Diff line number Diff line change @@ -15,8 +15,9 @@ namespace Microsoft.Extensions.AI.Evaluation.NLP;
15
15
/// Contextual information that the <see cref="BLEUEvaluator"/> uses to compute the BLEU score for a response.
16
16
/// </summary>
17
17
/// <remarks>
18
- /// <see cref="BLEUEvaluator"/> measures the BLEU score of a response compared to a reference. BLEU (Bilingual Evaluation Understudy)
19
- /// is a metric used to evaluate the quality of machine-generated text.
18
+ /// <see cref="BLEUEvaluator"/> measures the BLEU score of a response compared to one or more reference responses
19
+ /// supplied via <see cref="References"/>. BLEU (Bilingual Evaluation Understudy) is a metric used to evaluate the
20
+ /// quality of machine-generated text.
20
21
/// </remarks>
21
22
public sealed class BLEUEvaluatorContext : EvaluationContext
22
23
{
@@ -31,7 +32,7 @@ public sealed class BLEUEvaluatorContext : EvaluationContext
31
32
/// </summary>
32
33
/// <remarks>
33
34
/// The <see cref="BLEUEvaluator"/> measures the degree to which the response being evaluated is similar to
34
- /// the response supplied via <see cref="References"/>. The metric will be reported as a BLEU score.
35
+ /// the responses supplied via <see cref="References"/>. The metric will be reported as a BLEU score.
35
36
/// </remarks>
36
37
public IReadOnlyList < string > References { get ; }
37
38
Original file line number Diff line number Diff line change @@ -20,11 +20,11 @@ namespace Microsoft.Extensions.AI.Evaluation.NLP;
20
20
/// </summary>
21
21
/// <remarks>
22
22
/// <para>
23
- /// The <see cref="GLEUEvaluator"/> computes the GLEU score of a response ("hypothesis") compared to a reference
24
- /// supplied via <see cref="GLEUEvaluatorContext.References"/>. The score is returned in a <see cref="NumericMetric"/>
25
- /// with a value between 0.0 and 1.0 where 0.0 represents no match at all and 1.0 indicates a perfect match.
26
- /// By default, the score is interpreted with a pass/fail cutoff of 0.5. So a score of 0.5 or higher is
27
- /// passing and a score below 0.5 is failing.
23
+ /// The <see cref="GLEUEvaluator"/> computes the GLEU score of a response ("hypothesis") compared to one or more
24
+ /// reference responses supplied via <see cref="GLEUEvaluatorContext.References"/>. The score is returned in a
25
+ /// <see cref="NumericMetric"/> with a value between 0.0 and 1.0 where 0.0 represents no match at all and 1.0 indicates
26
+ /// a perfect match. By default, the score is interpreted with a pass/fail cutoff of 0.5. So a score of 0.5 or higher
27
+ /// is passing and a score below 0.5 is failing.
28
28
/// </para>
29
29
/// </remarks>
30
30
public sealed class GLEUEvaluator : IEvaluator
Original file line number Diff line number Diff line change @@ -16,7 +16,8 @@ namespace Microsoft.Extensions.AI.Evaluation.NLP;
16
16
/// </summary>
17
17
/// <remarks>
18
18
/// <see cref="GLEUEvaluator"/> measures the GLEU score of a response compared to one or more reference responses
19
- /// supplied via <see cref="References"/>. GLEU (Google-BLEU) is a metric used to evaluate the quality of machine-generated text.
19
+ /// supplied via <see cref="References"/>. GLEU (Google-BLEU) is a metric used to evaluate the quality of
20
+ /// machine-generated text.
20
21
/// </remarks>
21
22
public sealed class GLEUEvaluatorContext : EvaluationContext
22
23
{
@@ -27,11 +28,11 @@ public sealed class GLEUEvaluatorContext : EvaluationContext
27
28
public static string ReferencesContextName => "References (GLEU)" ;
28
29
29
30
/// <summary>
30
- /// Gets the reference against which the provided response will be scored.
31
+ /// Gets the references against which the provided response will be scored.
31
32
/// </summary>
32
33
/// <remarks>
33
34
/// The <see cref="GLEUEvaluator"/> measures the degree to which the response being evaluated is similar to
34
- /// the response supplied via <see cref="References"/>. The metric will be reported as a GLEU score.
35
+ /// the responses supplied via <see cref="References"/>. The metric will be reported as a GLEU score.
35
36
/// </remarks>
36
37
public IReadOnlyList < string > References { get ; }
37
38
Original file line number Diff line number Diff line change @@ -40,7 +40,7 @@ public sealed class IntentResolutionEvaluatorContext : EvaluationContext
40
40
/// </para>
41
41
/// </param>
42
42
public IntentResolutionEvaluatorContext ( params AITool [ ] toolDefinitions )
43
- : base ( name : IntentResolutionContextName , contents : [ new TextContent ( toolDefinitions . RenderAsJson ( ) ) ] )
43
+ : base ( name : ToolDefinitionsContextName , contents : [ new TextContent ( toolDefinitions . RenderAsJson ( ) ) ] )
44
44
{
45
45
ToolDefinitions = [ .. toolDefinitions ] ;
46
46
}
@@ -67,7 +67,7 @@ public IntentResolutionEvaluatorContext(IEnumerable<AITool> toolDefinitions)
67
67
/// Gets the unique <see cref="EvaluationContext.Name"/> that is used for
68
68
/// <see cref="IntentResolutionEvaluatorContext"/>.
69
69
/// </summary>
70
- public static string IntentResolutionContextName => "Tool Definitions (Intent Resolution)" ;
70
+ public static string ToolDefinitionsContextName => "Tool Definitions (Intent Resolution)" ;
71
71
72
72
/// <summary>
73
73
/// Gets set of tool definitions (see <see cref="ChatOptions.Tools"/>) that were used when generating the model
Original file line number Diff line number Diff line change @@ -41,7 +41,7 @@ public sealed class TaskAdherenceEvaluatorContext : EvaluationContext
41
41
/// </para>
42
42
/// </param>
43
43
public TaskAdherenceEvaluatorContext ( params AITool [ ] toolDefinitions )
44
- : base ( name : TaskAdherenceContextName , contents : [ new TextContent ( toolDefinitions . RenderAsJson ( ) ) ] )
44
+ : base ( name : ToolDefinitionsContextName , contents : [ new TextContent ( toolDefinitions . RenderAsJson ( ) ) ] )
45
45
{
46
46
ToolDefinitions = [ .. toolDefinitions ] ;
47
47
}
@@ -68,7 +68,7 @@ public TaskAdherenceEvaluatorContext(IEnumerable<AITool> toolDefinitions)
68
68
/// Gets the unique <see cref="EvaluationContext.Name"/> that is used for
69
69
/// <see cref="TaskAdherenceEvaluatorContext"/>.
70
70
/// </summary>
71
- public static string TaskAdherenceContextName => "Tool Definitions (Task Adherence)" ;
71
+ public static string ToolDefinitionsContextName => "Tool Definitions (Task Adherence)" ;
72
72
73
73
/// <summary>
74
74
/// Gets set of tool definitions (see <see cref="ChatOptions.Tools"/>) that were used when generating the model
Original file line number Diff line number Diff line change @@ -42,7 +42,7 @@ public sealed class ToolCallAccuracyEvaluatorContext : EvaluationContext
42
42
/// </para>
43
43
/// </param>
44
44
public ToolCallAccuracyEvaluatorContext ( params AITool [ ] toolDefinitions )
45
- : base ( name : ToolCallAccuracyContextName , contents : [ new TextContent ( toolDefinitions . RenderAsJson ( ) ) ] )
45
+ : base ( name : ToolDefinitionsContextName , contents : [ new TextContent ( toolDefinitions . RenderAsJson ( ) ) ] )
46
46
{
47
47
ToolDefinitions = [ .. toolDefinitions ] ;
48
48
}
@@ -69,7 +69,7 @@ public ToolCallAccuracyEvaluatorContext(IEnumerable<AITool> toolDefinitions)
69
69
/// Gets the unique <see cref="EvaluationContext.Name"/> that is used for
70
70
/// <see cref="ToolCallAccuracyEvaluatorContext"/>.
71
71
/// </summary>
72
- public static string ToolCallAccuracyContextName => "Tool Definitions (Tool Call Accuracy)" ;
72
+ public static string ToolDefinitionsContextName => "Tool Definitions (Tool Call Accuracy)" ;
73
73
74
74
/// <summary>
75
75
/// Gets set of tool definitions (see <see cref="ChatOptions.Tools"/>) that were used when generating the model
You can’t perform that action at this time.
0 commit comments