Skip to content

Commit 8ee5f7e

Browse files
authored
Merge pull request #177 from salesforcecli/wr/customEval
Wr/custom eval W-19120805
2 parents f8e6962 + 3fa8517 commit 8ee5f7e

File tree

2 files changed

+193
-0
lines changed

2 files changed

+193
-0
lines changed

src/commands/agent/generate/test-spec.ts

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,15 @@ type TestCase = {
2626
expectedActions: string[];
2727
expectedTopic: string;
2828
expectedOutcome: string;
29+
customEvaluations?: Array<{
30+
label: string;
31+
name: string;
32+
parameters: Array<
33+
| { name: 'operator'; value: string; isReference: false }
34+
| { name: 'actual'; value: string; isReference: true }
35+
| { name: 'expected'; value: string; isReference: boolean }
36+
>;
37+
}>;
2938
};
3039

3140
/**
@@ -38,13 +47,15 @@ type TestCase = {
3847
* - expectedTopic: The expected topic for classification
3948
* - expectedActions: Array of expected action names
4049
* - expectedOutcome: Expected outcome string
50+
* - customEvaluations: Optional array of custom evaluation JSONpaths, names, and required information for metadata
4151
*
4252
* @remarks
4353
* This function guides users through creating a test case by:
4454
* 1. Prompting for an utterance
4555
* 2. Selecting an expected topic (from GenAiPlugins specified in the Bot's GenAiPlannerBundle)
4656
* 3. Choosing expected actions (from GenAiFunctions in the GenAiPlannerBundle or GenAiPlugin)
4757
* 4. Defining an expected outcome
58+
* 5. Optional array of custom evaluation JSONpaths, names, and required information for metadata
4859
*/
4960
async function promptForTestCase(genAiPlugins: Record<string, string>, genAiFunctions: string[]): Promise<TestCase> {
5061
const utterance = await input({
@@ -104,14 +115,119 @@ async function promptForTestCase(genAiPlugins: Record<string, string>, genAiFunc
104115
theme,
105116
});
106117

118+
const customEvaluations = await promptForCustomEvaluations();
119+
107120
return {
108121
utterance,
109122
expectedTopic,
110123
expectedActions,
111124
expectedOutcome,
125+
customEvaluations,
126+
};
127+
}
128+
129+
/**
130+
* Creates a custom evaluation object with the provided parameters
131+
*
132+
* @param label - Descriptive label for the evaluation
133+
* @param jsonPath - JSONPath for the actual value
134+
* @param operator - Comparison operator
135+
* @param expectedValue - Expected value to compare against
136+
* @returns Custom evaluation object in the expected format
137+
*/
138+
export function createCustomEvaluation(
139+
label: string,
140+
jsonPath: string,
141+
operator: string,
142+
expectedValue: string
143+
): NonNullable<TestCase['customEvaluations']>[0] {
144+
return {
145+
label,
146+
name:
147+
!isNaN(Number(expectedValue)) && !isNaN(parseFloat(expectedValue)) ? 'numeric_comparison' : 'string_comparison',
148+
parameters: [
149+
{ name: 'operator', value: operator, isReference: false },
150+
{ name: 'actual', value: jsonPath, isReference: true },
151+
{ name: 'expected', value: expectedValue, isReference: false },
152+
],
112153
};
113154
}
114155

156+
export async function promptForCustomEvaluations(): Promise<NonNullable<TestCase['customEvaluations']>> {
157+
const customEvaluations: NonNullable<TestCase['customEvaluations']> = [];
158+
let wantsCustomEvaluation = await confirm({
159+
message: 'Do you want to add a custom evaluation',
160+
default: false,
161+
theme,
162+
});
163+
164+
// we can have multiple custom evaluations, prompt until the user is done
165+
while (wantsCustomEvaluation) {
166+
// eslint-disable-next-line no-await-in-loop
167+
const label = await input({
168+
message: 'Custom evaluation label (descriptive name)',
169+
validate: (d: string): boolean | string => {
170+
if (!d.length) {
171+
return 'Label cannot be empty';
172+
}
173+
return true;
174+
},
175+
theme,
176+
});
177+
178+
// eslint-disable-next-line no-await-in-loop
179+
const jsonPath = await input({
180+
message: 'Custom evaluation JSONPath (starts with $)',
181+
validate: (d: string): boolean | string => {
182+
if (!d.length) {
183+
return 'JSONPath cannot be empty';
184+
}
185+
if (!d.startsWith('$')) {
186+
return 'JSONPath must start with $';
187+
}
188+
return true;
189+
},
190+
theme,
191+
});
192+
193+
// eslint-disable-next-line no-await-in-loop
194+
const operator = await select<string>({
195+
message: 'Comparison operator',
196+
choices: [
197+
{ name: 'Equals ', value: 'equals' },
198+
{ name: 'Greater than or equals (>=)', value: 'greater_than_or_equal' },
199+
{ name: 'Greater than (>)', value: 'greater_than' },
200+
{ name: 'Less than (<)', value: 'less_than' },
201+
{ name: 'Less than or equals (<=)', value: 'less_than_or_equal' },
202+
],
203+
theme,
204+
});
205+
206+
// eslint-disable-next-line no-await-in-loop
207+
const expectedValue = await input({
208+
message: 'Expected value',
209+
validate: (d: string): boolean | string => {
210+
if (!d.length) {
211+
return 'Expected value cannot be empty';
212+
}
213+
return true;
214+
},
215+
theme,
216+
});
217+
218+
customEvaluations.push(createCustomEvaluation(label, jsonPath, operator, expectedValue));
219+
220+
// eslint-disable-next-line no-await-in-loop
221+
wantsCustomEvaluation = await confirm({
222+
message: 'Do you want to add another custom evaluation',
223+
default: false,
224+
theme,
225+
});
226+
}
227+
228+
return customEvaluations;
229+
}
230+
115231
export function getMetadataFilePaths(cs: ComponentSet, type: string): Record<string, string> {
116232
return [...cs.filter((component) => component.type.name === type && component.fullName !== '*')].reduce<
117233
Record<string, string>

test/commands/agent/generate/test-spec.test.ts

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import {
1616
ensureYamlExtension,
1717
getMetadataFilePaths,
1818
getPluginsAndFunctions,
19+
createCustomEvaluation,
1920
} from '../../../../src/commands/agent/generate/test-spec.js';
2021

2122
describe('AgentGenerateTestSpec Helper Methods', () => {
@@ -378,4 +379,80 @@ describe('AgentGenerateTestSpec Helper Methods', () => {
378379
expect(result).to.not.have.property('*');
379380
});
380381
});
382+
383+
describe('createCustomEvaluation', () => {
384+
it('should create correct structure for string comparison', () => {
385+
const evaluation = createCustomEvaluation('Test Label', '$.response.message', 'equals', 'expected text');
386+
387+
expect(evaluation).to.deep.equal({
388+
label: 'Test Label',
389+
name: 'string_comparison',
390+
parameters: [
391+
{ name: 'operator', value: 'equals', isReference: false },
392+
{ name: 'actual', value: '$.response.message', isReference: true },
393+
{ name: 'expected', value: 'expected text', isReference: false },
394+
],
395+
});
396+
});
397+
398+
it('should create correct structure for numeric comparison', () => {
399+
const evaluation = createCustomEvaluation('Numeric Test', '$.metrics.score', 'greater_than_or_equal', '85');
400+
401+
expect(evaluation).to.deep.equal({
402+
label: 'Numeric Test',
403+
name: 'numeric_comparison',
404+
parameters: [
405+
{ name: 'operator', value: 'greater_than_or_equal', isReference: false },
406+
{ name: 'actual', value: '$.metrics.score', isReference: true },
407+
{ name: 'expected', value: '85', isReference: false },
408+
],
409+
});
410+
});
411+
412+
it('should handle all supported operators', () => {
413+
const operators = ['equals', 'greater_than_or_equal', 'greater_than', 'less_than', 'less_than_or_equal'];
414+
415+
operators.forEach((operator) => {
416+
const evaluation = createCustomEvaluation(`Test ${operator}`, '$.test.value', operator, '100');
417+
418+
expect(evaluation.parameters[0]).to.deep.equal({
419+
name: 'operator',
420+
value: operator,
421+
isReference: false,
422+
});
423+
});
424+
});
425+
426+
it('should always set correct isReference flags', () => {
427+
const evaluation = createCustomEvaluation('Reference Test', '$.actual.path', 'equals', 'expected');
428+
429+
const [operatorParam, actualParam, expectedParam] = evaluation.parameters;
430+
431+
expect(operatorParam.isReference).to.be.false;
432+
expect(actualParam.isReference).to.be.true; // actual is always a reference (JSONPath)
433+
expect(expectedParam.isReference).to.be.false; // expected is always a literal value
434+
});
435+
436+
it('should correctly determine comparison type based on expected value', () => {
437+
const numericEvaluation = createCustomEvaluation('Test', '$.path', 'equals', '42');
438+
expect(numericEvaluation.name).to.equal('numeric_comparison');
439+
440+
const stringEvaluation = createCustomEvaluation('Test', '$.path', 'equals', 'text');
441+
expect(stringEvaluation.name).to.equal('string_comparison');
442+
});
443+
444+
it('should handle complex JSONPaths and values', () => {
445+
const evaluation = createCustomEvaluation(
446+
'Complex Test',
447+
'$.response.data[0].nested["special-key"].value',
448+
'less_than',
449+
'3.14159'
450+
);
451+
452+
expect(evaluation.label).to.equal('Complex Test');
453+
expect(evaluation.name).to.equal('numeric_comparison');
454+
expect(evaluation.parameters[1].value).to.equal('$.response.data[0].nested["special-key"].value');
455+
expect(evaluation.parameters[2].value).to.equal('3.14159');
456+
});
457+
});
381458
});

0 commit comments

Comments
 (0)