Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 116 additions & 0 deletions src/commands/agent/generate/test-spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,15 @@ type TestCase = {
expectedActions: string[];
expectedTopic: string;
expectedOutcome: string;
customEvaluations?: Array<{
label: string;
name: string;
parameters: Array<
| { name: 'operator'; value: string; isReference: false }
| { name: 'actual'; value: string; isReference: true }
| { name: 'expected'; value: string; isReference: boolean }
>;
}>;
};

/**
Expand All @@ -38,13 +47,15 @@ type TestCase = {
* - expectedTopic: The expected topic for classification
* - expectedActions: Array of expected action names
* - expectedOutcome: Expected outcome string
* - customEvaluations: Optional array of custom evaluation JSONpaths, names, and required information for metadata
*
* @remarks
* This function guides users through creating a test case by:
* 1. Prompting for an utterance
* 2. Selecting an expected topic (from GenAiPlugins specified in the Bot's GenAiPlannerBundle)
* 3. Choosing expected actions (from GenAiFunctions in the GenAiPlannerBundle or GenAiPlugin)
* 4. Defining an expected outcome
* 5. Optional array of custom evaluation JSONpaths, names, and required information for metadata
*/
async function promptForTestCase(genAiPlugins: Record<string, string>, genAiFunctions: string[]): Promise<TestCase> {
const utterance = await input({
Expand Down Expand Up @@ -104,14 +115,119 @@ async function promptForTestCase(genAiPlugins: Record<string, string>, genAiFunc
theme,
});

const customEvaluations = await promptForCustomEvaluations();

return {
utterance,
expectedTopic,
expectedActions,
expectedOutcome,
customEvaluations,
};
}

/**
* Creates a custom evaluation object with the provided parameters
*
* @param label - Descriptive label for the evaluation
* @param jsonPath - JSONPath for the actual value
* @param operator - Comparison operator
* @param expectedValue - Expected value to compare against
* @returns Custom evaluation object in the expected format
*/
export function createCustomEvaluation(
label: string,
jsonPath: string,
operator: string,
expectedValue: string
): NonNullable<TestCase['customEvaluations']>[0] {
return {
label,
name:
!isNaN(Number(expectedValue)) && !isNaN(parseFloat(expectedValue)) ? 'numeric_comparison' : 'string_comparison',
parameters: [
{ name: 'operator', value: operator, isReference: false },
{ name: 'actual', value: jsonPath, isReference: true },
{ name: 'expected', value: expectedValue, isReference: false },
],
};
}

export async function promptForCustomEvaluations(): Promise<NonNullable<TestCase['customEvaluations']>> {
const customEvaluations: NonNullable<TestCase['customEvaluations']> = [];
let wantsCustomEvaluation = await confirm({
message: 'Do you want to add a custom evaluation',
default: false,
theme,
});

// we can have multiple custom evaluations, prompt until the user is done
while (wantsCustomEvaluation) {
// eslint-disable-next-line no-await-in-loop
const label = await input({
message: 'Custom evaluation label (descriptive name)',
validate: (d: string): boolean | string => {
if (!d.length) {
return 'Label cannot be empty';
}
return true;
},
theme,
});

// eslint-disable-next-line no-await-in-loop
const jsonPath = await input({
message: 'Custom evaluation JSONPath (starts with $)',
validate: (d: string): boolean | string => {
if (!d.length) {
return 'JSONPath cannot be empty';
}
if (!d.startsWith('$')) {
return 'JSONPath must start with $';
}
return true;
},
theme,
});

// eslint-disable-next-line no-await-in-loop
const operator = await select<string>({
message: 'Comparison operator',
choices: [
{ name: 'Equals ', value: 'equals' },
{ name: 'Greater than or equals (>=)', value: 'greater_than_or_equal' },
{ name: 'Greater than (>)', value: 'greater_than' },
{ name: 'Less than (<)', value: 'less_than' },
{ name: 'Less than or equals (<=)', value: 'less_than_or_equal' },
],
theme,
});

// eslint-disable-next-line no-await-in-loop
const expectedValue = await input({
message: 'Expected value',
validate: (d: string): boolean | string => {
if (!d.length) {
return 'Expected value cannot be empty';
}
return true;
},
theme,
});

customEvaluations.push(createCustomEvaluation(label, jsonPath, operator, expectedValue));

// eslint-disable-next-line no-await-in-loop
wantsCustomEvaluation = await confirm({
message: 'Do you want to add another custom evaluation',
default: false,
theme,
});
}

return customEvaluations;
}

export function getMetadataFilePaths(cs: ComponentSet, type: string): Record<string, string> {
return [...cs.filter((component) => component.type.name === type && component.fullName !== '*')].reduce<
Record<string, string>
Expand Down
77 changes: 77 additions & 0 deletions test/commands/agent/generate/test-spec.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import {
ensureYamlExtension,
getMetadataFilePaths,
getPluginsAndFunctions,
createCustomEvaluation,
} from '../../../../src/commands/agent/generate/test-spec.js';

describe('AgentGenerateTestSpec Helper Methods', () => {
Expand Down Expand Up @@ -378,4 +379,80 @@ describe('AgentGenerateTestSpec Helper Methods', () => {
expect(result).to.not.have.property('*');
});
});

describe('createCustomEvaluation', () => {
it('should create correct structure for string comparison', () => {
const evaluation = createCustomEvaluation('Test Label', '$.response.message', 'equals', 'expected text');

expect(evaluation).to.deep.equal({
label: 'Test Label',
name: 'string_comparison',
parameters: [
{ name: 'operator', value: 'equals', isReference: false },
{ name: 'actual', value: '$.response.message', isReference: true },
{ name: 'expected', value: 'expected text', isReference: false },
],
});
});

it('should create correct structure for numeric comparison', () => {
const evaluation = createCustomEvaluation('Numeric Test', '$.metrics.score', 'greater_than_or_equal', '85');

expect(evaluation).to.deep.equal({
label: 'Numeric Test',
name: 'numeric_comparison',
parameters: [
{ name: 'operator', value: 'greater_than_or_equal', isReference: false },
{ name: 'actual', value: '$.metrics.score', isReference: true },
{ name: 'expected', value: '85', isReference: false },
],
});
});

it('should handle all supported operators', () => {
const operators = ['equals', 'greater_than_or_equal', 'greater_than', 'less_than', 'less_than_or_equal'];

operators.forEach((operator) => {
const evaluation = createCustomEvaluation(`Test ${operator}`, '$.test.value', operator, '100');

expect(evaluation.parameters[0]).to.deep.equal({
name: 'operator',
value: operator,
isReference: false,
});
});
});

it('should always set correct isReference flags', () => {
const evaluation = createCustomEvaluation('Reference Test', '$.actual.path', 'equals', 'expected');

const [operatorParam, actualParam, expectedParam] = evaluation.parameters;

expect(operatorParam.isReference).to.be.false;
expect(actualParam.isReference).to.be.true; // actual is always a reference (JSONPath)
expect(expectedParam.isReference).to.be.false; // expected is always a literal value
});

it('should correctly determine comparison type based on expected value', () => {
const numericEvaluation = createCustomEvaluation('Test', '$.path', 'equals', '42');
expect(numericEvaluation.name).to.equal('numeric_comparison');

const stringEvaluation = createCustomEvaluation('Test', '$.path', 'equals', 'text');
expect(stringEvaluation.name).to.equal('string_comparison');
});

it('should handle complex JSONPaths and values', () => {
const evaluation = createCustomEvaluation(
'Complex Test',
'$.response.data[0].nested["special-key"].value',
'less_than',
'3.14159'
);

expect(evaluation.label).to.equal('Complex Test');
expect(evaluation.name).to.equal('numeric_comparison');
expect(evaluation.parameters[1].value).to.equal('$.response.data[0].nested["special-key"].value');
expect(evaluation.parameters[2].value).to.equal('3.14159');
});
});
});
Loading