Merge pull request #177 from salesforcecli/wr/customEval

shetzel · web-flow · commit 8ee5f7e4dc42 · 2025-07-28T15:26:25.000-06:00
Wr/custom eval W-19120805
diff --git a/src/commands/agent/generate/test-spec.ts b/src/commands/agent/generate/test-spec.ts
@@ -26,6 +26,15 @@ type TestCase = {
   expectedActions: string[];
   expectedTopic: string;
   expectedOutcome: string;
+  customEvaluations?: Array<{
+    label: string;
+    name: string;
+    parameters: Array<
+      | { name: 'operator'; value: string; isReference: false }
+      | { name: 'actual'; value: string; isReference: true }
+      | { name: 'expected'; value: string; isReference: boolean }
+    >;
+  }>;
 };
 
 /**
@@ -38,13 +47,15 @@ type TestCase = {
  * - expectedTopic: The expected topic for classification
  * - expectedActions: Array of expected action names
  * - expectedOutcome: Expected outcome string
+ * - customEvaluations: Optional array of custom evaluation JSONpaths, names, and required information for metadata
  *
  * @remarks
  * This function guides users through creating a test case by:
  * 1. Prompting for an utterance
  * 2. Selecting an expected topic (from GenAiPlugins specified in the Bot's GenAiPlannerBundle)
  * 3. Choosing expected actions (from GenAiFunctions in the GenAiPlannerBundle or GenAiPlugin)
  * 4. Defining an expected outcome
+ * 5. Optional array of custom evaluation JSONpaths, names, and required information for metadata
  */
 async function promptForTestCase(genAiPlugins: Record<string, string>, genAiFunctions: string[]): Promise<TestCase> {
   const utterance = await input({
@@ -104,14 +115,119 @@ async function promptForTestCase(genAiPlugins: Record<string, string>, genAiFunc
     theme,
   });
 
+  const customEvaluations = await promptForCustomEvaluations();
+
   return {
     utterance,
     expectedTopic,
     expectedActions,
     expectedOutcome,
+    customEvaluations,
+  };
+}
+
+/**
+ * Creates a custom evaluation object with the provided parameters
+ *
+ * @param label - Descriptive label for the evaluation
+ * @param jsonPath - JSONPath for the actual value
+ * @param operator - Comparison operator
+ * @param expectedValue - Expected value to compare against
+ * @returns Custom evaluation object in the expected format
+ */
+export function createCustomEvaluation(
+  label: string,
+  jsonPath: string,
+  operator: string,
+  expectedValue: string
+): NonNullable<TestCase['customEvaluations']>[0] {
+  return {
+    label,
+    name:
+      !isNaN(Number(expectedValue)) && !isNaN(parseFloat(expectedValue)) ? 'numeric_comparison' : 'string_comparison',
+    parameters: [
+      { name: 'operator', value: operator, isReference: false },
+      { name: 'actual', value: jsonPath, isReference: true },
+      { name: 'expected', value: expectedValue, isReference: false },
+    ],
   };
 }
 
+export async function promptForCustomEvaluations(): Promise<NonNullable<TestCase['customEvaluations']>> {
+  const customEvaluations: NonNullable<TestCase['customEvaluations']> = [];
+  let wantsCustomEvaluation = await confirm({
+    message: 'Do you want to add a custom evaluation',
+    default: false,
+    theme,
+  });
+
+  // we can have multiple custom evaluations, prompt until the user is done
+  while (wantsCustomEvaluation) {
+    // eslint-disable-next-line no-await-in-loop
+    const label = await input({
+      message: 'Custom evaluation label (descriptive name)',
+      validate: (d: string): boolean | string => {
+        if (!d.length) {
+          return 'Label cannot be empty';
+        }
+        return true;
+      },
+      theme,
+    });
+
+    // eslint-disable-next-line no-await-in-loop
+    const jsonPath = await input({
+      message: 'Custom evaluation JSONPath (starts with $)',
+      validate: (d: string): boolean | string => {
+        if (!d.length) {
+          return 'JSONPath cannot be empty';
+        }
+        if (!d.startsWith('$')) {
+          return 'JSONPath must start with $';
+        }
+        return true;
+      },
+      theme,
+    });
+
+    // eslint-disable-next-line no-await-in-loop
+    const operator = await select<string>({
+      message: 'Comparison operator',
+      choices: [
+        { name: 'Equals ', value: 'equals' },
+        { name: 'Greater than or equals (>=)', value: 'greater_than_or_equal' },
+        { name: 'Greater than (>)', value: 'greater_than' },
+        { name: 'Less than (<)', value: 'less_than' },
+        { name: 'Less than or equals (<=)', value: 'less_than_or_equal' },
+      ],
+      theme,
+    });
+
+    // eslint-disable-next-line no-await-in-loop
+    const expectedValue = await input({
+      message: 'Expected value',
+      validate: (d: string): boolean | string => {
+        if (!d.length) {
+          return 'Expected value cannot be empty';
+        }
+        return true;
+      },
+      theme,
+    });
+
+    customEvaluations.push(createCustomEvaluation(label, jsonPath, operator, expectedValue));
+
+    // eslint-disable-next-line no-await-in-loop
+    wantsCustomEvaluation = await confirm({
+      message: 'Do you want to add another custom evaluation',
+      default: false,
+      theme,
+    });
+  }
+
+  return customEvaluations;
+}
+
 export function getMetadataFilePaths(cs: ComponentSet, type: string): Record<string, string> {
   return [...cs.filter((component) => component.type.name === type && component.fullName !== '*')].reduce<
     Record<string, string>
diff --git a/test/commands/agent/generate/test-spec.test.ts b/test/commands/agent/generate/test-spec.test.ts
@@ -16,6 +16,7 @@ import {
   ensureYamlExtension,
   getMetadataFilePaths,
   getPluginsAndFunctions,
+  createCustomEvaluation,
 } from '../../../../src/commands/agent/generate/test-spec.js';
 
 describe('AgentGenerateTestSpec Helper Methods', () => {
@@ -378,4 +379,80 @@ describe('AgentGenerateTestSpec Helper Methods', () => {
       expect(result).to.not.have.property('*');
     });
   });
+
+  describe('createCustomEvaluation', () => {
+    it('should create correct structure for string comparison', () => {
+      const evaluation = createCustomEvaluation('Test Label', '$.response.message', 'equals', 'expected text');
+
+      expect(evaluation).to.deep.equal({
+        label: 'Test Label',
+        name: 'string_comparison',
+        parameters: [
+          { name: 'operator', value: 'equals', isReference: false },
+          { name: 'actual', value: '$.response.message', isReference: true },
+          { name: 'expected', value: 'expected text', isReference: false },
+        ],
+      });
+    });
+
+    it('should create correct structure for numeric comparison', () => {
+      const evaluation = createCustomEvaluation('Numeric Test', '$.metrics.score', 'greater_than_or_equal', '85');
+
+      expect(evaluation).to.deep.equal({
+        label: 'Numeric Test',
+        name: 'numeric_comparison',
+        parameters: [
+          { name: 'operator', value: 'greater_than_or_equal', isReference: false },
+          { name: 'actual', value: '$.metrics.score', isReference: true },
+          { name: 'expected', value: '85', isReference: false },
+        ],
+      });
+    });
+
+    it('should handle all supported operators', () => {
+      const operators = ['equals', 'greater_than_or_equal', 'greater_than', 'less_than', 'less_than_or_equal'];
+
+      operators.forEach((operator) => {
+        const evaluation = createCustomEvaluation(`Test ${operator}`, '$.test.value', operator, '100');
+
+        expect(evaluation.parameters[0]).to.deep.equal({
+          name: 'operator',
+          value: operator,
+          isReference: false,
+        });
+      });
+    });
+
+    it('should always set correct isReference flags', () => {
+      const evaluation = createCustomEvaluation('Reference Test', '$.actual.path', 'equals', 'expected');
+
+      const [operatorParam, actualParam, expectedParam] = evaluation.parameters;
+
+      expect(operatorParam.isReference).to.be.false;
+      expect(actualParam.isReference).to.be.true; // actual is always a reference (JSONPath)
+      expect(expectedParam.isReference).to.be.false; // expected is always a literal value
+    });
+
+    it('should correctly determine comparison type based on expected value', () => {
+      const numericEvaluation = createCustomEvaluation('Test', '$.path', 'equals', '42');
+      expect(numericEvaluation.name).to.equal('numeric_comparison');
+
+      const stringEvaluation = createCustomEvaluation('Test', '$.path', 'equals', 'text');
+      expect(stringEvaluation.name).to.equal('string_comparison');
+    });
+
+    it('should handle complex JSONPaths and values', () => {
+      const evaluation = createCustomEvaluation(
+        'Complex Test',
+        '$.response.data[0].nested["special-key"].value',
+        'less_than',
+        '3.14159'
+      );
+
+      expect(evaluation.label).to.equal('Complex Test');
+      expect(evaluation.name).to.equal('numeric_comparison');
+      expect(evaluation.parameters[1].value).to.equal('$.response.data[0].nested["special-key"].value');
+      expect(evaluation.parameters[2].value).to.equal('3.14159');
+    });
+  });
 });