formatting

sparticleinc · Oct 10, 2023 · 0604a1a · 0604a1a
1 parent a264792
commit 0604a1a
Show file tree

Hide file tree

Showing 25 changed files with 574 additions and 305 deletions.
diff --git a/README.md b/README.md
@@ -100,8 +100,8 @@ See [Test assertions](https://promptfoo.dev/docs/configuration/expected-outputs)
 | `starts-with`             | output starts with string                                                       |
 | `contains-any`            | output contains any of the listed substrings                                    |
 | `contains-all`            | output contains all list of substrings                                          |
-| `icontains-any`           | output contains any of the listed substrings, case insensitive                                    |
-| `icontains-all`           | output contains all list of substrings, case insensitive                                          |
+| `icontains-any`           | output contains any of the listed substrings, case insensitive                  |
+| `icontains-all`           | output contains all list of substrings, case insensitive                        |
 | `is-json`                 | output is valid json (optional json schema validation)                          |
 | `contains-json`           | output contains valid json (optional json schema validation)                    |
 | `javascript`              | provided Javascript function validates the output                               |

diff --git a/examples/mistral-llama-comparison/README.md b/examples/mistral-llama-comparison/README.md
@@ -3,6 +3,7 @@ To get started, set your HF_API_TOKEN and REPLICATE_API_KEY environment variable
 Next, change a few of the text files in prompts/ and edit promptfooconfig.yaml.
 
 Then run:
+
 ```
 promptfoo eval
 ```

diff --git a/examples/ollama-comparison/README.md b/examples/ollama-comparison/README.md
@@ -3,6 +3,7 @@ To get started, set your OPENAI_API_KEY environment variable and start Ollama.
 Next, change a few of the prompts in prompts.txt and edit promptfooconfig.yaml.
 
 Then run:
+
 ```
 promptfoo eval
 ```

diff --git a/src/assertions.ts b/src/assertions.ts
@@ -255,7 +255,9 @@ export async function runAssertion(
       Array.isArray(renderedValue),
       '"icontains-any" assertion type must have an array value',
     );
-    pass = renderedValue.some((value) => output.toLowerCase().includes(String(value).toLowerCase())) !== inverse;
+    pass =
+      renderedValue.some((value) => output.toLowerCase().includes(String(value).toLowerCase())) !==
+      inverse;
     return {
       pass,
       score: pass ? 1 : 0,
@@ -289,7 +291,9 @@ export async function runAssertion(
       Array.isArray(renderedValue),
       '"icontains-all" assertion type must have an array value',
     );
-    pass = renderedValue.every((value) => output.toLowerCase().includes(String(value).toLowerCase())) !== inverse;
+    pass =
+      renderedValue.every((value) => output.toLowerCase().includes(String(value).toLowerCase())) !==
+      inverse;
     return {
       pass,
       score: pass ? 1 : 0,
@@ -391,7 +395,8 @@ export async function runAssertion(
           const functionString = assertion.value.toString();
           ret.assertion = {
             type: 'javascript',
-            value: functionString.length > 50 ? functionString.slice(0, 50) + '...' : functionString,
+            value:
+              functionString.length > 50 ? functionString.slice(0, 50) + '...' : functionString,
           };
         }
         return ret;
@@ -751,7 +756,12 @@ export function assertionFromString(expected: string): Assertion {
     const fullType = notPrefix ? `not-${type}` : type;
     const threshold = parseFloat(thresholdStr);
 
-    if (type === 'contains-any' || type === 'contains-all' || type === 'icontains-any' || type === 'icontains-all') {
+    if (
+      type === 'contains-any' ||
+      type === 'contains-all' ||
+      type === 'icontains-any' ||
+      type === 'icontains-all'
+    ) {
       return {
         type: fullType as AssertionType,
         value: value.split(',').map((s) => s.trim()),

diff --git a/src/commands/list.ts b/src/commands/list.ts
@@ -9,7 +9,8 @@ import telemetry from '../telemetry';
 export function listCommand(program: Command) {
   const listCommand = program.command('list').description('List various resources');
 
-  listCommand.command('evals')
+  listCommand
+    .command('evals')
     .description('List evaluations.')
     .action(async () => {
       telemetry.maybeShowNotice();
@@ -19,21 +20,26 @@ export function listCommand(program: Command) {
       await telemetry.send();
 
       const evals = getEvals();
-      const tableData = evals.map(evl => ({
+      const tableData = evals.map((evl) => ({
         'Eval ID': evl.id.slice(0, 6),
         Filename: evl.filePath,
-        Prompts: evl.results.table.head.prompts.map(p => sha256(p.raw).slice(0, 6)).join(', '),
-        Vars: evl.results.table.head.vars.map(v => v).join(', '),
+        Prompts: evl.results.table.head.prompts.map((p) => sha256(p.raw).slice(0, 6)).join(', '),
+        Vars: evl.results.table.head.vars.map((v) => v).join(', '),
       }));
 
       logger.info(wrapTable(tableData));
       printBorder();
 
-      logger.info(`Run ${chalk.green('promptfoo show eval <id>')} to see details of a specific evaluation.`);
-      logger.info(`Run ${chalk.green('promptfoo show prompt <id>')} to see details of a specific prompt.`);
+      logger.info(
+        `Run ${chalk.green('promptfoo show eval <id>')} to see details of a specific evaluation.`,
+      );
+      logger.info(
+        `Run ${chalk.green('promptfoo show prompt <id>')} to see details of a specific prompt.`,
+      );
     });
 
-  listCommand.command('prompts')
+  listCommand
+    .command('prompts')
     .description('List prompts used')
     .action(async () => {
       telemetry.maybeShowNotice();
@@ -43,20 +49,25 @@ export function listCommand(program: Command) {
       await telemetry.send();
 
       const prompts = getPrompts().sort((a, b) => b.recentEvalId.localeCompare(a.recentEvalId));
-      const tableData = prompts.map(prompt => ({
+      const tableData = prompts.map((prompt) => ({
         'Prompt ID': prompt.id.slice(0, 6),
-        'Raw': prompt.prompt.raw.slice(0, 100) + (prompt.prompt.raw.length > 100 ? '...' : ''),
+        Raw: prompt.prompt.raw.slice(0, 100) + (prompt.prompt.raw.length > 100 ? '...' : ''),
         '# evals': prompt.count,
         'Most recent eval': prompt.recentEvalId.slice(0, 6),
       }));
 
       logger.info(wrapTable(tableData));
       printBorder();
-      logger.info(`Run ${chalk.green('promptfoo show prompt <id>')} to see details of a specific prompt.`);
-      logger.info(`Run ${chalk.green('promptfoo show eval <id>')} to see details of a specific evaluation.`);
+      logger.info(
+        `Run ${chalk.green('promptfoo show prompt <id>')} to see details of a specific prompt.`,
+      );
+      logger.info(
+        `Run ${chalk.green('promptfoo show eval <id>')} to see details of a specific evaluation.`,
+      );
     });
 
-  listCommand.command('datasets')
+  listCommand
+    .command('datasets')
     .description('List datasets used')
     .action(async () => {
       telemetry.maybeShowNotice();
@@ -66,18 +77,26 @@ export function listCommand(program: Command) {
       await telemetry.send();
 
       const datasets = getTestCases().sort((a, b) => b.recentEvalId.localeCompare(a.recentEvalId));
-      const tableData = datasets.map(dataset => ({
+      const tableData = datasets.map((dataset) => ({
         'Dataset ID': dataset.id.slice(0, 6),
-        'Highest scoring prompt': dataset.prompts.sort((a, b) => (b.prompt.metrics?.score || 0) - (a.prompt.metrics?.score || 0))[0].id.slice(0, 6),
+        'Highest scoring prompt': dataset.prompts
+          .sort((a, b) => (b.prompt.metrics?.score || 0) - (a.prompt.metrics?.score || 0))[0]
+          .id.slice(0, 6),
         '# evals': dataset.count,
         '# prompts': dataset.prompts.length,
         'Most recent eval': dataset.recentEvalId.slice(0, 6),
       }));
 
       logger.info(wrapTable(tableData));
       printBorder();
-      logger.info(`Run ${chalk.green('promptfoo show dataset <id>')} to see details of a specific dataset.`);
-      logger.info(`Run ${chalk.green('promptfoo show prompt <id>')} to see details of a specific prompt.`);
-      logger.info(`Run ${chalk.green('promptfoo show eval <id>')} to see details of a specific evaluation.`);
+      logger.info(
+        `Run ${chalk.green('promptfoo show dataset <id>')} to see details of a specific dataset.`,
+      );
+      logger.info(
+        `Run ${chalk.green('promptfoo show prompt <id>')} to see details of a specific prompt.`,
+      );
+      logger.info(
+        `Run ${chalk.green('promptfoo show eval <id>')} to see details of a specific evaluation.`,
+      );
     });
 }
diff --git a/src/commands/show.ts b/src/commands/show.ts
@@ -7,33 +7,42 @@ import logger from '../logger';
 import telemetry from '../telemetry';
 
 export function showCommand(program: Command) {
-  const showCommand = program.command('show <id>').description('Show details of a specific resource').action(async (id: string) => {
-    const evl = getEvalFromHash(id);
-    if (evl) {
-      return handleEval(id);
-    }
-
-    const prompt = getPromptFromHash(id);
-    if (prompt) {
-      return handlePrompt(id);
-    }
-
-    const dataset = getDatasetFromHash(id);
-    if (dataset) {
-      return handleDataset(id);
-    }
-
-    logger.error(`No resource found with ID ${id}`);
-  });
+  const showCommand = program
+    .command('show <id>')
+    .description('Show details of a specific resource')
+    .action(async (id: string) => {
+      const evl = getEvalFromHash(id);
+      if (evl) {
+        return handleEval(id);
+      }
+
+      const prompt = getPromptFromHash(id);
+      if (prompt) {
+        return handlePrompt(id);
+      }
+
+      const dataset = getDatasetFromHash(id);
+      if (dataset) {
+        return handleDataset(id);
+      }
+
+      logger.error(`No resource found with ID ${id}`);
+    });
 
   showCommand
     .command('eval <id>')
     .description('Show details of a specific evaluation')
     .action(handleEval);
 
-  showCommand.command('prompt <id>').description('Show details of a specific prompt').action(handlePrompt);
+  showCommand
+    .command('prompt <id>')
+    .description('Show details of a specific prompt')
+    .action(handlePrompt);
 
-  showCommand.command('dataset <id>').description('Show details of a specific dataset').action(handleDataset);
+  showCommand
+    .command('dataset <id>')
+    .description('Show details of a specific dataset')
+    .action(handleDataset);
 }
 
 async function handleEval(id: string) {
@@ -61,7 +70,11 @@ async function handleEval(id: string) {
   printBorder();
   // TODO(ian): List prompt ids
   logger.info(`${prompts.length} prompts`);
-  logger.info(`${vars.length} variables: ${vars.slice(0, 5).join(', ')}${vars.length > 5 ? ` (and ${vars.length - 5} more...)` : ''}`);
+  logger.info(
+    `${vars.length} variables: ${vars.slice(0, 5).join(', ')}${
+      vars.length > 5 ? ` (and ${vars.length - 5} more...)` : ''
+    }`,
+  );
 }
 
 async function handlePrompt(id: string) {
@@ -90,48 +103,73 @@ async function handlePrompt(id: string) {
       'Eval ID': evl.id.slice(0, 6),
       'Dataset ID': evl.datasetId.slice(0, 6),
       'Raw score': evl.metrics?.score.toFixed(2) || '-',
-      'Pass rate': evl.metrics && evl.metrics.testPassCount + evl.metrics.testFailCount > 0 ? `${(evl.metrics.testPassCount / (evl.metrics.testPassCount + evl.metrics.testFailCount) * 100).toFixed(2)}%` : '-',
+      'Pass rate':
+        evl.metrics && evl.metrics.testPassCount + evl.metrics.testFailCount > 0
+          ? `${(
+              (evl.metrics.testPassCount /
+                (evl.metrics.testPassCount + evl.metrics.testFailCount)) *
+              100
+            ).toFixed(2)}%`
+          : '-',
       'Pass count': evl.metrics?.testPassCount || '-',
       'Fail count': evl.metrics?.testFailCount || '-',
     });
   }
   logger.info(wrapTable(table));
   printBorder();
-  logger.info(`Run ${chalk.green('promptfoo show eval <id>')} to see details of a specific evaluation.`);
-  logger.info(`Run ${chalk.green('promptfoo show dataset <id>')} to see details of a specific dataset.`);
+  logger.info(
+    `Run ${chalk.green('promptfoo show eval <id>')} to see details of a specific evaluation.`,
+  );
+  logger.info(
+    `Run ${chalk.green('promptfoo show dataset <id>')} to see details of a specific dataset.`,
+  );
 }
 
 async function handleDataset(id: string) {
-    telemetry.maybeShowNotice();
-    telemetry.record('command_used', {
-      name: 'show dataset',
+  telemetry.maybeShowNotice();
+  telemetry.record('command_used', {
+    name: 'show dataset',
+  });
+  await telemetry.send();
+
+  const dataset = getDatasetFromHash(id);
+  if (!dataset) {
+    logger.error(`Dataset with ID ${id} not found.`);
+    return;
+  }
+
+  printBorder();
+  logger.info(chalk.bold(`Dataset ${id}`));
+  printBorder();
+
+  logger.info(`This dataset is used in the following evals:`);
+  const table = [];
+  for (const prompt of dataset.prompts
+    .sort((a, b) => b.evalId.localeCompare(a.evalId))
+    .slice(0, 10)) {
+    table.push({
+      'Eval ID': prompt.evalId.slice(0, 6),
+      'Prompt ID': prompt.id.slice(0, 6),
+      'Raw score': prompt.prompt.metrics?.score.toFixed(2) || '-',
+      'Pass rate':
+        prompt.prompt.metrics &&
+        prompt.prompt.metrics.testPassCount + prompt.prompt.metrics.testFailCount > 0
+          ? `${(
+              (prompt.prompt.metrics.testPassCount /
+                (prompt.prompt.metrics.testPassCount + prompt.prompt.metrics.testFailCount)) *
+              100
+            ).toFixed(2)}%`
+          : '-',
+      'Pass count': prompt.prompt.metrics?.testPassCount || '-',
+      'Fail count': prompt.prompt.metrics?.testFailCount || '-',
     });
-    await telemetry.send();
-
-    const dataset = getDatasetFromHash(id);
-    if (!dataset) {
-      logger.error(`Dataset with ID ${id} not found.`);
-      return;
-    }
-
-    printBorder();
-    logger.info(chalk.bold(`Dataset ${id}`));
-    printBorder();
-
-    logger.info(`This dataset is used in the following evals:`);
-    const table = [];
-    for (const prompt of dataset.prompts.sort((a, b) => b.evalId.localeCompare(a.evalId)).slice(0, 10)) {
-      table.push({
-        'Eval ID': prompt.evalId.slice(0, 6),
-        'Prompt ID': prompt.id.slice(0, 6),
-        'Raw score': prompt.prompt.metrics?.score.toFixed(2) || '-',
-        'Pass rate': prompt.prompt.metrics && prompt.prompt.metrics.testPassCount + prompt.prompt.metrics.testFailCount > 0 ? `${(prompt.prompt.metrics.testPassCount / (prompt.prompt.metrics.testPassCount + prompt.prompt.metrics.testFailCount) * 100).toFixed(2)}%` : '-',
-        'Pass count': prompt.prompt.metrics?.testPassCount || '-',
-        'Fail count': prompt.prompt.metrics?.testFailCount || '-',
-      });
-    }
-    logger.info(wrapTable(table));
-    printBorder();
-    logger.info(`Run ${chalk.green('promptfoo show prompt <id>')} to see details of a specific prompt.`);
-    logger.info(`Run ${chalk.green('promptfoo show eval <id>')} to see details of a specific evaluation.`);
   }
+  logger.info(wrapTable(table));
+  printBorder();
+  logger.info(
+    `Run ${chalk.green('promptfoo show prompt <id>')} to see details of a specific prompt.`,
+  );
+  logger.info(
+    `Run ${chalk.green('promptfoo show eval <id>')} to see details of a specific evaluation.`,
+  );
+}
diff --git a/src/evaluator.ts b/src/evaluator.ts
@@ -562,8 +562,10 @@ class Evaluator {
         metrics.score += row.score;
         metrics.testPassCount += row.success ? 1 : 0;
         metrics.testFailCount += row.success ? 0 : 1;
-        metrics.assertPassCount += row.gradingResult?.componentResults?.filter(r => r.pass).length || 0;
-        metrics.assertFailCount += row.gradingResult?.componentResults?.filter(r => !r.pass).length || 0;
+        metrics.assertPassCount +=
+          row.gradingResult?.componentResults?.filter((r) => r.pass).length || 0;
+        metrics.assertFailCount +=
+          row.gradingResult?.componentResults?.filter((r) => !r.pass).length || 0;
       },
     );
 

diff --git a/src/providers.ts b/src/providers.ts
@@ -23,7 +23,10 @@ import {
   AzureOpenAiCompletionProvider,
   AzureOpenAiEmbeddingProvider,
 } from './providers/azureopenai';
-import { HuggingfaceFeatureExtractionProvider, HuggingfaceTextGenerationProvider } from './providers/huggingface';
+import {
+  HuggingfaceFeatureExtractionProvider,
+  HuggingfaceTextGenerationProvider,
+} from './providers/huggingface';
 
 import type {
   ApiProvider,

diff --git a/src/providers/azureopenai.ts b/src/providers/azureopenai.ts
@@ -42,7 +42,8 @@ class AzureOpenAiGenericProvider implements ApiProvider {
     this.deploymentName = deploymentName;
 
     this.apiKey = config?.apiKey || env?.AZURE_OPENAI_API_KEY || process.env.AZURE_OPENAI_API_KEY;
-    this.apiHost = config?.apiHost || env?.AZURE_OPENAI_API_HOST || process.env.AZURE_OPENAI_API_HOST;
+    this.apiHost =
+      config?.apiHost || env?.AZURE_OPENAI_API_HOST || process.env.AZURE_OPENAI_API_HOST;
 
     this.config = config || {};
     this.id = id ? () => id : this.id;
-Original file line number
+Diff line change
@@ Expand Up @@
     Next, change a few of the text files in prompts/ and edit promptfooconfig.yaml.
     Then run:
     ```
     promptfoo eval
     ```
@@ Expand Down @@