forked from promptfoo/promptfoo
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add CLI commands for listing and showing evals, prompts, and datasets (…
…promptfoo#218) Adds the following: ``` promptfoo list evals promptfoo list prompts promptfoo list datasets promptfoo show <id> promptfoo show eval <id> promptfoo show prompt <id> promptfoo show dataset <id> ```
- Loading branch information
Showing
10 changed files
with
386 additions
and
62 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
import chalk from 'chalk'; | ||
import { Command } from 'commander'; | ||
|
||
import { getEvals, getPrompts, getTestCases, printBorder, sha256 } from '../util'; | ||
import { wrapTable } from '../table'; | ||
import logger from '../logger'; | ||
import telemetry from '../telemetry'; | ||
|
||
export function listCommand(program: Command) { | ||
const listCommand = program.command('list').description('List various resources'); | ||
|
||
listCommand.command('evals') | ||
.description('List evaluations.') | ||
.action(async () => { | ||
telemetry.maybeShowNotice(); | ||
telemetry.record('command_used', { | ||
name: 'list evals', | ||
}); | ||
await telemetry.send(); | ||
|
||
const evals = getEvals(); | ||
const tableData = evals.map(evl => ({ | ||
'Eval ID': evl.id.slice(0, 6), | ||
Filename: evl.filePath, | ||
Prompts: evl.results.table.head.prompts.map(p => sha256(p.raw).slice(0, 6)).join(', '), | ||
Vars: evl.results.table.head.vars.map(v => v).join(', '), | ||
})); | ||
|
||
logger.info(wrapTable(tableData)); | ||
printBorder(); | ||
|
||
logger.info(`Run ${chalk.green('promptfoo show eval <id>')} to see details of a specific evaluation.`); | ||
logger.info(`Run ${chalk.green('promptfoo show prompt <id>')} to see details of a specific prompt.`); | ||
}); | ||
|
||
listCommand.command('prompts') | ||
.description('List prompts used') | ||
.action(async () => { | ||
telemetry.maybeShowNotice(); | ||
telemetry.record('command_used', { | ||
name: 'list prompts', | ||
}); | ||
await telemetry.send(); | ||
|
||
const prompts = getPrompts().sort((a, b) => b.recentEvalId.localeCompare(a.recentEvalId)); | ||
const tableData = prompts.map(prompt => ({ | ||
'Prompt ID': prompt.id.slice(0, 6), | ||
'Raw': prompt.prompt.raw.slice(0, 100) + (prompt.prompt.raw.length > 100 ? '...' : ''), | ||
'# evals': prompt.count, | ||
'Most recent eval': prompt.recentEvalId.slice(0, 6), | ||
})); | ||
|
||
logger.info(wrapTable(tableData)); | ||
printBorder(); | ||
logger.info(`Run ${chalk.green('promptfoo show prompt <id>')} to see details of a specific prompt.`); | ||
logger.info(`Run ${chalk.green('promptfoo show eval <id>')} to see details of a specific evaluation.`); | ||
}); | ||
|
||
listCommand.command('datasets') | ||
.description('List datasets used') | ||
.action(async () => { | ||
telemetry.maybeShowNotice(); | ||
telemetry.record('command_used', { | ||
name: 'list datasets', | ||
}); | ||
await telemetry.send(); | ||
|
||
const datasets = getTestCases().sort((a, b) => b.recentEvalId.localeCompare(a.recentEvalId)); | ||
const tableData = datasets.map(dataset => ({ | ||
'Dataset ID': dataset.id.slice(0, 6), | ||
'Highest scoring prompt': dataset.prompts.sort((a, b) => (b.prompt.metrics?.score || 0) - (a.prompt.metrics?.score || 0))[0].id.slice(0, 6), | ||
'# evals': dataset.count, | ||
'# prompts': dataset.prompts.length, | ||
'Most recent eval': dataset.recentEvalId.slice(0, 6), | ||
})); | ||
|
||
logger.info(wrapTable(tableData)); | ||
printBorder(); | ||
logger.info(`Run ${chalk.green('promptfoo show dataset <id>')} to see details of a specific dataset.`); | ||
logger.info(`Run ${chalk.green('promptfoo show prompt <id>')} to see details of a specific prompt.`); | ||
logger.info(`Run ${chalk.green('promptfoo show eval <id>')} to see details of a specific evaluation.`); | ||
}); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
import chalk from 'chalk'; | ||
import { Command } from 'commander'; | ||
|
||
import { getEvalFromHash, getPromptFromHash, getDatasetFromHash, printBorder } from '../util'; | ||
import { generateTable, wrapTable } from '../table'; | ||
import logger from '../logger'; | ||
import telemetry from '../telemetry'; | ||
|
||
export function showCommand(program: Command) { | ||
const showCommand = program.command('show <id>').description('Show details of a specific resource').action(async (id: string) => { | ||
const evl = getEvalFromHash(id); | ||
if (evl) { | ||
return handleEval(id); | ||
} | ||
|
||
const prompt = getPromptFromHash(id); | ||
if (prompt) { | ||
return handlePrompt(id); | ||
} | ||
|
||
const dataset = getDatasetFromHash(id); | ||
if (dataset) { | ||
return handleDataset(id); | ||
} | ||
|
||
logger.error(`No resource found with ID ${id}`); | ||
}); | ||
|
||
showCommand | ||
.command('eval <id>') | ||
.description('Show details of a specific evaluation') | ||
.action(handleEval); | ||
|
||
showCommand.command('prompt <id>').description('Show details of a specific prompt').action(handlePrompt); | ||
|
||
showCommand.command('dataset <id>').description('Show details of a specific dataset').action(handleDataset); | ||
} | ||
|
||
async function handleEval(id: string) { | ||
telemetry.maybeShowNotice(); | ||
telemetry.record('command_used', { | ||
name: 'show eval', | ||
}); | ||
await telemetry.send(); | ||
|
||
const evl = getEvalFromHash(id); | ||
if (!evl) { | ||
logger.error(`No evaluation found with ID ${id}`); | ||
return; | ||
} | ||
|
||
const { prompts, vars } = evl.results.table.head; | ||
logger.info(generateTable(evl.results, 100, 25)); | ||
if (evl.results.table.body.length > 25) { | ||
const rowsLeft = evl.results.table.body.length - 25; | ||
logger.info(`... ${rowsLeft} more row${rowsLeft === 1 ? '' : 's'} not shown ...\n`); | ||
} | ||
|
||
printBorder(); | ||
logger.info(chalk.cyan(`Eval ${id}`)); | ||
printBorder(); | ||
// TODO(ian): List prompt ids | ||
logger.info(`${prompts.length} prompts`); | ||
logger.info(`${vars.length} variables: ${vars.slice(0, 5).join(', ')}${vars.length > 5 ? ` (and ${vars.length - 5} more...)` : ''}`); | ||
} | ||
|
||
async function handlePrompt(id: string) { | ||
telemetry.maybeShowNotice(); | ||
telemetry.record('command_used', { | ||
name: 'show prompt', | ||
}); | ||
await telemetry.send(); | ||
|
||
const prompt = getPromptFromHash(id); | ||
if (!prompt) { | ||
logger.error(`Prompt with ID ${id} not found.`); | ||
return; | ||
} | ||
|
||
printBorder(); | ||
logger.info(chalk.cyan(prompt.prompt.raw)); | ||
printBorder(); | ||
logger.info(chalk.bold(`Prompt ${id}`)); | ||
printBorder(); | ||
|
||
logger.info(`This prompt is used in the following evals:`); | ||
const table = []; | ||
for (const evl of prompt.evals.sort((a, b) => b.id.localeCompare(a.id)).slice(0, 10)) { | ||
table.push({ | ||
'Eval ID': evl.id.slice(0, 6), | ||
'Dataset ID': evl.datasetId.slice(0, 6), | ||
'Raw score': evl.metrics?.score.toFixed(2) || '-', | ||
'Pass rate': evl.metrics && evl.metrics.testPassCount + evl.metrics.testFailCount > 0 ? `${(evl.metrics.testPassCount / (evl.metrics.testPassCount + evl.metrics.testFailCount) * 100).toFixed(2)}%` : '-', | ||
'Pass count': evl.metrics?.testPassCount || '-', | ||
'Fail count': evl.metrics?.testFailCount || '-', | ||
}); | ||
} | ||
logger.info(wrapTable(table)); | ||
printBorder(); | ||
logger.info(`Run ${chalk.green('promptfoo show eval <id>')} to see details of a specific evaluation.`); | ||
logger.info(`Run ${chalk.green('promptfoo show dataset <id>')} to see details of a specific dataset.`); | ||
} | ||
|
||
async function handleDataset(id: string) { | ||
telemetry.maybeShowNotice(); | ||
telemetry.record('command_used', { | ||
name: 'show dataset', | ||
}); | ||
await telemetry.send(); | ||
|
||
const dataset = getDatasetFromHash(id); | ||
if (!dataset) { | ||
logger.error(`Dataset with ID ${id} not found.`); | ||
return; | ||
} | ||
|
||
printBorder(); | ||
logger.info(chalk.bold(`Dataset ${id}`)); | ||
printBorder(); | ||
|
||
logger.info(`This dataset is used in the following evals:`); | ||
const table = []; | ||
for (const prompt of dataset.prompts.sort((a, b) => b.evalId.localeCompare(a.evalId)).slice(0, 10)) { | ||
table.push({ | ||
'Eval ID': prompt.evalId.slice(0, 6), | ||
'Prompt ID': prompt.id.slice(0, 6), | ||
'Raw score': prompt.prompt.metrics?.score.toFixed(2) || '-', | ||
'Pass rate': prompt.prompt.metrics && prompt.prompt.metrics.testPassCount + prompt.prompt.metrics.testFailCount > 0 ? `${(prompt.prompt.metrics.testPassCount / (prompt.prompt.metrics.testPassCount + prompt.prompt.metrics.testFailCount) * 100).toFixed(2)}%` : '-', | ||
'Pass count': prompt.prompt.metrics?.testPassCount || '-', | ||
'Fail count': prompt.prompt.metrics?.testFailCount || '-', | ||
}); | ||
} | ||
logger.info(wrapTable(table)); | ||
printBorder(); | ||
logger.info(`Run ${chalk.green('promptfoo show prompt <id>')} to see details of a specific prompt.`); | ||
logger.info(`Run ${chalk.green('promptfoo show eval <id>')} to see details of a specific evaluation.`); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.