Skip to content

Commit

Permalink
feat: add label field to provider options (promptfoo#563)
Browse files Browse the repository at this point in the history
  • Loading branch information
typpo authored Mar 17, 2024
1 parent 47e23e6 commit 5efb6fe
Show file tree
Hide file tree
Showing 9 changed files with 112 additions and 87 deletions.
16 changes: 8 additions & 8 deletions examples/gpt-3.5-temperature-comparison/promptfooconfig.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@ prompts:
- 'Respond to the following instruction: {{message}}'

providers:
- openai:gpt-3.5-turbo-0613:
id: openai-gpt-3.5-turbo-lowtemp
config:
temperature: 0
- openai:gpt-3.5-turbo-0613:
id: openai-gpt-3.5-turbo-hightemp
config:
temperature: 1
- id: openai:gpt-3.5-turbo-0613
label: openai-gpt-3.5-turbo-lowtemp
config:
temperature: 0
- id: openai:gpt-3.5-turbo-0613
label: openai-gpt-3.5-turbo-hightemp
config:
temperature: 1

tests:
- vars:
Expand Down
35 changes: 18 additions & 17 deletions examples/llama-gpt-comparison/promptfooconfig.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,24 @@ prompts:
prompts/completion_prompt.txt: completion_prompt

providers:
- openai:gpt-3.5-turbo-0613:
id: openai-gpt-3.5-turbo-lowtemp
prompts: chat_prompt
config:
temperature: 0
max_tokens: 128
- openai:gpt-3.5-turbo-0613:
id: openai-gpt-3.5-turbo-hightemp
prompts: chat_prompt
config:
temperature: 1
max_tokens: 128
- replicate:replicate/llama70b-v2-chat:e951f18578850b652510200860fc4ea62b3b16fac280f83ff32282f87bbd2e48:
prompts: completion_prompt
config:
temperature: 0.01 # minimum temperature
max_length: 128
- id: openai:gpt-3.5-turbo-0613
label: openai-gpt-3.5-turbo-lowtemp
prompts: chat_prompt
config:
temperature: 0
max_tokens: 128
- id: openai:gpt-3.5-turbo-0613
label: openai-gpt-3.5-turbo-hightemp
prompts: chat_prompt
config:
temperature: 1
max_tokens: 128
- id: replicate:meta/llama70b-v2-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3
label: llama70b-v2-chat
prompts: completion_prompt
config:
temperature: 0.01 # minimum temperature
max_length: 128

tests:
- vars:
Expand Down
13 changes: 7 additions & 6 deletions site/docs/configuration/parameters.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,12 +131,13 @@ prompts:
prompts/llama_completion_prompt.txt: llama_completion_prompt
providers:
- openai:gpt-3.5-turbo-0613:
prompts: gpt_chat_prompt
- openai:gpt-4-turbo-0613:
prompts: gpt_chat_prompt
- replicate:replicate/llama70b-v2-chat:e951f18578850b652510200860fc4ea62b3b16fac280f83ff32282f87bbd2e48:
prompts: llama_completion_prompt
- id: openai:gpt-3.5-turbo-0613
prompts: gpt_chat_prompt
- id: openai:gpt-4-turbo-0613
prompts: gpt_chat_prompt
- id: replicate:meta/llama70b-v2-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3
label: llama70b-v2-chat
prompts: llama_completion_prompt
```

In this configuration, the `gpt_chat_prompt` is used for both GPT-3.5 and GPT-4 models, while the `llama_completion_prompt` is used for the Llama v2 model. The prompts are defined in separate files within the `prompts` directory.
Expand Down
57 changes: 30 additions & 27 deletions site/docs/guides/compare-llama2-vs-gpt.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,15 @@ prompts:
prompts/completion_prompt.txt: completion_prompt
providers:
- openai:gpt-3.5-turbo-0613:
prompts: chat_prompt
- openai:gpt-4-0613:
prompts: chat_prompt
- replicate:replicate/llama70b-v2-chat:e951f18578850b652510200860fc4ea62b3b16fac280f83ff32282f87bbd2e48:
prompts: completion_prompt
- id: openai:gpt-3.5-turbo-0613
label: gpt-3.5
prompts: chat_prompt
- id: openai:gpt-4-0613
label: gpt-4
prompts: chat_prompt
- id: replicate:meta/llama70b-v2-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3
label: llama70b-v2-chat
prompts: completion_prompt
```

:::info
Expand Down Expand Up @@ -161,27 +164,27 @@ Each model has a `config` field where you can specify additional parameters. Let

```yaml title=promptfooconfig.yaml
providers:
- openai:gpt-3.5-turbo-0613:
prompts: chat_prompt
// highlight-start
config:
temperature: 0
max_tokens: 128
// highlight-end
- openai:gpt-4-0613:
prompts: chat_prompt
// highlight-start
config:
temperature: 0
max_tokens: 128
// highlight-end
- replicate:replicate/llama70b-v2-chat:e951f18578850b652510200860fc4ea62b3b16fac280f83ff32282f87bbd2e48:
prompts: completion_prompt
// highlight-start
config:
temperature: 0.01 # minimum temperature
max_length: 128
// highlight-end
- id: openai:gpt-3.5-turbo-0613
prompts: chat_prompt
// highlight-start
config:
temperature: 0
max_tokens: 128
// highlight-end
- id: openai:gpt-4-0613
prompts: chat_prompt
// highlight-start
config:
temperature: 0
max_tokens: 128
// highlight-end
- id: replicate:meta/llama70b-v2-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3
prompts: completion_prompt
// highlight-start
config:
temperature: 0.01 # minimum temperature
max_length: 128
// highlight-end
```

Here's what each parameter means:
Expand Down
40 changes: 20 additions & 20 deletions site/docs/guides/evaluate-llm-temperature.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,14 @@ prompts:
- 'Respond to the following instruction: {{message}}'

providers:
- openai:gpt-3.5-turbo-0613:
id: openai-gpt-3.5-turbo-lowtemp
config:
temperature: 0.2
- openai:gpt-3.5-turbo-0613:
id: openai-gpt-3.5-turbo-hightemp
config:
temperature: 0.9
- id: openai:gpt-3.5-turbo-0613
label: openai-gpt-3.5-turbo-lowtemp
config:
temperature: 0.2
- id: openai:gpt-3.5-turbo-0613
label: openai-gpt-3.5-turbo-hightemp
config:
temperature: 0.9

tests:
- vars:
Expand Down Expand Up @@ -130,18 +130,18 @@ Set a constant seed in the provider config:

```yaml
providers:
- openai:gpt-3.5-turbo-0613:
id: openai-gpt-3.5-turbo-lowtemp
config:
temperature: 0.2
// highlight-next-line
seed: 0
- openai:gpt-3.5-turbo-0613:
id: openai-gpt-3.5-turbo-hightemp
config:
temperature: 0.9
// highlight-next-line
seed: 0
- id: openai:gpt-3.5-turbo-0613
label: openai-gpt-3.5-turbo-lowtemp
config:
temperature: 0.2
// highlight-next-line
seed: 0
- id: openai:gpt-3.5-turbo-0613
label: openai-gpt-3.5-turbo-hightemp
config:
temperature: 0.9
// highlight-next-line
seed: 0
```

The `eval` command also has a parameter, `repeat`, which runs each test multiple times:
Expand Down
14 changes: 8 additions & 6 deletions src/evaluator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ class Evaluator {

// Set up the special _conversation variable
const vars = test.vars || {};
const conversationKey = `${provider.id()}:${prompt.id}`;
const conversationKey = `${provider.label || provider.id()}:${prompt.id}`;
const usesConversation = prompt.raw.includes('_conversation');
if (
!process.env.PROMPTFOO_DISABLE_CONVERSATION_VAR &&
Expand All @@ -232,6 +232,7 @@ class Evaluator {
const setup = {
provider: {
id: provider.id(),
label: provider.label,
},
prompt: {
raw: renderedPrompt,
Expand Down Expand Up @@ -416,10 +417,10 @@ class Evaluator {
continue;
}
}
prompts.push({
const completedPrompt = {
...prompt,
id: sha256(typeof prompt.raw === 'object' ? JSON.stringify(prompt.raw) : prompt.raw),
provider: provider.id(),
provider: provider.label || provider.id(),
display: prompt.display,
metrics: {
score: 0,
Expand All @@ -437,7 +438,8 @@ class Evaluator {
namedScores: {},
cost: 0,
},
});
};
prompts.push(completedPrompt);
}
}

Expand Down Expand Up @@ -624,7 +626,7 @@ class Evaluator {
numComplete++;
if (progressbar) {
progressbar.increment({
provider: evalStep.provider.id(),
provider: evalStep.provider.label || evalStep.provider.id(),
prompt: evalStep.prompt.raw.slice(0, 10).replace(/\n/g, ' '),
vars: Object.entries(evalStep.test.vars || {})
.map(([k, v]) => `${k}=${v}`)
Expand Down Expand Up @@ -686,7 +688,7 @@ class Evaluator {
namedScores: row.namedScores,
text: resultText,
prompt: row.prompt.raw,
provider: row.provider.id,
provider: row.provider.label || row.provider.id,
latencyMs: row.latencyMs,
tokenUsage: row.response?.tokenUsage,
gradingResult: row.gradingResult,
Expand Down
3 changes: 3 additions & 0 deletions src/prompts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ export function readProviderPromptMap(
'You must specify an `id` on the Provider when you override options.prompts',
);
ret[rawProvider.id] = rawProvider.prompts || allPrompts;
if (rawProvider.label) {
ret[rawProvider.label] = rawProvider.prompts || allPrompts;
}
} else {
const rawProvider = provider as ProviderOptionsMap;
const originalId = Object.keys(rawProvider)[0];
Expand Down
5 changes: 3 additions & 2 deletions src/providers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,9 @@ export async function loadApiProvider(
} = {},
): Promise<ApiProvider> {
const { options = {}, basePath, env } = context;
const providerOptions = {
id: options.id,
const providerOptions: ProviderOptions = {
// Hack(ian): Override id with label. This makes it so that debug and display info, which rely on id, will use the label instead.
id: options.label || options.id,
config: {
...options.config,
basePath,
Expand Down
16 changes: 15 additions & 1 deletion src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,10 @@ export interface EnvOverrides {

export interface ProviderOptions {
id?: ProviderId;
label?: ProviderLabel;
config?: any;
prompts?: string[]; // List of prompt display strings
env?: EnvOverrides;
}

export interface CallApiContextParams {
Expand All @@ -73,14 +75,24 @@ export interface CallApiOptionsParams {
}

export interface ApiProvider {
// Unique identifier for the provider
id: () => string;

// Text generation function
callApi: (
prompt: string,
context?: CallApiContextParams,
options?: CallApiOptionsParams,
) => Promise<ProviderResponse>;

// Embedding function
callEmbeddingApi?: (prompt: string) => Promise<ProviderEmbeddingResponse>;

// Classification function
callClassificationApi?: (prompt: string) => Promise<ProviderClassificationResponse>;

// Shown on output
label?: ProviderLabel;
}

export interface ApiEmbeddingProvider extends ApiProvider {
Expand Down Expand Up @@ -216,7 +228,7 @@ export interface PromptWithMetadata {
}

export interface EvaluateResult {
provider: Pick<ProviderOptions, 'id'>;
provider: Pick<ProviderOptions, 'id' | 'label'>;
prompt: Prompt;
vars: Record<string, string | object>;
response?: ProviderResponse;
Expand Down Expand Up @@ -463,6 +475,8 @@ export interface TestSuite {

export type ProviderId = string;

export type ProviderLabel = string;

export type ProviderFunction = ApiProvider['callApi'];

export type ProviderOptionsMap = Record<ProviderId, ProviderOptions>;
Expand Down

0 comments on commit 5efb6fe

Please sign in to comment.