diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 837894bfb..bc3f36214 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "4.92.1" + ".": "4.93.0" } diff --git a/.stats.yml b/.stats.yml index aebb90c8c..c39ce1186 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ -configured_endpoints: 82 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-4bce8217a697c729ac98046d4caf2c9e826b54c427fb0ab4f98e549a2e0ce31c.yml -openapi_spec_hash: 7996d2c34cc44fe2ce9ffe93c0ab774e -config_hash: bcd2cacdcb9fae9938f273cd167f613c +configured_endpoints: 97 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-32de3bc513663c5fac922c49be41c222b6ee8c0b841d8966bcdfa489d441daa3.yml +openapi_spec_hash: ea86343b5e9858a74e85da8ab2c532f6 +config_hash: 43dc8df20ffec9d1503f91866cb2b7d9 diff --git a/CHANGELOG.md b/CHANGELOG.md index 105627c5b..e6a402af8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,20 @@ # Changelog +## 4.93.0 (2025-04-08) + +Full Changelog: [v4.92.1...v4.93.0](https://github.com/openai/openai-node/compare/v4.92.1...v4.93.0) + +### Features + +* **api:** Add evalapi to sdk ([#1456](https://github.com/openai/openai-node/issues/1456)) ([ee917e3](https://github.com/openai/openai-node/commit/ee917e3335fcf44e87a28e54ce8ddfdcdfab1652)) + + +### Chores + +* **internal:** fix examples ([#1457](https://github.com/openai/openai-node/issues/1457)) ([a3dd0dd](https://github.com/openai/openai-node/commit/a3dd0dde3e8ad9cc7a02cf203d4550f91d31a2ae)) +* **internal:** skip broken test ([#1458](https://github.com/openai/openai-node/issues/1458)) ([4d2f815](https://github.com/openai/openai-node/commit/4d2f815ba5f6c426f9c21f4c3db443166389bbf8)) +* **tests:** improve enum examples ([#1454](https://github.com/openai/openai-node/issues/1454)) ([ecabce2](https://github.com/openai/openai-node/commit/ecabce282a9fb60122310942f3b647dfefae5403)) + ## 4.92.1 (2025-04-07) Full Changelog: [v4.92.0...v4.92.1](https://github.com/openai/openai-node/compare/v4.92.0...v4.92.1) diff --git a/api.md b/api.md index cf464cf63..2eb54b34a 100644 --- a/api.md +++ b/api.md @@ -235,6 +235,22 @@ Methods: - client.fineTuning.jobs.checkpoints.list(fineTuningJobId, { ...params }) -> FineTuningJobCheckpointsPage +## Checkpoints + +### Permissions + +Types: + +- PermissionCreateResponse +- PermissionRetrieveResponse +- PermissionDeleteResponse + +Methods: + +- client.fineTuning.checkpoints.permissions.create(fineTunedModelCheckpoint, { ...params }) -> PermissionCreateResponsesPage +- client.fineTuning.checkpoints.permissions.retrieve(fineTunedModelCheckpoint, { ...params }) -> PermissionRetrieveResponse +- client.fineTuning.checkpoints.permissions.del(fineTunedModelCheckpoint) -> PermissionDeleteResponse + # VectorStores Types: @@ -643,3 +659,59 @@ Types: Methods: - client.responses.inputItems.list(responseId, { ...params }) -> ResponseItemsPage + +# Evals + +Types: + +- EvalCustomDataSourceConfig +- EvalLabelModelGrader +- EvalStoredCompletionsDataSourceConfig +- EvalStringCheckGrader +- EvalTextSimilarityGrader +- EvalCreateResponse +- EvalRetrieveResponse +- EvalUpdateResponse +- EvalListResponse +- EvalDeleteResponse + +Methods: + +- client.evals.create({ ...params }) -> EvalCreateResponse +- client.evals.retrieve(evalId) -> EvalRetrieveResponse +- client.evals.update(evalId, { ...params }) -> EvalUpdateResponse +- client.evals.list({ ...params }) -> EvalListResponsesPage +- client.evals.del(evalId) -> EvalDeleteResponse + +## Runs + +Types: + +- CreateEvalCompletionsRunDataSource +- CreateEvalJSONLRunDataSource +- EvalAPIError +- RunCreateResponse +- RunRetrieveResponse +- RunListResponse +- RunDeleteResponse +- RunCancelResponse + +Methods: + +- client.evals.runs.create(evalId, { ...params }) -> RunCreateResponse +- client.evals.runs.retrieve(evalId, runId) -> RunRetrieveResponse +- client.evals.runs.list(evalId, { ...params }) -> RunListResponsesPage +- client.evals.runs.del(evalId, runId) -> RunDeleteResponse +- client.evals.runs.cancel(evalId, runId) -> RunCancelResponse + +### OutputItems + +Types: + +- OutputItemRetrieveResponse +- OutputItemListResponse + +Methods: + +- client.evals.runs.outputItems.retrieve(evalId, runId, outputItemId) -> OutputItemRetrieveResponse +- client.evals.runs.outputItems.list(evalId, runId, { ...params }) -> OutputItemListResponsesPage diff --git a/jsr.json b/jsr.json index b986198a1..b5e49671a 100644 --- a/jsr.json +++ b/jsr.json @@ -1,6 +1,6 @@ { "name": "@openai/openai", - "version": "4.92.1", + "version": "4.93.0", "exports": { ".": "./index.ts", "./helpers/zod": "./helpers/zod.ts", diff --git a/package.json b/package.json index 58c231fda..b9316cbe3 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "openai", - "version": "4.92.1", + "version": "4.93.0", "description": "The official TypeScript library for the OpenAI API", "author": "OpenAI ", "types": "dist/index.d.ts", diff --git a/src/index.ts b/src/index.ts index 931894f2f..9e8d7ce37 100644 --- a/src/index.ts +++ b/src/index.ts @@ -66,6 +66,23 @@ import { import { Audio, AudioModel, AudioResponseFormat } from './resources/audio/audio'; import { Beta } from './resources/beta/beta'; import { Chat } from './resources/chat/chat'; +import { + EvalCreateParams, + EvalCreateResponse, + EvalCustomDataSourceConfig, + EvalDeleteResponse, + EvalLabelModelGrader, + EvalListParams, + EvalListResponse, + EvalListResponsesPage, + EvalRetrieveResponse, + EvalStoredCompletionsDataSourceConfig, + EvalStringCheckGrader, + EvalTextSimilarityGrader, + EvalUpdateParams, + EvalUpdateResponse, + Evals, +} from './resources/evals/evals'; import { FineTuning } from './resources/fine-tuning/fine-tuning'; import { Responses } from './resources/responses/responses'; import { @@ -293,6 +310,7 @@ export class OpenAI extends Core.APIClient { batches: API.Batches = new API.Batches(this); uploads: API.Uploads = new API.Uploads(this); responses: API.Responses = new API.Responses(this); + evals: API.Evals = new API.Evals(this); protected override defaultQuery(): Core.DefaultQuery | undefined { return this._options.defaultQuery; @@ -356,6 +374,8 @@ OpenAI.Batches = Batches; OpenAI.BatchesPage = BatchesPage; OpenAI.Uploads = UploadsAPIUploads; OpenAI.Responses = Responses; +OpenAI.Evals = Evals; +OpenAI.EvalListResponsesPage = EvalListResponsesPage; export declare namespace OpenAI { export type RequestOptions = Core.RequestOptions; @@ -508,6 +528,24 @@ export declare namespace OpenAI { export { Responses as Responses }; + export { + Evals as Evals, + type EvalCustomDataSourceConfig as EvalCustomDataSourceConfig, + type EvalLabelModelGrader as EvalLabelModelGrader, + type EvalStoredCompletionsDataSourceConfig as EvalStoredCompletionsDataSourceConfig, + type EvalStringCheckGrader as EvalStringCheckGrader, + type EvalTextSimilarityGrader as EvalTextSimilarityGrader, + type EvalCreateResponse as EvalCreateResponse, + type EvalRetrieveResponse as EvalRetrieveResponse, + type EvalUpdateResponse as EvalUpdateResponse, + type EvalListResponse as EvalListResponse, + type EvalDeleteResponse as EvalDeleteResponse, + EvalListResponsesPage as EvalListResponsesPage, + type EvalCreateParams as EvalCreateParams, + type EvalUpdateParams as EvalUpdateParams, + type EvalListParams as EvalListParams, + }; + export type AllModels = API.AllModels; export type ChatModel = API.ChatModel; export type ComparisonFilter = API.ComparisonFilter; diff --git a/src/resources/evals.ts b/src/resources/evals.ts new file mode 100644 index 000000000..b611710e1 --- /dev/null +++ b/src/resources/evals.ts @@ -0,0 +1,3 @@ +// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +export * from './evals/index'; diff --git a/src/resources/evals/evals.ts b/src/resources/evals/evals.ts new file mode 100644 index 000000000..84ff6d1bb --- /dev/null +++ b/src/resources/evals/evals.ts @@ -0,0 +1,783 @@ +// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +import { APIResource } from '../../resource'; +import { isRequestOptions } from '../../core'; +import * as Core from '../../core'; +import * as Shared from '../shared'; +import * as RunsAPI from './runs/runs'; +import { + CreateEvalCompletionsRunDataSource, + CreateEvalJSONLRunDataSource, + EvalAPIError, + RunCancelResponse, + RunCreateParams, + RunCreateResponse, + RunDeleteResponse, + RunListParams, + RunListResponse, + RunListResponsesPage, + RunRetrieveResponse, + Runs, +} from './runs/runs'; +import { CursorPage, type CursorPageParams } from '../../pagination'; + +export class Evals extends APIResource { + runs: RunsAPI.Runs = new RunsAPI.Runs(this._client); + + /** + * Create the structure of an evaluation that can be used to test a model's + * performance. An evaluation is a set of testing criteria and a datasource. After + * creating an evaluation, you can run it on different models and model parameters. + * We support several types of graders and datasources. For more information, see + * the [Evals guide](https://platform.openai.com/docs/guides/evals). + */ + create(body: EvalCreateParams, options?: Core.RequestOptions): Core.APIPromise { + return this._client.post('/evals', { body, ...options }); + } + + /** + * Get an evaluation by ID. + */ + retrieve(evalId: string, options?: Core.RequestOptions): Core.APIPromise { + return this._client.get(`/evals/${evalId}`, options); + } + + /** + * Update certain properties of an evaluation. + */ + update( + evalId: string, + body: EvalUpdateParams, + options?: Core.RequestOptions, + ): Core.APIPromise { + return this._client.post(`/evals/${evalId}`, { body, ...options }); + } + + /** + * List evaluations for a project. + */ + list( + query?: EvalListParams, + options?: Core.RequestOptions, + ): Core.PagePromise; + list(options?: Core.RequestOptions): Core.PagePromise; + list( + query: EvalListParams | Core.RequestOptions = {}, + options?: Core.RequestOptions, + ): Core.PagePromise { + if (isRequestOptions(query)) { + return this.list({}, query); + } + return this._client.getAPIList('/evals', EvalListResponsesPage, { query, ...options }); + } + + /** + * Delete an evaluation. + */ + del(evalId: string, options?: Core.RequestOptions): Core.APIPromise { + return this._client.delete(`/evals/${evalId}`, options); + } +} + +export class EvalListResponsesPage extends CursorPage {} + +/** + * A CustomDataSourceConfig which specifies the schema of your `item` and + * optionally `sample` namespaces. The response schema defines the shape of the + * data that will be: + * + * - Used to define your testing criteria and + * - What data is required when creating a run + */ +export interface EvalCustomDataSourceConfig { + /** + * The json schema for the run data source items. Learn how to build JSON schemas + * [here](https://json-schema.org/). + */ + schema: Record; + + /** + * The type of data source. Always `custom`. + */ + type: 'custom'; +} + +/** + * A LabelModelGrader object which uses a model to assign labels to each item in + * the evaluation. + */ +export interface EvalLabelModelGrader { + input: Array; + + /** + * The labels to assign to each item in the evaluation. + */ + labels: Array; + + /** + * The model to use for the evaluation. Must support structured outputs. + */ + model: string; + + /** + * The name of the grader. + */ + name: string; + + /** + * The labels that indicate a passing result. Must be a subset of labels. + */ + passing_labels: Array; + + /** + * The object type, which is always `label_model`. + */ + type: 'label_model'; +} + +export namespace EvalLabelModelGrader { + export interface InputMessage { + content: InputMessage.Content; + + /** + * The role of the message. One of `user`, `system`, or `developer`. + */ + role: 'user' | 'system' | 'developer'; + + /** + * The type of item, which is always `message`. + */ + type: 'message'; + } + + export namespace InputMessage { + export interface Content { + /** + * The text content. + */ + text: string; + + /** + * The type of content, which is always `input_text`. + */ + type: 'input_text'; + } + } + + export interface Assistant { + content: Assistant.Content; + + /** + * The role of the message. Must be `assistant` for output. + */ + role: 'assistant'; + + /** + * The type of item, which is always `message`. + */ + type: 'message'; + } + + export namespace Assistant { + export interface Content { + /** + * The text content. + */ + text: string; + + /** + * The type of content, which is always `output_text`. + */ + type: 'output_text'; + } + } +} + +/** + * A StoredCompletionsDataSourceConfig which specifies the metadata property of + * your stored completions query. This is usually metadata like `usecase=chatbot` + * or `prompt-version=v2`, etc. The schema returned by this data source config is + * used to defined what variables are available in your evals. `item` and `sample` + * are both defined when using this data source config. + */ +export interface EvalStoredCompletionsDataSourceConfig { + /** + * The json schema for the run data source items. Learn how to build JSON schemas + * [here](https://json-schema.org/). + */ + schema: Record; + + /** + * The type of data source. Always `stored_completions`. + */ + type: 'stored_completions'; + + /** + * Set of 16 key-value pairs that can be attached to an object. This can be useful + * for storing additional information about the object in a structured format, and + * querying for objects via API or the dashboard. + * + * Keys are strings with a maximum length of 64 characters. Values are strings with + * a maximum length of 512 characters. + */ + metadata?: Shared.Metadata | null; +} + +/** + * A StringCheckGrader object that performs a string comparison between input and + * reference using a specified operation. + */ +export interface EvalStringCheckGrader { + /** + * The input text. This may include template strings. + */ + input: string; + + /** + * The name of the grader. + */ + name: string; + + /** + * The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`. + */ + operation: 'eq' | 'ne' | 'like' | 'ilike'; + + /** + * The reference text. This may include template strings. + */ + reference: string; + + /** + * The object type, which is always `string_check`. + */ + type: 'string_check'; +} + +/** + * A TextSimilarityGrader object which grades text based on similarity metrics. + */ +export interface EvalTextSimilarityGrader { + /** + * The evaluation metric to use. One of `cosine`, `fuzzy_match`, `bleu`, `gleu`, + * `meteor`, `rouge_1`, `rouge_2`, `rouge_3`, `rouge_4`, `rouge_5`, or `rouge_l`. + */ + evaluation_metric: + | 'fuzzy_match' + | 'bleu' + | 'gleu' + | 'meteor' + | 'rouge_1' + | 'rouge_2' + | 'rouge_3' + | 'rouge_4' + | 'rouge_5' + | 'rouge_l' + | 'cosine'; + + /** + * The text being graded. + */ + input: string; + + /** + * A float score where a value greater than or equal indicates a passing grade. + */ + pass_threshold: number; + + /** + * The text being graded against. + */ + reference: string; + + /** + * The type of grader. + */ + type: 'text_similarity'; + + /** + * The name of the grader. + */ + name?: string; +} + +/** + * An Eval object with a data source config and testing criteria. An Eval + * represents a task to be done for your LLM integration. Like: + * + * - Improve the quality of my chatbot + * - See how well my chatbot handles customer support + * - Check if o3-mini is better at my usecase than gpt-4o + */ +export interface EvalCreateResponse { + /** + * Unique identifier for the evaluation. + */ + id: string; + + /** + * The Unix timestamp (in seconds) for when the eval was created. + */ + created_at: number; + + /** + * Configuration of data sources used in runs of the evaluation. + */ + data_source_config: EvalCustomDataSourceConfig | EvalStoredCompletionsDataSourceConfig; + + /** + * Set of 16 key-value pairs that can be attached to an object. This can be useful + * for storing additional information about the object in a structured format, and + * querying for objects via API or the dashboard. + * + * Keys are strings with a maximum length of 64 characters. Values are strings with + * a maximum length of 512 characters. + */ + metadata: Shared.Metadata | null; + + /** + * The name of the evaluation. + */ + name: string; + + /** + * The object type. + */ + object: 'eval'; + + /** + * Indicates whether the evaluation is shared with OpenAI. + */ + share_with_openai: boolean; + + /** + * A list of testing criteria. + */ + testing_criteria: Array; +} + +/** + * An Eval object with a data source config and testing criteria. An Eval + * represents a task to be done for your LLM integration. Like: + * + * - Improve the quality of my chatbot + * - See how well my chatbot handles customer support + * - Check if o3-mini is better at my usecase than gpt-4o + */ +export interface EvalRetrieveResponse { + /** + * Unique identifier for the evaluation. + */ + id: string; + + /** + * The Unix timestamp (in seconds) for when the eval was created. + */ + created_at: number; + + /** + * Configuration of data sources used in runs of the evaluation. + */ + data_source_config: EvalCustomDataSourceConfig | EvalStoredCompletionsDataSourceConfig; + + /** + * Set of 16 key-value pairs that can be attached to an object. This can be useful + * for storing additional information about the object in a structured format, and + * querying for objects via API or the dashboard. + * + * Keys are strings with a maximum length of 64 characters. Values are strings with + * a maximum length of 512 characters. + */ + metadata: Shared.Metadata | null; + + /** + * The name of the evaluation. + */ + name: string; + + /** + * The object type. + */ + object: 'eval'; + + /** + * Indicates whether the evaluation is shared with OpenAI. + */ + share_with_openai: boolean; + + /** + * A list of testing criteria. + */ + testing_criteria: Array; +} + +/** + * An Eval object with a data source config and testing criteria. An Eval + * represents a task to be done for your LLM integration. Like: + * + * - Improve the quality of my chatbot + * - See how well my chatbot handles customer support + * - Check if o3-mini is better at my usecase than gpt-4o + */ +export interface EvalUpdateResponse { + /** + * Unique identifier for the evaluation. + */ + id: string; + + /** + * The Unix timestamp (in seconds) for when the eval was created. + */ + created_at: number; + + /** + * Configuration of data sources used in runs of the evaluation. + */ + data_source_config: EvalCustomDataSourceConfig | EvalStoredCompletionsDataSourceConfig; + + /** + * Set of 16 key-value pairs that can be attached to an object. This can be useful + * for storing additional information about the object in a structured format, and + * querying for objects via API or the dashboard. + * + * Keys are strings with a maximum length of 64 characters. Values are strings with + * a maximum length of 512 characters. + */ + metadata: Shared.Metadata | null; + + /** + * The name of the evaluation. + */ + name: string; + + /** + * The object type. + */ + object: 'eval'; + + /** + * Indicates whether the evaluation is shared with OpenAI. + */ + share_with_openai: boolean; + + /** + * A list of testing criteria. + */ + testing_criteria: Array; +} + +/** + * An Eval object with a data source config and testing criteria. An Eval + * represents a task to be done for your LLM integration. Like: + * + * - Improve the quality of my chatbot + * - See how well my chatbot handles customer support + * - Check if o3-mini is better at my usecase than gpt-4o + */ +export interface EvalListResponse { + /** + * Unique identifier for the evaluation. + */ + id: string; + + /** + * The Unix timestamp (in seconds) for when the eval was created. + */ + created_at: number; + + /** + * Configuration of data sources used in runs of the evaluation. + */ + data_source_config: EvalCustomDataSourceConfig | EvalStoredCompletionsDataSourceConfig; + + /** + * Set of 16 key-value pairs that can be attached to an object. This can be useful + * for storing additional information about the object in a structured format, and + * querying for objects via API or the dashboard. + * + * Keys are strings with a maximum length of 64 characters. Values are strings with + * a maximum length of 512 characters. + */ + metadata: Shared.Metadata | null; + + /** + * The name of the evaluation. + */ + name: string; + + /** + * The object type. + */ + object: 'eval'; + + /** + * Indicates whether the evaluation is shared with OpenAI. + */ + share_with_openai: boolean; + + /** + * A list of testing criteria. + */ + testing_criteria: Array; +} + +export interface EvalDeleteResponse { + deleted: boolean; + + eval_id: string; + + object: string; +} + +export interface EvalCreateParams { + /** + * The configuration for the data source used for the evaluation runs. + */ + data_source_config: EvalCreateParams.Custom | EvalCreateParams.StoredCompletions; + + /** + * A list of graders for all eval runs in this group. + */ + testing_criteria: Array; + + /** + * Set of 16 key-value pairs that can be attached to an object. This can be useful + * for storing additional information about the object in a structured format, and + * querying for objects via API or the dashboard. + * + * Keys are strings with a maximum length of 64 characters. Values are strings with + * a maximum length of 512 characters. + */ + metadata?: Shared.Metadata | null; + + /** + * The name of the evaluation. + */ + name?: string; + + /** + * Indicates whether the evaluation is shared with OpenAI. + */ + share_with_openai?: boolean; +} + +export namespace EvalCreateParams { + /** + * A CustomDataSourceConfig object that defines the schema for the data source used + * for the evaluation runs. This schema is used to define the shape of the data + * that will be: + * + * - Used to define your testing criteria and + * - What data is required when creating a run + */ + export interface Custom { + /** + * The json schema for the run data source items. + */ + item_schema: Record; + + /** + * The type of data source. Always `custom`. + */ + type: 'custom'; + + /** + * Whether to include the sample schema in the data source. + */ + include_sample_schema?: boolean; + } + + /** + * A data source config which specifies the metadata property of your stored + * completions query. This is usually metadata like `usecase=chatbot` or + * `prompt-version=v2`, etc. + */ + export interface StoredCompletions { + /** + * The type of data source. Always `stored_completions`. + */ + type: 'stored_completions'; + + /** + * Set of 16 key-value pairs that can be attached to an object. This can be useful + * for storing additional information about the object in a structured format, and + * querying for objects via API or the dashboard. + * + * Keys are strings with a maximum length of 64 characters. Values are strings with + * a maximum length of 512 characters. + */ + metadata?: Shared.Metadata | null; + } + + /** + * A LabelModelGrader object which uses a model to assign labels to each item in + * the evaluation. + */ + export interface LabelModel { + input: Array; + + /** + * The labels to classify to each item in the evaluation. + */ + labels: Array; + + /** + * The model to use for the evaluation. Must support structured outputs. + */ + model: string; + + /** + * The name of the grader. + */ + name: string; + + /** + * The labels that indicate a passing result. Must be a subset of labels. + */ + passing_labels: Array; + + /** + * The object type, which is always `label_model`. + */ + type: 'label_model'; + } + + export namespace LabelModel { + export interface SimpleInputMessage { + /** + * The content of the message. + */ + content: string; + + /** + * The role of the message (e.g. "system", "assistant", "user"). + */ + role: string; + } + + export interface InputMessage { + content: InputMessage.Content; + + /** + * The role of the message. One of `user`, `system`, or `developer`. + */ + role: 'user' | 'system' | 'developer'; + + /** + * The type of item, which is always `message`. + */ + type: 'message'; + } + + export namespace InputMessage { + export interface Content { + /** + * The text content. + */ + text: string; + + /** + * The type of content, which is always `input_text`. + */ + type: 'input_text'; + } + } + + export interface OutputMessage { + content: OutputMessage.Content; + + /** + * The role of the message. Must be `assistant` for output. + */ + role: 'assistant'; + + /** + * The type of item, which is always `message`. + */ + type: 'message'; + } + + export namespace OutputMessage { + export interface Content { + /** + * The text content. + */ + text: string; + + /** + * The type of content, which is always `output_text`. + */ + type: 'output_text'; + } + } + } +} + +export interface EvalUpdateParams { + /** + * Set of 16 key-value pairs that can be attached to an object. This can be useful + * for storing additional information about the object in a structured format, and + * querying for objects via API or the dashboard. + * + * Keys are strings with a maximum length of 64 characters. Values are strings with + * a maximum length of 512 characters. + */ + metadata?: Shared.Metadata | null; + + /** + * Rename the evaluation. + */ + name?: string; +} + +export interface EvalListParams extends CursorPageParams { + /** + * Sort order for evals by timestamp. Use `asc` for ascending order or `desc` for + * descending order. + */ + order?: 'asc' | 'desc'; + + /** + * Evals can be ordered by creation time or last updated time. Use `created_at` for + * creation time or `updated_at` for last updated time. + */ + order_by?: 'created_at' | 'updated_at'; +} + +Evals.EvalListResponsesPage = EvalListResponsesPage; +Evals.Runs = Runs; +Evals.RunListResponsesPage = RunListResponsesPage; + +export declare namespace Evals { + export { + type EvalCustomDataSourceConfig as EvalCustomDataSourceConfig, + type EvalLabelModelGrader as EvalLabelModelGrader, + type EvalStoredCompletionsDataSourceConfig as EvalStoredCompletionsDataSourceConfig, + type EvalStringCheckGrader as EvalStringCheckGrader, + type EvalTextSimilarityGrader as EvalTextSimilarityGrader, + type EvalCreateResponse as EvalCreateResponse, + type EvalRetrieveResponse as EvalRetrieveResponse, + type EvalUpdateResponse as EvalUpdateResponse, + type EvalListResponse as EvalListResponse, + type EvalDeleteResponse as EvalDeleteResponse, + EvalListResponsesPage as EvalListResponsesPage, + type EvalCreateParams as EvalCreateParams, + type EvalUpdateParams as EvalUpdateParams, + type EvalListParams as EvalListParams, + }; + + export { + Runs as Runs, + type CreateEvalCompletionsRunDataSource as CreateEvalCompletionsRunDataSource, + type CreateEvalJSONLRunDataSource as CreateEvalJSONLRunDataSource, + type EvalAPIError as EvalAPIError, + type RunCreateResponse as RunCreateResponse, + type RunRetrieveResponse as RunRetrieveResponse, + type RunListResponse as RunListResponse, + type RunDeleteResponse as RunDeleteResponse, + type RunCancelResponse as RunCancelResponse, + RunListResponsesPage as RunListResponsesPage, + type RunCreateParams as RunCreateParams, + type RunListParams as RunListParams, + }; +} diff --git a/src/resources/evals/index.ts b/src/resources/evals/index.ts new file mode 100644 index 000000000..a246fe4e7 --- /dev/null +++ b/src/resources/evals/index.ts @@ -0,0 +1,33 @@ +// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +export { + EvalListResponsesPage, + Evals, + type EvalCustomDataSourceConfig, + type EvalLabelModelGrader, + type EvalStoredCompletionsDataSourceConfig, + type EvalStringCheckGrader, + type EvalTextSimilarityGrader, + type EvalCreateResponse, + type EvalRetrieveResponse, + type EvalUpdateResponse, + type EvalListResponse, + type EvalDeleteResponse, + type EvalCreateParams, + type EvalUpdateParams, + type EvalListParams, +} from './evals'; +export { + RunListResponsesPage, + Runs, + type CreateEvalCompletionsRunDataSource, + type CreateEvalJSONLRunDataSource, + type EvalAPIError, + type RunCreateResponse, + type RunRetrieveResponse, + type RunListResponse, + type RunDeleteResponse, + type RunCancelResponse, + type RunCreateParams, + type RunListParams, +} from './runs/index'; diff --git a/src/resources/evals/runs.ts b/src/resources/evals/runs.ts new file mode 100644 index 000000000..a3cc2bc7f --- /dev/null +++ b/src/resources/evals/runs.ts @@ -0,0 +1,3 @@ +// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +export * from './runs/index'; diff --git a/src/resources/evals/runs/index.ts b/src/resources/evals/runs/index.ts new file mode 100644 index 000000000..d0e18bff4 --- /dev/null +++ b/src/resources/evals/runs/index.ts @@ -0,0 +1,23 @@ +// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +export { + OutputItemListResponsesPage, + OutputItems, + type OutputItemRetrieveResponse, + type OutputItemListResponse, + type OutputItemListParams, +} from './output-items'; +export { + RunListResponsesPage, + Runs, + type CreateEvalCompletionsRunDataSource, + type CreateEvalJSONLRunDataSource, + type EvalAPIError, + type RunCreateResponse, + type RunRetrieveResponse, + type RunListResponse, + type RunDeleteResponse, + type RunCancelResponse, + type RunCreateParams, + type RunListParams, +} from './runs'; diff --git a/src/resources/evals/runs/output-items.ts b/src/resources/evals/runs/output-items.ts new file mode 100644 index 000000000..ee947c60f --- /dev/null +++ b/src/resources/evals/runs/output-items.ts @@ -0,0 +1,410 @@ +// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +import { APIResource } from '../../../resource'; +import { isRequestOptions } from '../../../core'; +import * as Core from '../../../core'; +import * as RunsAPI from './runs'; +import { CursorPage, type CursorPageParams } from '../../../pagination'; + +export class OutputItems extends APIResource { + /** + * Get an evaluation run output item by ID. + */ + retrieve( + evalId: string, + runId: string, + outputItemId: string, + options?: Core.RequestOptions, + ): Core.APIPromise { + return this._client.get(`/evals/${evalId}/runs/${runId}/output_items/${outputItemId}`, options); + } + + /** + * Get a list of output items for an evaluation run. + */ + list( + evalId: string, + runId: string, + query?: OutputItemListParams, + options?: Core.RequestOptions, + ): Core.PagePromise; + list( + evalId: string, + runId: string, + options?: Core.RequestOptions, + ): Core.PagePromise; + list( + evalId: string, + runId: string, + query: OutputItemListParams | Core.RequestOptions = {}, + options?: Core.RequestOptions, + ): Core.PagePromise { + if (isRequestOptions(query)) { + return this.list(evalId, runId, {}, query); + } + return this._client.getAPIList( + `/evals/${evalId}/runs/${runId}/output_items`, + OutputItemListResponsesPage, + { query, ...options }, + ); + } +} + +export class OutputItemListResponsesPage extends CursorPage {} + +/** + * A schema representing an evaluation run output item. + */ +export interface OutputItemRetrieveResponse { + /** + * Unique identifier for the evaluation run output item. + */ + id: string; + + /** + * Unix timestamp (in seconds) when the evaluation run was created. + */ + created_at: number; + + /** + * Details of the input data source item. + */ + datasource_item: Record; + + /** + * The identifier for the data source item. + */ + datasource_item_id: number; + + /** + * The identifier of the evaluation group. + */ + eval_id: string; + + /** + * The type of the object. Always "eval.run.output_item". + */ + object: 'eval.run.output_item'; + + /** + * A list of results from the evaluation run. + */ + results: Array>; + + /** + * The identifier of the evaluation run associated with this output item. + */ + run_id: string; + + /** + * A sample containing the input and output of the evaluation run. + */ + sample: OutputItemRetrieveResponse.Sample; + + /** + * The status of the evaluation run. + */ + status: string; +} + +export namespace OutputItemRetrieveResponse { + /** + * A sample containing the input and output of the evaluation run. + */ + export interface Sample { + /** + * An object representing an error response from the Eval API. + */ + error: RunsAPI.EvalAPIError; + + /** + * The reason why the sample generation was finished. + */ + finish_reason: string; + + /** + * An array of input messages. + */ + input: Array; + + /** + * The maximum number of tokens allowed for completion. + */ + max_completion_tokens: number; + + /** + * The model used for generating the sample. + */ + model: string; + + /** + * An array of output messages. + */ + output: Array; + + /** + * The seed used for generating the sample. + */ + seed: number; + + /** + * The sampling temperature used. + */ + temperature: number; + + /** + * The top_p value used for sampling. + */ + top_p: number; + + /** + * Token usage details for the sample. + */ + usage: Sample.Usage; + } + + export namespace Sample { + /** + * An input message. + */ + export interface Input { + /** + * The content of the message. + */ + content: string; + + /** + * The role of the message sender (e.g., system, user, developer). + */ + role: string; + } + + export interface Output { + /** + * The content of the message. + */ + content?: string; + + /** + * The role of the message (e.g. "system", "assistant", "user"). + */ + role?: string; + } + + /** + * Token usage details for the sample. + */ + export interface Usage { + /** + * The number of tokens retrieved from cache. + */ + cached_tokens: number; + + /** + * The number of completion tokens generated. + */ + completion_tokens: number; + + /** + * The number of prompt tokens used. + */ + prompt_tokens: number; + + /** + * The total number of tokens used. + */ + total_tokens: number; + } + } +} + +/** + * A schema representing an evaluation run output item. + */ +export interface OutputItemListResponse { + /** + * Unique identifier for the evaluation run output item. + */ + id: string; + + /** + * Unix timestamp (in seconds) when the evaluation run was created. + */ + created_at: number; + + /** + * Details of the input data source item. + */ + datasource_item: Record; + + /** + * The identifier for the data source item. + */ + datasource_item_id: number; + + /** + * The identifier of the evaluation group. + */ + eval_id: string; + + /** + * The type of the object. Always "eval.run.output_item". + */ + object: 'eval.run.output_item'; + + /** + * A list of results from the evaluation run. + */ + results: Array>; + + /** + * The identifier of the evaluation run associated with this output item. + */ + run_id: string; + + /** + * A sample containing the input and output of the evaluation run. + */ + sample: OutputItemListResponse.Sample; + + /** + * The status of the evaluation run. + */ + status: string; +} + +export namespace OutputItemListResponse { + /** + * A sample containing the input and output of the evaluation run. + */ + export interface Sample { + /** + * An object representing an error response from the Eval API. + */ + error: RunsAPI.EvalAPIError; + + /** + * The reason why the sample generation was finished. + */ + finish_reason: string; + + /** + * An array of input messages. + */ + input: Array; + + /** + * The maximum number of tokens allowed for completion. + */ + max_completion_tokens: number; + + /** + * The model used for generating the sample. + */ + model: string; + + /** + * An array of output messages. + */ + output: Array; + + /** + * The seed used for generating the sample. + */ + seed: number; + + /** + * The sampling temperature used. + */ + temperature: number; + + /** + * The top_p value used for sampling. + */ + top_p: number; + + /** + * Token usage details for the sample. + */ + usage: Sample.Usage; + } + + export namespace Sample { + /** + * An input message. + */ + export interface Input { + /** + * The content of the message. + */ + content: string; + + /** + * The role of the message sender (e.g., system, user, developer). + */ + role: string; + } + + export interface Output { + /** + * The content of the message. + */ + content?: string; + + /** + * The role of the message (e.g. "system", "assistant", "user"). + */ + role?: string; + } + + /** + * Token usage details for the sample. + */ + export interface Usage { + /** + * The number of tokens retrieved from cache. + */ + cached_tokens: number; + + /** + * The number of completion tokens generated. + */ + completion_tokens: number; + + /** + * The number of prompt tokens used. + */ + prompt_tokens: number; + + /** + * The total number of tokens used. + */ + total_tokens: number; + } + } +} + +export interface OutputItemListParams extends CursorPageParams { + /** + * Sort order for output items by timestamp. Use `asc` for ascending order or + * `desc` for descending order. Defaults to `asc`. + */ + order?: 'asc' | 'desc'; + + /** + * Filter output items by status. Use `failed` to filter by failed output items or + * `pass` to filter by passed output items. + */ + status?: 'fail' | 'pass'; +} + +OutputItems.OutputItemListResponsesPage = OutputItemListResponsesPage; + +export declare namespace OutputItems { + export { + type OutputItemRetrieveResponse as OutputItemRetrieveResponse, + type OutputItemListResponse as OutputItemListResponse, + OutputItemListResponsesPage as OutputItemListResponsesPage, + type OutputItemListParams as OutputItemListParams, + }; +} diff --git a/src/resources/evals/runs/runs.ts b/src/resources/evals/runs/runs.ts new file mode 100644 index 000000000..ca2b7f424 --- /dev/null +++ b/src/resources/evals/runs/runs.ts @@ -0,0 +1,1058 @@ +// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +import { APIResource } from '../../../resource'; +import { isRequestOptions } from '../../../core'; +import * as Core from '../../../core'; +import * as Shared from '../../shared'; +import * as OutputItemsAPI from './output-items'; +import { + OutputItemListParams, + OutputItemListResponse, + OutputItemListResponsesPage, + OutputItemRetrieveResponse, + OutputItems, +} from './output-items'; +import { CursorPage, type CursorPageParams } from '../../../pagination'; + +export class Runs extends APIResource { + outputItems: OutputItemsAPI.OutputItems = new OutputItemsAPI.OutputItems(this._client); + + /** + * Create a new evaluation run. This is the endpoint that will kick off grading. + */ + create( + evalId: string, + body: RunCreateParams, + options?: Core.RequestOptions, + ): Core.APIPromise { + return this._client.post(`/evals/${evalId}/runs`, { body, ...options }); + } + + /** + * Get an evaluation run by ID. + */ + retrieve( + evalId: string, + runId: string, + options?: Core.RequestOptions, + ): Core.APIPromise { + return this._client.get(`/evals/${evalId}/runs/${runId}`, options); + } + + /** + * Get a list of runs for an evaluation. + */ + list( + evalId: string, + query?: RunListParams, + options?: Core.RequestOptions, + ): Core.PagePromise; + list( + evalId: string, + options?: Core.RequestOptions, + ): Core.PagePromise; + list( + evalId: string, + query: RunListParams | Core.RequestOptions = {}, + options?: Core.RequestOptions, + ): Core.PagePromise { + if (isRequestOptions(query)) { + return this.list(evalId, {}, query); + } + return this._client.getAPIList(`/evals/${evalId}/runs`, RunListResponsesPage, { query, ...options }); + } + + /** + * Delete an eval run. + */ + del(evalId: string, runId: string, options?: Core.RequestOptions): Core.APIPromise { + return this._client.delete(`/evals/${evalId}/runs/${runId}`, options); + } + + /** + * Cancel an ongoing evaluation run. + */ + cancel(evalId: string, runId: string, options?: Core.RequestOptions): Core.APIPromise { + return this._client.post(`/evals/${evalId}/runs/${runId}`, options); + } +} + +export class RunListResponsesPage extends CursorPage {} + +/** + * A CompletionsRunDataSource object describing a model sampling configuration. + */ +export interface CreateEvalCompletionsRunDataSource { + input_messages: + | CreateEvalCompletionsRunDataSource.Template + | CreateEvalCompletionsRunDataSource.ItemReference; + + /** + * The name of the model to use for generating completions (e.g. "o3-mini"). + */ + model: string; + + /** + * A StoredCompletionsRunDataSource configuration describing a set of filters + */ + source: + | CreateEvalCompletionsRunDataSource.FileContent + | CreateEvalCompletionsRunDataSource.FileID + | CreateEvalCompletionsRunDataSource.StoredCompletions; + + /** + * The type of run data source. Always `completions`. + */ + type: 'completions'; + + sampling_params?: CreateEvalCompletionsRunDataSource.SamplingParams; +} + +export namespace CreateEvalCompletionsRunDataSource { + export interface Template { + /** + * A list of chat messages forming the prompt or context. May include variable + * references to the "item" namespace, ie {{item.name}}. + */ + template: Array; + + /** + * The type of input messages. Always `template`. + */ + type: 'template'; + } + + export namespace Template { + export interface ChatMessage { + /** + * The content of the message. + */ + content: string; + + /** + * The role of the message (e.g. "system", "assistant", "user"). + */ + role: string; + } + + export interface InputMessage { + content: InputMessage.Content; + + /** + * The role of the message. One of `user`, `system`, or `developer`. + */ + role: 'user' | 'system' | 'developer'; + + /** + * The type of item, which is always `message`. + */ + type: 'message'; + } + + export namespace InputMessage { + export interface Content { + /** + * The text content. + */ + text: string; + + /** + * The type of content, which is always `input_text`. + */ + type: 'input_text'; + } + } + + export interface OutputMessage { + content: OutputMessage.Content; + + /** + * The role of the message. Must be `assistant` for output. + */ + role: 'assistant'; + + /** + * The type of item, which is always `message`. + */ + type: 'message'; + } + + export namespace OutputMessage { + export interface Content { + /** + * The text content. + */ + text: string; + + /** + * The type of content, which is always `output_text`. + */ + type: 'output_text'; + } + } + } + + export interface ItemReference { + /** + * A reference to a variable in the "item" namespace. Ie, "item.name" + */ + item_reference: string; + + /** + * The type of input messages. Always `item_reference`. + */ + type: 'item_reference'; + } + + export interface FileContent { + /** + * The content of the jsonl file. + */ + content: Array; + + /** + * The type of jsonl source. Always `file_content`. + */ + type: 'file_content'; + } + + export namespace FileContent { + export interface Content { + item: Record; + + sample?: Record; + } + } + + export interface FileID { + /** + * The identifier of the file. + */ + id: string; + + /** + * The type of jsonl source. Always `file_id`. + */ + type: 'file_id'; + } + + /** + * A StoredCompletionsRunDataSource configuration describing a set of filters + */ + export interface StoredCompletions { + /** + * An optional Unix timestamp to filter items created after this time. + */ + created_after: number | null; + + /** + * An optional Unix timestamp to filter items created before this time. + */ + created_before: number | null; + + /** + * An optional maximum number of items to return. + */ + limit: number | null; + + /** + * Set of 16 key-value pairs that can be attached to an object. This can be useful + * for storing additional information about the object in a structured format, and + * querying for objects via API or the dashboard. + * + * Keys are strings with a maximum length of 64 characters. Values are strings with + * a maximum length of 512 characters. + */ + metadata: Shared.Metadata | null; + + /** + * An optional model to filter by (e.g., 'gpt-4o'). + */ + model: string | null; + + /** + * The type of source. Always `stored_completions`. + */ + type: 'stored_completions'; + } + + export interface SamplingParams { + /** + * The maximum number of tokens in the generated output. + */ + max_completion_tokens?: number; + + /** + * A seed value to initialize the randomness, during sampling. + */ + seed?: number; + + /** + * A higher temperature increases randomness in the outputs. + */ + temperature?: number; + + /** + * An alternative to temperature for nucleus sampling; 1.0 includes all tokens. + */ + top_p?: number; + } +} + +/** + * A JsonlRunDataSource object with that specifies a JSONL file that matches the + * eval + */ +export interface CreateEvalJSONLRunDataSource { + source: CreateEvalJSONLRunDataSource.FileContent | CreateEvalJSONLRunDataSource.FileID; + + /** + * The type of data source. Always `jsonl`. + */ + type: 'jsonl'; +} + +export namespace CreateEvalJSONLRunDataSource { + export interface FileContent { + /** + * The content of the jsonl file. + */ + content: Array; + + /** + * The type of jsonl source. Always `file_content`. + */ + type: 'file_content'; + } + + export namespace FileContent { + export interface Content { + item: Record; + + sample?: Record; + } + } + + export interface FileID { + /** + * The identifier of the file. + */ + id: string; + + /** + * The type of jsonl source. Always `file_id`. + */ + type: 'file_id'; + } +} + +/** + * An object representing an error response from the Eval API. + */ +export interface EvalAPIError { + /** + * The error code. + */ + code: string; + + /** + * The error message. + */ + message: string; +} + +/** + * A schema representing an evaluation run. + */ +export interface RunCreateResponse { + /** + * Unique identifier for the evaluation run. + */ + id: string; + + /** + * Unix timestamp (in seconds) when the evaluation run was created. + */ + created_at: number; + + /** + * Information about the run's data source. + */ + data_source: CreateEvalJSONLRunDataSource | CreateEvalCompletionsRunDataSource; + + /** + * An object representing an error response from the Eval API. + */ + error: EvalAPIError; + + /** + * The identifier of the associated evaluation. + */ + eval_id: string; + + /** + * Set of 16 key-value pairs that can be attached to an object. This can be useful + * for storing additional information about the object in a structured format, and + * querying for objects via API or the dashboard. + * + * Keys are strings with a maximum length of 64 characters. Values are strings with + * a maximum length of 512 characters. + */ + metadata: Shared.Metadata | null; + + /** + * The model that is evaluated, if applicable. + */ + model: string; + + /** + * The name of the evaluation run. + */ + name: string; + + /** + * The type of the object. Always "eval.run". + */ + object: 'eval.run'; + + /** + * Usage statistics for each model during the evaluation run. + */ + per_model_usage: Array; + + /** + * Results per testing criteria applied during the evaluation run. + */ + per_testing_criteria_results: Array; + + /** + * The URL to the rendered evaluation run report on the UI dashboard. + */ + report_url: string; + + /** + * Counters summarizing the outcomes of the evaluation run. + */ + result_counts: RunCreateResponse.ResultCounts; + + /** + * The status of the evaluation run. + */ + status: string; +} + +export namespace RunCreateResponse { + export interface PerModelUsage { + /** + * The number of tokens retrieved from cache. + */ + cached_tokens: number; + + /** + * The number of completion tokens generated. + */ + completion_tokens: number; + + /** + * The number of invocations. + */ + invocation_count: number; + + /** + * The name of the model. + */ + model_name: string; + + /** + * The number of prompt tokens used. + */ + prompt_tokens: number; + + /** + * The total number of tokens used. + */ + total_tokens: number; + } + + export interface PerTestingCriteriaResult { + /** + * Number of tests failed for this criteria. + */ + failed: number; + + /** + * Number of tests passed for this criteria. + */ + passed: number; + + /** + * A description of the testing criteria. + */ + testing_criteria: string; + } + + /** + * Counters summarizing the outcomes of the evaluation run. + */ + export interface ResultCounts { + /** + * Number of output items that resulted in an error. + */ + errored: number; + + /** + * Number of output items that failed to pass the evaluation. + */ + failed: number; + + /** + * Number of output items that passed the evaluation. + */ + passed: number; + + /** + * Total number of executed output items. + */ + total: number; + } +} + +/** + * A schema representing an evaluation run. + */ +export interface RunRetrieveResponse { + /** + * Unique identifier for the evaluation run. + */ + id: string; + + /** + * Unix timestamp (in seconds) when the evaluation run was created. + */ + created_at: number; + + /** + * Information about the run's data source. + */ + data_source: CreateEvalJSONLRunDataSource | CreateEvalCompletionsRunDataSource; + + /** + * An object representing an error response from the Eval API. + */ + error: EvalAPIError; + + /** + * The identifier of the associated evaluation. + */ + eval_id: string; + + /** + * Set of 16 key-value pairs that can be attached to an object. This can be useful + * for storing additional information about the object in a structured format, and + * querying for objects via API or the dashboard. + * + * Keys are strings with a maximum length of 64 characters. Values are strings with + * a maximum length of 512 characters. + */ + metadata: Shared.Metadata | null; + + /** + * The model that is evaluated, if applicable. + */ + model: string; + + /** + * The name of the evaluation run. + */ + name: string; + + /** + * The type of the object. Always "eval.run". + */ + object: 'eval.run'; + + /** + * Usage statistics for each model during the evaluation run. + */ + per_model_usage: Array; + + /** + * Results per testing criteria applied during the evaluation run. + */ + per_testing_criteria_results: Array; + + /** + * The URL to the rendered evaluation run report on the UI dashboard. + */ + report_url: string; + + /** + * Counters summarizing the outcomes of the evaluation run. + */ + result_counts: RunRetrieveResponse.ResultCounts; + + /** + * The status of the evaluation run. + */ + status: string; +} + +export namespace RunRetrieveResponse { + export interface PerModelUsage { + /** + * The number of tokens retrieved from cache. + */ + cached_tokens: number; + + /** + * The number of completion tokens generated. + */ + completion_tokens: number; + + /** + * The number of invocations. + */ + invocation_count: number; + + /** + * The name of the model. + */ + model_name: string; + + /** + * The number of prompt tokens used. + */ + prompt_tokens: number; + + /** + * The total number of tokens used. + */ + total_tokens: number; + } + + export interface PerTestingCriteriaResult { + /** + * Number of tests failed for this criteria. + */ + failed: number; + + /** + * Number of tests passed for this criteria. + */ + passed: number; + + /** + * A description of the testing criteria. + */ + testing_criteria: string; + } + + /** + * Counters summarizing the outcomes of the evaluation run. + */ + export interface ResultCounts { + /** + * Number of output items that resulted in an error. + */ + errored: number; + + /** + * Number of output items that failed to pass the evaluation. + */ + failed: number; + + /** + * Number of output items that passed the evaluation. + */ + passed: number; + + /** + * Total number of executed output items. + */ + total: number; + } +} + +/** + * A schema representing an evaluation run. + */ +export interface RunListResponse { + /** + * Unique identifier for the evaluation run. + */ + id: string; + + /** + * Unix timestamp (in seconds) when the evaluation run was created. + */ + created_at: number; + + /** + * Information about the run's data source. + */ + data_source: CreateEvalJSONLRunDataSource | CreateEvalCompletionsRunDataSource; + + /** + * An object representing an error response from the Eval API. + */ + error: EvalAPIError; + + /** + * The identifier of the associated evaluation. + */ + eval_id: string; + + /** + * Set of 16 key-value pairs that can be attached to an object. This can be useful + * for storing additional information about the object in a structured format, and + * querying for objects via API or the dashboard. + * + * Keys are strings with a maximum length of 64 characters. Values are strings with + * a maximum length of 512 characters. + */ + metadata: Shared.Metadata | null; + + /** + * The model that is evaluated, if applicable. + */ + model: string; + + /** + * The name of the evaluation run. + */ + name: string; + + /** + * The type of the object. Always "eval.run". + */ + object: 'eval.run'; + + /** + * Usage statistics for each model during the evaluation run. + */ + per_model_usage: Array; + + /** + * Results per testing criteria applied during the evaluation run. + */ + per_testing_criteria_results: Array; + + /** + * The URL to the rendered evaluation run report on the UI dashboard. + */ + report_url: string; + + /** + * Counters summarizing the outcomes of the evaluation run. + */ + result_counts: RunListResponse.ResultCounts; + + /** + * The status of the evaluation run. + */ + status: string; +} + +export namespace RunListResponse { + export interface PerModelUsage { + /** + * The number of tokens retrieved from cache. + */ + cached_tokens: number; + + /** + * The number of completion tokens generated. + */ + completion_tokens: number; + + /** + * The number of invocations. + */ + invocation_count: number; + + /** + * The name of the model. + */ + model_name: string; + + /** + * The number of prompt tokens used. + */ + prompt_tokens: number; + + /** + * The total number of tokens used. + */ + total_tokens: number; + } + + export interface PerTestingCriteriaResult { + /** + * Number of tests failed for this criteria. + */ + failed: number; + + /** + * Number of tests passed for this criteria. + */ + passed: number; + + /** + * A description of the testing criteria. + */ + testing_criteria: string; + } + + /** + * Counters summarizing the outcomes of the evaluation run. + */ + export interface ResultCounts { + /** + * Number of output items that resulted in an error. + */ + errored: number; + + /** + * Number of output items that failed to pass the evaluation. + */ + failed: number; + + /** + * Number of output items that passed the evaluation. + */ + passed: number; + + /** + * Total number of executed output items. + */ + total: number; + } +} + +export interface RunDeleteResponse { + deleted?: boolean; + + object?: string; + + run_id?: string; +} + +/** + * A schema representing an evaluation run. + */ +export interface RunCancelResponse { + /** + * Unique identifier for the evaluation run. + */ + id: string; + + /** + * Unix timestamp (in seconds) when the evaluation run was created. + */ + created_at: number; + + /** + * Information about the run's data source. + */ + data_source: CreateEvalJSONLRunDataSource | CreateEvalCompletionsRunDataSource; + + /** + * An object representing an error response from the Eval API. + */ + error: EvalAPIError; + + /** + * The identifier of the associated evaluation. + */ + eval_id: string; + + /** + * Set of 16 key-value pairs that can be attached to an object. This can be useful + * for storing additional information about the object in a structured format, and + * querying for objects via API or the dashboard. + * + * Keys are strings with a maximum length of 64 characters. Values are strings with + * a maximum length of 512 characters. + */ + metadata: Shared.Metadata | null; + + /** + * The model that is evaluated, if applicable. + */ + model: string; + + /** + * The name of the evaluation run. + */ + name: string; + + /** + * The type of the object. Always "eval.run". + */ + object: 'eval.run'; + + /** + * Usage statistics for each model during the evaluation run. + */ + per_model_usage: Array; + + /** + * Results per testing criteria applied during the evaluation run. + */ + per_testing_criteria_results: Array; + + /** + * The URL to the rendered evaluation run report on the UI dashboard. + */ + report_url: string; + + /** + * Counters summarizing the outcomes of the evaluation run. + */ + result_counts: RunCancelResponse.ResultCounts; + + /** + * The status of the evaluation run. + */ + status: string; +} + +export namespace RunCancelResponse { + export interface PerModelUsage { + /** + * The number of tokens retrieved from cache. + */ + cached_tokens: number; + + /** + * The number of completion tokens generated. + */ + completion_tokens: number; + + /** + * The number of invocations. + */ + invocation_count: number; + + /** + * The name of the model. + */ + model_name: string; + + /** + * The number of prompt tokens used. + */ + prompt_tokens: number; + + /** + * The total number of tokens used. + */ + total_tokens: number; + } + + export interface PerTestingCriteriaResult { + /** + * Number of tests failed for this criteria. + */ + failed: number; + + /** + * Number of tests passed for this criteria. + */ + passed: number; + + /** + * A description of the testing criteria. + */ + testing_criteria: string; + } + + /** + * Counters summarizing the outcomes of the evaluation run. + */ + export interface ResultCounts { + /** + * Number of output items that resulted in an error. + */ + errored: number; + + /** + * Number of output items that failed to pass the evaluation. + */ + failed: number; + + /** + * Number of output items that passed the evaluation. + */ + passed: number; + + /** + * Total number of executed output items. + */ + total: number; + } +} + +export interface RunCreateParams { + /** + * Details about the run's data source. + */ + data_source: CreateEvalJSONLRunDataSource | CreateEvalCompletionsRunDataSource; + + /** + * Set of 16 key-value pairs that can be attached to an object. This can be useful + * for storing additional information about the object in a structured format, and + * querying for objects via API or the dashboard. + * + * Keys are strings with a maximum length of 64 characters. Values are strings with + * a maximum length of 512 characters. + */ + metadata?: Shared.Metadata | null; + + /** + * The name of the run. + */ + name?: string; +} + +export interface RunListParams extends CursorPageParams { + /** + * Sort order for runs by timestamp. Use `asc` for ascending order or `desc` for + * descending order. Defaults to `asc`. + */ + order?: 'asc' | 'desc'; + + /** + * Filter runs by status. Use "queued" | "in_progress" | "failed" | "completed" | + * "canceled". + */ + status?: 'queued' | 'in_progress' | 'completed' | 'canceled' | 'failed'; +} + +Runs.RunListResponsesPage = RunListResponsesPage; +Runs.OutputItems = OutputItems; +Runs.OutputItemListResponsesPage = OutputItemListResponsesPage; + +export declare namespace Runs { + export { + type CreateEvalCompletionsRunDataSource as CreateEvalCompletionsRunDataSource, + type CreateEvalJSONLRunDataSource as CreateEvalJSONLRunDataSource, + type EvalAPIError as EvalAPIError, + type RunCreateResponse as RunCreateResponse, + type RunRetrieveResponse as RunRetrieveResponse, + type RunListResponse as RunListResponse, + type RunDeleteResponse as RunDeleteResponse, + type RunCancelResponse as RunCancelResponse, + RunListResponsesPage as RunListResponsesPage, + type RunCreateParams as RunCreateParams, + type RunListParams as RunListParams, + }; + + export { + OutputItems as OutputItems, + type OutputItemRetrieveResponse as OutputItemRetrieveResponse, + type OutputItemListResponse as OutputItemListResponse, + OutputItemListResponsesPage as OutputItemListResponsesPage, + type OutputItemListParams as OutputItemListParams, + }; +} diff --git a/src/resources/fine-tuning/checkpoints.ts b/src/resources/fine-tuning/checkpoints.ts new file mode 100644 index 000000000..eb09063f6 --- /dev/null +++ b/src/resources/fine-tuning/checkpoints.ts @@ -0,0 +1,3 @@ +// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +export * from './checkpoints/index'; diff --git a/src/resources/fine-tuning/checkpoints/checkpoints.ts b/src/resources/fine-tuning/checkpoints/checkpoints.ts new file mode 100644 index 000000000..08422aa64 --- /dev/null +++ b/src/resources/fine-tuning/checkpoints/checkpoints.ts @@ -0,0 +1,32 @@ +// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +import { APIResource } from '../../../resource'; +import * as PermissionsAPI from './permissions'; +import { + PermissionCreateParams, + PermissionCreateResponse, + PermissionCreateResponsesPage, + PermissionDeleteResponse, + PermissionRetrieveParams, + PermissionRetrieveResponse, + Permissions, +} from './permissions'; + +export class Checkpoints extends APIResource { + permissions: PermissionsAPI.Permissions = new PermissionsAPI.Permissions(this._client); +} + +Checkpoints.Permissions = Permissions; +Checkpoints.PermissionCreateResponsesPage = PermissionCreateResponsesPage; + +export declare namespace Checkpoints { + export { + Permissions as Permissions, + type PermissionCreateResponse as PermissionCreateResponse, + type PermissionRetrieveResponse as PermissionRetrieveResponse, + type PermissionDeleteResponse as PermissionDeleteResponse, + PermissionCreateResponsesPage as PermissionCreateResponsesPage, + type PermissionCreateParams as PermissionCreateParams, + type PermissionRetrieveParams as PermissionRetrieveParams, + }; +} diff --git a/src/resources/fine-tuning/checkpoints/index.ts b/src/resources/fine-tuning/checkpoints/index.ts new file mode 100644 index 000000000..51d1af9cf --- /dev/null +++ b/src/resources/fine-tuning/checkpoints/index.ts @@ -0,0 +1,12 @@ +// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +export { Checkpoints } from './checkpoints'; +export { + PermissionCreateResponsesPage, + Permissions, + type PermissionCreateResponse, + type PermissionRetrieveResponse, + type PermissionDeleteResponse, + type PermissionCreateParams, + type PermissionRetrieveParams, +} from './permissions'; diff --git a/src/resources/fine-tuning/checkpoints/permissions.ts b/src/resources/fine-tuning/checkpoints/permissions.ts new file mode 100644 index 000000000..500c3de81 --- /dev/null +++ b/src/resources/fine-tuning/checkpoints/permissions.ts @@ -0,0 +1,198 @@ +// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +import { APIResource } from '../../../resource'; +import { isRequestOptions } from '../../../core'; +import * as Core from '../../../core'; +import { Page } from '../../../pagination'; + +export class Permissions extends APIResource { + /** + * **NOTE:** Calling this endpoint requires an [admin API key](../admin-api-keys). + * + * This enables organization owners to share fine-tuned models with other projects + * in their organization. + */ + create( + fineTunedModelCheckpoint: string, + body: PermissionCreateParams, + options?: Core.RequestOptions, + ): Core.PagePromise { + return this._client.getAPIList( + `/fine_tuning/checkpoints/${fineTunedModelCheckpoint}/permissions`, + PermissionCreateResponsesPage, + { body, method: 'post', ...options }, + ); + } + + /** + * **NOTE:** This endpoint requires an [admin API key](../admin-api-keys). + * + * Organization owners can use this endpoint to view all permissions for a + * fine-tuned model checkpoint. + */ + retrieve( + fineTunedModelCheckpoint: string, + query?: PermissionRetrieveParams, + options?: Core.RequestOptions, + ): Core.APIPromise; + retrieve( + fineTunedModelCheckpoint: string, + options?: Core.RequestOptions, + ): Core.APIPromise; + retrieve( + fineTunedModelCheckpoint: string, + query: PermissionRetrieveParams | Core.RequestOptions = {}, + options?: Core.RequestOptions, + ): Core.APIPromise { + if (isRequestOptions(query)) { + return this.retrieve(fineTunedModelCheckpoint, {}, query); + } + return this._client.get(`/fine_tuning/checkpoints/${fineTunedModelCheckpoint}/permissions`, { + query, + ...options, + }); + } + + /** + * **NOTE:** This endpoint requires an [admin API key](../admin-api-keys). + * + * Organization owners can use this endpoint to delete a permission for a + * fine-tuned model checkpoint. + */ + del( + fineTunedModelCheckpoint: string, + options?: Core.RequestOptions, + ): Core.APIPromise { + return this._client.delete(`/fine_tuning/checkpoints/${fineTunedModelCheckpoint}/permissions`, options); + } +} + +/** + * Note: no pagination actually occurs yet, this is for forwards-compatibility. + */ +export class PermissionCreateResponsesPage extends Page {} + +/** + * The `checkpoint.permission` object represents a permission for a fine-tuned + * model checkpoint. + */ +export interface PermissionCreateResponse { + /** + * The permission identifier, which can be referenced in the API endpoints. + */ + id: string; + + /** + * The Unix timestamp (in seconds) for when the permission was created. + */ + created_at: number; + + /** + * The object type, which is always "checkpoint.permission". + */ + object: 'checkpoint.permission'; + + /** + * The project identifier that the permission is for. + */ + project_id: string; +} + +export interface PermissionRetrieveResponse { + data: Array; + + has_more: boolean; + + object: 'list'; + + first_id?: string | null; + + last_id?: string | null; +} + +export namespace PermissionRetrieveResponse { + /** + * The `checkpoint.permission` object represents a permission for a fine-tuned + * model checkpoint. + */ + export interface Data { + /** + * The permission identifier, which can be referenced in the API endpoints. + */ + id: string; + + /** + * The Unix timestamp (in seconds) for when the permission was created. + */ + created_at: number; + + /** + * The object type, which is always "checkpoint.permission". + */ + object: 'checkpoint.permission'; + + /** + * The project identifier that the permission is for. + */ + project_id: string; + } +} + +export interface PermissionDeleteResponse { + /** + * The ID of the fine-tuned model checkpoint permission that was deleted. + */ + id: string; + + /** + * Whether the fine-tuned model checkpoint permission was successfully deleted. + */ + deleted: boolean; + + /** + * The object type, which is always "checkpoint.permission". + */ + object: 'checkpoint.permission'; +} + +export interface PermissionCreateParams { + /** + * The project identifiers to grant access to. + */ + project_ids: Array; +} + +export interface PermissionRetrieveParams { + /** + * Identifier for the last permission ID from the previous pagination request. + */ + after?: string; + + /** + * Number of permissions to retrieve. + */ + limit?: number; + + /** + * The order in which to retrieve permissions. + */ + order?: 'ascending' | 'descending'; + + /** + * The ID of the project to get permissions for. + */ + project_id?: string; +} + +Permissions.PermissionCreateResponsesPage = PermissionCreateResponsesPage; + +export declare namespace Permissions { + export { + type PermissionCreateResponse as PermissionCreateResponse, + type PermissionRetrieveResponse as PermissionRetrieveResponse, + type PermissionDeleteResponse as PermissionDeleteResponse, + PermissionCreateResponsesPage as PermissionCreateResponsesPage, + type PermissionCreateParams as PermissionCreateParams, + type PermissionRetrieveParams as PermissionRetrieveParams, + }; +} diff --git a/src/resources/fine-tuning/fine-tuning.ts b/src/resources/fine-tuning/fine-tuning.ts index df013c8ec..9b0a01992 100644 --- a/src/resources/fine-tuning/fine-tuning.ts +++ b/src/resources/fine-tuning/fine-tuning.ts @@ -1,6 +1,8 @@ // File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. import { APIResource } from '../../resource'; +import * as CheckpointsAPI from './checkpoints/checkpoints'; +import { Checkpoints } from './checkpoints/checkpoints'; import * as JobsAPI from './jobs/jobs'; import { FineTuningJob, @@ -18,11 +20,13 @@ import { export class FineTuning extends APIResource { jobs: JobsAPI.Jobs = new JobsAPI.Jobs(this._client); + checkpoints: CheckpointsAPI.Checkpoints = new CheckpointsAPI.Checkpoints(this._client); } FineTuning.Jobs = Jobs; FineTuning.FineTuningJobsPage = FineTuningJobsPage; FineTuning.FineTuningJobEventsPage = FineTuningJobEventsPage; +FineTuning.Checkpoints = Checkpoints; export declare namespace FineTuning { export { @@ -38,4 +42,6 @@ export declare namespace FineTuning { type JobListParams as JobListParams, type JobListEventsParams as JobListEventsParams, }; + + export { Checkpoints as Checkpoints }; } diff --git a/src/resources/fine-tuning/index.ts b/src/resources/fine-tuning/index.ts index 4954406b8..d23161c62 100644 --- a/src/resources/fine-tuning/index.ts +++ b/src/resources/fine-tuning/index.ts @@ -1,5 +1,6 @@ // File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. +export { Checkpoints } from './checkpoints/index'; export { FineTuning } from './fine-tuning'; export { FineTuningJobsPage, diff --git a/src/resources/index.ts b/src/resources/index.ts index 04c2c887b..0d8ec9220 100644 --- a/src/resources/index.ts +++ b/src/resources/index.ts @@ -29,6 +29,23 @@ export { type EmbeddingModel, type EmbeddingCreateParams, } from './embeddings'; +export { + EvalListResponsesPage, + Evals, + type EvalCustomDataSourceConfig, + type EvalLabelModelGrader, + type EvalStoredCompletionsDataSourceConfig, + type EvalStringCheckGrader, + type EvalTextSimilarityGrader, + type EvalCreateResponse, + type EvalRetrieveResponse, + type EvalUpdateResponse, + type EvalListResponse, + type EvalDeleteResponse, + type EvalCreateParams, + type EvalUpdateParams, + type EvalListParams, +} from './evals/evals'; export { FileObjectsPage, Files, diff --git a/src/version.ts b/src/version.ts index bfae301de..c385afc4c 100644 --- a/src/version.ts +++ b/src/version.ts @@ -1 +1 @@ -export const VERSION = '4.92.1'; // x-release-please-version +export const VERSION = '4.93.0'; // x-release-please-version diff --git a/tests/api-resources/beta/threads/runs/runs.test.ts b/tests/api-resources/beta/threads/runs/runs.test.ts index 13ae89a00..4b2b8030b 100644 --- a/tests/api-resources/beta/threads/runs/runs.test.ts +++ b/tests/api-resources/beta/threads/runs/runs.test.ts @@ -37,7 +37,7 @@ describe('resource runs', () => { max_completion_tokens: 256, max_prompt_tokens: 256, metadata: { foo: 'string' }, - model: 'gpt-4o', + model: 'string', parallel_tool_calls: true, reasoning_effort: 'low', response_format: 'auto', diff --git a/tests/api-resources/beta/threads/threads.test.ts b/tests/api-resources/beta/threads/threads.test.ts index f26d6ec44..bc92a0c8a 100644 --- a/tests/api-resources/beta/threads/threads.test.ts +++ b/tests/api-resources/beta/threads/threads.test.ts @@ -121,7 +121,7 @@ describe('resource threads', () => { max_completion_tokens: 256, max_prompt_tokens: 256, metadata: { foo: 'string' }, - model: 'gpt-4o', + model: 'string', parallel_tool_calls: true, response_format: 'auto', stream: false, diff --git a/tests/api-resources/evals/evals.test.ts b/tests/api-resources/evals/evals.test.ts new file mode 100644 index 000000000..fabc2602a --- /dev/null +++ b/tests/api-resources/evals/evals.test.ts @@ -0,0 +1,128 @@ +// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +import OpenAI from 'openai'; +import { Response } from 'node-fetch'; + +const client = new OpenAI({ + apiKey: 'My API Key', + baseURL: process.env['TEST_API_BASE_URL'] ?? 'http://127.0.0.1:4010', +}); + +describe('resource evals', () => { + test('create: only required params', async () => { + const responsePromise = client.evals.create({ + data_source_config: { item_schema: { foo: 'bar' }, type: 'custom' }, + testing_criteria: [ + { + input: [{ content: 'content', role: 'role' }], + labels: ['string'], + model: 'model', + name: 'name', + passing_labels: ['string'], + type: 'label_model', + }, + ], + }); + const rawResponse = await responsePromise.asResponse(); + expect(rawResponse).toBeInstanceOf(Response); + const response = await responsePromise; + expect(response).not.toBeInstanceOf(Response); + const dataAndResponse = await responsePromise.withResponse(); + expect(dataAndResponse.data).toBe(response); + expect(dataAndResponse.response).toBe(rawResponse); + }); + + test('create: required and optional params', async () => { + const response = await client.evals.create({ + data_source_config: { item_schema: { foo: 'bar' }, type: 'custom', include_sample_schema: true }, + testing_criteria: [ + { + input: [{ content: 'content', role: 'role' }], + labels: ['string'], + model: 'model', + name: 'name', + passing_labels: ['string'], + type: 'label_model', + }, + ], + metadata: { foo: 'string' }, + name: 'name', + share_with_openai: true, + }); + }); + + test('retrieve', async () => { + const responsePromise = client.evals.retrieve('eval_id'); + const rawResponse = await responsePromise.asResponse(); + expect(rawResponse).toBeInstanceOf(Response); + const response = await responsePromise; + expect(response).not.toBeInstanceOf(Response); + const dataAndResponse = await responsePromise.withResponse(); + expect(dataAndResponse.data).toBe(response); + expect(dataAndResponse.response).toBe(rawResponse); + }); + + test('retrieve: request options instead of params are passed correctly', async () => { + // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error + await expect(client.evals.retrieve('eval_id', { path: '/_stainless_unknown_path' })).rejects.toThrow( + OpenAI.NotFoundError, + ); + }); + + test('update', async () => { + const responsePromise = client.evals.update('eval_id', {}); + const rawResponse = await responsePromise.asResponse(); + expect(rawResponse).toBeInstanceOf(Response); + const response = await responsePromise; + expect(response).not.toBeInstanceOf(Response); + const dataAndResponse = await responsePromise.withResponse(); + expect(dataAndResponse.data).toBe(response); + expect(dataAndResponse.response).toBe(rawResponse); + }); + + test('list', async () => { + const responsePromise = client.evals.list(); + const rawResponse = await responsePromise.asResponse(); + expect(rawResponse).toBeInstanceOf(Response); + const response = await responsePromise; + expect(response).not.toBeInstanceOf(Response); + const dataAndResponse = await responsePromise.withResponse(); + expect(dataAndResponse.data).toBe(response); + expect(dataAndResponse.response).toBe(rawResponse); + }); + + test('list: request options instead of params are passed correctly', async () => { + // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error + await expect(client.evals.list({ path: '/_stainless_unknown_path' })).rejects.toThrow( + OpenAI.NotFoundError, + ); + }); + + test('list: request options and params are passed correctly', async () => { + // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error + await expect( + client.evals.list( + { after: 'after', limit: 0, order: 'asc', order_by: 'created_at' }, + { path: '/_stainless_unknown_path' }, + ), + ).rejects.toThrow(OpenAI.NotFoundError); + }); + + test('del', async () => { + const responsePromise = client.evals.del('eval_id'); + const rawResponse = await responsePromise.asResponse(); + expect(rawResponse).toBeInstanceOf(Response); + const response = await responsePromise; + expect(response).not.toBeInstanceOf(Response); + const dataAndResponse = await responsePromise.withResponse(); + expect(dataAndResponse.data).toBe(response); + expect(dataAndResponse.response).toBe(rawResponse); + }); + + test('del: request options instead of params are passed correctly', async () => { + // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error + await expect(client.evals.del('eval_id', { path: '/_stainless_unknown_path' })).rejects.toThrow( + OpenAI.NotFoundError, + ); + }); +}); diff --git a/tests/api-resources/evals/runs/output-items.test.ts b/tests/api-resources/evals/runs/output-items.test.ts new file mode 100644 index 000000000..ff075b404 --- /dev/null +++ b/tests/api-resources/evals/runs/output-items.test.ts @@ -0,0 +1,61 @@ +// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +import OpenAI from 'openai'; +import { Response } from 'node-fetch'; + +const client = new OpenAI({ + apiKey: 'My API Key', + baseURL: process.env['TEST_API_BASE_URL'] ?? 'http://127.0.0.1:4010', +}); + +describe('resource outputItems', () => { + test('retrieve', async () => { + const responsePromise = client.evals.runs.outputItems.retrieve('eval_id', 'run_id', 'output_item_id'); + const rawResponse = await responsePromise.asResponse(); + expect(rawResponse).toBeInstanceOf(Response); + const response = await responsePromise; + expect(response).not.toBeInstanceOf(Response); + const dataAndResponse = await responsePromise.withResponse(); + expect(dataAndResponse.data).toBe(response); + expect(dataAndResponse.response).toBe(rawResponse); + }); + + test('retrieve: request options instead of params are passed correctly', async () => { + // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error + await expect( + client.evals.runs.outputItems.retrieve('eval_id', 'run_id', 'output_item_id', { + path: '/_stainless_unknown_path', + }), + ).rejects.toThrow(OpenAI.NotFoundError); + }); + + test('list', async () => { + const responsePromise = client.evals.runs.outputItems.list('eval_id', 'run_id'); + const rawResponse = await responsePromise.asResponse(); + expect(rawResponse).toBeInstanceOf(Response); + const response = await responsePromise; + expect(response).not.toBeInstanceOf(Response); + const dataAndResponse = await responsePromise.withResponse(); + expect(dataAndResponse.data).toBe(response); + expect(dataAndResponse.response).toBe(rawResponse); + }); + + test('list: request options instead of params are passed correctly', async () => { + // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error + await expect( + client.evals.runs.outputItems.list('eval_id', 'run_id', { path: '/_stainless_unknown_path' }), + ).rejects.toThrow(OpenAI.NotFoundError); + }); + + test('list: request options and params are passed correctly', async () => { + // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error + await expect( + client.evals.runs.outputItems.list( + 'eval_id', + 'run_id', + { after: 'after', limit: 0, order: 'asc', status: 'fail' }, + { path: '/_stainless_unknown_path' }, + ), + ).rejects.toThrow(OpenAI.NotFoundError); + }); +}); diff --git a/tests/api-resources/evals/runs/runs.test.ts b/tests/api-resources/evals/runs/runs.test.ts new file mode 100644 index 000000000..786df0ba1 --- /dev/null +++ b/tests/api-resources/evals/runs/runs.test.ts @@ -0,0 +1,118 @@ +// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +import OpenAI from 'openai'; +import { Response } from 'node-fetch'; + +const client = new OpenAI({ + apiKey: 'My API Key', + baseURL: process.env['TEST_API_BASE_URL'] ?? 'http://127.0.0.1:4010', +}); + +describe('resource runs', () => { + test('create: only required params', async () => { + const responsePromise = client.evals.runs.create('eval_id', { + data_source: { source: { content: [{ item: { foo: 'bar' } }], type: 'file_content' }, type: 'jsonl' }, + }); + const rawResponse = await responsePromise.asResponse(); + expect(rawResponse).toBeInstanceOf(Response); + const response = await responsePromise; + expect(response).not.toBeInstanceOf(Response); + const dataAndResponse = await responsePromise.withResponse(); + expect(dataAndResponse.data).toBe(response); + expect(dataAndResponse.response).toBe(rawResponse); + }); + + test('create: required and optional params', async () => { + const response = await client.evals.runs.create('eval_id', { + data_source: { + source: { content: [{ item: { foo: 'bar' }, sample: { foo: 'bar' } }], type: 'file_content' }, + type: 'jsonl', + }, + metadata: { foo: 'string' }, + name: 'name', + }); + }); + + test('retrieve', async () => { + const responsePromise = client.evals.runs.retrieve('eval_id', 'run_id'); + const rawResponse = await responsePromise.asResponse(); + expect(rawResponse).toBeInstanceOf(Response); + const response = await responsePromise; + expect(response).not.toBeInstanceOf(Response); + const dataAndResponse = await responsePromise.withResponse(); + expect(dataAndResponse.data).toBe(response); + expect(dataAndResponse.response).toBe(rawResponse); + }); + + test('retrieve: request options instead of params are passed correctly', async () => { + // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error + await expect( + client.evals.runs.retrieve('eval_id', 'run_id', { path: '/_stainless_unknown_path' }), + ).rejects.toThrow(OpenAI.NotFoundError); + }); + + test('list', async () => { + const responsePromise = client.evals.runs.list('eval_id'); + const rawResponse = await responsePromise.asResponse(); + expect(rawResponse).toBeInstanceOf(Response); + const response = await responsePromise; + expect(response).not.toBeInstanceOf(Response); + const dataAndResponse = await responsePromise.withResponse(); + expect(dataAndResponse.data).toBe(response); + expect(dataAndResponse.response).toBe(rawResponse); + }); + + test('list: request options instead of params are passed correctly', async () => { + // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error + await expect(client.evals.runs.list('eval_id', { path: '/_stainless_unknown_path' })).rejects.toThrow( + OpenAI.NotFoundError, + ); + }); + + test('list: request options and params are passed correctly', async () => { + // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error + await expect( + client.evals.runs.list( + 'eval_id', + { after: 'after', limit: 0, order: 'asc', status: 'queued' }, + { path: '/_stainless_unknown_path' }, + ), + ).rejects.toThrow(OpenAI.NotFoundError); + }); + + test('del', async () => { + const responsePromise = client.evals.runs.del('eval_id', 'run_id'); + const rawResponse = await responsePromise.asResponse(); + expect(rawResponse).toBeInstanceOf(Response); + const response = await responsePromise; + expect(response).not.toBeInstanceOf(Response); + const dataAndResponse = await responsePromise.withResponse(); + expect(dataAndResponse.data).toBe(response); + expect(dataAndResponse.response).toBe(rawResponse); + }); + + test('del: request options instead of params are passed correctly', async () => { + // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error + await expect( + client.evals.runs.del('eval_id', 'run_id', { path: '/_stainless_unknown_path' }), + ).rejects.toThrow(OpenAI.NotFoundError); + }); + + test('cancel', async () => { + const responsePromise = client.evals.runs.cancel('eval_id', 'run_id'); + const rawResponse = await responsePromise.asResponse(); + expect(rawResponse).toBeInstanceOf(Response); + const response = await responsePromise; + expect(response).not.toBeInstanceOf(Response); + const dataAndResponse = await responsePromise.withResponse(); + expect(dataAndResponse.data).toBe(response); + expect(dataAndResponse.response).toBe(rawResponse); + }); + + test('cancel: request options instead of params are passed correctly', async () => { + // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error + await expect( + client.evals.runs.cancel('eval_id', 'run_id', { path: '/_stainless_unknown_path' }), + ).rejects.toThrow(OpenAI.NotFoundError); + }); +}); diff --git a/tests/api-resources/fine-tuning/checkpoints/permissions.test.ts b/tests/api-resources/fine-tuning/checkpoints/permissions.test.ts new file mode 100644 index 000000000..e7aceae3e --- /dev/null +++ b/tests/api-resources/fine-tuning/checkpoints/permissions.test.ts @@ -0,0 +1,87 @@ +// File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +import OpenAI from 'openai'; +import { Response } from 'node-fetch'; + +const client = new OpenAI({ + apiKey: 'My API Key', + baseURL: process.env['TEST_API_BASE_URL'] ?? 'http://127.0.0.1:4010', +}); + +describe('resource permissions', () => { + test('create: only required params', async () => { + const responsePromise = client.fineTuning.checkpoints.permissions.create( + 'ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd', + { project_ids: ['string'] }, + ); + const rawResponse = await responsePromise.asResponse(); + expect(rawResponse).toBeInstanceOf(Response); + const response = await responsePromise; + expect(response).not.toBeInstanceOf(Response); + const dataAndResponse = await responsePromise.withResponse(); + expect(dataAndResponse.data).toBe(response); + expect(dataAndResponse.response).toBe(rawResponse); + }); + + test('create: required and optional params', async () => { + const response = await client.fineTuning.checkpoints.permissions.create( + 'ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd', + { project_ids: ['string'] }, + ); + }); + + test('retrieve', async () => { + const responsePromise = client.fineTuning.checkpoints.permissions.retrieve('ft-AF1WoRqd3aJAHsqc9NY7iL8F'); + const rawResponse = await responsePromise.asResponse(); + expect(rawResponse).toBeInstanceOf(Response); + const response = await responsePromise; + expect(response).not.toBeInstanceOf(Response); + const dataAndResponse = await responsePromise.withResponse(); + expect(dataAndResponse.data).toBe(response); + expect(dataAndResponse.response).toBe(rawResponse); + }); + + test('retrieve: request options instead of params are passed correctly', async () => { + // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error + await expect( + client.fineTuning.checkpoints.permissions.retrieve('ft-AF1WoRqd3aJAHsqc9NY7iL8F', { + path: '/_stainless_unknown_path', + }), + ).rejects.toThrow(OpenAI.NotFoundError); + }); + + test('retrieve: request options and params are passed correctly', async () => { + // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error + await expect( + client.fineTuning.checkpoints.permissions.retrieve( + 'ft-AF1WoRqd3aJAHsqc9NY7iL8F', + { after: 'after', limit: 0, order: 'ascending', project_id: 'project_id' }, + { path: '/_stainless_unknown_path' }, + ), + ).rejects.toThrow(OpenAI.NotFoundError); + }); + + // OpenAPI spec is slightly incorrect + test.skip('del', async () => { + const responsePromise = client.fineTuning.checkpoints.permissions.del( + 'ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd', + ); + const rawResponse = await responsePromise.asResponse(); + expect(rawResponse).toBeInstanceOf(Response); + const response = await responsePromise; + expect(response).not.toBeInstanceOf(Response); + const dataAndResponse = await responsePromise.withResponse(); + expect(dataAndResponse.data).toBe(response); + expect(dataAndResponse.response).toBe(rawResponse); + }); + + // OpenAPI spec is slightly incorrect + test.skip('del: request options instead of params are passed correctly', async () => { + // ensure the request options are being passed correctly by passing an invalid HTTP method in order to cause an error + await expect( + client.fineTuning.checkpoints.permissions.del('ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd', { + path: '/_stainless_unknown_path', + }), + ).rejects.toThrow(OpenAI.NotFoundError); + }); +}); diff --git a/tests/api-resources/images.test.ts b/tests/api-resources/images.test.ts index 88eb97a93..4f15e20ac 100644 --- a/tests/api-resources/images.test.ts +++ b/tests/api-resources/images.test.ts @@ -25,10 +25,10 @@ describe('resource images', () => { test('createVariation: required and optional params', async () => { const response = await client.images.createVariation({ image: await toFile(Buffer.from('# my file contents'), 'README.md'), - model: 'dall-e-2', + model: 'string', n: 1, response_format: 'url', - size: '256x256', + size: '1024x1024', user: 'user-1234', }); }); @@ -52,10 +52,10 @@ describe('resource images', () => { image: await toFile(Buffer.from('# my file contents'), 'README.md'), prompt: 'A cute baby sea otter wearing a beret', mask: await toFile(Buffer.from('# my file contents'), 'README.md'), - model: 'dall-e-2', + model: 'string', n: 1, response_format: 'url', - size: '256x256', + size: '1024x1024', user: 'user-1234', }); }); @@ -74,11 +74,11 @@ describe('resource images', () => { test('generate: required and optional params', async () => { const response = await client.images.generate({ prompt: 'A cute baby sea otter', - model: 'dall-e-3', + model: 'string', n: 1, quality: 'standard', response_format: 'url', - size: '256x256', + size: '1024x1024', style: 'vivid', user: 'user-1234', }); diff --git a/tests/api-resources/moderations.test.ts b/tests/api-resources/moderations.test.ts index 64f9acf3c..107ce9974 100644 --- a/tests/api-resources/moderations.test.ts +++ b/tests/api-resources/moderations.test.ts @@ -21,9 +21,6 @@ describe('resource moderations', () => { }); test('create: required and optional params', async () => { - const response = await client.moderations.create({ - input: 'I want to kill them.', - model: 'omni-moderation-2024-09-26', - }); + const response = await client.moderations.create({ input: 'I want to kill them.', model: 'string' }); }); });