Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions internal/apischema/openai/openai.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,8 @@ const (
// Model names for testing.
const (
// ModelGPT5Nano is the cheapest model usable with /chat/completions.
// Note: gpt-5-nano is also the cheapest reasoning model.
ModelGPT5Nano = "gpt-5-nano"
// ModelO3Mini is the cheapest reasoning model usable with /chat/completions.
ModelO3Mini = "o3-mini"
// ModelGPT4oMiniAudioPreview is the cheapest audio synthesis model usable with /chat/completions.
ModelGPT4oMiniAudioPreview = "gpt-4o-mini-audio-preview"
// ModelGPT4oAudioPreview is the cheapest audio transcription model usable with /chat/completions.
Expand Down
3 changes: 2 additions & 1 deletion tests/extproc/vcr/otel_chat_completions_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ func TestOtelOpenAIChatCompletions_metrics(t *testing.T) {
metrics := requireScopeMetrics(t, allMetrics)

// Get expected model names from span
requestModel := getInvocationModel(span.Attributes, "llm.invocation_parameters")
originalModel := getInvocationModel(span.Attributes, "llm.invocation_parameters")
requestModel := originalModel // in non-override cases, these are the same
responseModel := getSpanAttributeString(span.Attributes, "llm.model_name")

verifyTokenUsageMetrics(t, "chat", metrics, span, requestModel, responseModel, tc.isError)
Expand Down
8 changes: 2 additions & 6 deletions tests/extproc/vcr/otel_embeddings_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,7 @@ func TestOtelOpenAIEmbeddings_metrics(t *testing.T) {
metrics := requireScopeMetrics(t, allMetrics)

// Get expected model names from span
// TODO: these attributes are inconsistent and will be fixed soon.
// See https://github.com/Arize-ai/openinference/pull/2210
requestModel := getInvocationModel(span.Attributes, "llm.invocation_parameters")
requestModel := getInvocationModel(span.Attributes, "embedding.invocation_parameters")
responseModel := getSpanAttributeString(span.Attributes, "embedding.model_name")

// Verify each metric in separate functions.
Expand Down Expand Up @@ -117,9 +115,7 @@ func TestOtelOpenAIEmbeddings_metrics_modelNameOverride(t *testing.T) {
metrics := requireScopeMetrics(t, allMetrics)

// Get expected model names from span
// TODO: Until trace attribute recording is moved to the upstream filter,
// llm.invocation_parameters is the original model, not the override.
requestModel := "text-embedding-3-small" // overridden model
requestModel := getInvocationModel(span.Attributes, "embedding.invocation_parameters")
responseModel := getSpanAttributeString(span.Attributes, "embedding.model_name")

verifyTokenUsageMetrics(t, "embeddings", metrics, span, requestModel, responseModel, false)
Expand Down
15 changes: 10 additions & 5 deletions tests/extproc/vcr/otel_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,16 @@ func verifyRequestDurationMetrics(t *testing.T, op string, metrics *metricsv1.Sc
require.NotEmpty(t, histogram.DataPoints)
for _, dp := range histogram.DataPoints {
attrs := getAttributeStringMap(dp.Attributes)
require.Equal(t, "_OTHER", attrs["error.type"])
require.Equal(t, op, attrs["gen_ai.operation.name"])
require.Equal(t, "openai", attrs["gen_ai.provider.name"])
require.Equal(t, requestModel, attrs["gen_ai.request.model"])
// Don't validate response model for errors
expected := map[string]string{
"error.type": "_OTHER", // we don't set specific error types yet
"gen_ai.operation.name": op,
"gen_ai.provider.name": "openai",
"gen_ai.request.model": requestModel,
// TODO: we can't verify the response model for errors until it is set consistently
// See https://github.com/envoyproxy/ai-gateway/issues/1224
"gen_ai.response.model": attrs["gen_ai.response.model"],
}
require.Equal(t, expected, attrs)
}
return
}
Expand Down
186 changes: 73 additions & 113 deletions tests/internal/testopenai/cassettes/chat-reasoning.yaml
Original file line number Diff line number Diff line change
@@ -1,116 +1,76 @@
---
version: 2
interactions:
- id: 0
request:
proto: HTTP/1.1
proto_major: 1
proto_minor: 1
content_length: 165
host: api.openai.com
body: |-
{
"messages": [
{
"content": "A bat and ball cost $1.10. Bat costs $1 more than ball. Ball cost?",
"role": "user"
}
],
"model": "o3-mini"
}
headers:
Accept-Encoding:
- gzip
Content-Length:
- "165"
Content-Type:
- application/json
User-Agent:
- Go-http-client/1.1
url: https://api.openai.com/v1/chat/completions
method: POST
response:
proto: HTTP/2.0
proto_major: 2
proto_minor: 0
content_length: 1137
body: |-
{
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"annotations": [],
"content": "Let the cost of the ball be x dollars. Then the bat costs x + 1 dollars (since it costs $1 more than the ball).\n\nThe total cost is given by:\n  x + (x + 1) = 1.10\n\nCombine like terms:\n  2x + 1 = 1.10\n\nSubtract 1 from both sides:\n  2x = 0.10\n\nDivide by 2:\n  x = 0.05\n\nSo, the ball costs $0.05 (5 cents) and the bat costs $1.05.",
"refusal": null,
"role": "assistant"
}
}
],
"created": 1755133862,
"id": "chatcmpl-C4GmcU7sPLtZ16jI8fqxAtXVj1FX7",
"model": "o3-mini-2025-01-31",
"object": "chat.completion",
"service_tier": "default",
"system_fingerprint": "fp_e20469f047",
"usage": {
"completion_tokens": 264,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 128,
"rejected_prediction_tokens": 0
},
"prompt_tokens": 27,
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
},
"total_tokens": 291
}
}
headers:
Access-Control-Expose-Headers:
- X-Request-ID
Alt-Svc:
- h3=":443"; ma=86400
Cf-Cache-Status:
- DYNAMIC
Cf-Ray:
- 96ec936f2b27e4fe-JHB
Content-Type:
- application/json
Date:
- Thu, 14 Aug 2025 01:11:05 GMT
Openai-Processing-Ms:
- "3309"
Openai-Project:
- proj_KYenqYOfeZsnXEVK8dXVBhez
Openai-Version:
- "2020-10-01"
Server:
- cloudflare
Strict-Transport-Security:
- max-age=31536000; includeSubDomains; preload
X-Content-Type-Options:
- nosniff
X-Envoy-Upstream-Service-Time:
- "3345"
X-Ratelimit-Limit-Requests:
- "500"
X-Ratelimit-Limit-Tokens:
- "200000"
X-Ratelimit-Remaining-Requests:
- "499"
X-Ratelimit-Remaining-Tokens:
- "199981"
X-Ratelimit-Reset-Requests:
- 120ms
X-Ratelimit-Reset-Tokens:
- 5ms
X-Request-Id:
- req_1d8f72e7e26b41e7bd2a09bef17d6a4c
status: 200 OK
code: 200
duration: 3.649114291s
- id: 0
request:
proto: HTTP/1.1
proto_major: 1
proto_minor: 1
content_length: 168
host: api.openai.com
body: "{\n \"messages\": [\n {\n \"content\": \"A bat and ball cost $1.10. Bat costs $1 more than ball. Ball cost?\",\n \"role\": \"user\"\n }\n ],\n \"model\": \"gpt-5-nano\"\n}"
headers:
Accept-Encoding:
- gzip
Content-Length:
- "168"
Content-Type:
- application/json
User-Agent:
- Go-http-client/1.1
url: https://api.openai.com/v1/chat/completions
method: POST
response:
proto: HTTP/2.0
proto_major: 2
proto_minor: 0
content_length: 916
body: "{\n \"choices\": [\n {\n \"finish_reason\": \"stop\",\n \"index\": 0,\n \"message\": {\n \"annotations\": [],\n \"content\": \"Ball costs 5 cents.\\n\\nReason: Let ball = x dollars. Then bat = x + 1. Total: x + (x + 1) = 1.10 → 2x + 1 = 1.10 → 2x = 0.10 → x = 0.05.\",\n \"refusal\": null,\n \"role\": \"assistant\"\n }\n }\n ],\n \"created\": 1758778531,\n \"id\": \"chatcmpl-CJYvbsw8VZh862zGsIV5ZJtMnVFej\",\n \"model\": \"gpt-5-nano-2025-08-07\",\n \"object\": \"chat.completion\",\n \"service_tier\": \"default\",\n \"system_fingerprint\": null,\n \"usage\": {\n \"completion_tokens\": 267,\n \"completion_tokens_details\": {\n \"accepted_prediction_tokens\": 0,\n \"audio_tokens\": 0,\n \"reasoning_tokens\": 192,\n \"rejected_prediction_tokens\": 0\n },\n \"prompt_tokens\": 27,\n \"prompt_tokens_details\": {\n \"audio_tokens\": 0,\n \"cached_tokens\": 0\n },\n \"total_tokens\": 294\n }\n}"
headers:
Access-Control-Expose-Headers:
- X-Request-ID
Alt-Svc:
- h3=":443"; ma=86400
Cf-Cache-Status:
- DYNAMIC
Cf-Ray:
- 98482897efc67a7f-KUL
Content-Length:
- "916"
Content-Type:
- application/json
Date:
- Thu, 25 Sep 2025 05:35:33 GMT
Openai-Processing-Ms:
- "2387"
Openai-Project:
- proj_KYenqYOfeZsnXEVK8dXVBhez
Openai-Version:
- "2020-10-01"
Server:
- cloudflare
Strict-Transport-Security:
- max-age=31536000; includeSubDomains; preload
X-Content-Type-Options:
- nosniff
X-Envoy-Upstream-Service-Time:
- "2685"
X-Openai-Proxy-Wasm:
- v0.1
X-Ratelimit-Limit-Requests:
- "500"
X-Ratelimit-Limit-Tokens:
- "200000"
X-Ratelimit-Remaining-Requests:
- "499"
X-Ratelimit-Remaining-Tokens:
- "199981"
X-Ratelimit-Reset-Requests:
- 120ms
X-Ratelimit-Reset-Tokens:
- 5ms
X-Request-Id:
- req_136fb62f8c814ae989fed221f6f44390
status: 200 OK
code: 200
duration: 3.369177041s
2 changes: 1 addition & 1 deletion tests/internal/testopenai/chat_requests.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ var chatRequests = map[Cassette]*openai.ChatCompletionRequest{
},
},
CassetteChatReasoning: {
Model: openai.ModelO3Mini,
Model: openai.ModelGPT5Nano,
Messages: []openai.ChatCompletionMessageParamUnion{
{
OfUser: &openai.ChatCompletionUserMessageParam{
Expand Down
Loading
Loading