envoyproxy · mathetake · Sep 25, 2025 · Sep 25, 2025
@@ -38,9 +38,8 @@ const (
 // Model names for testing.
 const (
 	// ModelGPT5Nano is the cheapest model usable with /chat/completions.
+	// Note: gpt-5-nano is also the cheapest reasoning model.
 	ModelGPT5Nano = "gpt-5-nano"
-	// ModelO3Mini is the cheapest reasoning model usable with /chat/completions.
-	ModelO3Mini = "o3-mini"
 	// ModelGPT4oMiniAudioPreview is the cheapest audio synthesis model usable with /chat/completions.
 	ModelGPT4oMiniAudioPreview = "gpt-4o-mini-audio-preview"
 	// ModelGPT4oAudioPreview is the cheapest audio transcription model usable with /chat/completions.

@@ -79,7 +79,8 @@ func TestOtelOpenAIChatCompletions_metrics(t *testing.T) {
 			metrics := requireScopeMetrics(t, allMetrics)
 
 			// Get expected model names from span
-			requestModel := getInvocationModel(span.Attributes, "llm.invocation_parameters")
+			originalModel := getInvocationModel(span.Attributes, "llm.invocation_parameters")
+			requestModel := originalModel // in non-override cases, these are the same
 			responseModel := getSpanAttributeString(span.Attributes, "llm.model_name")
 
 			verifyTokenUsageMetrics(t, "chat", metrics, span, requestModel, responseModel, tc.isError)

@@ -74,9 +74,7 @@ func TestOtelOpenAIEmbeddings_metrics(t *testing.T) {
 			metrics := requireScopeMetrics(t, allMetrics)
 
 			// Get expected model names from span
-			// TODO: these attributes are inconsistent and will be fixed soon.
-			// See https://github.com/Arize-ai/openinference/pull/2210
-			requestModel := getInvocationModel(span.Attributes, "llm.invocation_parameters")
+			requestModel := getInvocationModel(span.Attributes, "embedding.invocation_parameters")
 			responseModel := getSpanAttributeString(span.Attributes, "embedding.model_name")
 
 			// Verify each metric in separate functions.
@@ -117,9 +115,7 @@ func TestOtelOpenAIEmbeddings_metrics_modelNameOverride(t *testing.T) {
 	metrics := requireScopeMetrics(t, allMetrics)
 
 	// Get expected model names from span
-	// TODO: Until trace attribute recording is moved to the upstream filter,
-	// llm.invocation_parameters is the original model, not the override.
-	requestModel := "text-embedding-3-small" // overridden model
+	requestModel := getInvocationModel(span.Attributes, "embedding.invocation_parameters")
 	responseModel := getSpanAttributeString(span.Attributes, "embedding.model_name")
 
 	verifyTokenUsageMetrics(t, "embeddings", metrics, span, requestModel, responseModel, false)

@@ -112,11 +112,16 @@ func verifyRequestDurationMetrics(t *testing.T, op string, metrics *metricsv1.Sc
 				require.NotEmpty(t, histogram.DataPoints)
 				for _, dp := range histogram.DataPoints {
 					attrs := getAttributeStringMap(dp.Attributes)
-					require.Equal(t, "_OTHER", attrs["error.type"])
-					require.Equal(t, op, attrs["gen_ai.operation.name"])
-					require.Equal(t, "openai", attrs["gen_ai.provider.name"])
-					require.Equal(t, requestModel, attrs["gen_ai.request.model"])
-					// Don't validate response model for errors
+					expected := map[string]string{
+						"error.type":            "_OTHER", // we don't set specific error types yet
+						"gen_ai.operation.name": op,
+						"gen_ai.provider.name":  "openai",
+						"gen_ai.request.model":  requestModel,
+						// TODO: we can't verify the response model for errors until it is set consistently
+						// See https://github.com/envoyproxy/ai-gateway/issues/1224
+						"gen_ai.response.model": attrs["gen_ai.response.model"],
+					}
+					require.Equal(t, expected, attrs)
 				}
 				return
 			}

@@ -1,116 +1,76 @@
 ---
 version: 2
 interactions:
-    - id: 0
-      request:
-        proto: HTTP/1.1
-        proto_major: 1
-        proto_minor: 1
-        content_length: 165
-        host: api.openai.com
-        body: |-
-            {
-              "messages": [
-                {
-                  "content": "A bat and ball cost $1.10. Bat costs $1 more than ball. Ball cost?",
-                  "role": "user"
-                }
-              ],
-              "model": "o3-mini"
-            }
-        headers:
-            Accept-Encoding:
-                - gzip
-            Content-Length:
-                - "165"
-            Content-Type:
-                - application/json
-            User-Agent:
-                - Go-http-client/1.1
-        url: https://api.openai.com/v1/chat/completions
-        method: POST
-      response:
-        proto: HTTP/2.0
-        proto_major: 2
-        proto_minor: 0
-        content_length: 1137
-        body: |-
-            {
-              "choices": [
-                {
-                  "finish_reason": "stop",
-                  "index": 0,
-                  "message": {
-                    "annotations": [],
-                    "content": "Let the cost of the ball be x dollars. Then the bat costs x + 1 dollars (since it costs $1 more than the ball).\n\nThe total cost is given by:\n  x + (x + 1) = 1.10\n\nCombine like terms:\n  2x + 1 = 1.10\n\nSubtract 1 from both sides:\n  2x = 0.10\n\nDivide by 2:\n  x = 0.05\n\nSo, the ball costs $0.05 (5 cents) and the bat costs $1.05.",
-                    "refusal": null,
-                    "role": "assistant"
-                  }
-                }
-              ],
-              "created": 1755133862,
-              "id": "chatcmpl-C4GmcU7sPLtZ16jI8fqxAtXVj1FX7",
-              "model": "o3-mini-2025-01-31",
-              "object": "chat.completion",
-              "service_tier": "default",
-              "system_fingerprint": "fp_e20469f047",
-              "usage": {
-                "completion_tokens": 264,
-                "completion_tokens_details": {
-                  "accepted_prediction_tokens": 0,
-                  "audio_tokens": 0,
-                  "reasoning_tokens": 128,
-                  "rejected_prediction_tokens": 0
-                },
-                "prompt_tokens": 27,
-                "prompt_tokens_details": {
-                  "audio_tokens": 0,
-                  "cached_tokens": 0
-                },
-                "total_tokens": 291
-              }
-            }
-        headers:
-            Access-Control-Expose-Headers:
-                - X-Request-ID
-            Alt-Svc:
-                - h3=":443"; ma=86400
-            Cf-Cache-Status:
-                - DYNAMIC
-            Cf-Ray:
-                - 96ec936f2b27e4fe-JHB
-            Content-Type:
-                - application/json
-            Date:
-                - Thu, 14 Aug 2025 01:11:05 GMT
-            Openai-Processing-Ms:
-                - "3309"
-            Openai-Project:
-                - proj_KYenqYOfeZsnXEVK8dXVBhez
-            Openai-Version:
-                - "2020-10-01"
-            Server:
-                - cloudflare
-            Strict-Transport-Security:
-                - max-age=31536000; includeSubDomains; preload
-            X-Content-Type-Options:
-                - nosniff
-            X-Envoy-Upstream-Service-Time:
-                - "3345"
-            X-Ratelimit-Limit-Requests:
-                - "500"
-            X-Ratelimit-Limit-Tokens:
-                - "200000"
-            X-Ratelimit-Remaining-Requests:
-                - "499"
-            X-Ratelimit-Remaining-Tokens:
-                - "199981"
-            X-Ratelimit-Reset-Requests:
-                - 120ms
-            X-Ratelimit-Reset-Tokens:
-                - 5ms
-            X-Request-Id:
-                - req_1d8f72e7e26b41e7bd2a09bef17d6a4c
-        status: 200 OK
-        code: 200
-        duration: 3.649114291s
+- id: 0
+  request:
+    proto: HTTP/1.1
+    proto_major: 1
+    proto_minor: 1
+    content_length: 168
+    host: api.openai.com
+    body: "{\n  \"messages\": [\n    {\n      \"content\": \"A bat and ball cost $1.10. Bat costs $1 more than ball. Ball cost?\",\n      \"role\": \"user\"\n    }\n  ],\n  \"model\": \"gpt-5-nano\"\n}"
+    headers:
+      Accept-Encoding:
+      - gzip
+      Content-Length:
+      - "168"
+      Content-Type:
+      - application/json
+      User-Agent:
+      - Go-http-client/1.1
+    url: https://api.openai.com/v1/chat/completions
+    method: POST
+  response:
+    proto: HTTP/2.0
+    proto_major: 2
+    proto_minor: 0
+    content_length: 916
+    body: "{\n  \"choices\": [\n    {\n      \"finish_reason\": \"stop\",\n      \"index\": 0,\n      \"message\": {\n        \"annotations\": [],\n        \"content\": \"Ball costs 5 cents.\\n\\nReason: Let ball = x dollars. Then bat = x + 1. Total: x + (x + 1) = 1.10 → 2x + 1 = 1.10 → 2x = 0.10 → x = 0.05.\",\n        \"refusal\": null,\n        \"role\": \"assistant\"\n      }\n    }\n  ],\n  \"created\": 1758778531,\n  \"id\": \"chatcmpl-CJYvbsw8VZh862zGsIV5ZJtMnVFej\",\n  \"model\": \"gpt-5-nano-2025-08-07\",\n  \"object\": \"chat.completion\",\n  \"service_tier\": \"default\",\n  \"system_fingerprint\": null,\n  \"usage\": {\n    \"completion_tokens\": 267,\n    \"completion_tokens_details\": {\n      \"accepted_prediction_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"reasoning_tokens\": 192,\n      \"rejected_prediction_tokens\": 0\n    },\n    \"prompt_tokens\": 27,\n    \"prompt_tokens_details\": {\n      \"audio_tokens\": 0,\n      \"cached_tokens\": 0\n    },\n    \"total_tokens\": 294\n  }\n}"
+    headers:
+      Access-Control-Expose-Headers:
+      - X-Request-ID
+      Alt-Svc:
+      - h3=":443"; ma=86400
+      Cf-Cache-Status:
+      - DYNAMIC
+      Cf-Ray:
+      - 98482897efc67a7f-KUL
+      Content-Length:
+      - "916"
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 25 Sep 2025 05:35:33 GMT
+      Openai-Processing-Ms:
+      - "2387"
+      Openai-Project:
+      - proj_KYenqYOfeZsnXEVK8dXVBhez
+      Openai-Version:
+      - "2020-10-01"
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      X-Content-Type-Options:
+      - nosniff
+      X-Envoy-Upstream-Service-Time:
+      - "2685"
+      X-Openai-Proxy-Wasm:
+      - v0.1
+      X-Ratelimit-Limit-Requests:
+      - "500"
+      X-Ratelimit-Limit-Tokens:
+      - "200000"
+      X-Ratelimit-Remaining-Requests:
+      - "499"
+      X-Ratelimit-Remaining-Tokens:
+      - "199981"
+      X-Ratelimit-Reset-Requests:
+      - 120ms
+      X-Ratelimit-Reset-Tokens:
+      - 5ms
+      X-Request-Id:
+      - req_136fb62f8c814ae989fed221f6f44390
+    status: 200 OK
+    code: 200
+    duration: 3.369177041s
@@ -247,7 +247,7 @@ var chatRequests = map[Cassette]*openai.ChatCompletionRequest{
 		},
 	},
 	CassetteChatReasoning: {
-		Model: openai.ModelO3Mini,
+		Model: openai.ModelGPT5Nano,
 		Messages: []openai.ChatCompletionMessageParamUnion{
 			{
 				OfUser: &openai.ChatCompletionUserMessageParam{