github · sgoedecke · Jul 17, 2025 · Jul 17, 2025 · Jul 17, 2025 · Jul 17, 2025
@@ -511,6 +511,7 @@ description: Testing JSON with failing evaluators
 model: openai/gpt-4o
 testData:
   - input: "hello"
+    expected: "hello world"
 messages:
   - role: user
     content: "{{input}}"
@@ -553,18 +554,94 @@ evaluators:
 
 		output := out.String()
 
+		// Verify JSON structure
 		var result EvaluationSummary
 		err = json.Unmarshal([]byte(output), &result)
 		require.NoError(t, err)
 
-		// Verify failing test is properly represented
-		require.Equal(t, 1, result.Summary.TotalTests)
-		require.Equal(t, 0, result.Summary.PassedTests)
-		require.Equal(t, 1, result.Summary.FailedTests)
-		require.Equal(t, 0.0, result.Summary.PassRate)
+		// Verify JSON doesn't contain human-readable text
+		require.NotContains(t, output, "Running evaluation:")
+	})
+
+	t.Run("eval with responseFormat and jsonSchema", func(t *testing.T) {
+		const yamlBody = `
+name: JSON Schema Evaluation
+description: Testing responseFormat and jsonSchema in eval
+model: openai/gpt-4o
+responseFormat: json_schema
+jsonSchema:
+  name: response_schema
+  strict: true
+  schema:
+    type: object
+    properties:
+      message:
+        type: string
+        description: The response message
+      confidence:
+        type: number
+        description: Confidence score
+    required:
+      - message
+    additionalProperties: false
+testData:
+  - input: "hello"
+    expected: "hello world"
+messages:
+  - role: user
+    content: "Respond to: {{input}}"
+evaluators:
+  - name: contains-message
+    string:
+      contains: "message"
+`
 
-		require.Len(t, result.TestResults, 1)
-		require.False(t, result.TestResults[0].EvaluationResults[0].Passed)
-		require.Equal(t, 0.0, result.TestResults[0].EvaluationResults[0].Score)
+		tmpDir := t.TempDir()
+		promptFile := filepath.Join(tmpDir, "test.prompt.yml")
+		err := os.WriteFile(promptFile, []byte(yamlBody), 0644)
+		require.NoError(t, err)
+
+		client := azuremodels.NewMockClient()
+		var capturedRequest azuremodels.ChatCompletionOptions
+		client.MockGetChatCompletionStream = func(ctx context.Context, req azuremodels.ChatCompletionOptions, org string) (*azuremodels.ChatCompletionResponse, error) {
+			capturedRequest = req
+			response := `{"message": "hello world", "confidence": 0.95}`
+			reader := sse.NewMockEventReader([]azuremodels.ChatCompletion{
+				{
+					Choices: []azuremodels.ChatChoice{
+						{
+							Message: &azuremodels.ChatChoiceMessage{
+								Content: &response,
+							},
+						},
+					},
+				},
+			})
+			return &azuremodels.ChatCompletionResponse{Reader: reader}, nil
+		}
+
+		out := new(bytes.Buffer)
+		cfg := command.NewConfig(out, out, client, true, 100)
+
+		cmd := NewEvalCommand(cfg)
+		cmd.SetArgs([]string{promptFile})
+
+		err = cmd.Execute()
+		require.NoError(t, err)
+
+		// Verify that responseFormat and jsonSchema were included in the request
+		require.NotNil(t, capturedRequest.ResponseFormat)
+		require.Equal(t, "json_schema", capturedRequest.ResponseFormat.Type)
+		require.NotNil(t, capturedRequest.ResponseFormat.JsonSchema)
+
+		schema := *capturedRequest.ResponseFormat.JsonSchema
+		require.Equal(t, "response_schema", schema["name"])
+		require.Equal(t, true, schema["strict"])
+		require.Contains(t, schema, "schema")
+
+		// Verify the test passed
+		output := out.String()
+		require.Contains(t, output, "✓ PASSED")
+		require.Contains(t, output, "🎉 All tests passed!")
 	})
 }
@@ -351,9 +351,17 @@ func NewRunCommand(cfg *command.Config) *cobra.Command {
 					}
 				}
 
-				req := azuremodels.ChatCompletionOptions{
-					Messages: conversation.GetMessages(),
-					Model:    modelName,
+				var req azuremodels.ChatCompletionOptions
+				if pf != nil {
+					// Use the prompt file's BuildChatCompletionOptions method to include responseFormat and jsonSchema
+					req = pf.BuildChatCompletionOptions(conversation.GetMessages())
+					// Override the model name if provided via CLI
+					req.Model = modelName
+				} else {
+					req = azuremodels.ChatCompletionOptions{
+						Messages: conversation.GetMessages(),
+						Model:    modelName,
+					}
 				}
 
 				mp.UpdateRequest(&req)

@@ -331,6 +331,102 @@ messages:
 		require.Equal(t, "System message", *capturedReq.Messages[0].Content)
 		require.Equal(t, "User message", *capturedReq.Messages[1].Content)
 	})
+
+	t.Run("--file with responseFormat and jsonSchema", func(t *testing.T) {
+		const yamlBody = `
+name: JSON Schema Test
+description: Test responseFormat and jsonSchema
+model: openai/test-model
+responseFormat: json_schema
+jsonSchema:
+  name: person_schema
+  strict: true
+  schema:
+    type: object
+    properties:
+      name:
+        type: string
+        description: The name
+      age:
+        type: integer
+        description: The age
+    required:
+      - name
+      - age
+    additionalProperties: false
+messages:
+  - role: system
+    content: You are a helpful assistant.
+  - role: user
+    content: "Generate a person"
+`
+
+		tmp, err := os.CreateTemp(t.TempDir(), "*.prompt.yml")
+		require.NoError(t, err)
+		_, err = tmp.WriteString(yamlBody)
+		require.NoError(t, err)
+		require.NoError(t, tmp.Close())
+
+		client := azuremodels.NewMockClient()
+		modelSummary := &azuremodels.ModelSummary{
+			Name:      "test-model",
+			Publisher: "openai",
+			Task:      "chat-completion",
+		}
+		client.MockListModels = func(ctx context.Context) ([]*azuremodels.ModelSummary, error) {
+			return []*azuremodels.ModelSummary{modelSummary}, nil
+		}
+
+		var capturedRequest azuremodels.ChatCompletionOptions
+		client.MockGetChatCompletionStream = func(ctx context.Context, req azuremodels.ChatCompletionOptions, org string) (*azuremodels.ChatCompletionResponse, error) {
+			capturedRequest = req
+			reply := "hello this is a test response"
+			reader := sse.NewMockEventReader([]azuremodels.ChatCompletion{
+				{
+					Choices: []azuremodels.ChatChoice{
+						{
+							Message: &azuremodels.ChatChoiceMessage{
+								Content: &reply,
+							},
+						},
+					},
+				},
+			})
+			return &azuremodels.ChatCompletionResponse{Reader: reader}, nil
+		}
+
+		out := new(bytes.Buffer)
+		cfg := command.NewConfig(out, out, client, true, 100)
+
+		cmd := NewRunCommand(cfg)
+		cmd.SetArgs([]string{"--file", tmp.Name()})
+
+		err = cmd.Execute()
+		require.NoError(t, err)
+
+		// Verify that responseFormat and jsonSchema were included in the request
+		require.NotNil(t, capturedRequest.ResponseFormat)
+		require.Equal(t, "json_schema", capturedRequest.ResponseFormat.Type)
+		require.NotNil(t, capturedRequest.ResponseFormat.JsonSchema)
+
+		schema := *capturedRequest.ResponseFormat.JsonSchema
+		require.Contains(t, schema, "name")
+		require.Contains(t, schema, "schema")
+		require.Equal(t, "person_schema", schema["name"])
+
+		schemaContent := schema["schema"].(map[string]interface{})
+		require.Equal(t, "object", schemaContent["type"])
+		require.Contains(t, schemaContent, "properties")
+		require.Contains(t, schemaContent, "required")
+
+		properties := schemaContent["properties"].(map[string]interface{})
+		require.Contains(t, properties, "name")
+		require.Contains(t, properties, "age")
+
+		required := schemaContent["required"].([]interface{})
+		require.Contains(t, required, "name")
+		require.Contains(t, required, "age")
+	})
 }
 
 func TestParseTemplateVariables(t *testing.T) {

@@ -0,0 +1,19 @@
+name: JSON Response Example
+description: Example prompt demonstrating responseFormat with json
+model: openai/gpt-4o
+responseFormat: json_object
+messages:
+  - role: system
+    content: You are a helpful assistant that responds in JSON format.
+  - role: user
+    content: "Provide a summary of {{topic}} in JSON format with title, description, and key_points array."
+testData:
+  - topic: "artificial intelligence"
+  - topic: "climate change"
+evaluators:
+  - name: contains-json-structure
+    string:
+      contains: "{"
+  - name: has-title
+    string:
+      contains: "title"
@@ -0,0 +1,64 @@
+name: JSON Schema Response Example
+description: Example prompt demonstrating responseFormat and jsonSchema usage
+model: openai/gpt-4o
+responseFormat: json_schema
+jsonSchema:
+  name: Person Information Schema
+  strict: true
+  schema:
+    type: object
+    description: A structured response containing person information
+    properties:
+      name:
+        type: string
+        description: The full name of the person
+      age:
+        type: integer
+        description: The age of the person in years
+        minimum: 0
+        maximum: 150
+      email:
+        type: string
+        description: The email address of the person
+        format: email
+      skills:
+        type: array
+        description: A list of skills the person has
+        items:
+          type: string
+      address:
+        type: object
+        description: The person's address
+        properties:
+          street:
+            type: string
+            description: Street address
+          city:
+            type: string
+            description: City name
+          country:
+            type: string
+            description: Country name
+        required:
+          - city
+          - country
+    required:
+      - name
+      - age
+messages:
+  - role: system
+    content: You are a helpful assistant that provides structured information about people.
+  - role: user
+    content: "Generate information for a person named {{name}} who is {{age}} years old."
+testData:
+  - name: "Alice Johnson"
+    age: "30"
+  - name: "Bob Smith"
+    age: "25"
+evaluators:
+  - name: has-required-fields
+    string:
+      contains: "name"
+  - name: valid-json-structure
+    string:
+      contains: "age"
@@ -6,6 +6,23 @@ import (
 	"github.com/github/gh-models/internal/sse"
 )
 
+// ChatCompletionOptions represents available options for a chat completion request.
+type ChatCompletionOptions struct {
+	MaxTokens      *int            `json:"max_tokens,omitempty"`
+	Messages       []ChatMessage   `json:"messages"`
+	Model          string          `json:"model"`
+	Stream         bool            `json:"stream,omitempty"`
+	Temperature    *float64        `json:"temperature,omitempty"`
+	TopP           *float64        `json:"top_p,omitempty"`
+	ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
+}
+
+// ResponseFormat represents the response format specification
+type ResponseFormat struct {
+	Type       string                  `json:"type"`
+	JsonSchema *map[string]interface{} `json:"json_schema,omitempty"`
+}
+
 // ChatMessageRole represents the role of a chat message.
 type ChatMessageRole string
 
@@ -24,16 +41,6 @@ type ChatMessage struct {
 	Role    ChatMessageRole `json:"role"`
 }
 
-// ChatCompletionOptions represents available options for a chat completion request.
-type ChatCompletionOptions struct {
-	MaxTokens   *int          `json:"max_tokens,omitempty"`
-	Messages    []ChatMessage `json:"messages"`
-	Model       string        `json:"model"`
-	Stream      bool          `json:"stream,omitempty"`
-	Temperature *float64      `json:"temperature,omitempty"`
-	TopP        *float64      `json:"top_p,omitempty"`
-}
-
 // ChatChoiceMessage is a message from a choice in a chat conversation.
 type ChatChoiceMessage struct {
 	Content *string `json:"content,omitempty"`