Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 85 additions & 8 deletions cmd/eval/eval_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,7 @@ description: Testing JSON with failing evaluators
model: openai/gpt-4o
testData:
- input: "hello"
expected: "hello world"
messages:
- role: user
content: "{{input}}"
Expand Down Expand Up @@ -553,18 +554,94 @@ evaluators:

output := out.String()

// Verify JSON structure
var result EvaluationSummary
err = json.Unmarshal([]byte(output), &result)
require.NoError(t, err)

// Verify failing test is properly represented
require.Equal(t, 1, result.Summary.TotalTests)
require.Equal(t, 0, result.Summary.PassedTests)
require.Equal(t, 1, result.Summary.FailedTests)
require.Equal(t, 0.0, result.Summary.PassRate)
// Verify JSON doesn't contain human-readable text
require.NotContains(t, output, "Running evaluation:")
})

t.Run("eval with responseFormat and jsonSchema", func(t *testing.T) {
const yamlBody = `
name: JSON Schema Evaluation
description: Testing responseFormat and jsonSchema in eval
model: openai/gpt-4o
responseFormat: json_schema
jsonSchema:
name: response_schema
strict: true
schema:
type: object
properties:
message:
type: string
description: The response message
confidence:
type: number
description: Confidence score
required:
- message
additionalProperties: false
testData:
- input: "hello"
expected: "hello world"
messages:
- role: user
content: "Respond to: {{input}}"
evaluators:
- name: contains-message
string:
contains: "message"
`

require.Len(t, result.TestResults, 1)
require.False(t, result.TestResults[0].EvaluationResults[0].Passed)
require.Equal(t, 0.0, result.TestResults[0].EvaluationResults[0].Score)
tmpDir := t.TempDir()
promptFile := filepath.Join(tmpDir, "test.prompt.yml")
err := os.WriteFile(promptFile, []byte(yamlBody), 0644)
require.NoError(t, err)

client := azuremodels.NewMockClient()
var capturedRequest azuremodels.ChatCompletionOptions
client.MockGetChatCompletionStream = func(ctx context.Context, req azuremodels.ChatCompletionOptions, org string) (*azuremodels.ChatCompletionResponse, error) {
capturedRequest = req
response := `{"message": "hello world", "confidence": 0.95}`
reader := sse.NewMockEventReader([]azuremodels.ChatCompletion{
{
Choices: []azuremodels.ChatChoice{
{
Message: &azuremodels.ChatChoiceMessage{
Content: &response,
},
},
},
},
})
return &azuremodels.ChatCompletionResponse{Reader: reader}, nil
}

out := new(bytes.Buffer)
cfg := command.NewConfig(out, out, client, true, 100)

cmd := NewEvalCommand(cfg)
cmd.SetArgs([]string{promptFile})

err = cmd.Execute()
require.NoError(t, err)

// Verify that responseFormat and jsonSchema were included in the request
require.NotNil(t, capturedRequest.ResponseFormat)
require.Equal(t, "json_schema", capturedRequest.ResponseFormat.Type)
require.NotNil(t, capturedRequest.ResponseFormat.JsonSchema)

schema := *capturedRequest.ResponseFormat.JsonSchema
require.Equal(t, "response_schema", schema["name"])
require.Equal(t, true, schema["strict"])
require.Contains(t, schema, "schema")

// Verify the test passed
output := out.String()
require.Contains(t, output, "✓ PASSED")
require.Contains(t, output, "🎉 All tests passed!")
})
}
14 changes: 11 additions & 3 deletions cmd/run/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -351,9 +351,17 @@ func NewRunCommand(cfg *command.Config) *cobra.Command {
}
}

req := azuremodels.ChatCompletionOptions{
Messages: conversation.GetMessages(),
Model: modelName,
var req azuremodels.ChatCompletionOptions
if pf != nil {
// Use the prompt file's BuildChatCompletionOptions method to include responseFormat and jsonSchema
req = pf.BuildChatCompletionOptions(conversation.GetMessages())
// Override the model name if provided via CLI
req.Model = modelName
} else {
req = azuremodels.ChatCompletionOptions{
Messages: conversation.GetMessages(),
Model: modelName,
}
}

mp.UpdateRequest(&req)
Expand Down
96 changes: 96 additions & 0 deletions cmd/run/run_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,102 @@ messages:
require.Equal(t, "System message", *capturedReq.Messages[0].Content)
require.Equal(t, "User message", *capturedReq.Messages[1].Content)
})

t.Run("--file with responseFormat and jsonSchema", func(t *testing.T) {
const yamlBody = `
name: JSON Schema Test
description: Test responseFormat and jsonSchema
model: openai/test-model
responseFormat: json_schema
jsonSchema:
name: person_schema
strict: true
schema:
type: object
properties:
name:
type: string
description: The name
age:
type: integer
description: The age
required:
- name
- age
additionalProperties: false
messages:
- role: system
content: You are a helpful assistant.
- role: user
content: "Generate a person"
`

tmp, err := os.CreateTemp(t.TempDir(), "*.prompt.yml")
require.NoError(t, err)
_, err = tmp.WriteString(yamlBody)
require.NoError(t, err)
require.NoError(t, tmp.Close())

client := azuremodels.NewMockClient()
modelSummary := &azuremodels.ModelSummary{
Name: "test-model",
Publisher: "openai",
Task: "chat-completion",
}
client.MockListModels = func(ctx context.Context) ([]*azuremodels.ModelSummary, error) {
return []*azuremodels.ModelSummary{modelSummary}, nil
}

var capturedRequest azuremodels.ChatCompletionOptions
client.MockGetChatCompletionStream = func(ctx context.Context, req azuremodels.ChatCompletionOptions, org string) (*azuremodels.ChatCompletionResponse, error) {
capturedRequest = req
reply := "hello this is a test response"
reader := sse.NewMockEventReader([]azuremodels.ChatCompletion{
{
Choices: []azuremodels.ChatChoice{
{
Message: &azuremodels.ChatChoiceMessage{
Content: &reply,
},
},
},
},
})
return &azuremodels.ChatCompletionResponse{Reader: reader}, nil
}

out := new(bytes.Buffer)
cfg := command.NewConfig(out, out, client, true, 100)

cmd := NewRunCommand(cfg)
cmd.SetArgs([]string{"--file", tmp.Name()})

err = cmd.Execute()
require.NoError(t, err)

// Verify that responseFormat and jsonSchema were included in the request
require.NotNil(t, capturedRequest.ResponseFormat)
require.Equal(t, "json_schema", capturedRequest.ResponseFormat.Type)
require.NotNil(t, capturedRequest.ResponseFormat.JsonSchema)

schema := *capturedRequest.ResponseFormat.JsonSchema
require.Contains(t, schema, "name")
require.Contains(t, schema, "schema")
require.Equal(t, "person_schema", schema["name"])

schemaContent := schema["schema"].(map[string]interface{})
require.Equal(t, "object", schemaContent["type"])
require.Contains(t, schemaContent, "properties")
require.Contains(t, schemaContent, "required")

properties := schemaContent["properties"].(map[string]interface{})
require.Contains(t, properties, "name")
require.Contains(t, properties, "age")

required := schemaContent["required"].([]interface{})
require.Contains(t, required, "name")
require.Contains(t, required, "age")
})
}

func TestParseTemplateVariables(t *testing.T) {
Expand Down
19 changes: 19 additions & 0 deletions examples/json_response_prompt.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
name: JSON Response Example
description: Example prompt demonstrating responseFormat with json
model: openai/gpt-4o
responseFormat: json_object
messages:
- role: system
content: You are a helpful assistant that responds in JSON format.
- role: user
content: "Provide a summary of {{topic}} in JSON format with title, description, and key_points array."
testData:
- topic: "artificial intelligence"
- topic: "climate change"
evaluators:
- name: contains-json-structure
string:
contains: "{"
- name: has-title
string:
contains: "title"
64 changes: 64 additions & 0 deletions examples/json_schema_prompt.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
name: JSON Schema Response Example
description: Example prompt demonstrating responseFormat and jsonSchema usage
model: openai/gpt-4o
responseFormat: json_schema
jsonSchema:
name: Person Information Schema
strict: true
schema:
type: object
description: A structured response containing person information
properties:
name:
type: string
description: The full name of the person
age:
type: integer
description: The age of the person in years
minimum: 0
maximum: 150
email:
type: string
description: The email address of the person
format: email
skills:
type: array
description: A list of skills the person has
items:
type: string
address:
type: object
description: The person's address
properties:
street:
type: string
description: Street address
city:
type: string
description: City name
country:
type: string
description: Country name
required:
- city
- country
required:
- name
- age
messages:
- role: system
content: You are a helpful assistant that provides structured information about people.
- role: user
content: "Generate information for a person named {{name}} who is {{age}} years old."
testData:
- name: "Alice Johnson"
age: "30"
- name: "Bob Smith"
age: "25"
evaluators:
- name: has-required-fields
string:
contains: "name"
- name: valid-json-structure
string:
contains: "age"
27 changes: 17 additions & 10 deletions internal/azuremodels/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,23 @@ import (
"github.com/github/gh-models/internal/sse"
)

// ChatCompletionOptions represents available options for a chat completion request.
type ChatCompletionOptions struct {
MaxTokens *int `json:"max_tokens,omitempty"`
Messages []ChatMessage `json:"messages"`
Model string `json:"model"`
Stream bool `json:"stream,omitempty"`
Temperature *float64 `json:"temperature,omitempty"`
TopP *float64 `json:"top_p,omitempty"`
ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
}

// ResponseFormat represents the response format specification
type ResponseFormat struct {
Type string `json:"type"`
JsonSchema *map[string]interface{} `json:"json_schema,omitempty"`
}

// ChatMessageRole represents the role of a chat message.
type ChatMessageRole string

Expand All @@ -24,16 +41,6 @@ type ChatMessage struct {
Role ChatMessageRole `json:"role"`
}

// ChatCompletionOptions represents available options for a chat completion request.
type ChatCompletionOptions struct {
MaxTokens *int `json:"max_tokens,omitempty"`
Messages []ChatMessage `json:"messages"`
Model string `json:"model"`
Stream bool `json:"stream,omitempty"`
Temperature *float64 `json:"temperature,omitempty"`
TopP *float64 `json:"top_p,omitempty"`
}

// ChatChoiceMessage is a message from a choice in a chat conversation.
type ChatChoiceMessage struct {
Content *string `json:"content,omitempty"`
Expand Down
Loading
Loading