Skip to content

.Net: Bug: Schema generation for structured output does not support validation attributes #12292

Open
@frederikrosenberg

Description

@frederikrosenberg

Describe the bug
When used structured output based on a C# class the generated JSON schema does not include validation attributes such as RegularExpression, MinLength and so on. Most basic JSON schema validation properties are supported by OpenAI.

To Reproduce
Run the following

<Project Sdk="Microsoft.NET.Sdk">
    <PropertyGroup>
        <OutputType>Exe</OutputType>
        <TargetFramework>net9.0</TargetFramework>
        <ImplicitUsings>enable</ImplicitUsings>
        <Nullable>enable</Nullable>
    </PropertyGroup>

    <ItemGroup>
      <PackageReference Include="Microsoft.SemanticKernel" Version="1.54.0" />
      <PackageReference Include="Microsoft.SemanticKernel.Connectors.OpenAI" Version="1.54.0" />
    </ItemGroup>
</Project>
using System.ComponentModel;
using System.ComponentModel.DataAnnotations;
using System.Text.Json;
using System.Text.Json.Nodes;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.SemanticKernel;
using Microsoft.SemanticKernel.Connectors.OpenAI;

var services = new ServiceCollection();

var httpHandler = new HttpLogger();
var httpClient = new HttpClient(httpHandler);

services.AddKernel();
services.AddAzureOpenAIChatCompletion(..., httpClient: httpClient);

var collection = services.BuildServiceProvider();

var settings = new OpenAIPromptExecutionSettings()
{
    ResponseFormat = typeof(StructuredOutputResult),
};

var kernel = collection.GetRequiredService<Kernel>();

var function = kernel.CreateFunctionFromPrompt(
    "Return random data for the given structure.");

var result = await kernel.InvokeAsync<string>(function, new KernelArguments(settings));

Console.WriteLine($"Function result: {result}");

public class StructuredOutputResult
{
    [RegularExpression("[a-z0-9_]+")]
    [Description("A string")]
    public required string String { get; set; }
    
    [MinLength(1)]
    [Description("A list of strings")]
    public required string[] Strings { get; set; }
    
    [Description("A guid")]
    public required Guid Guid { get; set; }
}

public class HttpLogger : HttpClientHandler
{
    protected override async Task<HttpResponseMessage> SendAsync(HttpRequestMessage request, CancellationToken cancellationToken)
    {
        if (request.Content is not null)
        {
            var body = await request.Content.ReadAsStringAsync(cancellationToken);
            var json = JsonSerializer.Deserialize<JsonNode>(body);
            body = JsonSerializer.Serialize(json, new JsonSerializerOptions { WriteIndented = true });
            Console.WriteLine($"HTTP Body: {body}");
        }
        return await base.SendAsync(request, cancellationToken);
    }
}

See that the generated json schema does not include pattern, minItems and format

The request:

{
  "response_format": {
    "json_schema": {
      "name": "StructuredOutputResult",
      "strict": true,
      "schema": {
        "type": "object",
        "properties": {
          "String": {
            "description": "A string",
            "type": "string"
          },
          "Strings": {
            "description": "A list of strings",
            "type": "array",
            "items": {
              "type": "string"
            }
          },
          "Guid": {
            "description": "A guid",
            "type": "string"
          }
        },
        "required": [
          "String",
          "Strings",
          "Guid"
        ],
        "additionalProperties": false
      }
    },
    "type": "json_schema"
  },
  "messages": [
    {
      "role": "user",
      "content": "Return random data for the given structure."
    }
  ],
  "model": "..."
}

Expected behavior
The following request:

{
  "response_format": {
    "json_schema": {
      "name": "StructuredOutputResult",
      "strict": true,
      "schema": {
        "type": "object",
        "properties": {
          "String": {
            "description": "A string",
            "pattern": "[a-z0-9_]+", // <---
            "type": "string"
          },
          "Strings": {
            "description": "A list of strings",
            "type": "array",
            "minItems": 1, // <---
            "items": {
              "type": "string"
            }
          },
          "Guid": {
            "description": "A guid",
            "type": "string",
            "format": "uuid" // <---
          }
        },
        "required": [
          "String",
          "Strings",
          "Guid"
        ],
        "additionalProperties": false
      }
    },
    "type": "json_schema"
  },
  "messages": [
    {
      "role": "user",
      "content": "Return random data for the given structure."
    }
  ],
  "model": "..."
}

Platform

  • Language: C#
  • Source: NuGet: 1.54.0
  • AI model: Azure OpenAI: GPT-4.1
  • OS: Windows/Linux

Metadata

Metadata

Assignees

Labels

.NETIssue or Pull requests regarding .NET codebugSomething isn't working

Type

Projects

Status

Bug

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions