Skip to content

Created reply_in_json_interface and add json_interface_string utility #107

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/agents/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
bullets,
header,
quoted,
reply_in_json,
reply_in_json_interface,
template,
stats,
)
Expand Down Expand Up @@ -95,7 +95,7 @@ def proof(self, instructions: str, article: str) -> EditorFeedback:
regarding quality and suitability for a travel book.
"""

prompt += reply_in_json(EditorFeedback)
prompt += reply_in_json_interface(EditorFeedback)

return self.ask(prompt, format=EditorFeedback)

Expand Down
2 changes: 2 additions & 0 deletions src/haverscript/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
code,
quoted,
reply_in_json,
reply_in_json_interface,
template,
xml_element,
markdown,
Expand Down Expand Up @@ -81,6 +82,7 @@
"code",
"quoted",
"reply_in_json",
"reply_in_json_interface",
"Reply",
"Request",
"ServiceProvider",
Expand Down
194 changes: 194 additions & 0 deletions src/haverscript/json_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
from typing import (
Any,
Type,
List,
Dict,
)
from pydantic import BaseModel


def json_interface_string(model_type: Type[BaseModel]) -> str:
"""
Generates a TypeScript-style interface string for a given Pydantic model.

Args:
model_type: The Pydantic model class.

Returns:
A string representing the TypeScript-style JSON interface.
"""
json_schema = model_type.model_json_schema()

return _json_interface_string(json_schema, root_schema=json_schema)


def _json_interface_string(
current_schema: Dict[str, Any], root_schema: Dict[str, Any], indent: int = 1
) -> str:
"""
Generates a TypeScript-style interface string for a given Pydantic model.

Args:
current_schema: The current part of the JSON schema being processed.
root_schema: The root of the entire JSON schema, used for resolving $refs.
indent: The current indentation level.

Returns:
A string representing the TypeScript-style JSON interface.
"""
ret_str: str = "{"

# Helper for indentation and new lines
def ind_nl(n: int) -> str:
return "\n" + "\t" * n

# Properties to iterate over. Could be in 'properties' or from 'allOf' etc.
properties_to_process: Dict[str, Any] = {}
required_fields: List[str] = current_schema.get("required", [])

if "properties" in current_schema:
properties_to_process.update(current_schema["properties"])

# Basic handling for 'allOf' - merges properties from referenced schemas
# A more robust solution would deeply merge schemas if necessary.
if "allOf" in current_schema:
for item in current_schema["allOf"]:
if "$ref" in item:
ref_path = item["$ref"].split("/")
ref_schema = _resolve_ref(ref_path, root_schema)
if "properties" in ref_schema:
properties_to_process.update(ref_schema["properties"])
if "required" in ref_schema: # Collect required fields from allOf parts
required_fields.extend(
r for r in ref_schema["required"] if r not in required_fields
)

if not properties_to_process:
# In the case of an "empty object" or a schema with no properties
# just assume it can be represented by "{}"
# TODO: Decide if this is the desired behavior.
return "{}"

for key, value_schema in properties_to_process.items():
ret_str += ind_nl(indent) + f"{key}"

is_required = key in required_fields
ret_str += " ?: " if not is_required else " : "

ret_str += (
f"{_typescript_type_from_schema(value_schema, root_schema, indent + 1)};"
)

if "description" in value_schema:
ret_str += f" // {value_schema['description']}"

ret_str += ind_nl(indent - 1) + "}"
return ret_str


def _resolve_ref(ref_path: List[str], root_schema: Dict[str, Any]) -> Dict[str, Any]:
"""
Resolves a $ref path against the root schema.
Example $ref: "#/$defs/MyModel" -> path: ["#", "$defs", "MyModel"]
"""
if not ref_path or ref_path[0] != "#":
raise ValueError(f"Invalid $ref format: {ref_path}")

current_level = root_schema
for part in ref_path[1:]: # Skip "#"
if part not in current_level:
raise ValueError(
f"Reference part '{part}' not found in schema for $ref {'/'.join(ref_path)}"
)
current_level = current_level[part]
return current_level


def _typescript_type_from_schema(
value_schema: Dict[str, Any], root_schema: Dict[str, Any], indent: int
) -> str:
"""
Determines the TypeScript type string for a given JSON schema definition.

Args:
value_schema: The JSON schema for the specific value/property.
root_schema: The root of the entire JSON schema, for $ref resolution.
indent: Current indentation level (for nested objects).

Returns:
TypeScript type string.
"""
if "$ref" in value_schema:
ref_path = value_schema["$ref"].split("/")
# Find the referenced schema in the root schema
resolved_schema = _resolve_ref(ref_path, root_schema)
# Recursively resolve the type from the referenced schema
return _typescript_type_from_schema(resolved_schema, root_schema, indent)

# Handle 'anyOf' for nullable types (e.g., string | null) or other unions
if "anyOf" in value_schema:
types = []
for sub_schema in value_schema["anyOf"]:
types.append(_typescript_type_from_schema(sub_schema, root_schema, indent))
# Filter out duplicates and join (sorted for deterministic output)
return " | ".join(sorted(list(set(types))))

# Handle 'enum' from Pydantic Literal/Enum (JSON schema 'enum' or 'const')
if "enum" in value_schema:
return " | ".join(
[f'"{v}"' if isinstance(v, str) else str(v) for v in value_schema["enum"]]
)
if "const" in value_schema: # For Literal['a'] (why would we do this though!?)
const_val = value_schema["const"]
return f'"{const_val}"' if isinstance(const_val, str) else str(const_val)

schema_type = value_schema.get("type")

if isinstance(schema_type, list):
ts_types = []
for t in schema_type:
if t == "null":
ts_types.append("null")
else:
# Create a temporary schema for the non-null type to recursively call
temp_schema_for_type = value_schema.copy()
temp_schema_for_type["type"] = t
# Remove array specific 'items' if we are just getting type for one of list elements
if "items" in temp_schema_for_type and t != "array":
del temp_schema_for_type["items"]
ts_types.append(
_typescript_type_from_schema(
temp_schema_for_type, root_schema, indent
)
)
return " | ".join(sorted(list(set(ts_types))))

if schema_type == "array":
if "items" not in value_schema:
# Default to any[] if items are not specified (although this is very unusual)
# TODO: Decide if this is the desired behavior.
return "any[]"
items_schema = value_schema["items"]
item_type_str = _typescript_type_from_schema(items_schema, root_schema, indent)
# Handle cases where item_type_str itself might be a union type (e.g., (string | number)[])
if " | " in item_type_str:
return f"({item_type_str})[]"
return f"{item_type_str}[]"

elif schema_type == "object":
# This is for inline object definitions (not $ref)
# Check if it has properties or additionalProperties to determine structure
if "properties" in value_schema or "additionalProperties" in value_schema:
return _json_interface_string(value_schema, root_schema, indent)
else:
# An object type with no specified properties, could be a generic object
# TODO: Decide if this is the desired behavior.
return "object"
else:
# Otherwise, we just assume it's a primitive type (or if not specified, "any")
# TODO: Decide if this is the desired behavior.
if schema_type == "string" and value_schema.get("pattern") is not None:
# Handle regex patterns for strings
# NOTE: Is this maybe excessive and confusing for Agents?
return f'string /* regex: "{value_schema["pattern"]}" */'
return schema_type if schema_type else "any"
17 changes: 17 additions & 0 deletions src/haverscript/markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import re
from pydantic import BaseModel

from haverscript.json_utils import json_interface_string


class Markdown:
def __init__(
Expand Down Expand Up @@ -153,6 +155,21 @@ def table(headers: dict, rows: list[dict]) -> Markdown:
return "\n".join([header_row, separator] + data_rows)


def reply_in_json_interface(model: Type[BaseModel]) -> str:
"""
Instructions to reply in JSON using a specific interface.

Args:
model: The Pydantic model class.
Returns:
A string representing the prompt with the JSON interface.
"""
prompt: str = (
f"Reply with the following JSON interface:\n```\n{json_interface_string(model)}\n```"
)
return prompt


def reply_in_json(
model: Type[BaseModel], prefix: str = "Reply in JSON, using the following keys:"
) -> Markdown:
Expand Down