Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions instructor/core/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -828,6 +828,7 @@ def from_openai(
instructor.Mode.MD_JSON,
instructor.Mode.TOOLS_STRICT,
instructor.Mode.JSON_O1,
instructor.Mode.YAML,
instructor.Mode.RESPONSES_TOOLS,
instructor.Mode.RESPONSES_TOOLS_WITH_INBUILT_TOOLS,
}
Expand Down
2 changes: 2 additions & 0 deletions instructor/mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class Mode(enum.Enum):
JSON_O1 = "json_o1"
MD_JSON = "markdown_json_mode"
JSON_SCHEMA = "json_schema_mode"
YAML = "yaml_mode"

# Add new modes to support responses api
RESPONSES_TOOLS = "responses_tools"
Expand Down Expand Up @@ -105,6 +106,7 @@ def json_modes(cls) -> set["Mode"]:
cls.JSON_O1,
cls.MD_JSON,
cls.JSON_SCHEMA,
cls.YAML,
cls.ANTHROPIC_JSON,
cls.VERTEXAI_JSON,
cls.GEMINI_JSON,
Expand Down
66 changes: 66 additions & 0 deletions instructor/processing/function_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,9 @@ def from_response(
}:
return cls.parse_json(completion, validation_context, strict)

if mode == Mode.YAML:
return cls.parse_yaml(completion, validation_context, strict)

raise ValueError(f"Invalid patch mode: {mode}")

@classmethod
Expand Down Expand Up @@ -639,6 +642,69 @@ def parse_json(
# Validate the model from the JSON
return _validate_model_from_json(cls, json_content, validation_context, strict)

@classmethod
def parse_yaml(
cls: type[BaseModel],
completion: ChatCompletion,
validation_context: Optional[dict[str, Any]] = None,
strict: Optional[bool] = None,
) -> BaseModel:
"""Parse YAML mode responses using optimized extraction and validation."""
import yaml

# Check for incomplete output
_handle_incomplete_output(completion)

# Extract text from the response
message = _extract_text_content(completion)
if not message:
# Fallback for OpenAI format if _extract_text_content doesn't handle it
message = completion.choices[0].message.content or ""

# Extract YAML from the text - first try to find YAML in codeblock
yaml_content = message

# Look for YAML in code blocks
import re

yaml_match = re.search(
r"```(?:yaml|yml)\s*(.*?)\s*```", message, re.DOTALL | re.IGNORECASE
)
if yaml_match:
yaml_content = yaml_match.group(1).strip()
else:
# Look for generic code blocks that might contain YAML
code_match = re.search(r"```\s*(.*?)\s*```", message, re.DOTALL)
if code_match:
potential_yaml = code_match.group(1).strip()
# Simple heuristic: if it doesn't start with { or [, might be YAML
if not potential_yaml.startswith(("{", "[")):
yaml_content = potential_yaml

try:
# Parse YAML to dictionary
yaml_data = yaml.safe_load(yaml_content)

if yaml_data is None:
raise ValueError("YAML content is empty or invalid")

# Validate the model from the parsed data
if strict:
return cls.model_validate(
yaml_data, context=validation_context, strict=True
)
else:
return cls.model_validate(
yaml_data, context=validation_context, strict=False
)

except yaml.YAMLError as e:
logger.debug(f"YAML parse error: {e}")
raise ValueError(f"Failed to parse YAML: {e}") from e
except Exception as e:
logger.debug(f"Model validation error: {e}")
raise


def openai_schema(cls: type[BaseModel]) -> OpenAISchema:
"""
Expand Down
4 changes: 4 additions & 0 deletions instructor/processing/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,12 @@ class User(BaseModel):
handle_responses_tools_with_inbuilt_tools,
handle_tools,
handle_tools_strict,
handle_yaml_mode,
reask_default,
reask_md_json,
reask_responses_tools,
reask_tools,
reask_yaml,
)

# Perplexity utils
Expand Down Expand Up @@ -431,6 +433,7 @@ def handle_response_model(
Mode.JSON: lambda rm, nk: handle_json_modes(rm, nk, Mode.JSON), # type: ignore
Mode.MD_JSON: lambda rm, nk: handle_json_modes(rm, nk, Mode.MD_JSON), # type: ignore
Mode.JSON_SCHEMA: lambda rm, nk: handle_json_modes(rm, nk, Mode.JSON_SCHEMA), # type: ignore
Mode.YAML: handle_yaml_mode,
Mode.ANTHROPIC_TOOLS: handle_anthropic_tools,
Mode.ANTHROPIC_REASONING_TOOLS: handle_anthropic_reasoning_tools,
Mode.ANTHROPIC_JSON: handle_anthropic_json,
Expand Down Expand Up @@ -610,6 +613,7 @@ def handle_reask_kwargs(
Mode.JSON: reask_md_json,
Mode.MD_JSON: reask_md_json,
Mode.JSON_SCHEMA: reask_md_json,
Mode.YAML: reask_yaml,
Mode.PARALLEL_TOOLS: reask_tools,
Mode.RESPONSES_TOOLS: reask_responses_tools,
Mode.RESPONSES_TOOLS_WITH_INBUILT_TOOLS: reask_responses_tools,
Expand Down
74 changes: 74 additions & 0 deletions instructor/providers/openai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,31 @@ def reask_responses_tools(
return kwargs


def reask_yaml(
kwargs: dict[str, Any],
response: Any,
exception: Exception,
failed_attempts: list[Any] | None = None, # noqa: ARG001
):
"""
Handle reask for OpenAI YAML mode when validation fails.

Kwargs modifications:
- Adds: "messages" (user message requesting YAML correction)
"""
kwargs = kwargs.copy()
reask_msgs = [dump_message(response.choices[0].message)]

reask_msgs.append(
{
"role": "user",
"content": f"Correct your YAML ONLY RESPONSE, based on the following errors:\n{exception}",
}
)
kwargs["messages"].extend(reask_msgs)
return kwargs


def reask_md_json(
kwargs: dict[str, Any],
response: Any,
Expand Down Expand Up @@ -393,6 +418,51 @@ def handle_json_o1(
return response_model, new_kwargs


def handle_yaml_mode(
response_model: type[Any] | None, new_kwargs: dict[str, Any]
) -> tuple[type[Any] | None, dict[str, Any]]:
"""
Handle OpenAI YAML mode.

Kwargs modifications:
- When response_model is None: No modifications
- When response_model is provided:
- Modifies: "messages" (appends user message with YAML schema)
"""
if response_model is None:
return None, new_kwargs

message = dedent(
f"""
You must respond with YAML that matches the following schema:

{json.dumps(response_model.model_json_schema(), indent=2, ensure_ascii=False)}

Return only the YAML data instance matching this schema, not the schema itself.
Do not include any other text or explanation in your response.
"""
)

if new_kwargs["messages"][0]["role"] != "system":
new_kwargs["messages"].insert(
0,
{
"role": "system",
"content": message,
},
)
elif isinstance(new_kwargs["messages"][0]["content"], str):
new_kwargs["messages"][0]["content"] += f"\n\n{message}"
elif isinstance(new_kwargs["messages"][0]["content"], list):
new_kwargs["messages"][0]["content"][0]["text"] += f"\n\n{message}"
else:
raise ValueError(
"Invalid message format, must be a string or a list of messages"
)

return response_model, new_kwargs


def handle_json_modes(
response_model: type[Any] | None, new_kwargs: dict[str, Any], mode: Mode
) -> tuple[type[Any] | None, dict[str, Any]]:
Expand Down Expand Up @@ -512,6 +582,10 @@ def handle_openrouter_structured_outputs(
"reask": reask_md_json,
"response": handle_json_o1,
},
Mode.YAML: {
"reask": reask_yaml,
"response": handle_yaml_mode,
},
Mode.PARALLEL_TOOLS: {
"reask": reask_tools,
"response": handle_parallel_tools,
Expand Down
Loading