Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions docs/examples/document_segmentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ description: Learn effective document segmentation techniques using Cohere's LLM

# Document Segmentation

In this guide, we demonstrate how to do document segmentation using structured output from an LLM. We'll be using [command-a-reasoning-08-2025](https://docs.cohere.com/docs/command-a-reasoning) - one of Cohere's latest LLMs with 256k context length and testing the approach on an article explaining the Transformer architecture. Same approach to document segmentation can be applied to any other domain where we need to break down a complex long document into smaller chunks.
In this guide, we demonstrate how to do document segmentation using structured output from an LLM. We'll be using [command-a](https://docs.cohere.com/docs/command-a) - one of Cohere's latest LLMs with 256k context length and testing the approach on an article explaining the Transformer architecture. Same approach to document segmentation can be applied to any other domain where we need to break down a complex long document into smaller chunks.

!!! tips "Motivation"
Sometimes we need a way to split the document into meaningful parts that center around a single key concept/idea. Simple length-based / rule-based text-splitters are not reliable enough. Consider the cases where documents contain code snippets or math equations - we don't want to split those on `'\n\n'` or have to write extensive rules for different types of documents. It turns out that LLMs with sufficiently long context length are well suited for this task.
Expand Down Expand Up @@ -77,7 +77,7 @@ import cohere

# Apply the patch to the cohere client
# enables response_model keyword
client = instructor.from_cohere(cohere.Client())
client = instructor.from_cohere(cohere.ClientV2())


system_prompt = f"""\
Expand All @@ -89,7 +89,7 @@ Each line of the document is marked with its line number in square brackets (e.g

def get_structured_document(document_with_line_numbers) -> StructuredDocument:
return client.chat.completions.create(
model="command-a-reasoning-08-2025",
model="command-a-03-2025",
response_model=StructuredDocument,
messages=[
{
Expand Down Expand Up @@ -148,7 +148,7 @@ def doc_with_lines(document):
return document_with_line_numbers, line2text


client = instructor.from_cohere(cohere.Client())
client = instructor.from_cohere(cohere.ClientV2())


system_prompt = f"""\
Expand All @@ -172,7 +172,7 @@ class StructuredDocument(BaseModel):

def get_structured_document(document_with_line_numbers) -> StructuredDocument:
return client.chat.completions.create(
model="command-a-reasoning-08-2025",
model="command-a-03-2025",
response_model=StructuredDocument,
messages=[
{
Expand Down
4 changes: 2 additions & 2 deletions docs/integrations/cohere.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ import instructor

# Patching the Cohere client with the instructor for enhanced capabilities
client = instructor.from_provider(
"cohere/command-a-reasoning-08-2025",
"cohere/command-a-03-2025",
max_tokens=1000,
)

Expand Down Expand Up @@ -94,7 +94,7 @@ print(group.model_dump_json(indent=2))
import instructor

async_client = instructor.from_provider(
"cohere/command-a-reasoning-08-2025",
"cohere/command-a-03-2025",
async_client=True,
max_tokens=1000,
)
Expand Down
4 changes: 2 additions & 2 deletions docs/learning/getting_started/client_setup.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ For Cohere's models:
import instructor
import cohere

cohere_client = cohere.Client("YOUR_API_KEY")
cohere_client = cohere.ClientV2("YOUR_API_KEY")
client = instructor.from_cohere(cohere_client)
```

Expand Down Expand Up @@ -196,4 +196,4 @@ Now that you've set up your client, you can:
3. Add [validation](../validation/basics.md) to your models
4. Handle [optional fields](../patterns/optional_fields.md)

The following sections will guide you through these patterns with increasingly complex examples.
The following sections will guide you through these patterns with increasingly complex examples.
2 changes: 1 addition & 1 deletion docs/llms.txt
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ Available Modes:
import instructor
import cohere

client = instructor.from_cohere(cohere.Client())
client = instructor.from_cohere(cohere.ClientV2())
```

Available Modes:
Expand Down
6 changes: 3 additions & 3 deletions examples/caching/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,9 +449,9 @@ def calculate_cost_savings(baseline_stats: dict, cached_stats: dict) -> dict[str
"cost_savings_percent": savings_percent,
"time_saved": time_saved,
"time_savings_percent": time_savings_percent,
"speed_improvement": baseline_time / cached_time
if cached_time > 0
else float("inf"),
"speed_improvement": (
baseline_time / cached_time if cached_time > 0 else float("inf")
),
}


Expand Down
4 changes: 2 additions & 2 deletions examples/cohere/cohere.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

# Patching the Cohere client with the instructor for enhanced capabilities
client = instructor.from_cohere(
cohere.Client(),
cohere.ClientV2(),
max_tokens=1000,
model="command-a-reasoning-08-2025",
model="command-a-03-2025",
)


Expand Down
11 changes: 7 additions & 4 deletions instructor/auto_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,11 +407,11 @@ def from_provider(
from instructor import from_cohere

client = (
cohere.AsyncClient(api_key=api_key)
cohere.AsyncClientV2(api_key=api_key)
if async_client
else cohere.Client(api_key=api_key)
else cohere.ClientV2(api_key=api_key)
)
result = from_cohere(client, **kwargs)
result = from_cohere(client, model=model_name, **kwargs)
logger.info(
"Client initialized",
extra={**provider_info, "status": "success"},
Expand Down Expand Up @@ -809,7 +809,10 @@ def from_provider(
"llama4",
"mistral-nemo",
"firefunction-v2",
"command-a-reasoning-08-2025",
"command-a",
"command-r",
"command-r-plus",
"command-r7b",
"qwen2.5",
"qwen2.5-coder",
"qwen3",
Expand Down
13 changes: 11 additions & 2 deletions instructor/core/retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,12 @@
from json import JSONDecodeError
from typing import Any, Callable, TypeVar

from .exceptions import InstructorRetryException, AsyncValidationError, FailedAttempt, ValidationError as InstructorValidationError
from .exceptions import (
InstructorRetryException,
AsyncValidationError,
FailedAttempt,
ValidationError as InstructorValidationError,
)
from .hooks import Hooks
from ..mode import Mode
from ..processing.response import (
Expand Down Expand Up @@ -199,7 +204,11 @@ def retry_sync(
mode=mode,
stream=stream,
)
except (ValidationError, JSONDecodeError, InstructorValidationError) as e:
except (
ValidationError,
JSONDecodeError,
InstructorValidationError,
) as e:
logger.debug(f"Parse error: {e}")
hooks.emit_parse_error(e)

Expand Down
8 changes: 4 additions & 4 deletions instructor/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@
"cohere/command-r7b-12-2024",
"cohere/command-a-translate-08-2025",
"cohere/command-a-reasoning-08-2025",
"cohere/command-r",
"cohere/command-r-03-2024",
"cohere/command-r", # deprecated 2025-09-15
"cohere/command-r-03-2024", # deprecated 2025-09-15
"cohere/command-r-08-2024",
"cohere/command-r-plus",
"cohere/command-r-plus-04-2024",
"cohere/command-r-plus", # deprecated 2025-09-15
"cohere/command-r-plus-04-2024", # deprecated 2025-09-15
"cohere/command-r-plus-08-2024",
"cohere/command-r7b-12-2024",
# OpenAI Models
Expand Down
106 changes: 99 additions & 7 deletions instructor/processing/function_calls.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,12 +289,38 @@ def parse_cohere_json_schema(
validation_context: Optional[dict[str, Any]] = None,
strict: Optional[bool] = None,
):
assert hasattr(completion, "text"), (
"Completion is not of type NonStreamedChatResponse"
)
return cls.model_validate_json(
completion.text, context=validation_context, strict=strict
)
# Handle both V1 and V2 response structures
if hasattr(completion, "text"):
# V1 format: direct text access
text = completion.text
elif hasattr(completion, "message") and hasattr(completion.message, "content"):
# V2 format: nested structure (message.content[].text)
# V2 responses may have multiple content items (thinking, text, etc.)
content_items = completion.message.content
if content_items and len(content_items) > 0:
# Find the text content item (skip thinking/other types)
# TODO handle these other content types
text = None
for item in content_items:
if (
hasattr(item, "type")
and item.type == "text"
and hasattr(item, "text")
):
text = item.text
break

if text is None:
raise ValueError("Cohere V2 response has no text content item")
else:
raise ValueError("Cohere V2 response has no content")
else:
raise ValueError(
f"Unsupported Cohere response format. Expected 'text' (V1) or "
f"'message.content[].text' (V2), got: {type(completion)}"
)

return cls.model_validate_json(text, context=validation_context, strict=strict)

@classmethod
def parse_anthropic_tools(
Expand Down Expand Up @@ -479,7 +505,73 @@ def parse_cohere_tools(
validation_context: Optional[dict[str, Any]] = None,
strict: Optional[bool] = None,
) -> BaseModel:
text = cast(str, completion.text) # type: ignore - TODO update with cohere specific types
"""
Parse Cohere tools response.

Supports:
- V1 native tool calls: completion.tool_calls[0].parameters
- V2 native tool calls: completion.message.tool_calls[0].function.arguments (JSON string)
- V1 text-based: completion.text (prompt-based approach)
- V2 text-based: completion.message.content[].text (prompt-based approach)
"""
# First, check for native Cohere tool calls (V1 and V2)
# V1: completion.tool_calls with tc.parameters (dict)
if hasattr(completion, "tool_calls") and completion.tool_calls:
# V1 tool call format
tool_call = completion.tool_calls[0]
# Parameters in V1 are already a dict
return cls.model_validate(
tool_call.parameters, context=validation_context, strict=strict
)

# V2: completion.message.tool_calls with tc.function.arguments (JSON string)
if (
hasattr(completion, "message")
and hasattr(completion.message, "tool_calls")
and completion.message.tool_calls
):
# V2 tool call format
tool_call = completion.message.tool_calls[0]
# Arguments in V2 are a JSON string
import json

arguments = json.loads(tool_call.function.arguments)
return cls.model_validate(
arguments, context=validation_context, strict=strict
)

# Fallback to text-based extraction (current prompt-based approach)
# Handle both V1 and V2 text response structures
if hasattr(completion, "text"):
# V1 format: direct text access
text = completion.text
elif hasattr(completion, "message") and hasattr(completion.message, "content"):
# V2 format: nested structure (message.content[].text)
# V2 responses may have multiple content items (thinking, text, etc.)
content_items = completion.message.content
if content_items and len(content_items) > 0:
# Find the text content item (skip thinking/other types)
text = None
for item in content_items:
if (
hasattr(item, "type")
and item.type == "text"
and hasattr(item, "text")
):
text = item.text
break

if text is None:
raise ValueError("Cohere V2 response has no text content item")
else:
raise ValueError("Cohere V2 response has no content")
else:
raise ValueError(
f"Unsupported Cohere response format. Expected tool_calls or text content. "
f"Got: {type(completion)}"
)

# Extract JSON from text (for prompt-based approach)
extra_text = extract_json_from_codeblock(text)
return cls.model_validate_json(
extra_text, context=validation_context, strict=strict
Expand Down
16 changes: 10 additions & 6 deletions instructor/processing/multimodal.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,18 +682,22 @@ def to_openai(self, mode: Mode) -> dict[str, Any]:
if mode in {Mode.RESPONSES_TOOLS, Mode.RESPONSES_TOOLS_WITH_INBUILT_TOOLS}:
return {
"type": input_file_type,
"filename": self.source
if isinstance(self.source, str)
else str(self.source),
"filename": (
self.source
if isinstance(self.source, str)
else str(self.source)
),
"file_data": f"data:{self.media_type};base64,{data}",
}
else:
return {
"type": input_file_type,
"file": {
"filename": self.source
if isinstance(self.source, str)
else str(self.source),
"filename": (
self.source
if isinstance(self.source, str)
else str(self.source)
),
"file_data": f"data:{self.media_type};base64,{data}",
},
}
Expand Down
1 change: 0 additions & 1 deletion instructor/processing/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,6 @@ class to parse the response into. Special DSL types supported:
return model.content

model._raw_response = response

return model


Expand Down
Loading
Loading