Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
)
from livekit.agents.utils import is_given

from .models import ChatModels
from .models import ChatModels, _supports_thinking
from .utils import CACHE_CONTROL_EPHEMERAL, to_fnc_ctx


Expand All @@ -48,14 +48,16 @@ class _LLMOptions:
caching: NotGivenOr[Literal["ephemeral"]]
top_k: NotGivenOr[int]
max_tokens: NotGivenOr[int]
thinking_enabled: NotGivenOr[bool]
thinking_budget_tokens: NotGivenOr[int]
"""If set to "ephemeral", the system prompt, tools, and chat history will be cached."""


class LLM(llm.LLM):
def __init__(
self,
*,
model: str | ChatModels = "claude-3-5-sonnet-20241022",
model: str | ChatModels = "claude-haiku-4-5-20251001",
api_key: NotGivenOr[str] = NOT_GIVEN,
base_url: NotGivenOr[str] = NOT_GIVEN,
user: NotGivenOr[str] = NOT_GIVEN,
Expand All @@ -66,14 +68,16 @@ def __init__(
parallel_tool_calls: NotGivenOr[bool] = NOT_GIVEN,
tool_choice: NotGivenOr[ToolChoice] = NOT_GIVEN,
caching: NotGivenOr[Literal["ephemeral"]] = NOT_GIVEN,
thinking_enabled: NotGivenOr[bool] = NOT_GIVEN,
thinking_budget_tokens: NotGivenOr[int] = NOT_GIVEN,
) -> None:
"""
Create a new instance of Anthropic LLM.

``api_key`` must be set to your Anthropic API key, either using the argument or by setting
the ``ANTHROPIC_API_KEY`` environmental variable.

model (str | ChatModels): The model to use. Defaults to "claude-3-5-sonnet-20241022".
model (str | ChatModels): The model to use. Defaults to "claude-haiku-4-5-20251001".
api_key (str, optional): The Anthropic API key. Defaults to the ANTHROPIC_API_KEY environment variable.
base_url (str, optional): The base URL for the Anthropic API. Defaults to None.
user (str, optional): The user for the Anthropic API. Defaults to None.
Expand All @@ -82,6 +86,8 @@ def __init__(
parallel_tool_calls (bool, optional): Whether to parallelize tool calls. Defaults to None.
tool_choice (ToolChoice, optional): The tool choice for the Anthropic API. Defaults to "auto".
caching (Literal["ephemeral"], optional): If set to "ephemeral", caching will be enabled for the system prompt, tools, and chat history.
thinking_enabled (bool, optional): If True, enables extended thinking. Defaults to disabled.
thinking_budget_tokens (int, optional): The token budget for extended thinking. Must be less than max_tokens. Defaults to 1024 when thinking is enabled.
""" # noqa: E501

super().__init__()
Expand All @@ -95,6 +101,8 @@ def __init__(
caching=caching,
top_k=top_k,
max_tokens=max_tokens,
thinking_enabled=thinking_enabled,
thinking_budget_tokens=thinking_budget_tokens,
)
anthropic_api_key = api_key if is_given(api_key) else os.environ.get("ANTHROPIC_API_KEY")
if not anthropic_api_key:
Expand Down Expand Up @@ -148,6 +156,24 @@ def chat(

extra["max_tokens"] = self._opts.max_tokens if is_given(self._opts.max_tokens) else 1024

# Add extended thinking configuration if enabled and model supports it
if (
is_given(self._opts.thinking_enabled)
and self._opts.thinking_enabled
and _supports_thinking(self._opts.model)
):
budget = (
self._opts.thinking_budget_tokens
if is_given(self._opts.thinking_budget_tokens)
else 1024
)
max_tokens = extra["max_tokens"]
if budget >= max_tokens:
raise ValueError(
f"thinking_budget_tokens ({budget}) must be less than max_tokens ({max_tokens})"
)
extra["thinking"] = {"type": "enabled", "budget_tokens": budget}

if tools:
extra["tools"] = to_fnc_ctx(tools, self._opts.caching or None)
tool_choice = (
Expand Down Expand Up @@ -243,6 +269,7 @@ def __init__(

self._request_id: str = ""
self._ignoring_cot = False # ignore chain of thought
self._in_thinking_block = False # ignore extended thinking content
self._input_tokens = 0
self._cache_creation_tokens = 0
self._cache_read_tokens = 0
Expand Down Expand Up @@ -306,6 +333,8 @@ def _parse_event(self, event: anthropic.types.RawMessageStreamEvent) -> llm.Chat
self._tool_call_id = event.content_block.id
self._fnc_name = event.content_block.name
self._fnc_raw_arguments = ""
elif event.content_block.type == "thinking":
self._in_thinking_block = True
elif event.type == "content_block_delta":
delta = event.delta
if delta.type == "text_delta":
Expand All @@ -329,8 +358,14 @@ def _parse_event(self, event: anthropic.types.RawMessageStreamEvent) -> llm.Chat
elif delta.type == "input_json_delta":
assert self._fnc_raw_arguments is not None
self._fnc_raw_arguments += delta.partial_json
elif delta.type == "thinking_delta":
# ignore extended thinking content, don't emit to user
return None

elif event.type == "content_block_stop":
if self._in_thinking_block:
self._in_thinking_block = False
return None
if self._tool_call_id is not None:
assert self._fnc_name is not None
assert self._fnc_raw_arguments is not None
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Literal
from typing import Literal, Union

# https://docs.anthropic.com/en/docs/about-claude/model-deprecations#model-status

Expand All @@ -12,4 +12,25 @@
"claude-sonnet-4-20250514",
"claude-opus-4-20250514",
"claude-opus-4-1-20250805",
"claude-sonnet-4-5-20250929",
"claude-haiku-4-5-20251001",
"claude-opus-4-5-20251101",
]


# Models that support extended thinking
# https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#supported-models
THINKING_MODELS: set[str] = {
"claude-3-7-sonnet-20250219",
"claude-sonnet-4-20250514",
"claude-opus-4-20250514",
"claude-opus-4-1-20250805",
"claude-sonnet-4-5-20250929",
"claude-haiku-4-5-20251001",
"claude-opus-4-5-20251101",
}


def _supports_thinking(model: Union[ChatModels, str]) -> bool:
"""Check if the model supports extended thinking."""
return model in THINKING_MODELS
Loading