Skip to content

Commit

Permalink
Update Anthropic kwargs + docs
Browse files Browse the repository at this point in the history
  • Loading branch information
svilupp authored Sep 9, 2024
1 parent 5c198fb commit b89eb97
Show file tree
Hide file tree
Showing 6 changed files with 177 additions and 11 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added

### Fixed
## [0.53.0]

### Added
- Added beta headers to enable long outputs (up to 8K tokens) with Anthropic's Sonnet 3.5 (see `?anthropic_extra_headers`).
- Added a kwarg to prefill (`aiprefill`) AI responses with Anthropic's models to improve steerability (see `?aigenerate`).

### Updated
- Documentation of `aigenerate` to make it clear that if `streamcallback` is provide WITH `flavor` set, there is no automatic configuration and the user must provide the correct `api_kwargs`.
- Grouped Anthropic's beta headers as a comma-separated string as per the latest API specification.


## [0.52.0]

Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "PromptingTools"
uuid = "670122d1-24a8-4d70-bfce-740807c42192"
authors = ["J S @svilupp and contributors"]
version = "0.52.0"
version = "0.53.0"

[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
Expand Down
60 changes: 54 additions & 6 deletions src/llm_anthropic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"""
render(schema::AbstractAnthropicSchema,
messages::Vector{<:AbstractMessage};
aiprefill::Union{Nothing, AbstractString} = nothing,
conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
tools::Vector{<:Dict{String, <:Any}} = Dict{String, Any}[],
cache::Union{Nothing, Symbol} = nothing,
Expand All @@ -15,12 +16,14 @@
Builds a history of the conversation to provide the prompt to the API. All unspecified kwargs are passed as replacements such that `{{key}}=>value` in the template.
# Keyword Arguments
- `aiprefill`: A string to be used as a prefill for the AI response. This steer the AI response in a certain direction (and potentially save output tokens).
- `conversation`: Past conversation to be included in the beginning of the prompt (for continued conversations).
- `tools`: A list of tools to be used in the conversation. Added to the end of the system prompt to enforce its use.
- `cache`: A symbol representing the caching strategy to be used. Currently only `nothing` (no caching), `:system`, `:tools`,`:last` and `:all` are supported.
"""
function render(schema::AbstractAnthropicSchema,
messages::Vector{<:AbstractMessage};
aiprefill::Union{Nothing, AbstractString} = nothing,
conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
tools::Vector{<:Dict{String, <:Any}} = Dict{String, Any}[],
cache::Union{Nothing, Symbol} = nothing,
Expand Down Expand Up @@ -79,16 +82,41 @@ function render(schema::AbstractAnthropicSchema,
## Sense check
@assert !isempty(conversation) "AbstractAnthropicSchema requires at least 1 User message, ie, no `prompt` provided!"

## Apply prefilling of responses
if !isnothing(aiprefill) && !isempty(aiprefill)
aimsg = AIMessage(aiprefill)
push!(conversation,
Dict("role" => role4render(schema, aimsg),
"content" => [Dict{String, Any}("type" => "text", "text" => aiprefill)]))
end
return (; system, conversation)
end

function anthropic_extra_headers(; has_tools = false, has_cache = false)
"""
anthropic_extra_headers
Adds API version and beta headers to the request.
# Kwargs / Beta headers
- `has_tools`: Enables tools in the conversation.
- `has_cache`: Enables prompt caching.
- `has_long_output`: Enables long outputs (up to 8K tokens) with Anthropic's Sonnet 3.5.
"""
function anthropic_extra_headers(;
has_tools = false, has_cache = false, has_long_output = false)
extra_headers = ["anthropic-version" => "2023-06-01"]
beta_headers = String[]
if has_tools
push!(extra_headers, "anthropic-beta" => "tools-2024-04-04")
push!(beta_headers, "tools-2024-04-04")
end
if has_cache
push!(extra_headers, "anthropic-beta" => "prompt-caching-2024-07-31")
push!(beta_headers, "prompt-caching-2024-07-31")
end
if has_long_output
push!(beta_headers, "max-tokens-3-5-sonnet-2024-07-15")
end
if !isempty(beta_headers)
extra_headers = [extra_headers..., "anthropic-beta" => join(beta_headers, ",")]
end
return extra_headers
end
Expand Down Expand Up @@ -146,7 +174,8 @@ function anthropic_api(
end
## Build the headers
extra_headers = anthropic_extra_headers(;
has_tools = haskey(kwargs, :tools), has_cache = !isnothing(cache))
has_tools = haskey(kwargs, :tools), has_cache = !isnothing(cache),
has_long_output = (max_tokens > 4096 && model in ["claude-3-5-sonnet-20240620"]))
headers = auth_header(
api_key; bearer = false, x_api_key = true,
extra_headers)
Expand Down Expand Up @@ -174,7 +203,7 @@ function anthropic_api(prompt_schema::TestEchoAnthropicSchema,
cache::Union{Nothing, Symbol} = nothing,
model::String = "claude-3-haiku-20240307", kwargs...)
prompt_schema.model_id = model
prompt_schema.inputs = (; system, messages)
prompt_schema.inputs = (; system, messages = copy(messages))
return prompt_schema
end

Expand All @@ -185,6 +214,7 @@ end
return_all::Bool = false, dry_run::Bool = false,
conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
streamcallback::Any = nothing,
aiprefill::Union{Nothing, AbstractString} = nothing,
http_kwargs::NamedTuple = NamedTuple(), api_kwargs::NamedTuple = NamedTuple(),
cache::Union{Nothing, Symbol} = nothing,
kwargs...)
Expand All @@ -201,6 +231,8 @@ Generate an AI response based on a given prompt using the Anthropic API.
- `dry_run::Bool=false`: If `true`, skips sending the messages to the model (for debugging, often used with `return_all=true`).
- `conversation::AbstractVector{<:AbstractMessage}=[]`: Not allowed for this schema. Provided only for compatibility.
- `streamcallback::Any`: A callback function to handle streaming responses. Can be simply `stdout` or `StreamCallback` object. See `?StreamCallback` for details.
Note: We configure the `StreamCallback` (and necessary `api_kwargs`) for you, unless you specify the `flavor`. See `?configure_callback!` for details.
- `aiprefill::Union{Nothing, AbstractString}`: A string to be used as a prefill for the AI response. This steer the AI response in a certain direction (and potentially save output tokens). It MUST NOT end with a trailing with space. Useful for JSON formatting.
- `http_kwargs::NamedTuple`: Additional keyword arguments for the HTTP request. Defaults to empty `NamedTuple`.
- `api_kwargs::NamedTuple`: Additional keyword arguments for the Ollama API. Defaults to an empty `NamedTuple`.
- `max_tokens::Int`: The maximum number of tokens to generate. Defaults to 2048, because it's a required parameter for the API.
Expand Down Expand Up @@ -281,6 +313,13 @@ msg = aigenerate("Count from 1 to 10."; streamcallback, model="claudeh")
```
Note: Streaming support is only for Anthropic models and it doesn't yet support tool calling and a few other features (logprobs, refusals, etc.)
You can also provide a prefill for the AI response to steer the response in a certain direction (eg, formatting, style):
```julia
msg = aigenerate("Sum up 1 to 100."; aiprefill = "I'd be happy to answer in one number without any additional text. The answer is:", model="claudeh")
```
Note: It MUST NOT end with a trailing with space. You'll get an API error if you do.
"""
function aigenerate(
prompt_schema::AbstractAnthropicSchema, prompt::ALLOWED_PROMPT_TYPE;
Expand All @@ -290,24 +329,33 @@ function aigenerate(
return_all::Bool = false, dry_run::Bool = false,
conversation::AbstractVector{<:AbstractMessage} = AbstractMessage[],
streamcallback::Any = nothing,
aiprefill::Union{Nothing, AbstractString} = nothing,
http_kwargs::NamedTuple = NamedTuple(), api_kwargs::NamedTuple = NamedTuple(),
cache::Union{Nothing, Symbol} = nothing,
kwargs...)
##
global MODEL_ALIASES
@assert (isnothing(cache)||cache in [:system, :tools, :last, :all]) "Currently only `:system`, `:tools`, `:last` and `:all` are supported for Anthropic Prompt Caching"
@assert (isnothing(aiprefill)||!isempty(strip(aiprefill))) "`aiprefill` must not be empty`"
## Find the unique ID for the model alias provided
model_id = get(MODEL_ALIASES, model, model)
conv_rendered = render(prompt_schema, prompt; conversation, cache, kwargs...)
conv_rendered = render(prompt_schema, prompt; aiprefill, conversation, cache, kwargs...)

if !dry_run
@info conv_rendered.conversation
time = @elapsed resp = anthropic_api(
prompt_schema, conv_rendered.conversation; api_key,
conv_rendered.system, endpoint = "messages", model = model_id, streamcallback, http_kwargs, cache,
api_kwargs...)
tokens_prompt = get(resp.response[:usage], :input_tokens, 0)
tokens_completion = get(resp.response[:usage], :output_tokens, 0)
content = mapreduce(x -> get(x, :text, ""), *, resp.response[:content]) |> strip
## add aiprefill to the content
if !isnothing(aiprefill) && !isempty(aiprefill)
content = aiprefill * content
## remove the prefill from the end of the conversation
pop!(conv_rendered.conversation)
end
## Build metadata
extras = Dict{Symbol, Any}()
haskey(resp.response[:usage], :cache_creation_input_tokens) &&
Expand Down
1 change: 1 addition & 0 deletions src/llm_openai.jl
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,7 @@ Generate an AI response based on a given prompt using the OpenAI API.
- `dry_run::Bool=false`: If `true`, skips sending the messages to the model (for debugging, often used with `return_all=true`).
- `conversation`: An optional vector of `AbstractMessage` objects representing the conversation history. If not provided, it is initialized as an empty vector.
- `streamcallback`: A callback function to handle streaming responses. Can be simply `stdout` or a `StreamCallback` object. See `?StreamCallback` for details.
Note: We configure the `StreamCallback` (and necessary `api_kwargs`) for you, unless you specify the `flavor`. See `?configure_callback!` for details.
- `http_kwargs`: A named tuple of HTTP keyword arguments.
- `api_kwargs`: A named tuple of API keyword arguments. Useful parameters include:
- `temperature`: A float representing the temperature for sampling (ie, the amount of "creativity"). Often defaults to `0.7`.
Expand Down
10 changes: 8 additions & 2 deletions src/streaming.jl
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ msg = aigenerate("Count from 1 to 100."; streamcallback)
streamcallback = PT.StreamCallback(; verbose=true, throw_on_error=true)
msg = aigenerate("Count from 1 to 10."; streamcallback)
```
Note: If you provide a `StreamCallback` object to `aigenerate`, we will configure it and necessary `api_kwargs` via `configure_callback!` unless you specify the `flavor` field.
If you provide a `StreamCallback` with a specific `flavor`, we leave all configuration to the user (eg, you need to provide the correct `api_kwargs`).
"""
@kwdef mutable struct StreamCallback{
T1 <: Any, T2 <: Union{AbstractStreamFlavor, Nothing}} <:
Expand All @@ -111,8 +114,11 @@ Base.length(cb::AbstractStreamCallback) = length(cb.chunks)
api_kwargs...)
Configures the callback `cb` for streaming with a given prompt schema.
If no `cb.flavor` is provided, adjusts the `flavor` and the provided `api_kwargs` as necessary.
Eg, for most schemas, we add kwargs like `stream = true` to the `api_kwargs`.
If `cb.flavor` is provided, both `callback` and `api_kwargs` are left unchanged! You need to configure them yourself!
"""
function configure_callback!(cb::T, schema::AbstractPromptSchema;
api_kwargs...) where {T <: StreamCallback}
Expand Down Expand Up @@ -216,7 +222,7 @@ Returns a list of `StreamChunk` and the next spillover (if message was incomplet
try
JSON3.read(data)
catch e
verbose && @warn "Cannot parse JSON: $raw_chunk"
verbose && @warn "Cannot parse JSON: $data"
nothing
end
else
Expand All @@ -242,7 +248,7 @@ Returns a list of `StreamChunk` and the next spillover (if message was incomplet
try
JSON3.read(data)
catch e
verbose && @warn "Cannot parse JSON: $raw_chunk"
verbose && @warn "Cannot parse JSON: $data"
nothing
end
else
Expand Down
105 changes: 103 additions & 2 deletions test/llm_anthropic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,53 @@ using PromptingTools: call_cost, anthropic_api, function_call_signature,
"content" => [Dict("type" => "text", "text" => "Hello, my name is John",
"cache_control" => Dict("type" => "ephemeral"))])])
@test conversation == expected_output

# Test aiprefill functionality
messages = [
SystemMessage("Act as a helpful AI assistant"),
UserMessage("Hello, what's your name?")
]

# Test with aiprefill
conversation = render(schema, messages; aiprefill = "My name is Claude")
expected_output = (;
system = "Act as a helpful AI assistant",
conversation = [
Dict("role" => "user",
"content" => [Dict("type" => "text", "text" => "Hello, what's your name?")]),
Dict("role" => "assistant",
"content" => [Dict("type" => "text", "text" => "My name is Claude")])
])
@test conversation == expected_output

# Test without aiprefill
conversation_without_prefill = render(schema, messages)
expected_output_without_prefill = (;
system = "Act as a helpful AI assistant",
conversation = [
Dict("role" => "user",
"content" => [Dict("type" => "text", "text" => "Hello, what's your name?")])
])
@test conversation_without_prefill == expected_output_without_prefill

# Test with empty aiprefill
conversation_empty_prefill = render(schema, messages; aiprefill = "")
@test conversation_empty_prefill == expected_output_without_prefill

# Test aiprefill with cache
conversation_with_cache = render(
schema, messages; aiprefill = "My name is Claude", cache = :all)
expected_output_with_cache = (;
system = Dict{String, Any}[Dict("cache_control" => Dict("type" => "ephemeral"),
"text" => "Act as a helpful AI assistant", "type" => "text")],
conversation = [
Dict("role" => "user",
"content" => [Dict("type" => "text", "text" => "Hello, what's your name?",
"cache_control" => Dict("type" => "ephemeral"))]),
Dict("role" => "assistant",
"content" => [Dict("type" => "text", "text" => "My name is Claude")])
])
@test conversation_with_cache == expected_output_with_cache
end

@testset "anthropic_extra_headers" begin
Expand All @@ -177,8 +224,12 @@ end

@test anthropic_extra_headers(has_tools = true, has_cache = true) == [
"anthropic-version" => "2023-06-01",
"anthropic-beta" => "tools-2024-04-04",
"anthropic-beta" => "prompt-caching-2024-07-31"
"anthropic-beta" => "tools-2024-04-04,prompt-caching-2024-07-31"
]
@test anthropic_extra_headers(
has_tools = true, has_cache = true, has_long_output = true) == [
"anthropic-version" => "2023-06-01",
"anthropic-beta" => "tools-2024-04-04,prompt-caching-2024-07-31,max-tokens-3-5-sonnet-2024-07-15"
]
end

Expand Down Expand Up @@ -243,6 +294,41 @@ end
"role" => "user", "content" => [Dict("type" => "text", "text" => "Hello World")])]
@test schema2.model_id == "claude-3-5-sonnet-20240620"

# Test aiprefill functionality
schema2 = TestEchoAnthropicSchema(;
response = Dict(
:content => [Dict(:text => "The answer is 42")],
:stop_reason => "stop",
:usage => Dict(:input_tokens => 5, :output_tokens => 4)),
status = 200)

aiprefill = "The answer to the ultimate question of life, the universe, and everything is:"
msg = aigenerate(schema2, UserMessage("What is the answer to everything?"),
model = "claudes", http_kwargs = (; verbose = 3), api_kwargs = (; temperature = 0),
aiprefill = aiprefill)

expected_output = AIMessage(;
content = aiprefill * "The answer is 42" |> strip,
status = 200,
tokens = (5, 4),
finish_reason = "stop",
cost = msg.cost,
run_id = msg.run_id,
sample_id = msg.sample_id,
extras = Dict{Symbol, Any}(),
elapsed = msg.elapsed)

@test msg.content == expected_output.content
@test schema2.inputs.system == "Act as a helpful AI assistant"
@test schema2.inputs.messages == [
Dict("role" => "user",
"content" => [Dict(
"type" => "text", "text" => "What is the answer to everything?")]),
Dict("role" => "assistant",
"content" => [Dict("type" => "text", "text" => aiprefill)])
]
@test schema2.model_id == "claude-3-5-sonnet-20240620"

# With caching
response3 = Dict(
:content => [
Expand Down Expand Up @@ -276,6 +362,21 @@ end
## Bad cache
@test_throws AssertionError aigenerate(
schema3, UserMessage("Hello {{name}}"); model = "claudeo", cache = :bad)

# Test error throw if aiprefill is empty string
@test_throws AssertionError aigenerate(
AnthropicSchema(),
"Hello World";
model = "claudeh",
aiprefill = ""
)

@test_throws AssertionError aigenerate(
AnthropicSchema(),
"Hello World";
model = "claudeh",
aiprefill = " " # Only whitespace
)
end

@testset "aiextract-Anthropic" begin
Expand Down

2 comments on commit b89eb97

@svilupp
Copy link
Owner Author

@svilupp svilupp commented on b89eb97 Sep 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register

Release notes:

Added

  • Added beta headers to enable long outputs (up to 8K tokens) with Anthropic's Sonnet 3.5 (see ?anthropic_extra_headers).
  • Added a kwarg to prefill (aiprefill) AI responses with Anthropic's models to improve steerability (see ?aigenerate).

Updated

  • Documentation of aigenerate to make it clear that if streamcallback is provide WITH flavor set, there is no automatic configuration and the user must provide the correct api_kwargs.
  • Grouped Anthropic's beta headers as a comma-separated string as per the latest API specification.

Commits

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/114808

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.53.0 -m "<description of version>" b89eb970f6efa2e19d47faae694e94c77c830c8d
git push origin v0.53.0

Please sign in to comment.