Skip to content

Commit 289aa48

Browse files
authored
Move JSON grammar -> regex grammar conversion to the router (#2772)
* Move JSON grammar -> regex grammar conversion to the router This change moves the JSON grammar -> regex grammar conversion to the router by adding a dependency on the `outlines-core` Rust crate. In contrast to the Python implementation, the conversions are not LRU-cached since they seem to be fast enough: simple schema time: [5.8293 µs 5.8307 µs 5.8320 µs] change: [-13.166% -12.884% -12.641%] (p = 0.00 < 0.05) Performance has improved. complex schema time: [14.875 µs 14.881 µs 14.887 µs] change: [-2.1637% -1.9914% -1.7852%] (p = 0.00 < 0.05) Performance has improved. Using the schemas from: https://github.com/dottxt-ai/outlines-core/blob/main/benchmarks/bench_json_schema.py
1 parent c637d68 commit 289aa48

15 files changed

+108
-64
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ repos:
44
hooks:
55
- id: check-yaml
66
- id: end-of-file-fixer
7+
exclude: crate-hashes.json
78
- id: trailing-whitespace
89
exclude: docs/source/reference/launcher.md
910
- repo: https://github.com/psf/black

Cargo.lock

Lines changed: 24 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crate-hashes.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"git+https://github.com/dottxt-ai/outlines-core.git?rev=ba10c619fc9bf3c487e43f49bdecb95a24bb465c#outlines-core@0.1.0": "1j9dcd831b0bmmjk2n4aag3x47qnqmkpg4gqpvwwyic7744llbfm"
3+
}
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,23 @@
11
{
22
"choices": [
33
{
4-
"finish_reason": "eos_token",
4+
"finish_reason": "stop",
55
"index": 0,
66
"logprobs": null,
77
"message": {
8-
"content": "{ \"temperature\": [ 26, 30, 33, 29 ] ,\"unit\": \"Fahrenheit\" }",
8+
"content": "{ \"unit\": \"fahrenheit\", \"temperature\": [ 72, 79, 88 ] }",
99
"role": "assistant"
1010
}
1111
}
1212
],
13-
"created": 1718044128,
13+
"created": 1732525803,
1414
"id": "",
1515
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
16-
"object": "text_completion",
17-
"system_fingerprint": "2.0.5-dev0-native",
16+
"object": "chat.completion",
17+
"system_fingerprint": "2.4.1-dev0-native",
1818
"usage": {
19-
"completion_tokens": 39,
19+
"completion_tokens": 29,
2020
"prompt_tokens": 136,
21-
"total_tokens": 175
21+
"total_tokens": 165
2222
}
2323
}
Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"choices": [
33
{
4-
"finish_reason": "eos_token",
4+
"finish_reason": "stop",
55
"index": 0,
66
"logprobs": null,
77
"message": {
@@ -13,27 +13,27 @@
1313
"function": {
1414
"arguments": {
1515
"format": "celsius",
16-
"location": "Brooklyn"
16+
"location": "Brooklyn, New York"
1717
},
1818
"description": null,
1919
"name": "get_current_weather"
2020
},
21-
"id": 0,
21+
"id": "0",
2222
"type": "function"
2323
}
2424
]
2525
},
2626
"usage": null
2727
}
2828
],
29-
"created": 1712782670,
29+
"created": 1732293383,
3030
"id": "",
31-
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
32-
"object": "text_completion",
33-
"system_fingerprint": "2.0.1-native",
31+
"model": "meta-llama/Llama-3.1-8B-Instruct",
32+
"object": "chat.completion",
33+
"system_fingerprint": "2.4.1-dev0-native",
3434
"usage": {
35-
"completion_tokens": 37,
36-
"prompt_tokens": 524,
37-
"total_tokens": 561
35+
"completion_tokens": 30,
36+
"prompt_tokens": 615,
37+
"total_tokens": 645
3838
}
3939
}
Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"choices": [
33
{
4-
"finish_reason": "eos_token",
4+
"finish_reason": "stop",
55
"index": 0,
66
"logprobs": null,
77
"message": {
@@ -13,27 +13,27 @@
1313
"function": {
1414
"arguments": {
1515
"format": "celsius",
16-
"location": "Brooklyn"
16+
"location": "Brooklyn, New York"
1717
},
1818
"description": null,
1919
"name": "get_current_weather"
2020
},
21-
"id": 0,
21+
"id": "0",
2222
"type": "function"
2323
}
2424
]
2525
},
2626
"usage": null
2727
}
2828
],
29-
"created": 1712787937,
29+
"created": 1732293384,
3030
"id": "",
31-
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
32-
"object": "text_completion",
33-
"system_fingerprint": "2.0.1-native",
31+
"model": "meta-llama/Llama-3.1-8B-Instruct",
32+
"object": "chat.completion",
33+
"system_fingerprint": "2.4.1-dev0-native",
3434
"usage": {
35-
"completion_tokens": 37,
36-
"prompt_tokens": 524,
37-
"total_tokens": 561
35+
"completion_tokens": 30,
36+
"prompt_tokens": 615,
37+
"total_tokens": 645
3838
}
3939
}

integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_function_object.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@
1818
"logprobs": null
1919
}
2020
],
21-
"created": 1729084854,
21+
"created": 1732293254,
2222
"id": "",
2323
"model": "meta-llama/Llama-3.1-8B-Instruct",
2424
"object": "chat.completion.chunk",
25-
"system_fingerprint": "2.3.2-dev0-native",
25+
"system_fingerprint": "2.4.1-dev0-native",
2626
"usage": null
2727
}

integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_sea_creatures_stream_required.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@
1919
"logprobs": null
2020
}
2121
],
22-
"created": 1729084850,
22+
"created": 1732293246,
2323
"id": "",
2424
"model": "meta-llama/Llama-3.1-8B-Instruct",
2525
"object": "chat.completion.chunk",
26-
"system_fingerprint": "2.3.2-dev0-native",
26+
"system_fingerprint": "2.4.1-dev0-native",
2727
"usage": null
2828
}

integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_stream.json

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,22 +6,23 @@
66
"role": "assistant",
77
"tool_calls": {
88
"function": {
9-
"arguments": "</s>",
9+
"arguments": "<|eot_id|>",
1010
"name": null
1111
},
1212
"id": "",
1313
"index": 0,
1414
"type": "function"
1515
}
1616
},
17-
"finish_reason": "eos_token",
17+
"finish_reason": "stop",
1818
"index": 0,
1919
"logprobs": null
2020
}
2121
],
22-
"created": 1712788218,
22+
"created": 1732293235,
2323
"id": "",
24-
"model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
25-
"object": "text_completion",
26-
"system_fingerprint": "2.0.1-native"
24+
"model": "meta-llama/Llama-3.1-8B-Instruct",
25+
"object": "chat.completion.chunk",
26+
"system_fingerprint": "2.4.1-dev0-native",
27+
"usage": null
2728
}

integration-tests/models/test_grammar_response_format_llama.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ class Weather(BaseModel):
5555
called = chat_completion["choices"][0]["message"]["content"]
5656

5757
assert response.status_code == 200
58-
assert called == '{ "temperature": [ 26, 30, 33, 29 ] ,"unit": "Fahrenheit" }'
58+
assert called == '{ "unit": "fahrenheit", "temperature": [ 72, 79, 88 ] }'
5959
assert chat_completion == response_snapshot
6060

6161

integration-tests/models/test_tools_llama.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ async def test_flash_llama_grammar_tools(flash_llama_grammar_tools, response_sna
101101
"function": {
102102
"description": None,
103103
"name": "get_current_weather",
104-
"arguments": {"format": "celsius", "location": "Brooklyn, NY"},
104+
"arguments": {"format": "celsius", "location": "Brooklyn, New York"},
105105
},
106106
}
107107
]
@@ -138,7 +138,7 @@ async def test_flash_llama_grammar_tools_auto(
138138
"function": {
139139
"description": None,
140140
"name": "get_current_weather",
141-
"arguments": {"format": "celsius", "location": "Brooklyn, NY"},
141+
"arguments": {"format": "celsius", "location": "Brooklyn, New York"},
142142
},
143143
}
144144
]
@@ -219,7 +219,7 @@ async def test_flash_llama_grammar_tools_stream(
219219

220220
assert (
221221
tool_calls_generated
222-
== '{"function": {"_name": "get_current_weather", "format": "celsius", "location": "Paris, France"}}<|eot_id|>'
222+
== '{"function": {"_name": "get_current_weather", "location": "Paris, France", "format": "celsius"}}<|eot_id|>'
223223
)
224224
assert count == 28
225225
assert last_response == response_snapshot
@@ -366,7 +366,7 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_required(
366366
assert count == 29
367367
assert (
368368
tool_calls_generated
369-
== '{"function": {"_name": "get_current_weather", "format": "celsius", "location": "San Francisco, CA"}}<|eot_id|>'
369+
== '{"function": {"_name": "get_current_weather", "location": "San Francisco, CA", "format": "celsius"}}<|eot_id|>'
370370
)
371371
assert last_response == response_snapshot
372372

@@ -465,6 +465,6 @@ async def test_flash_llama_grammar_tools_sea_creatures_stream_function_object(
465465
assert count == 39
466466
assert (
467467
tool_calls_generated
468-
== '{"function": {"_name": "get_n_day_weather_forecast", "format": "celsius", "location": "San Francisco, CA", "num_days":3}}<|eot_id|>'
468+
== '{"function": {"_name": "get_n_day_weather_forecast", "location": "San Francisco, CA", "format": "celsius", "num_days":3}}<|eot_id|>'
469469
)
470470
assert last_response == response_snapshot

router/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ authors.workspace = true
88
homepage.workspace = true
99

1010
[dependencies]
11+
anyhow = "1"
1112
async-trait = "0.1.74"
1213
async-stream = "0.3.5"
1314
axum = { version = "0.7", features = ["json"] }
@@ -22,6 +23,7 @@ metrics-exporter-prometheus = { workspace = true }
2223
nohash-hasher = "0.2.0"
2324
opentelemetry = { version = "0.20.0", features = ["rt-tokio"] }
2425
opentelemetry-otlp = "0.13.0"
26+
outlines-core = { git = "https://github.com/dottxt-ai/outlines-core.git", rev = "ba10c619fc9bf3c487e43f49bdecb95a24bb465c" }
2527
rand = "0.8.5"
2628
reqwest = { version = "0.11.20", features = [] }
2729
serde = "1.0.188"

router/src/infer/chat_template.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -804,7 +804,7 @@ mod tests {
804804
let tool_prompt = "This default prompt will be used".to_string();
805805
let tools_and_prompt = Some((tools, tool_prompt));
806806
let result = ct.apply(msgs, tools_and_prompt);
807-
let expected = "<s>[INST] I'd like to show off how chat templating works! [/INST]Great! How can I help you today?</s> [INST] Just testing\n---\n[{\"type\":\"function\",\"function\":{\"description\":\"Get the current weather\",\"name\":\"get_current_weather\",\"arguments\":{\"properties\":{\"format\":{\"description\":\"The temperature unit to use. Infer this from the users location.\",\"enum\":[\"celsius\",\"fahrenheit\"],\"type\":\"string\"},\"location\":{\"description\":\"The city and state, e.g. San Francisco, CA\",\"type\":\"string\"}},\"required\":[\"location\",\"format\"],\"type\":\"object\"}}}]\nThis default prompt will be used [/INST]".to_string();
807+
let expected = "<s>[INST] I'd like to show off how chat templating works! [/INST]Great! How can I help you today?</s> [INST] Just testing\n---\n[{\"type\":\"function\",\"function\":{\"description\":\"Get the current weather\",\"name\":\"get_current_weather\",\"arguments\":{\"type\":\"object\",\"properties\":{\"location\":{\"type\":\"string\",\"description\":\"The city and state, e.g. San Francisco, CA\"},\"format\":{\"type\":\"string\",\"enum\":[\"celsius\",\"fahrenheit\"],\"description\":\"The temperature unit to use. Infer this from the users location.\"}},\"required\":[\"location\",\"format\"]}}}]\nThis default prompt will be used [/INST]".to_string();
808808
assert_eq!(result.unwrap(), expected);
809809
}
810810

router/src/validation.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use crate::{PyTokenizer, Tokenizer};
99
use base64::{engine::general_purpose::STANDARD, Engine};
1010
use image::{ImageFormat, ImageReader};
1111
use jsonschema::{Draft, JSONSchema};
12+
use outlines_core::json_schema::to_regex as json_schema_to_regex;
1213
use rand::{thread_rng, Rng};
1314
use serde_json::Value;
1415
use std::io::Cursor;
@@ -351,11 +352,13 @@ impl Validation {
351352
"Grammar must have a 'properties' field".to_string(),
352353
))?;
353354

354-
// Serialize json to string
355-
ValidGrammar::Json(
356-
serde_json::to_string(&json)
357-
.map_err(|e| ValidationError::InvalidGrammar(e.to_string()))?,
358-
)
355+
// Do compilation in the router for performance. In the future, we
356+
// should also move regex -> automaton compilation in the router,
357+
// but this is not yet supported in pure Rust by outlines-core.
358+
let grammar_regex = json_schema_to_regex(&json, None, &json)
359+
.map_err(ValidationError::RegexFromSchema)?;
360+
361+
ValidGrammar::Regex(grammar_regex.to_string())
359362
}
360363
GrammarType::Regex(regex) => ValidGrammar::Regex(regex),
361364
};
@@ -810,6 +813,8 @@ pub enum ValidationError {
810813
Grammar,
811814
#[error("grammar is not valid: {0}")]
812815
InvalidGrammar(String),
816+
#[error("cannot compile regex from schema: {0}")]
817+
RegexFromSchema(anyhow::Error),
813818
#[error("base64 encoding is invalid: {0}")]
814819
InvalidBase64(#[from] base64::DecodeError),
815820
#[error("invalid image: {0}")]

0 commit comments

Comments
 (0)