Skip to content

Commit

Permalink
Fix oneOf implementation of the json schema spec
Browse files Browse the repository at this point in the history
Implements XOR regex using negative lookaheads.
  • Loading branch information
bparis committed Dec 1, 2023
1 parent 12392f2 commit 7ad7d43
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 4 deletions.
11 changes: 10 additions & 1 deletion outlines/text/json_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,16 @@ def to_regex(resolver: Resolver, instance: dict):
# one of the given subschemas.
elif "oneOf" in instance:
subregexes = [to_regex(resolver, t) for t in instance["oneOf"]]
return rf"({'|'.join(subregexes)})"

xor_patterns = []
# json schema validation ensured there is no overlapping schemas in oneOf
for subregex in subregexes:
other_subregexes = filter(lambda r: r != subregex, subregexes)
other_subregexes_str = "|".join([f"{s}" for s in other_subregexes])
negative_lookahead = f"(?!.*({other_subregexes_str}))"
xor_patterns.append(f"({subregex}){negative_lookahead}")

return rf"({'|'.join(xor_patterns)})"

# The enum keyword is used to restrict a value to a fixed set of values. It
# must be an array with at least one element, where each element is unique.
Expand Down
15 changes: 12 additions & 3 deletions tests/text/test_json_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,10 +220,19 @@ def test_match_number(pattern, does_match):
(
{
"title": "Foo",
"oneOf": [{"type": "string"}, {"type": "number"}],
"oneOf": [{"type": "string"}, {"type": "number"}, {"type": "boolean"}],
},
rf"({STRING}|{NUMBER})",
[("12.3", True), ('"a"', True), ('1.3"a"', False)],
rf"(({STRING})(?!.*({NUMBER}|{BOOLEAN}))|({NUMBER})(?!.*({STRING}|{BOOLEAN}))|({BOOLEAN})(?!.*({STRING}|{NUMBER})))",
[
("12.3", True),
("true", True),
('"a"', True),
("null", False),
("", False),
("12true", False),
('1.3"a"', False),
('12.3true"a"', False),
],
),
# anyOf
(
Expand Down

0 comments on commit 7ad7d43

Please sign in to comment.