Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

grammars: x{min,max} repetition operator #6640

Merged
merged 36 commits into from
Jun 6, 2024
Merged
Changes from 1 commit
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
0160469
grammars: x{min,max} repetition operator + tweak +/*/? to avoid dupli…
ochafik Apr 12, 2024
f2030e3
grammars: handle `x{n}` and fix `x{n,n}`
ochafik Apr 12, 2024
de0fd3f
grammars: document new repetition operators
ochafik Apr 12, 2024
9d9b5a3
grammars: nit
ochafik Apr 12, 2024
6b5518c
grammars: uniform use of int for min & max
ochafik Apr 12, 2024
0ceb69a
grammars: refactor parser test
ochafik Apr 12, 2024
8938a05
grammar: parsing tests w/ natural pretty print of updated expectations
ochafik Apr 12, 2024
0d7347f
grammars: much prettier print of expectations (+ TEST_GRAMMAR_PARSER_…
ochafik Apr 12, 2024
2e2df72
grammars: improve test pretty print again
ochafik Apr 12, 2024
ffe321d
grammars: pretty print rules and chars
ochafik Apr 12, 2024
a9351b8
grammars: fix copy rule skipping
ochafik Apr 12, 2024
9d8efa5
grammars: disallow `a{,}` (not allowed in regexps)
ochafik Apr 12, 2024
2d98ebf
Update common/grammar-parser.cpp
ochafik Apr 12, 2024
ec91342
grammars: fix copy rule skipping (again) & display of expectations
ochafik Apr 12, 2024
22faba6
grammars: more test cases
ochafik Apr 12, 2024
1fb7787
Merge remote-tracking branch 'origin/master' into grammar-reps
ochafik Apr 15, 2024
15585e0
grammars: update reps parsing to bring ? / * / + closer to before
ochafik Apr 19, 2024
93b754e
json: use new GBNF repetitions{m,n} syntax
ochafik Apr 19, 2024
2ecc2ae
grammars: update performance gotchas w/ repetition advice
ochafik Apr 20, 2024
a9a2983
Merge remote-tracking branch 'origin/master' into grammar-reps
ochafik Apr 21, 2024
d47f537
Update examples/json_schema_to_grammar.py
ochafik Apr 24, 2024
724f879
Update examples/server/public/json-schema-to-grammar.mjs
ochafik Apr 24, 2024
a61281f
grammars: comment on rule repetitions
ochafik Apr 24, 2024
d03c98e
grammars: ensure unambiguous number alternatives
ochafik Apr 24, 2024
21bac1e
grammar: nit typo switched error msgs
ochafik Apr 24, 2024
0c74ad3
grammar: nit numbering in comment
ochafik Apr 24, 2024
218f41f
json: update numeric rule to be unambiguous
ochafik Apr 24, 2024
2813835
Apply suggestions from code review
ochafik Apr 24, 2024
46fe648
Update examples/server/public/json-schema-to-grammar.mjs
ochafik Apr 24, 2024
eb7ccd8
json: fix integral-part
ochafik Apr 24, 2024
3c02508
Merge branch 'grammar-reps' of https://github.com/ochafik/llama.cpp i…
ochafik Apr 24, 2024
476c97d
Merge remote-tracking branch 'origin/master' into grammar-reps
ochafik Apr 30, 2024
990bf57
grammar: add repetition tests
ochafik Apr 30, 2024
d070aee
Merge remote-tracking branch 'origin/master' into grammar-reps
ochafik May 18, 2024
8266b7c
Merge remote-tracking branch 'origin/master' into grammar-reps
ochafik May 21, 2024
2b79d47
Merge remote-tracking branch 'origin/master' into grammar-reps
ochafik Jun 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
grammar: parsing tests w/ natural pretty print of updated expectations
  • Loading branch information
ochafik committed Apr 12, 2024
commit 8938a050ccdab4b18e0fa7869a4639eaa578c811
177 changes: 145 additions & 32 deletions tests/test-grammar-parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,37 @@

#include <cassert>

static const char * type_str(llama_gretype type) {
switch (type) {
case LLAMA_GRETYPE_CHAR: return "LLAMA_GRETYPE_CHAR";
case LLAMA_GRETYPE_CHAR_NOT: return "LLAMA_GRETYPE_CHAR_NOT";
case LLAMA_GRETYPE_CHAR_ALT: return "LLAMA_GRETYPE_CHAR_ALT";
case LLAMA_GRETYPE_CHAR_RNG_UPPER: return "LLAMA_GRETYPE_CHAR_RNG_UPPER";
case LLAMA_GRETYPE_RULE_REF: return "LLAMA_GRETYPE_RULE_REF";
case LLAMA_GRETYPE_ALT: return "LLAMA_GRETYPE_ALT";
case LLAMA_GRETYPE_END: return "LLAMA_GRETYPE_END";
default: return "?";
}
}

static void verify_parsing(const char *grammar_bytes, const std::vector<std::pair<std::string, uint32_t>> expected, const std::vector<llama_grammar_element> &expected_rules) {
uint32_t index = 0;
grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_bytes);

auto print_all = [&]() {
fprintf(stderr, "Code to update expectation:\n");
fprintf(stderr, " verify_parsing(R\"\"\"(%s)\"\"\", {\n", grammar_bytes);
for (auto it = parsed_grammar.symbol_ids.begin(); it != parsed_grammar.symbol_ids.end(); ++it) {
fprintf(stderr, " {\"%s\", %u},\n", it->first.c_str(), it->second);
}
fprintf(stderr, " }, {\n");
for (auto rule : parsed_grammar.rules) {
for (uint32_t i = 0; i < rule.size(); i++) {
fprintf(stderr, " {%s, %u},\n", type_str(rule[i].type), rule[i].value);
}
}
fprintf(stderr, " });\n");
};
for (auto it = parsed_grammar.symbol_ids.begin(); it != parsed_grammar.symbol_ids.end(); ++it)
{
std::string key = it->first;
Expand All @@ -20,9 +47,11 @@ static void verify_parsing(const char *grammar_bytes, const std::vector<std::pai
// pretty print error message before asserting
if (expected_pair.first != key || expected_pair.second != value)
{
fprintf(stderr, "index: %u\n", index);
fprintf(stderr, "expected_pair: %s, %u\n", expected_pair.first.c_str(), expected_pair.second);
fprintf(stderr, "actual_pair: %s, %u\n", key.c_str(), value);
fprintf(stderr, "expected_pair != actual_pair\n");
print_all();
}

assert(expected_pair.first == key && expected_pair.second == value);
Expand All @@ -43,9 +72,11 @@ static void verify_parsing(const char *grammar_bytes, const std::vector<std::pai
if (expected_element.type != element.type || expected_element.value != element.value)
{
fprintf(stderr, "index: %u\n", index);
fprintf(stderr, "expected_element: %d, %u\n", expected_element.type, expected_element.value);
fprintf(stderr, "actual_element: %d, %u\n", element.type, element.value);
fprintf(stderr, "expected_element: %s, %u\n", type_str(expected_element.type), expected_element.value);
fprintf(stderr, "actual_element: %s, %u\n", type_str(element.type), element.value);
fprintf(stderr, "expected_element != actual_element\n");
fprintf(stderr, "all elements:\n");
print_all();
}

assert(expected_element.type == element.type && expected_element.value == element.value);
Expand All @@ -56,19 +87,71 @@ static void verify_parsing(const char *grammar_bytes, const std::vector<std::pai

int main()
{
verify_parsing(R"""(
root ::= "a"
)""", {
{"root", 0},
}, {
{LLAMA_GRETYPE_CHAR, 97},
{LLAMA_GRETYPE_END, 0},

});

verify_parsing(R"""(
root ::= "a" | [bdx-z] | [^1-3]
)""", {
{"root", 0},
}, {
{LLAMA_GRETYPE_CHAR, 97},
{LLAMA_GRETYPE_ALT, 0},
{LLAMA_GRETYPE_CHAR, 98},
{LLAMA_GRETYPE_CHAR_ALT, 100},
{LLAMA_GRETYPE_CHAR_ALT, 120},
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 122},
{LLAMA_GRETYPE_ALT, 0},
{LLAMA_GRETYPE_CHAR_NOT, 49},
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 51},
{LLAMA_GRETYPE_END, 0},
});

verify_parsing(R"""(
root ::= "a"+
)""", {
{"root", 0},
{"root_1", 1},
{"root_2", 2},
{"root_star_3", 3},
}, {
{LLAMA_GRETYPE_RULE_REF, 2},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_CHAR, 97},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_RULE_REF, 1},
{LLAMA_GRETYPE_RULE_REF, 3},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_RULE_REF, 1},
{LLAMA_GRETYPE_RULE_REF, 3},
{LLAMA_GRETYPE_ALT, 0},
{LLAMA_GRETYPE_END, 0},
});

verify_parsing(R"""(
root ::= (expr "=" term "\n")+
expr ::= term ([-+*/] term)*
term ::= [0-9]+
)""", {
{"expr", 2},
{"expr_5", 5},
{"expr_6", 6},
{"expr_7", 7},
{"expr_star_8", 8},
{"root", 0},
{"root_1", 1},
{"root_4", 4},
{"root_star_5", 5},
{"term", 3},
{"term_7", 7},
{"term_10", 10},
{"term_9", 9},
{"term_star_11", 11},
}, {
{LLAMA_GRETYPE_RULE_REF, 4},
{LLAMA_GRETYPE_END, 0},
Expand All @@ -78,31 +161,38 @@ int main()
{LLAMA_GRETYPE_CHAR, 10},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_RULE_REF, 3},
{LLAMA_GRETYPE_RULE_REF, 6},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_RULE_REF, 7},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_RULE_REF, 10},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_RULE_REF, 1},
{LLAMA_GRETYPE_RULE_REF, 4},
{LLAMA_GRETYPE_ALT, 0},
{LLAMA_GRETYPE_RULE_REF, 5},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_RULE_REF, 1},
{LLAMA_GRETYPE_RULE_REF, 5},
{LLAMA_GRETYPE_ALT, 0},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_CHAR, 45},
{LLAMA_GRETYPE_CHAR_ALT, 43},
{LLAMA_GRETYPE_CHAR_ALT, 42},
{LLAMA_GRETYPE_CHAR_ALT, 47},
{LLAMA_GRETYPE_RULE_REF, 3},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_RULE_REF, 5},
{LLAMA_GRETYPE_RULE_REF, 8},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_RULE_REF, 6},
{LLAMA_GRETYPE_RULE_REF, 8},
{LLAMA_GRETYPE_ALT, 0},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_CHAR, 48},
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 57},
{LLAMA_GRETYPE_RULE_REF, 7},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_RULE_REF, 9},
{LLAMA_GRETYPE_RULE_REF, 11},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_RULE_REF, 9},
{LLAMA_GRETYPE_RULE_REF, 11},
{LLAMA_GRETYPE_ALT, 0},
{LLAMA_GRETYPE_CHAR, 48},
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 57},
{LLAMA_GRETYPE_END, 0},
});

Expand All @@ -115,18 +205,26 @@ int main()
ws ::= [ \t\n]*
)""", {
{"expr", 2},
{"expr_6", 6},
{"expr_7", 7},
{"ident", 8},
{"ident_10", 10},
{"num", 9},
{"num_11", 11},
{"expr_8", 8},
{"expr_star_9", 9},
{"ident", 10},
{"ident_12", 12},
{"ident_13", 13},
{"ident_star_14", 14},
{"num", 11},
{"num_15", 15},
{"num_16", 16},
{"num_star_17", 17},
{"root", 0},
{"root_1", 1},
{"root_5", 5},
{"root_star_6", 6},
{"term", 4},
{"ws", 3},
{"ws_12", 12},
{"ws_18", 18},
{"ws_19", 19},
{"ws_star_20", 20},
}, {
{LLAMA_GRETYPE_RULE_REF, 5},
{LLAMA_GRETYPE_END, 0},
Expand All @@ -137,13 +235,13 @@ int main()
{LLAMA_GRETYPE_CHAR, 10},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_RULE_REF, 4},
{LLAMA_GRETYPE_RULE_REF, 7},
{LLAMA_GRETYPE_RULE_REF, 8},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_RULE_REF, 12},
{LLAMA_GRETYPE_RULE_REF, 19},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_RULE_REF, 8},
{LLAMA_GRETYPE_RULE_REF, 10},
{LLAMA_GRETYPE_ALT, 0},
{LLAMA_GRETYPE_RULE_REF, 9},
{LLAMA_GRETYPE_RULE_REF, 11},
{LLAMA_GRETYPE_ALT, 0},
{LLAMA_GRETYPE_CHAR, 40},
{LLAMA_GRETYPE_RULE_REF, 3},
Expand All @@ -152,47 +250,62 @@ int main()
{LLAMA_GRETYPE_RULE_REF, 3},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_RULE_REF, 1},
{LLAMA_GRETYPE_RULE_REF, 5},
{LLAMA_GRETYPE_ALT, 0},
{LLAMA_GRETYPE_RULE_REF, 6},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_RULE_REF, 1},
{LLAMA_GRETYPE_RULE_REF, 6},
{LLAMA_GRETYPE_ALT, 0},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_CHAR, 45},
{LLAMA_GRETYPE_CHAR_ALT, 43},
{LLAMA_GRETYPE_CHAR_ALT, 42},
{LLAMA_GRETYPE_CHAR_ALT, 47},
{LLAMA_GRETYPE_RULE_REF, 4},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_RULE_REF, 6},
{LLAMA_GRETYPE_RULE_REF, 9},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_RULE_REF, 7},
{LLAMA_GRETYPE_RULE_REF, 9},
{LLAMA_GRETYPE_ALT, 0},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_CHAR, 97},
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 122},
{LLAMA_GRETYPE_RULE_REF, 10},
{LLAMA_GRETYPE_RULE_REF, 13},
{LLAMA_GRETYPE_RULE_REF, 3},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_RULE_REF, 11},
{LLAMA_GRETYPE_RULE_REF, 16},
{LLAMA_GRETYPE_RULE_REF, 3},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_CHAR, 97},
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 122},
{LLAMA_GRETYPE_CHAR_ALT, 48},
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 57},
{LLAMA_GRETYPE_CHAR_ALT, 95},
{LLAMA_GRETYPE_RULE_REF, 10},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_RULE_REF, 14},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_RULE_REF, 12},
{LLAMA_GRETYPE_RULE_REF, 14},
{LLAMA_GRETYPE_ALT, 0},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_CHAR, 48},
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 57},
{LLAMA_GRETYPE_RULE_REF, 11},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_RULE_REF, 15},
{LLAMA_GRETYPE_RULE_REF, 17},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_RULE_REF, 15},
{LLAMA_GRETYPE_RULE_REF, 17},
{LLAMA_GRETYPE_ALT, 0},
{LLAMA_GRETYPE_CHAR, 48},
{LLAMA_GRETYPE_CHAR_RNG_UPPER, 57},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_CHAR, 32},
{LLAMA_GRETYPE_CHAR_ALT, 9},
{LLAMA_GRETYPE_CHAR_ALT, 10},
{LLAMA_GRETYPE_RULE_REF, 12},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_RULE_REF, 20},
{LLAMA_GRETYPE_END, 0},
{LLAMA_GRETYPE_RULE_REF, 18},
{LLAMA_GRETYPE_RULE_REF, 20},
{LLAMA_GRETYPE_ALT, 0},
{LLAMA_GRETYPE_END, 0},
});
Expand Down
Loading