Skip to content

Commit b204a5a

Browse files
authored
gpt-oss: implement harmony parsing (#15181)
* model : add harmony parser for gpt-oss * gpt-oss : fix grammar trigger from causing empty stack * gpt-oss: tweak the grammar trigger again * gpt-oss : add support for recipient in role header * gpt-oss : fix ungrouped tool calls in grammar * gpt-oss : loosen function name matching during parse * gpt-oss : clean up workarounds * gpt-oss : add template tests * gpt-oss : simulate thinking and tool call tags * gpt-oss : undo think tags when reasoning_format is none * gpt-oss : set special tokens back to user defined * gpt-oss : update openai-gpt-oss template * server : filter out harmony thought messages * gpt-oss : simplify parsing
1 parent 646944c commit b204a5a

File tree

7 files changed

+672
-12
lines changed

7 files changed

+672
-12
lines changed

common/chat.cpp

Lines changed: 155 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,7 @@ json common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msg
296296
}
297297
if (!msg.reasoning_content.empty()) {
298298
jmsg["reasoning_content"] = msg.reasoning_content;
299+
jmsg["thinking"] = msg.reasoning_content; // gpt-oss
299300
}
300301
if (!msg.tool_name.empty()) {
301302
jmsg["name"] = msg.tool_name;
@@ -1338,16 +1339,164 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
13381339
data.prompt = prompt;
13391340
data.format = COMMON_CHAT_FORMAT_GPT_OSS;
13401341

1341-
// TODO: support tool calls in GPT-OSS?
1342+
// These special tokens are required to parse properly, so we include them
1343+
// even if parse_tool_calls is false.
1344+
data.preserved_tokens = {
1345+
"<|channel|>",
1346+
"<|constrain|>",
1347+
"<|message|>",
1348+
"<|start|>",
1349+
"<|end|>",
1350+
};
1351+
1352+
if (inputs.tools.is_array() && !inputs.tools.empty()) {
1353+
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1354+
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
1355+
// tool calls can appear in commentary or analysis channels
1356+
auto channel = builder.add_rule("channel", "\"<|channel|>\" ( \"commentary\" | \"analysis\" )");
1357+
1358+
std::vector<std::string> tool_rules_recipient_in_role;
1359+
std::vector<std::string> tool_rules_recipient_in_channel;
1360+
foreach_function(inputs.tools, [&](const json & tool) {
1361+
const auto & function = tool.at("function");
1362+
std::string name = function.at("name");
1363+
auto parameters = function.at("parameters");
1364+
builder.resolve_refs(parameters);
1365+
1366+
tool_rules_recipient_in_role.push_back(
1367+
builder.add_rule(name + "-call",
1368+
"\"" + name + "\"" + channel + " \" <|constrain|>json\"? \"<|message|>\" " +
1369+
builder.add_schema(name + "-args", parameters)
1370+
)
1371+
);
1372+
1373+
tool_rules_recipient_in_channel.push_back(
1374+
builder.add_rule(name + "-call",
1375+
"\"" + name + "\"" + " \" <|constrain|>json\"? \"<|message|>\" " +
1376+
builder.add_schema(name + "-args", parameters)
1377+
)
1378+
);
1379+
});
1380+
1381+
auto recipient_in_role = builder.add_rule("recipient_in_role",
1382+
"\"<|start|>assistant\"? \" to=functions.\" ( " +
1383+
string_join(tool_rules_recipient_in_role, " | ") + " )"
1384+
);
1385+
1386+
auto recipient_in_channel = builder.add_rule("recipient_in_channel",
1387+
channel + " \" to=functions.\" ( " +
1388+
string_join(tool_rules_recipient_in_channel, " | ") + " )"
1389+
);
1390+
1391+
builder.add_rule("root", recipient_in_role + " | " + recipient_in_channel);
1392+
1393+
// Trigger on tool calls that appear in the commentary channel
1394+
data.grammar_triggers.push_back({
1395+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
1396+
"<\\|channel\\|>(commentary|analysis) to"
1397+
});
1398+
1399+
// Trigger tool calls that appear in the role section, either at the
1400+
// start or in the middle.
1401+
data.grammar_triggers.push_back({
1402+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1403+
"^ to"
1404+
});
1405+
1406+
data.grammar_triggers.push_back({
1407+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
1408+
"<\\|start\\|>assistant to"
1409+
});
1410+
});
1411+
}
13421412

13431413
return data;
13441414
}
13451415
static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) {
1346-
// TODO @ngxson : this won't work with --special enabled, we should fix that
1347-
builder.try_parse_reasoning("<|channel|>analysis<|message|>", "<|start|>assistant<|channel|>final<|message|>");
1348-
if (!builder.syntax().parse_tool_calls) {
1349-
builder.add_content(builder.consume_rest());
1350-
return;
1416+
static const std::string constraint = "(?: (<\\|constrain\\|>)?([a-zA-Z0-9_-]+))";
1417+
static const std::string recipient("(?: to=functions\\.([^<\\s]+))");
1418+
1419+
static const common_regex start_regex("<\\|start\\|>assistant");
1420+
static const common_regex analysis_regex("<\\|channel\\|>analysis");
1421+
static const common_regex final_regex("<\\|channel\\|>final" + constraint + "?");
1422+
static const common_regex preamble_regex("<\\|channel\\|>commentary");
1423+
static const common_regex tool_call1_regex(recipient + "<\\|channel\\|>(analysis|commentary)" + constraint + "?");
1424+
static const common_regex tool_call2_regex("<\\|channel\\|>(analysis|commentary)" + recipient + constraint + "?");
1425+
1426+
auto consume_end = [&](bool include_end = false) {
1427+
if (auto res = builder.try_find_literal("<|end|>")) {
1428+
return res->prelude + (include_end ? builder.str(res->groups[0]) : "");
1429+
}
1430+
return builder.consume_rest();
1431+
};
1432+
1433+
auto handle_tool_call = [&](const std::string & name) {
1434+
if (auto args = builder.try_consume_json_with_dumped_args({{}})) {
1435+
if (builder.syntax().parse_tool_calls) {
1436+
if (!builder.add_tool_call(name, "", args->value) || args->is_partial) {
1437+
throw common_chat_msg_partial_exception("incomplete tool call");
1438+
}
1439+
} else if (args->is_partial) {
1440+
throw common_chat_msg_partial_exception("incomplete tool call");
1441+
}
1442+
}
1443+
};
1444+
1445+
auto regex_match = [](const common_regex & regex, const std::string & input) -> std::optional<common_regex_match> {
1446+
auto match = regex.search(input, 0, true);
1447+
if (match.type == COMMON_REGEX_MATCH_TYPE_FULL) {
1448+
return match;
1449+
}
1450+
return std::nullopt;
1451+
};
1452+
1453+
do {
1454+
auto header_start_pos = builder.pos();
1455+
auto content_start = builder.try_find_literal("<|message|>");
1456+
if (!content_start) {
1457+
throw common_chat_msg_partial_exception("incomplete header");
1458+
}
1459+
1460+
auto header = content_start->prelude;
1461+
1462+
if (auto match = regex_match(tool_call1_regex, header)) {
1463+
auto group = match->groups[1];
1464+
auto name = header.substr(group.begin, group.end - group.begin);
1465+
handle_tool_call(name);
1466+
continue;
1467+
}
1468+
1469+
if (auto match = regex_match(tool_call2_regex, header)) {
1470+
auto group = match->groups[2];
1471+
auto name = header.substr(group.begin, group.end - group.begin);
1472+
handle_tool_call(name);
1473+
continue;
1474+
}
1475+
1476+
if (regex_match(analysis_regex, header)) {
1477+
builder.move_to(header_start_pos);
1478+
if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) {
1479+
builder.add_content(consume_end(true));
1480+
} else {
1481+
builder.try_parse_reasoning("<|channel|>analysis<|message|>", "<|end|>");
1482+
}
1483+
continue;
1484+
}
1485+
1486+
if(regex_match(final_regex, header) || regex_match(preamble_regex, header)) {
1487+
builder.add_content(consume_end());
1488+
continue;
1489+
}
1490+
1491+
// Possibly a malformed message, attempt to recover by rolling
1492+
// back to pick up the next <|start|>
1493+
LOG_DBG("%s: unknown header from message: %s\n", __func__, header.c_str());
1494+
builder.move_to(header_start_pos);
1495+
} while (builder.try_find_regex(start_regex, std::string::npos, false));
1496+
1497+
auto remaining = builder.consume_rest();
1498+
if (!remaining.empty()) {
1499+
LOG_DBG("%s: content after last message: %s\n", __func__, remaining.c_str());
13511500
}
13521501
}
13531502

0 commit comments

Comments
 (0)