diff --git a/docs/source/basic_tutorials/using_guidance.md b/docs/source/basic_tutorials/using_guidance.md index 7e33e9a2a12..dfa3f0e49b1 100644 --- a/docs/source/basic_tutorials/using_guidance.md +++ b/docs/source/basic_tutorials/using_guidance.md @@ -311,13 +311,11 @@ print(chat.choices[0].message.tool_calls) ``` -### OpenAI Integration +### OpenAI integration -Text Generation Inference (TGI) offers seamless integration with OpenAI's client libraries, allowing developers to interact with TGI's Messages API and Tool functions in a familiar way. This compatibility simplifies the implementation of advanced features, such as tools and grammar, within your applications using OpenAI’s client. +TGI exposes an OpenAI-compatible API, which means you can use OpenAI's client libraries to interact with TGI's Messages API and Tool functions. -Previously, TGI handled tool selection differently than OpenAI’s API—`tool_choice="auto"` would always pick a tool for you. However, as of the latest version, TGI now mimics OpenAI’s behavior more closely: `tool_choice="auto"` selects a tool only when the model deems it necessary, aligning with how OpenAI's API works. This enhancement ensures a smoother and more predictable integration experience. - -Additionally, error notifications like `notify_error`, which previously indicated that no tool was chosen, are no longer returned. Instead, TGI will proceed with generating a response as if no tool was selected, further improving consistency with OpenAI's API. +However there are some minor differences in the API, for example `tool_choice="auto"` will ALWAYS choose the tool for you. This is different from OpenAI's API where `tool_choice="auto"` will choose a tool if the model thinks it's necessary. ```python from openai import OpenAI diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information.json index 7f7f7884431..0cd3c67f1ff 100644 --- a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information.json +++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_insufficient_information.json @@ -1,26 +1,38 @@ { "choices": [ { - "finish_reason": "stop", + "finish_reason": "eos_token", "index": 0, "logprobs": null, "message": { - "content": "There is a huge storm in the ocean", + "content": null, "name": null, "role": "assistant", - "tool_calls": null + "tool_calls": [ + { + "function": { + "arguments": { + "error": "Cannot get current weather forecast from specified location and temperature unit. Please try again with different options." + }, + "description": null, + "name": "notify_error" + }, + "id": 0, + "type": "function" + } + ] }, "usage": null } ], - "created": 1727796440, + "created": 1712852597, "id": "", - "model": "meta-llama/Llama-3.1-8B-Instruct", - "object": "chat.completion", - "system_fingerprint": "2.3.1-dev0-native", + "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", + "object": "text_completion", + "system_fingerprint": "1.4.5-native", "usage": { - "completion_tokens": 25, - "prompt_tokens": 600, - "total_tokens": 625 + "completion_tokens": 39, + "prompt_tokens": 496, + "total_tokens": 535 } } diff --git a/integration-tests/models/test_tools_llama.py b/integration-tests/models/test_tools_llama.py index 3c222b72815..c337afa1bc0 100644 --- a/integration-tests/models/test_tools_llama.py +++ b/integration-tests/models/test_tools_llama.py @@ -225,6 +225,10 @@ async def test_flash_llama_grammar_tools_insufficient_information( tools=tools, tool_choice="auto", messages=[ + { + "role": "system", + "content": "STRICTLY ONLY RESPOND IF THE USER ASKS A WEATHER RELATED QUESTION", + }, { "role": "user", "content": "Tell me a story about 3 sea creatures", @@ -233,5 +237,8 @@ async def test_flash_llama_grammar_tools_insufficient_information( stream=False, ) - assert responses.choices[0].message.content == "There is a huge storm in the ocean" + assert responses.choices[0].message.content is None + assert ( + responses.choices[0].message.tool_calls[0]["function"]["name"] == "notify_error" + ) assert responses == response_snapshot diff --git a/router/src/server.rs b/router/src/server.rs index fb06b245a1a..73b5432187d 100644 --- a/router/src/server.rs +++ b/router/src/server.rs @@ -1246,33 +1246,17 @@ async fn chat_completions( if let Value::Object(ref mut props) = arguments { props.remove("_name"); } - match name.as_str() { - "notify_error" => { - // parse the error message - let error_message = arguments - .get("error") - .and_then(Value::as_str) - .ok_or_else(|| { - InferError::ToolError( - "No error message found in generated text".to_string(), - ) - })? - .to_string(); - (None, Some(error_message)) - } - _ => { - let tool_calls = vec![ToolCall { - id: "0".to_string(), - r#type: "function".to_string(), - function: FunctionDefinition { - description: None, - name, - arguments, - }, - }]; - (Some(tool_calls), None) - } - } + + let tool_calls = vec![ToolCall { + id: "0".to_string(), + r#type: "function".to_string(), + function: FunctionDefinition { + description: None, + name, + arguments, + }, + }]; + (Some(tool_calls), None) } else { (None, Some(generation.generated_text)) };