vllm-project · njhill · Oct 29, 2024 · Sep 10, 2024 · Sep 10, 2024 · Sep 10, 2024
diff --git a/docs/source/serving/openai_compatible_server.md b/docs/source/serving/openai_compatible_server.md
@@ -157,7 +157,7 @@ vLLM will use guided decoding to ensure the response matches the tool parameter
 To enable this feature, you should set the following flags:
 * `--enable-auto-tool-choice` -- **mandatory** Auto tool choice. tells vLLM that you want to enable the model to generate its own tool calls when it 
 deems appropriate.
-* `--tool-call-parser` -- select the tool parser to use - currently either `hermes`, `mistral` or `llama3_json`. Additional tool parsers 
+* `--tool-call-parser` -- select the tool parser to use - currently either `hermes`, `mistral`, `llama3_json` or `granite-20b-fc`. Additional tool parsers 
 will continue to be added in the future.
 * `--chat-template` -- **optional** for auto tool choice. the path to the chat template which handles `tool`-role messages and `assistant`-role messages 
 that contain previously generated tool calls. Hermes, Mistral and Llama models have tool-compatible chat templates in their 
@@ -167,7 +167,9 @@ from HuggingFace; and you can find an example of this in a `tokenizer_config.jso
 
 If your favorite tool-calling model is not supported, please feel free to contribute a parser & tool use chat template! 
 
-#### Hermes Models
+### Supported Models
+
+#### Hermes
 All Nous Research Hermes-series models newer than Hermes 2 Pro should be supported.
 * `NousResearch/Hermes-2-Pro-*`
 * `NousResearch/Hermes-2-Theta-*`
@@ -179,7 +181,7 @@ step in their creation_.
 
 Flags: `--tool-call-parser hermes`
 
-#### Mistral Models
+#### Mistral
 Supported models:
 * `mistralai/Mistral-7B-Instruct-v0.3` (confirmed)
 * Additional mistral function-calling models are compatible as well.
@@ -198,6 +200,7 @@ when tools are provided, that results in much better reliability when working wi
 
 Recommended flags: `--tool-call-parser mistral --chat-template examples/tool_chat_template_mistral_parallel.jinja`
 
+
 #### Llama Models
 Supported models:
 * `meta-llama/Meta-Llama-3.1-8B-Instruct`
@@ -218,4 +221,10 @@ it works better with vLLM.
 
 Recommended flags: `--tool-call-parser llama3_json --chat-template examples/tool_chat_template_llama3_json.jinja`
 
+#### IBM Granite
+
+Supported models:
+* `ibm-granite/granite-20b-functioncalling`
 
+Flags: `--tool-call-parser granite-20b-fc`
+`examples/tool_chat_template_granite_20b_fc.jinja`: this is a modified chat template from the original on Huggingface, which is not vLLM compatible. It blends function description elements from the Hermes template and follows the same system prompt as "Response Generation" mode from [the paper](https://arxiv.org/abs/2407.00121). Parallel function calls are supported.
diff --git a/examples/tool_chat_template_granite_20b_fc.jinja b/examples/tool_chat_template_granite_20b_fc.jinja
@@ -0,0 +1,130 @@
+{%- macro json_to_python_type(json_spec) %}
+    {%- set basic_type_map = {
+    "string": "str",
+    "number": "float",
+    "integer": "int",
+    "boolean": "bool"
+} %}
+
+    {%- if basic_type_map[json_spec.type] is defined %}
+        {{- basic_type_map[json_spec.type] }}
+    {%- elif json_spec.type == "array" %}
+        {{- "list[" +  json_to_python_type(json_spec|items) + "]" }}
+    {%- elif json_spec.type == "object" %}
+        {%- if json_spec.additionalProperties is defined %}
+            {{- "dict[str, " + json_to_python_type(json_spec.additionalProperties) + ']' }}
+        {%- else %}
+            {{- "dict" }}
+        {%- endif %}
+    {%- elif json_spec.type is iterable %}
+        {{- "Union[" }}
+        {%- for t in json_spec.type %}
+            {{- json_to_python_type({"type": t}) }}
+            {%- if not loop.last %}
+                {{- "," }}
+            {%- endif %}
+        {%- endfor %}
+        {{- "]" }}
+    {%- else %}
+        {{- "Any" }}
+    {%- endif %}
+{%- endmacro %}
+
+{%- if not full_function_description is defined %}
+    {%- set full_function_description = false %}
+{%- endif %}
+
+{%- macro full_description(tool) %}
+    {{- tool.name + '(' }}
+    {%- if tool.parameters is defined %}
+        {%- for param_name, param_fields in tool.parameters.properties|items %}
+            {{- param_name + ": " + json_to_python_type(param_fields) }}
+            {%- if not loop.last %}
+                {{- ", " }}
+            {%- endif %}
+        {%- endfor %}
+    {%- endif %}
+    {{- ")" }}
+    {%- if tool.return is defined %}
+        {{- " -> " + json_to_python_type(tool.return) }}
+    {%- endif %}
+    {{- " - " + tool.description + "\n\n" }}
+    {%- if tool.parameters is defined %}
+        {%- for param_name, param_fields in tool.parameters.properties|items %}
+            {%- if loop.first %}
+                {{- "    Args:\n" }}
+            {%- endif %}
+            {{- "        " + param_name + "(" + json_to_python_type(param_fields) + "): " + param_fields.description|trim }}
+        {%- endfor %}
+    {%- endif %}
+    {%- if tool.return is defined and tool.return.description is defined %}
+        {{- "\n    Returns:\n        " + tool.return.description }}
+    {%- endif %}
+    {{- '"' }}
+{%- endmacro %}
+
+{%- macro simple_description(tool) %}
+    {{- tool.description }}
+{%- endmacro %}
+
+{%- macro function_description(tool) %}
+    {%- if full_function_description %}
+        {{- full_description(tool) }}
+    {%- else %}
+        {{- simple_description(tool) }}
+    {%- endif %}
+{%- endmacro %}
+
+{%- if messages[0]["role"] == "system" %}
+    {%- set sys_prompt = messages[0]["content"] %}
+    {%- set loop_messages = messages[1:] %}
+{%- else %}
+    {%- set loop_messages = messages %}
+    {% set sys_prompt = 'You are a helpful assistant with access to the following function calls. Your task is to understand the given conversation with function calls and responses and generate natural language response as the ASSISTANT to continue the conversation. You may use the following function calls to understand how to respond to the user query.' %}
+{%- endif %}
+
+{{ 'SYSTEM: ' + sys_prompt }}
+{% if tools is iterable and tools | length > 0 %}
+<|function_call_library|>
+    {%- for tool in tools %}
+        {%- if tool.function is defined %}
+            {%- set tool = tool.function %}
+        {%- endif %}
+        {{- '{"name": "' + tool.name + '", ' }}
+        {{- '"description": "' + function_description(tool) }}
+        {{- ', "parameters": ' }}
+        {%- if not tool.parameters is defined or tool.parameters.properties | length == 0 %}
+            {{- "{}" }}
+        {%- else %}
+            {{- tool.parameters|tojson }}
+        {%- endif %}
+        {{- "}" }}
+        {%- if not loop.last %}
+            {{- "\n" }}
+        {%- endif %}
+    {%- endfor %}
+If none of the functions are relevant or the given question lacks the parameters required by the function, please output \"<function_call> {\"name\": \"no_function\", \"arguments\": {}}\".
+{%- endif %}
+
+
+
+{% for message in messages %}
+    {% if message['role'] == 'user' %}
+        {{- '\nUSER: ' + message['content'] }}
+    {% elif message['role'] == 'assistant' and message.tool_calls is defined %}
+        {{- '\nASSISTANT:'  }}
+        {% for tc in message.tool_calls %}
+            {{- '<function_call> ' + {'name': tc.function.name, 'arguments': tc.function.arguments}|tojson  }}
+        {% endfor %}
+        {{- '<|endoftext|>'  }}
+    {% elif message['role'] == 'assistant' %}
+        {{- '\nASSISTANT: ' + message['content'] + ' <|endoftext|>'  }}
+    {% elif message['role'] == 'tool' %}
+        {{- '<function_response> ' + message['content'] }}
+    {%- else %}
+        {{- raise_exception("Unexpected combination of role and message content") }}
+    {% endif %}
+    {% if loop.last and add_generation_prompt %}
+        {{- '\nASSISTANT: ' }}
+    {% endif %}
+{% endfor %}
diff --git a/tests/tool_use/utils.py b/tests/tool_use/utils.py
@@ -73,6 +73,14 @@ def ensure_system_prompt(messages: List[Dict[str, Any]],
         "supports_parallel":
         False,
     },
+    "granite": {
+        "model":
+        "ibm-granite/granite-20b-functioncalling",
+        "arguments": [
+            "--tool-call-parser", "granite", "--chat-template",
+            str(VLLM_PATH / "examples/tool_chat_template_granite.jinja")
+        ],
+    },
     "mistral": {
         "model":
         "mistralai/Mistral-7B-Instruct-v0.3",
@@ -87,6 +95,14 @@ def ensure_system_prompt(messages: List[Dict[str, Any]],
         "call the tool. Otherwise, answer the user's query directly "
         "without calling a tool. DO NOT CALL A TOOL THAT IS IRRELEVANT "
         "to the user's question - just respond to it normally."
+    },
+    "granite20b": {
+        "model":
+        "ibm-granite/granite-20b-functioncalling",
+        "arguments": [
+            "--tool-call-parser", "granite-20b-fc", "--chat-template",
+            str(VLLM_PATH / "examples/tool_chat_template_granite_20b_fc.jinja")
+        ],
     }
 }
 

diff --git a/vllm/entrypoints/openai/cli_args.py b/vllm/entrypoints/openai/cli_args.py
@@ -193,7 +193,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
     parser.add_argument(
         "--tool-call-parser",
         type=str,
-        choices=["mistral", "hermes", "llama3_json"],
+        choices=["mistral", "hermes", "llama3_json", "granite-20b-fc"],
         default=None,
         help=
         "Select the tool call parser depending on the model that you're using."

diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py
@@ -29,7 +29,8 @@
                                                     OpenAIServing,
                                                     PromptAdapterPath,
                                                     TextTokensPrompt)
-from vllm.entrypoints.openai.tool_parsers import (Hermes2ProToolParser,
+from vllm.entrypoints.openai.tool_parsers import (Granite20bFCToolParser,
+                                                  Hermes2ProToolParser,
                                                   Llama3JsonToolParser,
                                                   MistralToolParser,
                                                   ToolParser)
@@ -88,6 +89,8 @@ def __init__(self,
                 self.tool_parser = Hermes2ProToolParser
             elif tool_parser == "llama3_json":
                 self.tool_parser = Llama3JsonToolParser
+            elif tool_parser == "granite-20b-fc":
+                self.tool_parser = Granite20bFCToolParser
             else:
                 raise TypeError("Error: --enable-auto-tool-choice requires "
                                 "--tool-call-parser")

diff --git a/vllm/entrypoints/openai/tool_parsers/__init__.py b/vllm/entrypoints/openai/tool_parsers/__init__.py
@@ -1,9 +1,10 @@
 from .abstract_tool_parser import ToolParser
+from .granite_20b_fc_tool_parser import Granite20bFCToolParser
 from .hermes_tool_parser import Hermes2ProToolParser
 from .llama_tool_parser import Llama3JsonToolParser
 from .mistral_tool_parser import MistralToolParser
 
 __all__ = [
     "ToolParser", "Hermes2ProToolParser", "MistralToolParser",
-    "Llama3JsonToolParser"
+    "Granite20bFCToolParser", "Llama3JsonToolParser"
 ]