lint & format + adjust tests to new tool parser API

vllm-project · DarkLight1337 · Oct 18, 2024 · Sep 26, 2024 · Sep 26, 2024 · Oct 1, 2024
commit 25d839d61eec05da46a7cc3393306c5e866c7880
diff --git a/tests/tool_use/test_jamba_tool_parser.py b/tests/tool_use/test_jamba_tool_parser.py
@@ -1,14 +1,15 @@
 import json
-from typing import Dict, List, Optional, Generator
+from typing import Generator, List, Optional
 
 import partial_json_parser
 import pytest
 from partial_json_parser.core.options import Allow
 
-from vllm.entrypoints.openai.protocol import ToolCall, FunctionCall, DeltaMessage
+from vllm.entrypoints.openai.protocol import (DeltaMessage, FunctionCall,
+                                              ToolCall)
 from vllm.entrypoints.openai.tool_parsers import JambaToolParser
 from vllm.transformers_utils.detokenizer import detokenize_incrementally
-from vllm.transformers_utils.tokenizer import get_tokenizer, AnyTokenizer
+from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
 
 MODEL = "ai21labs/Jamba-tiny-dev"
 
@@ -27,16 +28,20 @@ def assert_tool_calls(actual_tool_calls: List[ToolCall],
                       expected_tool_calls: List[ToolCall]):
     assert len(actual_tool_calls) == len(expected_tool_calls)
 
-    for actual_tool_call, expected_tool_call in zip(actual_tool_calls, expected_tool_calls):
+    for actual_tool_call, expected_tool_call in zip(actual_tool_calls,
+                                                    expected_tool_calls):
         assert isinstance(actual_tool_call.id, str)
         assert len(actual_tool_call.id) > 16
 
         assert actual_tool_call.type == "function"
         assert actual_tool_call.function == expected_tool_call.function
 
 
-def stream_delta_message_generator(jamba_tool_parser: JambaToolParser, jamba_tokenizer: AnyTokenizer, model_output: str) -> Generator[DeltaMessage, None, None]:
-    all_token_ids = jamba_tokenizer.encode(model_output, add_special_tokens=False)
+def stream_delta_message_generator(
+        jamba_tool_parser: JambaToolParser, jamba_tokenizer: AnyTokenizer,
+        model_output: str) -> Generator[DeltaMessage, None, None]:
+    all_token_ids = jamba_tokenizer.encode(model_output,
+                                           add_special_tokens=False)
 
     previous_text = ""
     previous_tokens = None
@@ -45,17 +50,18 @@ def stream_delta_message_generator(jamba_tool_parser: JambaToolParser, jamba_tok
     for i, delta_token in enumerate(all_token_ids):
         delta_token_ids = [delta_token]
         previous_token_ids = all_token_ids[:i]
-        current_token_ids = all_token_ids[:i+1]
-
-        new_tokens, delta_text, new_prefix_offset, new_read_offset = detokenize_incrementally(
-            tokenizer=jamba_tokenizer,
-            all_input_ids=current_token_ids,
-            prev_tokens=previous_tokens,
-            prefix_offset=prefix_offset,
-            read_offset=read_offset,
-            skip_special_tokens=False,
-            spaces_between_special_tokens=True,
-        )
+        current_token_ids = all_token_ids[:i + 1]
+
+        (new_tokens, delta_text, new_prefix_offset,
+         new_read_offset) = detokenize_incrementally(
+             tokenizer=jamba_tokenizer,
+             all_input_ids=current_token_ids,
+             prev_tokens=previous_tokens,
+             prefix_offset=prefix_offset,
+             read_offset=read_offset,
+             skip_special_tokens=False,
+             spaces_between_special_tokens=True,
+         )
 
         current_text = previous_text + delta_text
 
@@ -66,20 +72,23 @@ def stream_delta_message_generator(jamba_tool_parser: JambaToolParser, jamba_tok
             previous_token_ids,
             current_token_ids,
             delta_token_ids,
+            request=None,  # type: ignore[arg-type]
         )
         if delta_message:
             yield delta_message
 
         previous_text = current_text
-        previous_tokens = previous_tokens + new_tokens if previous_tokens else new_tokens
+        previous_tokens = previous_tokens + new_tokens if previous_tokens\
+            else new_tokens
         prefix_offset = new_prefix_offset
         read_offset = new_read_offset
 
 
 def test_extract_tool_calls_no_tools(jamba_tool_parser):
     model_output = "This is a test"
-    extracted_tool_calls = jamba_tool_parser.extract_tool_calls(model_output)
-    assert extracted_tool_calls.tools_called == False
+    extracted_tool_calls = jamba_tool_parser.extract_tool_calls(
+        model_output, request=None)  # type: ignore[arg-type]
+    assert not extracted_tool_calls.tools_called
     assert extracted_tool_calls.tool_calls == []
     assert extracted_tool_calls.content == model_output
 
@@ -93,26 +102,55 @@ def test_extract_tool_calls_no_tools(jamba_tool_parser):
     argnames=["model_output", "expected_tool_calls", "expected_content"],
     argvalues=[
         (
-                ''' <tool_calls>[\n    {"name": "get_current_weather", "arguments": {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}\n]</tool_calls>''',
-                [ToolCall(function=FunctionCall(name="get_current_weather", arguments=json.dumps({"city": "Dallas", "state": "TX", "unit": "fahrenheit"})))],
-                None
-         ),
+            ''' <tool_calls>[\n    {"name": "get_current_weather", "arguments": {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}\n]</tool_calls>''',  # noqa: E501
+            [
+                ToolCall(function=FunctionCall(name="get_current_weather",
+                                               arguments=json.dumps(
+                                                   {
+                                                       "city": "Dallas",
+                                                       "state": "TX",
+                                                       "unit": "fahrenheit"
+                                                   })))
+            ],
+            None),
         (
-                ''' Sure! let me call the tool for you.<tool_calls>[\n    {"name": "get_current_weather", "arguments": {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}\n]</tool_calls>''',
-                [ToolCall(function=FunctionCall(name="get_current_weather", arguments=json.dumps({"city": "Dallas", "state": "TX", "unit": "fahrenheit"})))],
-                " Sure! let me call the tool for you."
-         ),
+            ''' Sure! let me call the tool for you.<tool_calls>[\n    {"name": "get_current_weather", "arguments": {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}\n]</tool_calls>''',  # noqa: E501
+            [
+                ToolCall(function=FunctionCall(name="get_current_weather",
+                                               arguments=json.dumps(
+                                                   {
+                                                       "city": "Dallas",
+                                                       "state": "TX",
+                                                       "unit": "fahrenheit"
+                                                   })))
+            ],
+            " Sure! let me call the tool for you."),
         (
-                ''' <tool_calls>[\n    {"name": "get_current_weather", "arguments": {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}},\n    {"name": "get_current_weather", "arguments": {"city": "Orlando", "state": "FL", "unit": "fahrenheit"}}\n]</tool_calls>''',
-                [ToolCall(function=FunctionCall(name="get_current_weather", arguments=json.dumps({"city": "Dallas", "state": "TX", "unit": "fahrenheit"}))),
-                 ToolCall(function=FunctionCall(name="get_current_weather", arguments=json.dumps({"city": "Orlando", "state": "FL", "unit": "fahrenheit"})))],
-                None
-        )
+            ''' <tool_calls>[\n    {"name": "get_current_weather", "arguments": {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}},\n    {"name": "get_current_weather", "arguments": {"city": "Orlando", "state": "FL", "unit": "fahrenheit"}}\n]</tool_calls>''',  # noqa: E501
+            [
+                ToolCall(function=FunctionCall(name="get_current_weather",
+                                               arguments=json.dumps(
+                                                   {
+                                                       "city": "Dallas",
+                                                       "state": "TX",
+                                                       "unit": "fahrenheit"
+                                                   }))),
+                ToolCall(function=FunctionCall(name="get_current_weather",
+                                               arguments=json.dumps(
+                                                   {
+                                                       "city": "Orlando",
+                                                       "state": "FL",
+                                                       "unit": "fahrenheit"
+                                                   })))
+            ],
+            None)
     ],
 )
-def test_extract_tool_calls(jamba_tool_parser, model_output, expected_tool_calls, expected_content):
-    extracted_tool_calls = jamba_tool_parser.extract_tool_calls(model_output)
-    assert extracted_tool_calls.tools_called == True
+def test_extract_tool_calls(jamba_tool_parser, model_output,
+                            expected_tool_calls, expected_content):
+    extracted_tool_calls = jamba_tool_parser.extract_tool_calls(
+        model_output, request=None)  # type: ignore[arg-type]
+    assert extracted_tool_calls.tools_called
 
     assert_tool_calls(extracted_tool_calls.tool_calls, expected_tool_calls)
 
@@ -128,37 +166,63 @@ def test_extract_tool_calls(jamba_tool_parser, model_output, expected_tool_calls
     ],
     argnames=["model_output", "expected_tool_calls", "expected_content"],
     argvalues=[
+        ('''This is a test''', [], '''This is a test'''),
         (
-                '''This is a test''',
-                [],
-                '''This is a test'''
-        ),
+            ''' <tool_calls>[\n    {"name": "get_current_weather", "arguments": {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}\n]</tool_calls>''',  # noqa: E501
+            [
+                ToolCall(function=FunctionCall(name="get_current_weather",
+                                               arguments=json.dumps(
+                                                   {
+                                                       "city": "Dallas",
+                                                       "state": "TX",
+                                                       "unit": "fahrenheit"
+                                                   })))
+            ],
+            " "),
         (
-                ''' <tool_calls>[\n    {"name": "get_current_weather", "arguments": {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}\n]</tool_calls>''',
-                [ToolCall(function=FunctionCall(name="get_current_weather", arguments=json.dumps({"city": "Dallas", "state": "TX", "unit": "fahrenheit"})))],
-                " "
-        ),
+            ''' Sure! let me call the tool for you.<tool_calls>[\n    {"name": "get_current_weather", "arguments": {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}\n]</tool_calls>''',  # noqa: E501
+            [
+                ToolCall(function=FunctionCall(name="get_current_weather",
+                                               arguments=json.dumps(
+                                                   {
+                                                       "city": "Dallas",
+                                                       "state": "TX",
+                                                       "unit": "fahrenheit"
+                                                   })))
+            ],
+            " Sure! let me call the tool for you."),
         (
-                ''' Sure! let me call the tool for you.<tool_calls>[\n    {"name": "get_current_weather", "arguments": {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}\n]</tool_calls>''',
-                [ToolCall(function=FunctionCall(name="get_current_weather", arguments=json.dumps({"city": "Dallas", "state": "TX", "unit": "fahrenheit"})))],
-                " Sure! let me call the tool for you."
-        ),
-        (
-                ''' <tool_calls>[\n    {"name": "get_current_weather", "arguments": {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}},\n    {"name": "get_current_weather", "arguments": {"city": "Orlando", "state": "FL", "unit": "fahrenheit"}}\n]</tool_calls>''',
-                [ToolCall(function=FunctionCall(name="get_current_weather", arguments=json.dumps({"city": "Dallas", "state": "TX", "unit": "fahrenheit"}))),
-                 ToolCall(function=FunctionCall(name="get_current_weather", arguments=json.dumps({"city": "Orlando", "state": "FL", "unit": "fahrenheit"})))],
-                " "
-        )
+            ''' <tool_calls>[\n    {"name": "get_current_weather", "arguments": {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}},\n    {"name": "get_current_weather", "arguments": {"city": "Orlando", "state": "FL", "unit": "fahrenheit"}}\n]</tool_calls>''',  # noqa: E501
+            [
+                ToolCall(function=FunctionCall(name="get_current_weather",
+                                               arguments=json.dumps(
+                                                   {
+                                                       "city": "Dallas",
+                                                       "state": "TX",
+                                                       "unit": "fahrenheit"
+                                                   }))),
+                ToolCall(function=FunctionCall(name="get_current_weather",
+                                               arguments=json.dumps(
+                                                   {
+                                                       "city": "Orlando",
+                                                       "state": "FL",
+                                                       "unit": "fahrenheit"
+                                                   })))
+            ],
+            " ")
     ],
 )
-def test_extract_tool_calls_streaming(jamba_tool_parser, jamba_tokenizer, model_output, expected_tool_calls, expected_content):
+def test_extract_tool_calls_streaming(jamba_tool_parser, jamba_tokenizer,
+                                      model_output, expected_tool_calls,
+                                      expected_content):
     other_content: str = ''
     function_names: List[str] = []
     function_args_strs: List[str] = []
     tool_call_idx: int = -1
     tool_call_ids: List[Optional[str]] = []
 
-    for delta_message in stream_delta_message_generator(jamba_tool_parser, jamba_tokenizer, model_output):
+    for delta_message in stream_delta_message_generator(
+            jamba_tool_parser, jamba_tokenizer, model_output):
         # role should never be streamed from tool parser
         assert not delta_message.role
 
@@ -179,9 +243,8 @@ def test_extract_tool_calls_streaming(jamba_tool_parser, jamba_tokenizer, model_
                 tool_call_ids.append(None)
 
             # if a tool call ID is streamed, make sure one hasn't been already
-            if tool_call.id:
-                if not tool_call_ids[tool_call.index]:
-                    tool_call_ids[tool_call.index] = tool_call.id
+            if tool_call.id and not tool_call_ids[tool_call.index]:
+                tool_call_ids[tool_call.index] = tool_call.id
 
             # if parts of the function start being streamed
             if tool_call.function:
@@ -200,9 +263,13 @@ def test_extract_tool_calls_streaming(jamba_tool_parser, jamba_tokenizer, model_
 
     assert other_content == expected_content
 
-    actual_tool_calls = [ToolCall(id=tool_call_id,
-                                  function=FunctionCall(
-                                      name=function_name,
-                                      arguments=partial_json_parser.ensure_json(function_args_str, Allow.OBJ | Allow.STR)))
-                         for tool_call_id, function_name, function_args_str in zip(tool_call_ids, function_names, function_args_strs)]
-    assert_tool_calls(actual_tool_calls, expected_tool_calls)
+    actual_tool_calls = [
+        ToolCall(id=tool_call_id,
+                 function=FunctionCall(
+                     name=function_name,
+                     arguments=partial_json_parser.ensure_json(
+                         function_args_str, Allow.OBJ | Allow.STR)))
+        for tool_call_id, function_name, function_args_str in zip(
+            tool_call_ids, function_names, function_args_strs)
+    ]
+    assert_tool_calls(actual_tool_calls, expected_tool_calls)
diff --git a/vllm/entrypoints/openai/tool_parsers/__init__.py b/vllm/entrypoints/openai/tool_parsers/__init__.py
@@ -1,11 +1,12 @@
 from .abstract_tool_parser import ToolParser, ToolParserManager
 from .hermes_tool_parser import Hermes2ProToolParser
 from .internlm2_tool_parser import Internlm2ToolParser
-from .llama_tool_parser import Llama3JsonToolParser
 from .jamba_tool_parser import JambaToolParser
+from .llama_tool_parser import Llama3JsonToolParser
 from .mistral_tool_parser import MistralToolParser
 
 __all__ = [
     "ToolParser", "ToolParserManager", "Hermes2ProToolParser",
-    "MistralToolParser", "Internlm2ToolParser", "Llama3JsonToolParser", "JambaToolParser"
+    "MistralToolParser", "Internlm2ToolParser", "Llama3JsonToolParser",
+    "JambaToolParser"
 ]
diff --git a/vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py
@@ -5,10 +5,11 @@
 import partial_json_parser
 from partial_json_parser.core.options import Allow
 
-from vllm.entrypoints.openai.protocol import (DeltaFunctionCall, DeltaMessage,
+from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
+                                              DeltaFunctionCall, DeltaMessage,
                                               DeltaToolCall,
                                               ExtractedToolCallInformation,
-                                              FunctionCall, ToolCall, ChatCompletionRequest)
+                                              FunctionCall, ToolCall)
 from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager
 from vllm.entrypoints.openai.tool_parsers.utils import (
     extract_intermediate_diff)
@@ -68,8 +69,7 @@ def adjust_request(
         return request
 
     def extract_tool_calls(
-            self,
-            model_output: str,
+            self, model_output: str,
             request: ChatCompletionRequest) -> ExtractedToolCallInformation:
 
         # sanity check; avoid unnecessary processing
@@ -103,7 +103,8 @@ def extract_tool_calls(
                 return ExtractedToolCallInformation(
                     tools_called=True,
                     tool_calls=tool_calls,
-                    content=content if (len(content)>0 and content != " ") else None)
+                    content=content if
+                    (len(content) > 0 and content != " ") else None)
 
             except Exception as e:
                 logger.error("Error in extracting tool call from response %s",