stream function call细节调整

josStorer · josStorer · Aug 17, 2024 · Jul 28, 2024 · Jul 29, 2024 · Jul 30, 2024
commit 3df4302b2362b337c4237333478a545d8c3107f4
diff --git a/backend-python/routes/completion.py b/backend-python/routes/completion.py
@@ -462,14 +462,32 @@ async def async_generator_stream_respose(
         model, request, body, completion_text, body.stream, body.stop, True
     )  # Get an asnyc generator handle
     content: str = ""
+    function_id: str = "call_" + "".join(
+        random.sample(string.ascii_letters + string.digits, 24)
+    )
     flag_is_function_call_confirmed = False
     flag_is_common_confirmed = False
 
     # Loop, there is only one existing endpoint.
+    done = False
+    stack_keyword_pairs = [["```", "```"], ["(", ")"], ['"', '"'], ["'", "'"]]
     while True:
+        if done:
+            yield json.dumps(
+                {
+                    "object": "chat.completion.chunk",
+                    "model": model.name,
+                    "choices": [
+                        {"index": 0, "delta": {}, "finish_reason": "tool_calls"}
+                    ],
+                }
+            )
+            yield "[DONE]"
+
         try:
             response = await anext(gen)  # Generate a delta response
             if response == "[DONE]":
+                done = True
                 continue
         except StopAsyncIteration:
             # Too few inference result
@@ -486,25 +504,58 @@ async def async_generator_stream_respose(
         response_decoded = json.loads(response)  # Decode string
         if response_decoded["choices"][0]["delta"] == {}:
             continue
-        content += response_decoded["choices"][0]["delta"]["content"]
+        delta_content = response_decoded["choices"][0]["delta"]["content"]
+        content += delta_content
 
         if flag_is_function_call_confirmed:
-            content = f"{{{content.strip()[1:-1]}}}"
-            content = content.replace("=", ":")
+            if "\n\n" in content:
+                done = True
+                continue
+
+            for pair in stack_keyword_pairs:
+                if done:
+                    break
+                for keyword in pair:
+                    if keyword in delta_content:
+                        stack.append(keyword)
+                        if (
+                            pair[0] in stack
+                            and pair[1] in stack
+                            and stack.index(pair[0]) < stack.index(pair[1])
+                        ):
+                            stack.remove(pair[0])
+                            stack.remove(pair[1])
+                            if "(" not in stack and ")" not in stack:
+                                done = True
+                                response_decoded["choices"][0]["delta"] = {
+                                    "tool_calls": [
+                                        {
+                                            "index": 0,
+                                            "function": {
+                                                "arguments": (
+                                                    '"'
+                                                    if delta_content.startswith('"')
+                                                    else ""
+                                                )
+                                                + "}",
+                                            },
+                                        }
+                                    ]
+                                }
+                                yield json.dumps(response_decoded)
+                                break
+            if done:
+                continue
+
+            delta_content = delta_content.replace("=", ":")
             # content = content.replace(r'"', r"\"") # XXX: Check whether to reserve.
             response_decoded["choices"][0]["delta"]["content"] = None
             response_decoded["choices"][0]["delta"] = {
-                "arguments": [
+                "tool_calls": [
                     {
                         "index": 0,
-                        "id": "call_"
-                        + "".join(
-                            random.sample(string.ascii_letters + string.digits, 24)
-                        ),
-                        "type": "function",
                         "function": {
-                            "name": name,
-                            "arguments": content,
+                            "arguments": delta_content,
                         },
                     }
                 ]
@@ -525,67 +576,50 @@ async def async_generator_stream_respose(
             """
             # Constant
             LIMIT_LINE_FEEDS = 4
-            LIMIT_CHARACTERS = 40
-            LIMIT_BLOCKS_CHARACTERS = 30
-            REGEX_BLOCKS = r"([\w]+)[\s]*```[\w\s]*tool_call(.*?)\n*```"
-            REGEX_BLOCKS_HEADERS = r"([\w]+)[\s]*```[\w\s]*(tool_call)\("
+            LIMIT_CHARACTERS = 60
+            LIMIT_FUNCTION_NAME_CHARACTERS = 44
+            REGEX_BLOCKS_HEADERS = r"([\w]+)[\s]*```[\w\s]*tool_call\("
 
             # Regex
-            feild_function_call_block: re.Match | None = re.search(
-                REGEX_BLOCKS, content
-            )
-            feild_function_call_head: re.Match | None = re.search(
+            regex_match_function_call_head: re.Match | None = re.search(
                 REGEX_BLOCKS_HEADERS, content
             )
 
             # Confirm Common Response
-            if (
-                content.count("\n") > LIMIT_LINE_FEEDS
-                and feild_function_call_head is None
-            ) or (len(content) > LIMIT_CHARACTERS and feild_function_call_head is None):
+            if regex_match_function_call_head is None and (
+                content.count("\n") >= LIMIT_LINE_FEEDS
+                or len(content) > LIMIT_CHARACTERS
+                or (
+                    len(content) > LIMIT_FUNCTION_NAME_CHARACTERS
+                    and "```" not in content
+                )
+            ):
                 flag_is_common_confirmed = True
                 response_decoded["choices"][0]["delta"]["content"] = content
                 yield json.dumps(response_decoded)
                 del response_decoded
                 del content
                 continue
 
-            # Confirm Common Response
-            if isinstance(feild_function_call_head, re.Match):
-                if (
-                    len(content[feild_function_call_head.end(2) :])
-                    > LIMIT_BLOCKS_CHARACTERS
-                    and feild_function_call_block is None
-                ):
-                    flag_is_common_confirmed = True
-                    response_decoded["choices"][0]["delta"]["content"] = content
-                    yield json.dumps(response_decoded)
-                    del response_decoded
-                    del content
-                    continue
-
             # Confirm Function call Response
-            if feild_function_call_block is not None:
+            if regex_match_function_call_head is not None:
                 flag_is_function_call_confirmed = True
+                stack = ["```", "("]
 
                 # Generate a blank content response
-                response_decoded["choices"][0]["delta"]["assistant"] = (
-                    model.bot if body.assistant_name is None else body.assistant_name
-                )
+                response_decoded["choices"][0]["delta"]["role"] = "assistant"
                 response_decoded["choices"][0]["delta"]["content"] = None
                 yield json.dumps(response_decoded)
 
                 # Generate a function call details response
-                name = feild_function_call_head.group(1)
-                del response_decoded["choices"][0]["delta"]["assistant"]
+                name = regex_match_function_call_head.group(1)
+                del response_decoded["choices"][0]["delta"]["role"]
                 del response_decoded["choices"][0]["delta"]["content"]
                 response_decoded["choices"][0]["delta"] = {
                     "tool_calls": [
                         {
-                            "id": "call_"
-                            + "".join(
-                                random.sample(string.ascii_letters + string.digits, 24)
-                            ),
+                            "index": 0,
+                            "id": function_id,
                             "type": "function",
                             "function": {
                                 "name": name,
@@ -595,9 +629,21 @@ async def async_generator_stream_respose(
                     ]
                 }
                 yield json.dumps(response_decoded)
+                response_decoded["choices"][0]["delta"] = {
+                    "tool_calls": [
+                        {
+                            "index": 0,
+                            "function": {
+                                "arguments": "{"
+                                + ('"' if delta_content.endswith('"') else ""),
+                            },
+                        }
+                    ]
+                }
+                yield json.dumps(response_decoded)
 
                 # Reset content buffer
-                content = feild_function_call_block.group(2)
+                # content = feild_function_call_block.group(2)
                 continue
 
         # Default: Unsure Response

diff --git a/backend-python/tests/function_call_stream.py b/backend-python/tests/function_call_stream.py
@@ -1,22 +1,41 @@
 # Example of an OpenAI ChatCompletion request with stream=True
 # https://platform.openai.com/docs/guides/chat
 import time
+import json
 from openai import OpenAI
+from collections import defaultdict
 
 # record the time before the request is sent
 start_time = time.time()
 
+
+# Example dummy function hard coded to return the same weather
+# In production, this could be your backend API or an external API
+def get_current_weather(location, unit="fahrenheit"):
+    """Get the current weather in a given location"""
+    if "tokyo" in location.lower():
+        return json.dumps({"location": "Tokyo", "temperature": "10", "unit": unit})
+    elif "san francisco" in location.lower():
+        return json.dumps(
+            {"location": "San Francisco", "temperature": "72", "unit": unit}
+        )
+    elif "paris" in location.lower():
+        return json.dumps({"location": "Paris", "temperature": "22", "unit": unit})
+    else:
+        return json.dumps({"location": location, "temperature": "unknown"})
+
+
 client = OpenAI(
     base_url="http://127.0.0.1:8000",
     api_key="test",
 )
 
 messages = [
-        {
-            "role": "user",
-            "content": "Hello!",
-        }
-    ]
+    {
+        "role": "user",
+        "content": "What's the weather like in Paris?",
+    }
+]
 
 tools = [
     {
@@ -46,17 +65,86 @@
     stream=True,
 )
 
-# create variables to collect the stream of chunks
-collected_chunks = []
-collected_messages = []
-
-# iterate through the stream of events
+# https://community.openai.com/t/has-anyone-managed-to-get-a-tool-call-working-when-stream-true/498867/11
+tool_calls = []
+index = 0
+start = True
 for chunk in response:
-    chunk_time = time.time() - start_time  # calculate the time delay of the chunk
-    collected_chunks.append(chunk)  # save the event response
-    chunk_message = chunk.choices[0].delta.content  # extract the message
-    collected_messages.append(chunk_message)  # save the message
-    print(chunk_message, end='')
+    print(chunk)
+    chunk_time = time.time() - start_time
+
+    delta = chunk.choices[0].delta
+    if not delta:
+        break
+    if not delta.function_call and not delta.tool_calls:
+        if start:
+            continue
+        else:
+            break
+    start = False
+    if delta.function_call:
+        if index == len(tool_calls):
+            tool_calls.append(defaultdict(str))
+        if delta.function_call.name:
+            tool_calls[index]["name"] = delta.function_call.name
+        if delta.function_call.arguments:
+            tool_calls[index]["arguments"] += delta.function_call.arguments
+    elif delta.tool_calls:
+        tool_call = delta.tool_calls[0]
+        index = tool_call.index
+        if index == len(tool_calls):
+            tool_calls.append(defaultdict(str))
+        if tool_call.id:
+            tool_calls[index]["id"] = tool_call.id
+        if tool_call.function:
+            if tool_call.function.name:
+                tool_calls[index]["name"] = tool_call.function.name
+            if tool_call.function.arguments:
+                tool_calls[index]["arguments"] += tool_call.function.arguments
 
 print()
+print(tool_calls)
 print(f"Full response received {chunk_time:.2f} seconds after request")
+
+if tool_calls:
+    # Step 3: call the function
+    # Note: the JSON response may not always be valid; be sure to handle errors
+    available_functions = {
+        "get_current_weather": get_current_weather,
+    }  # only one function in this example, but you can have multiple
+    # Step 4: send the info for each function call and function response to the model
+    for tool_call in tool_calls:
+        function_name = tool_call["name"]
+        function_to_call = available_functions[function_name]
+        function_args = json.loads(tool_call["arguments"])
+        function_response = function_to_call(
+            location=function_args.get("location"),
+            unit=function_args.get("unit"),
+        )
+        messages.append(
+            {
+                "role": "assistant",
+                "tool_calls": [
+                    {
+                        "id": tool_call["id"],
+                        "type": "function",
+                        "function": {
+                            "name": function_name,
+                            "arguments": tool_call["arguments"],
+                        },
+                    }
+                ],
+            }
+        )  # extend conversation with assistant's reply
+        messages.append(
+            {
+                "tool_call_id": tool_call["id"],
+                "role": "tool",
+                "content": function_response,
+            }
+        )  # extend conversation with function response
+    second_response = client.chat.completions.create(
+        model="gpt-4o",
+        messages=messages,
+    )  # get a new response from the model where it can see the function response
+    print(second_response.choices[0].message.content)