Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(python backend): function call upgrade #368

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
26077b8
add function_call.py
EliwiiKeeya Jul 28, 2024
2e6bc03
feat(backend): add wip annotation
EliwiiKeeya Jul 29, 2024
f287e32
feat(backend): Synchronized Files
EliwiiKeeya Jul 30, 2024
542a9eb
feat(Backend): detect function call request
EliwiiKeeya Jul 30, 2024
478151a
fix(Backend): detect function call request
EliwiiKeeya Jul 30, 2024
372b433
feat(Backend): Implement function call in non-stream mode for chatmode
EliwiiKeeya Jul 31, 2024
9cbef91
fix(Backend): Fix OpenAI API data format compatibility
EliwiiKeeya Jul 31, 2024
c9e9d51
feat(Backend): Implement non-streaming function call process
EliwiiKeeya Jul 31, 2024
19e9d9f
fix(Backend): Move directory structure from routes to backend-python
EliwiiKeeya Aug 1, 2024
432644d
fix(Backend): Update Prompts to Mobius example and refactor code stru…
EliwiiKeeya Aug 2, 2024
b3994a3
test(Backend): Add postprocss_response.py for tests
EliwiiKeeya Aug 2, 2024
0d06082
fix(Backend): Fix OpenAI API data format compatibility
EliwiiKeeya Aug 2, 2024
6dfc9ec
perf(Backend): Modify some attributes.
EliwiiKeeya Aug 2, 2024
ff5c44f
fix(Backend): Fix postprocess_response funciton.
EliwiiKeeya Aug 3, 2024
0345073
refactor(Backend): Refactor two places related to prompt word synthesis.
EliwiiKeeya Aug 3, 2024
eb89837
修正一些小细节使function_call工作正常
josStorer Aug 8, 2024
8c93173
test(Backend): Add function_call_stream.py
EliwiiKeeya Aug 8, 2024
9747edf
chore(Backend): Add a function to generate streaming responses for to…
EliwiiKeeya Aug 9, 2024
4917205
fix(Backend): Adjust the output format of the streaming test file.
EliwiiKeeya Aug 9, 2024
c4b9c4e
feat(Backend): Try to implement streaming function calls
EliwiiKeeya Aug 9, 2024
6b0f597
style(Backend): Change some format
EliwiiKeeya Aug 9, 2024
3df4302
stream function call细节调整
josStorer Aug 14, 2024
e88ee50
docs(Backend): Modify comments in the code.
EliwiiKeeya Aug 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
stream function call细节调整
  • Loading branch information
josStorer committed Aug 14, 2024
commit 3df4302b2362b337c4237333478a545d8c3107f4
142 changes: 94 additions & 48 deletions backend-python/routes/completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,14 +462,32 @@ async def async_generator_stream_respose(
model, request, body, completion_text, body.stream, body.stop, True
) # Get an asnyc generator handle
content: str = ""
function_id: str = "call_" + "".join(
random.sample(string.ascii_letters + string.digits, 24)
)
flag_is_function_call_confirmed = False
flag_is_common_confirmed = False

# Loop, there is only one existing endpoint.
done = False
stack_keyword_pairs = [["```", "```"], ["(", ")"], ['"', '"'], ["'", "'"]]
while True:
if done:
yield json.dumps(
{
"object": "chat.completion.chunk",
"model": model.name,
"choices": [
{"index": 0, "delta": {}, "finish_reason": "tool_calls"}
],
}
)
yield "[DONE]"

try:
response = await anext(gen) # Generate a delta response
if response == "[DONE]":
done = True
continue
except StopAsyncIteration:
# Too few inference result
Expand All @@ -486,25 +504,58 @@ async def async_generator_stream_respose(
response_decoded = json.loads(response) # Decode string
if response_decoded["choices"][0]["delta"] == {}:
continue
content += response_decoded["choices"][0]["delta"]["content"]
delta_content = response_decoded["choices"][0]["delta"]["content"]
content += delta_content

if flag_is_function_call_confirmed:
content = f"{{{content.strip()[1:-1]}}}"
content = content.replace("=", ":")
if "\n\n" in content:
done = True
continue

for pair in stack_keyword_pairs:
if done:
break
for keyword in pair:
if keyword in delta_content:
stack.append(keyword)
if (
pair[0] in stack
and pair[1] in stack
and stack.index(pair[0]) < stack.index(pair[1])
):
stack.remove(pair[0])
stack.remove(pair[1])
if "(" not in stack and ")" not in stack:
done = True
response_decoded["choices"][0]["delta"] = {
"tool_calls": [
{
"index": 0,
"function": {
"arguments": (
'"'
if delta_content.startswith('"')
else ""
)
+ "}",
},
}
]
}
yield json.dumps(response_decoded)
break
if done:
continue

delta_content = delta_content.replace("=", ":")
# content = content.replace(r'"', r"\"") # XXX: Check whether to reserve.
response_decoded["choices"][0]["delta"]["content"] = None
response_decoded["choices"][0]["delta"] = {
"arguments": [
"tool_calls": [
{
"index": 0,
"id": "call_"
+ "".join(
random.sample(string.ascii_letters + string.digits, 24)
),
"type": "function",
"function": {
"name": name,
"arguments": content,
"arguments": delta_content,
},
}
]
Expand All @@ -525,67 +576,50 @@ async def async_generator_stream_respose(
"""
# Constant
LIMIT_LINE_FEEDS = 4
LIMIT_CHARACTERS = 40
LIMIT_BLOCKS_CHARACTERS = 30
REGEX_BLOCKS = r"([\w]+)[\s]*```[\w\s]*tool_call(.*?)\n*```"
REGEX_BLOCKS_HEADERS = r"([\w]+)[\s]*```[\w\s]*(tool_call)\("
LIMIT_CHARACTERS = 60
LIMIT_FUNCTION_NAME_CHARACTERS = 44
REGEX_BLOCKS_HEADERS = r"([\w]+)[\s]*```[\w\s]*tool_call\("

# Regex
feild_function_call_block: re.Match | None = re.search(
REGEX_BLOCKS, content
)
feild_function_call_head: re.Match | None = re.search(
regex_match_function_call_head: re.Match | None = re.search(
REGEX_BLOCKS_HEADERS, content
)

# Confirm Common Response
if (
content.count("\n") > LIMIT_LINE_FEEDS
and feild_function_call_head is None
) or (len(content) > LIMIT_CHARACTERS and feild_function_call_head is None):
if regex_match_function_call_head is None and (
content.count("\n") >= LIMIT_LINE_FEEDS
or len(content) > LIMIT_CHARACTERS
or (
len(content) > LIMIT_FUNCTION_NAME_CHARACTERS
and "```" not in content
)
):
flag_is_common_confirmed = True
response_decoded["choices"][0]["delta"]["content"] = content
yield json.dumps(response_decoded)
del response_decoded
del content
continue

# Confirm Common Response
if isinstance(feild_function_call_head, re.Match):
if (
len(content[feild_function_call_head.end(2) :])
> LIMIT_BLOCKS_CHARACTERS
and feild_function_call_block is None
):
flag_is_common_confirmed = True
response_decoded["choices"][0]["delta"]["content"] = content
yield json.dumps(response_decoded)
del response_decoded
del content
continue

# Confirm Function call Response
if feild_function_call_block is not None:
if regex_match_function_call_head is not None:
flag_is_function_call_confirmed = True
stack = ["```", "("]

# Generate a blank content response
response_decoded["choices"][0]["delta"]["assistant"] = (
model.bot if body.assistant_name is None else body.assistant_name
)
response_decoded["choices"][0]["delta"]["role"] = "assistant"
response_decoded["choices"][0]["delta"]["content"] = None
yield json.dumps(response_decoded)

# Generate a function call details response
name = feild_function_call_head.group(1)
del response_decoded["choices"][0]["delta"]["assistant"]
name = regex_match_function_call_head.group(1)
del response_decoded["choices"][0]["delta"]["role"]
del response_decoded["choices"][0]["delta"]["content"]
response_decoded["choices"][0]["delta"] = {
"tool_calls": [
{
"id": "call_"
+ "".join(
random.sample(string.ascii_letters + string.digits, 24)
),
"index": 0,
"id": function_id,
"type": "function",
"function": {
"name": name,
Expand All @@ -595,9 +629,21 @@ async def async_generator_stream_respose(
]
}
yield json.dumps(response_decoded)
response_decoded["choices"][0]["delta"] = {
"tool_calls": [
{
"index": 0,
"function": {
"arguments": "{"
+ ('"' if delta_content.endswith('"') else ""),
},
}
]
}
yield json.dumps(response_decoded)

# Reset content buffer
content = feild_function_call_block.group(2)
# content = feild_function_call_block.group(2)
continue

# Default: Unsure Response
Expand Down
118 changes: 103 additions & 15 deletions backend-python/tests/function_call_stream.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,41 @@
# Example of an OpenAI ChatCompletion request with stream=True
# https://platform.openai.com/docs/guides/chat
import time
import json
from openai import OpenAI
from collections import defaultdict

# record the time before the request is sent
start_time = time.time()


# Example dummy function hard coded to return the same weather
# In production, this could be your backend API or an external API
def get_current_weather(location, unit="fahrenheit"):
"""Get the current weather in a given location"""
if "tokyo" in location.lower():
return json.dumps({"location": "Tokyo", "temperature": "10", "unit": unit})
elif "san francisco" in location.lower():
return json.dumps(
{"location": "San Francisco", "temperature": "72", "unit": unit}
)
elif "paris" in location.lower():
return json.dumps({"location": "Paris", "temperature": "22", "unit": unit})
else:
return json.dumps({"location": location, "temperature": "unknown"})


client = OpenAI(
base_url="http://127.0.0.1:8000",
api_key="test",
)

messages = [
{
"role": "user",
"content": "Hello!",
}
]
{
"role": "user",
"content": "What's the weather like in Paris?",
}
]

tools = [
{
Expand Down Expand Up @@ -46,17 +65,86 @@
stream=True,
)

# create variables to collect the stream of chunks
collected_chunks = []
collected_messages = []

# iterate through the stream of events
# https://community.openai.com/t/has-anyone-managed-to-get-a-tool-call-working-when-stream-true/498867/11
tool_calls = []
index = 0
start = True
for chunk in response:
chunk_time = time.time() - start_time # calculate the time delay of the chunk
collected_chunks.append(chunk) # save the event response
chunk_message = chunk.choices[0].delta.content # extract the message
collected_messages.append(chunk_message) # save the message
print(chunk_message, end='')
print(chunk)
chunk_time = time.time() - start_time

delta = chunk.choices[0].delta
if not delta:
break
if not delta.function_call and not delta.tool_calls:
if start:
continue
else:
break
start = False
if delta.function_call:
if index == len(tool_calls):
tool_calls.append(defaultdict(str))
if delta.function_call.name:
tool_calls[index]["name"] = delta.function_call.name
if delta.function_call.arguments:
tool_calls[index]["arguments"] += delta.function_call.arguments
elif delta.tool_calls:
tool_call = delta.tool_calls[0]
index = tool_call.index
if index == len(tool_calls):
tool_calls.append(defaultdict(str))
if tool_call.id:
tool_calls[index]["id"] = tool_call.id
if tool_call.function:
if tool_call.function.name:
tool_calls[index]["name"] = tool_call.function.name
if tool_call.function.arguments:
tool_calls[index]["arguments"] += tool_call.function.arguments

print()
print(tool_calls)
print(f"Full response received {chunk_time:.2f} seconds after request")

if tool_calls:
# Step 3: call the function
# Note: the JSON response may not always be valid; be sure to handle errors
available_functions = {
"get_current_weather": get_current_weather,
} # only one function in this example, but you can have multiple
# Step 4: send the info for each function call and function response to the model
for tool_call in tool_calls:
function_name = tool_call["name"]
function_to_call = available_functions[function_name]
function_args = json.loads(tool_call["arguments"])
function_response = function_to_call(
location=function_args.get("location"),
unit=function_args.get("unit"),
)
messages.append(
{
"role": "assistant",
"tool_calls": [
{
"id": tool_call["id"],
"type": "function",
"function": {
"name": function_name,
"arguments": tool_call["arguments"],
},
}
],
}
) # extend conversation with assistant's reply
messages.append(
{
"tool_call_id": tool_call["id"],
"role": "tool",
"content": function_response,
}
) # extend conversation with function response
second_response = client.chat.completions.create(
model="gpt-4o",
messages=messages,
) # get a new response from the model where it can see the function response
print(second_response.choices[0].message.content)