Skip to content

Commit

Permalink
Reduce gradio overhead (lm-sys#2325)
Browse files Browse the repository at this point in the history
  • Loading branch information
merrymercy committed Aug 28, 2023
1 parent da8d0cd commit a81a04c
Showing 1 changed file with 17 additions and 7 deletions.
24 changes: 17 additions & 7 deletions fastchat/serve/gradio_web_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,15 @@

ip_expiration_dict = defaultdict(lambda: 0)

# Information about custom OpenAI compatible API models.
# JSON file format:
# {
# "vicuna-7b": {
# "model_name": "vicuna-7b-v1.5",
# "api_base": "http://8.8.8.55:5555/v1",
# "api_key": "password"
# },
# }
openai_compatible_models_info = {}


Expand Down Expand Up @@ -394,11 +403,11 @@ def bot_response(state, temperature, top_p, max_new_tokens, request: gr.Request)
yield (state, state.to_gradio_chatbot()) + (disable_btn,) * 5

try:
for data in stream_iter:
for i, data in enumerate(stream_iter):
if data["error_code"] == 0:
if i % 5 != 0: # reduce gradio's overhead
continue
output = data["text"].strip()
if "vicuna" in model_name:
output = post_process_code(output)
conv.update_last_message(output + "▌")
yield (state, state.to_gradio_chatbot()) + (disable_btn,) * 5
else:
Expand All @@ -412,6 +421,11 @@ def bot_response(state, temperature, top_p, max_new_tokens, request: gr.Request)
enable_btn,
)
return
output = data["text"].strip()
if "vicuna" in model_name:
output = post_process_code(output)
conv.update_last_message(output)
yield (state, state.to_gradio_chatbot()) + (enable_btn,) * 5
except requests.exceptions.RequestException as e:
conv.update_last_message(
f"{SERVER_ERROR_MSG}\n\n"
Expand Down Expand Up @@ -439,10 +453,6 @@ def bot_response(state, temperature, top_p, max_new_tokens, request: gr.Request)
)
return

# Delete "▌"
conv.update_last_message(conv.messages[-1][-1][:-1])
yield (state, state.to_gradio_chatbot()) + (enable_btn,) * 5

finish_tstamp = time.time()
logger.info(f"{output}")

Expand Down

0 comments on commit a81a04c

Please sign in to comment.