Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add StreamingLLM support to studio2 chat #2060

Merged
merged 23 commits into from
Jan 19, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
bugfixes
  • Loading branch information
monorimet committed Jan 10, 2024
commit a099d7611e8a94c34532145c2608a98ca173ef91
34 changes: 15 additions & 19 deletions apps/shark_studio/api/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
}

B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<s>", "</s>"

DEFAULT_CHAT_SYS_PROMPT = """<s>[INST] <<SYS>>
Be concise. You are a helpful, respectful and honest assistant. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\n <</SYS>>\n\n
"""
Expand All @@ -42,11 +42,6 @@ def append_user_prompt(history, input_prompt):
history += user_prompt
return history

def append_bot_prompt(history, input_prompt):
user_prompt = f"{B_SYS} {input_prompt}{E_SYS} {E_SYS}"
history += user_prompt
return history

class LanguageModel:
def __init__(
self,
Expand Down Expand Up @@ -210,7 +205,6 @@ def sanitize_prompt(self, prompt):

def chat(self, prompt):
prompt = self.sanitize_prompt(prompt)
print(f"sanitized: {prompt}")

input_tensor = self.tokenizer(prompt, return_tensors="pt").input_ids

Expand Down Expand Up @@ -243,24 +237,26 @@ def format_out(results):
total_time = time.time() - st_time
token_len += 1


while format_out(token) != llm_model_map["llama2_7b"]["stop_token"]:
dec_time=time.time()
if self.streaming_llm and self.model["get_seq_step"]() > 600:
print("Evicting cache space!")
self.model["evict_kvcache_space"]()
token = self.model["run_forward"](token)
history.append(format_out(token))
total_time = time.time() - dec_time
self.prev_token_len = token_len + len(history)
yield self.tokenizer.decode(history), total_time
history.append(format_out(token))
while format_out(token) != llm_model_map["llama2_7b"]["stop_token"]:
dec_time=time.time()
if self.streaming_llm and self.model["get_seq_step"]() > 600:
print("Evicting cache space!")
self.model["evict_kvcache_space"]()
token = self.model["run_forward"](token)
history.append(format_out(token))
total_time = time.time() - dec_time
yield self.tokenizer.decode(history), total_time

self.prev_token_len = token_len + len(history)

if format_out(token) == llm_model_map["llama2_7b"]["stop_token"]:
break

for i in range(len(history)):
if type(history[i]) != int:
history[i] = int(history[i])
result_output = append_bot_prompt(history, self.tokenizer.decode(history))
result_output = self.tokenizer.decode(history)
self.global_iter += 1
return result_output, total_time

Expand Down
7 changes: 6 additions & 1 deletion apps/shark_studio/web/ui/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,18 @@
LanguageModel,
)

B_SYS, E_SYS = "<s>", "</s>"

def user(message, history):
if message == "":
message = "Hello!"
# Append the user's message to the conversation history
return "", history + [[message, ""]]

def append_bot_prompt(history, input_prompt):
user_prompt = f"{B_SYS} {input_prompt}{E_SYS} {E_SYS}"
history += user_prompt
return history

language_model = None

Expand Down Expand Up @@ -65,7 +70,7 @@ def chat_fn(
prefill_time = 0
is_first = True
for text, exec_time in language_model.chat(history):
history[-1][-1] = text
history[-1][-1] = f"{text}{E_SYS} {E_SYS}"
if is_first:
prefill_time = exec_time
is_first = False
Expand Down
Loading