From 3448b6f46083a4ffc43be67aa8e2578c5a7c4ad3 Mon Sep 17 00:00:00 2001
From: Letong Han <106566639+letonghan@users.noreply.github.com>
Date: Fri, 31 May 2024 14:22:49 +0800
Subject: [PATCH] update llm microservice output to binary (#123)

Signed-off-by: letonghan <letong.han@intel.com>
---
 comps/llms/text-generation/tgi/llm.py | 23 +++--------------------
 1 file changed, 3 insertions(+), 20 deletions(-)
diff --git a/comps/llms/text-generation/tgi/llm.py b/comps/llms/text-generation/tgi/llm.py
index 445999992..caa3f20f3 100644
--- a/comps/llms/text-generation/tgi/llm.py
+++ b/comps/llms/text-generation/tgi/llm.py
@@ -21,18 +21,6 @@
 from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice
 
 
-@traceable(run_type="tool")
-def post_process_text(text: str):
-    if text == " ":
-        return "data: @#$\n\n"
-    if text == "\n":
-        return "data: <br/>\n\n"
-    if text.isspace():
-        return None
-    new_text = text.replace("Answer: ", "").replace("Human: ", "").replace(" ", "@#$")
-    return f"data: {new_text}\n\n"
-
-
 @register_microservice(
     name="opea_service@llm_tgi",
     service_type=ServiceType.LLM,
@@ -61,14 +49,9 @@ async def stream_generator():
             chat_response = ""
             async for text in llm.astream(input.query):
                 chat_response += text
-                processed_text = post_process_text(text)
-                if text and processed_text:
-                    if "</s>" in text:
-                        res = text.split("</s>")[0]
-                        if res != "":
-                            yield res
-                        break
-                    yield processed_text
+                chunk_repr = repr(text.encode("utf-8"))
+                print(f"[llm - chat_stream] chunk:{chunk_repr}")
+                yield f"data: {chunk_repr}\n\n"
             print(f"[llm - chat_stream] stream response: {chat_response}")
             yield "data: [DONE]\n\n"