infiniflow · lizheng419 · Nov 19, 2024 · Nov 19, 2024 · Nov 20, 2024 · Nov 20, 2024
diff --git a/api/apps/sdk/session.py b/api/apps/sdk/session.py
@@ -13,6 +13,7 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
+import logging
 import re
 import json
 from copy import deepcopy
@@ -189,7 +190,7 @@ def stream():
         nonlocal dia, msg, req, conv
         try:
             for ans in chat(dia, msg, **req):
-                print("ans:", ans)
+                #logging.info("ans : {}".format(ans))
                 fillin_conv(ans)
                 yield "data:" + json.dumps({"code": 0, "data": ans}, ensure_ascii=False) + "\n\n"
             ConversationService.update_by_id(conv.id, conv.to_dict())

diff --git a/api/db/services/dialog_service.py b/api/db/services/dialog_service.py
@@ -29,6 +29,7 @@
 from api.db.services.llm_service import LLMService, TenantLLMService, LLMBundle
 from api import settings
 from rag.app.resume import forbidden_select_fields4resume
+from rag.llm import LENGTH_NOTIFICATION_CN, LENGTH_NOTIFICATION_EN
 from rag.nlp.search import index_name
 from rag.utils import rmSpace, num_tokens_from_string, encoder
 from api.utils.file_utils import get_project_base_directory
@@ -273,21 +274,80 @@ def decorate_answer(answer):
             (done_tm - retrieval_tm) * 1000)
         return {"answer": answer, "reference": refs, "prompt": prompt}
 
+    # if stream:
+    #     last_ans = ""
+    #     answer = ""
+    #     for ans in chat_mdl.chat_streamly(prompt, msg[1:], gen_conf):
+    #         answer = ans
+    #         logging.info("answer_stream : {}".format(ans))
+    #         delta_ans = ans[len(last_ans):]
+    #         if num_tokens_from_string(delta_ans) < 16:
+    #             continue
+    #         last_ans = answer
+    #         yield {"answer": answer, "reference": {}, "audio_binary": tts(tts_mdl, delta_ans)}
+    #     delta_ans = answer[len(last_ans):]
+    #     if delta_ans:
+    #         yield {"answer": answer, "reference": {}, "audio_binary": tts(tts_mdl, delta_ans)}
+    #     yield decorate_answer(answer)
+
     if stream:
-        last_ans = ""
+        logging.info("stream_mode : {}".format(msg[1:]))
         answer = ""
-        for ans in chat_mdl.chat_streamly(prompt, msg[1:], gen_conf):
-            answer = ans
-            logging.info("answer_stream : {}".format(ans))
-            delta_ans = ans[len(last_ans):]
-            if num_tokens_from_string(delta_ans) < 16:
-                continue
-            last_ans = answer
-            yield {"answer": answer, "reference": {}, "audio_binary": tts(tts_mdl, delta_ans)}
-        delta_ans = answer[len(last_ans):]
-        if delta_ans:
-            yield {"answer": answer, "reference": {}, "audio_binary": tts(tts_mdl, delta_ans)}
-        yield decorate_answer(answer)
+        for delta in chat_mdl.chat_streamly(prompt, msg[1:], gen_conf):
+            # 检查是否为总令牌数或通知信息
+            if isinstance(delta, str):
+                if delta.isdigit():
+                    # 处理总令牌数（如果需要）
+                    total_tokens = int(delta)
+                    # logging.info(f"Total tokens used: {total_tokens}")
+                    continue
+                elif delta in [LENGTH_NOTIFICATION_CN, LENGTH_NOTIFICATION_EN]:
+                    # 处理长度通知信息
+                    answer += delta
+                    # logging.info(f"Length notification: {delta}")
+                    audio = tts(tts_mdl, delta)
+                    yield {"answer": answer, "reference": {}, "audio_binary": audio}
+                    continue
+                elif "\n**ERROR**:" in delta:
+                    # 处理错误信息
+                    answer += delta
+                    # logging.error(f"Error in response: {delta}")
+                    yield {"answer": answer, "reference": {}, "audio_binary": b''}  # 错误时不生成音频
+                    continue
+
+                # 处理增量文本
+                delta_ans = delta
+                # if num_tokens_from_string(delta_ans) < 16:
+                #     continue  # 根据需求调整阈值
+
+                # 更新完整的答案
+                answer += delta_ans
+
+                # 生成音频
+                audio = tts(tts_mdl, delta_ans)
+                # logging.info(f"Generated audio for delta: {delta_ans}")
+                yield {"answer": delta_ans, "reference": {}, "audio_binary": audio}
+            elif isinstance(delta, dict):
+                # 如果 chat_streamly 仍返回字典（不推荐）
+                # 例如: {"new_text": "新增内容", "position": 10}
+                new_text = delta.get("new_text", "")
+                if not new_text:
+                    continue
+                if num_tokens_from_string(new_text) < 16:
+                    continue
+
+                # 更新完整的答案
+                answer += new_text
+
+                # 生成音频
+                audio = tts(tts_mdl, new_text)
+                logging.info(f"Generated audio for new_text: {new_text}")
+                yield {"answer": answer, "reference": {}, "audio_binary": audio}
+
+        # 最终装饰答案
+        decorated_answer = decorate_answer(answer)
+        logging.info(f"Final decorated answer: {decorated_answer}")
+        yield decorated_answer
     else:
         answer = chat_mdl.chat(prompt, msg[1:], gen_conf)
         logging.debug("User: {}|Assistant: {}".format(

diff --git a/poetry.toml b/poetry.toml
@@ -1,4 +1,6 @@
 [virtualenvs]
 in-project = true
 create = true
-prefer-active-python = true
+prefer-active-python = true
+[repositories.tuna]
+url = "https://pypi.tuna.tsinghua.edu.cn/simple"
diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
@@ -13,6 +13,7 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #
+import logging
 import re
 
 from openai.lib.azure import AzureOpenAI
@@ -57,42 +58,95 @@ def chat(self, system, history, gen_conf):
         except openai.APIError as e:
             return "**ERROR**: " + str(e), 0
 
+    # def chat_streamly(self, system, history, gen_conf):
+    #     if system:
+    #         history.insert(0, {"role": "system", "content": system})
+    #     ans = ""
+    #     total_tokens = 0
+    #     try:
+    #         response = self.client.chat.completions.create(
+    #             model=self.model_name,
+    #             messages=history,
+    #             stream=True,
+    #             **gen_conf)
+    #         for resp in response:
+    #             if not resp.choices: continue
+    #             if not resp.choices[0].delta.content:
+    #                 resp.choices[0].delta.content = ""
+    #             ans += resp.choices[0].delta.content
+    #
+    #             if not hasattr(resp, "usage") or not resp.usage:
+    #                 total_tokens = (
+    #                             total_tokens
+    #                             + num_tokens_from_string(resp.choices[0].delta.content)
+    #                     )
+    #             elif isinstance(resp.usage, dict):
+    #                 total_tokens = resp.usage.get("total_tokens", total_tokens)
+    #             else: total_tokens = resp.usage.total_tokens
+    #
+    #             if resp.choices[0].finish_reason == "length":
+    #                 if is_chinese(ans):
+    #                     ans += LENGTH_NOTIFICATION_CN
+    #                 else:
+    #                     ans += LENGTH_NOTIFICATION_EN
+    #             yield ans
+    #
+    #     except openai.APIError as e:
+    #         yield ans + "\n**ERROR**: " + str(e)
+    #
+    #     yield total_tokens
+
     def chat_streamly(self, system, history, gen_conf):
+        logging.info("lizheng_test: chat_streamly")
         if system:
             history.insert(0, {"role": "system", "content": system})
+
         ans = ""
         total_tokens = 0
         try:
             response = self.client.chat.completions.create(
                 model=self.model_name,
                 messages=history,
                 stream=True,
-                **gen_conf)
+                **gen_conf
+            )
             for resp in response:
-                if not resp.choices: continue
-                if not resp.choices[0].delta.content:
-                    resp.choices[0].delta.content = ""
-                ans += resp.choices[0].delta.content
+                if not resp.choices:
+                    continue
+                # 获取delta内容，确保其为字符串
+                delta_content = resp.choices[0].delta.content or ""
+                if not delta_content:
+                    continue
+                # 累积答案
+                ans += delta_content
 
+                # 更新令牌计数
                 if not hasattr(resp, "usage") or not resp.usage:
                     total_tokens = (
-                                total_tokens
-                                + num_tokens_from_string(resp.choices[0].delta.content)
-                        )
+                            total_tokens
+                            + num_tokens_from_string(delta_content)
+                    )
                 elif isinstance(resp.usage, dict):
                     total_tokens = resp.usage.get("total_tokens", total_tokens)
-                else: total_tokens = resp.usage.total_tokens
+                else:
+                    total_tokens = resp.usage.total_tokens
 
+                # 仅返回新增的部分
+                yield delta_content
+
+                # 处理完成原因
                 if resp.choices[0].finish_reason == "length":
                     if is_chinese(ans):
-                        ans += LENGTH_NOTIFICATION_CN
+                        notification = LENGTH_NOTIFICATION_CN
                     else:
-                        ans += LENGTH_NOTIFICATION_EN
-                yield ans
+                        notification = LENGTH_NOTIFICATION_EN
+                    yield notification
 
         except openai.APIError as e:
+            # 返回错误信息
             yield ans + "\n**ERROR**: " + str(e)
 
+        # 返回总令牌数
         yield total_tokens
 
 

diff --git a/rag/llm/cv_model.py b/rag/llm/cv_model.py
@@ -57,12 +57,48 @@ def chat(self, system, history, gen_conf, image=""):
         except Exception as e:
             return "**ERROR**: " + str(e), 0
 
+    # def chat_streamly(self, system, history, gen_conf, image=""):
+    #     if system:
+    #         history[-1]["content"] = system + history[-1]["content"] + "user query: " + history[-1]["content"]
+    #
+    #     ans = ""
+    #     tk_count = 0
+    #     try:
+    #         for his in history:
+    #             if his["role"] == "user":
+    #                 his["content"] = self.chat_prompt(his["content"], image)
+    #
+    #         response = self.client.chat.completions.create(
+    #             model=self.model_name,
+    #             messages=history,
+    #             max_tokens=gen_conf.get("max_tokens", 1000),
+    #             temperature=gen_conf.get("temperature", 0.3),
+    #             top_p=gen_conf.get("top_p", 0.7),
+    #             stream=True
+    #         )
+    #         for resp in response:
+    #             if not resp.choices[0].delta.content: continue
+    #             delta = resp.choices[0].delta.content
+    #             ans += delta
+    #             if resp.choices[0].finish_reason == "length":
+    #                 ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
+    #                     [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
+    #                 tk_count = resp.usage.total_tokens
+    #             if resp.choices[0].finish_reason == "stop": tk_count = resp.usage.total_tokens
+    #             yield ans
+    #     except Exception as e:
+    #         yield ans + "\n**ERROR**: " + str(e)
+    #
+    #     yield tk_count
+
     def chat_streamly(self, system, history, gen_conf, image=""):
         if system:
             history[-1]["content"] = system + history[-1]["content"] + "user query: " + history[-1]["content"]
 
         ans = ""
         tk_count = 0
+        last_sent_length = 0  # 跟踪上一次发送的内容长度
+
         try:
             for his in history:
                 if his["role"] == "user":
@@ -77,19 +113,39 @@ def chat_streamly(self, system, history, gen_conf, image=""):
                 stream=True
             )
             for resp in response:
-                if not resp.choices[0].delta.content: continue
+                if not resp.choices[0].delta.content:
+                    continue
                 delta = resp.choices[0].delta.content
                 ans += delta
+
+                # 计算新增部分
+                new_text = delta
+                position = last_sent_length  # 新增部分的起始位置
+                last_sent_length += len(new_text)
+
+                # 构建增量协议
+                incremental_update = {
+                    "new_text": new_text,
+                    "position": position
+                }
+
                 if resp.choices[0].finish_reason == "length":
-                    ans += "...\nFor the content length reason, it stopped, continue?" if is_english(
+                    message = "...\nFor the content length reason, it stopped, continue?" if is_english(
                         [ans]) else "······\n由于长度的原因，回答被截断了，要继续吗？"
+                    incremental_update["new_text"] = message
+                    incremental_update["position"] = last_sent_length
                     tk_count = resp.usage.total_tokens
-                if resp.choices[0].finish_reason == "stop": tk_count = resp.usage.total_tokens
-                yield ans
+
+                if resp.choices[0].finish_reason == "stop":
+                    tk_count = resp.usage.total_tokens
+
+                yield incremental_update
+
         except Exception as e:
-            yield ans + "\n**ERROR**: " + str(e)
+            yield {"error": f"**ERROR**: {str(e)}"}
 
         yield tk_count
+
 
     def image2base64(self, image):
         if isinstance(image, bytes):