fix persona streaming

aj4ayushjain · Feb 18, 2025 · 6aecb84 · 6aecb84
1 parent 2679e9a
commit 6aecb84
Show file tree

Hide file tree

Showing 2 changed files with 63 additions and 49 deletions.
diff --git a/backend/utils/llm.py b/backend/utils/llm.py
@@ -1972,17 +1972,17 @@ def condense_facts(facts, name):
 
 **Requirements:**  
 1. Prioritize facts based on:  
-   - Relevance to the user’s core identity, personality, and communication style.  
+   - Relevance to the user's core identity, personality, and communication style.  
    - Frequency of occurrence or mention in conversations.  
    - Impact on decision-making processes and behavioral patterns.  
 2. Group related facts to eliminate redundancy while preserving context.  
 3. Preserve nuances in communication style, humor, tone, and preferences.  
 4. Retain facts essential for continuity in ongoing projects, interests, and relationships.  
 5. Discard trivial details, repetitive information, and rarely mentioned facts.  
-6. Maintain consistency in the user’s thought processes, conversational flow, and emotional responses.  
+6. Maintain consistency in the user's thought processes, conversational flow, and emotional responses.  
 
 **Output Format (No Extra Text):**  
-- **Core Identity and Personality:** Brief overview encapsulating the user’s personality, values, and communication style.  
+- **Core Identity and Personality:** Brief overview encapsulating the user's personality, values, and communication style.  
 - **Prioritized Facts:** Organized into categories with only the most relevant and impactful details.  
 - **Behavioral Patterns and Decision-Making:** Key patterns defining how the user approaches problems and makes decisions.  
 - **Contextual Knowledge and Continuity:** Facts crucial for maintaining continuity in conversations and ongoing projects.  
@@ -1996,8 +1996,10 @@ def condense_facts(facts, name):
     return response.content
 
 
-def generate_persona_description(facts):
+def generate_persona_description(facts, name):
     prompt = f"""Based on these facts about a person, create a concise, engaging description that captures their unique personality and characteristics (max 250 characters).
+    
+    They chose to be known as {name}.
 
 Facts:
 {facts}
@@ -2043,48 +2045,3 @@ def condense_conversations(conversations):
     return response.content
 
 
-async def execute_persona_chat_stream(
-        uid: str, messages: List[Message], app: App, cited: Optional[bool] = False,
-        callback_data: dict = None, chat_session: Optional[str] = None
-) -> AsyncGenerator[str, None]:
-    """Handle streaming chat responses for persona-type apps"""
-
-    system_prompt = app.persona_prompt
-    formatted_messages = [SystemMessage(content=system_prompt)]
-
-    for msg in messages:
-        if msg.sender == "ai":
-            formatted_messages.append(AIMessage(content=msg.text))
-        else:
-            formatted_messages.append(HumanMessage(content=msg.text))
-
-    full_response = []
-
-    async def stream_tokens():
-
-        def get_tokens():
-            for token in llm_medium_stream.stream(formatted_messages):
-                yield token.content
-
-        for token in get_tokens():
-            yield token
-
-    try:
-        async for token in stream_tokens():
-            full_response.append(token)
-            yield f"data: {token}\n\n"
-
-        if callback_data is not None:
-            callback_data['answer'] = ''.join(full_response)
-            callback_data['memories_found'] = []
-            callback_data['ask_for_nps'] = False
-
-        yield None
-        return
-
-    except Exception as e:
-        print(f"Error in execute_persona_chat_stream: {e}")
-        if callback_data is not None:
-            callback_data['error'] = str(e)
-        yield None
-        return
diff --git a/backend/utils/retrieval/graph.py b/backend/utils/retrieval/graph.py
@@ -4,6 +4,7 @@
 from typing import List, Optional, Tuple, AsyncGenerator
 
 from langchain.callbacks.base import BaseCallbackHandler
+from langchain_core.messages import SystemMessage, AIMessage, HumanMessage
 from langchain_openai import ChatOpenAI
 from langgraph.checkpoint.memory import MemorySaver
 from langgraph.constants import END
@@ -15,6 +16,7 @@
 from database.redis_db import get_filter_category_items
 from database.vector_db import query_vectors_by_metadata
 import database.notifications as notification_db
+from models.app import App
 from models.chat import ChatSession, Message
 from models.memory import Memory
 from models.plugin import Plugin
@@ -39,6 +41,7 @@
 from utils.plugins import get_github_docs_content
 
 model = ChatOpenAI(model="gpt-4o-mini")
+llm_medium_stream = ChatOpenAI(model='gpt-4o', streaming=True)
 
 
 class StructuredFilters(TypedDict):
@@ -419,3 +422,57 @@ async def execute_graph_chat_stream(
 
     yield None
     return
+
+
+async def execute_persona_chat_stream(
+        uid: str, messages: List[Message], app: App, cited: Optional[bool] = False,
+        callback_data: dict = None, chat_session: Optional[str] = None
+) -> AsyncGenerator[str, None]:
+    """Handle streaming chat responses for persona-type apps"""
+
+    system_prompt = app.persona_prompt
+    formatted_messages = [SystemMessage(content=system_prompt)]
+
+    for msg in messages:
+        if msg.sender == "ai":
+            formatted_messages.append(AIMessage(content=msg.text))
+        else:
+            formatted_messages.append(HumanMessage(content=msg.text))
+
+    full_response = []
+    callback = AsyncStreamingCallback()
+
+    try:
+        task = asyncio.create_task(llm_medium_stream.agenerate(
+            messages=[formatted_messages],
+            callbacks=[callback]
+        ))
+
+        while True:
+            try:
+                chunk = await callback.queue.get()
+                if chunk:
+                    token = chunk.replace("data: ", "")
+                    full_response.append(token)
+                    yield chunk
+                else:
+                    break
+            except asyncio.CancelledError:
+                break
+
+        await task
+
+        if callback_data is not None:
+            callback_data['answer'] = ''.join(full_response)
+            callback_data['memories_found'] = []
+            callback_data['ask_for_nps'] = False
+
+        yield None
+        return
+
+    except Exception as e:
+        print(f"Error in execute_persona_chat_stream: {e}")
+        if callback_data is not None:
+            callback_data['error'] = str(e)
+        yield None
+        return