Skip to content

Commit

Permalink
fix persona streaming
Browse files Browse the repository at this point in the history
  • Loading branch information
mdmohsin7 committed Feb 18, 2025
1 parent 2679e9a commit 6aecb84
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 49 deletions.
55 changes: 6 additions & 49 deletions backend/utils/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1972,17 +1972,17 @@ def condense_facts(facts, name):
**Requirements:**
1. Prioritize facts based on:
- Relevance to the users core identity, personality, and communication style.
- Relevance to the user's core identity, personality, and communication style.
- Frequency of occurrence or mention in conversations.
- Impact on decision-making processes and behavioral patterns.
2. Group related facts to eliminate redundancy while preserving context.
3. Preserve nuances in communication style, humor, tone, and preferences.
4. Retain facts essential for continuity in ongoing projects, interests, and relationships.
5. Discard trivial details, repetitive information, and rarely mentioned facts.
6. Maintain consistency in the users thought processes, conversational flow, and emotional responses.
6. Maintain consistency in the user's thought processes, conversational flow, and emotional responses.
**Output Format (No Extra Text):**
- **Core Identity and Personality:** Brief overview encapsulating the users personality, values, and communication style.
- **Core Identity and Personality:** Brief overview encapsulating the user's personality, values, and communication style.
- **Prioritized Facts:** Organized into categories with only the most relevant and impactful details.
- **Behavioral Patterns and Decision-Making:** Key patterns defining how the user approaches problems and makes decisions.
- **Contextual Knowledge and Continuity:** Facts crucial for maintaining continuity in conversations and ongoing projects.
Expand All @@ -1996,8 +1996,10 @@ def condense_facts(facts, name):
return response.content


def generate_persona_description(facts):
def generate_persona_description(facts, name):
prompt = f"""Based on these facts about a person, create a concise, engaging description that captures their unique personality and characteristics (max 250 characters).
They chose to be known as {name}.
Facts:
{facts}
Expand Down Expand Up @@ -2043,48 +2045,3 @@ def condense_conversations(conversations):
return response.content


async def execute_persona_chat_stream(
uid: str, messages: List[Message], app: App, cited: Optional[bool] = False,
callback_data: dict = None, chat_session: Optional[str] = None
) -> AsyncGenerator[str, None]:
"""Handle streaming chat responses for persona-type apps"""

system_prompt = app.persona_prompt
formatted_messages = [SystemMessage(content=system_prompt)]

for msg in messages:
if msg.sender == "ai":
formatted_messages.append(AIMessage(content=msg.text))
else:
formatted_messages.append(HumanMessage(content=msg.text))

full_response = []

async def stream_tokens():

def get_tokens():
for token in llm_medium_stream.stream(formatted_messages):
yield token.content

for token in get_tokens():
yield token

try:
async for token in stream_tokens():
full_response.append(token)
yield f"data: {token}\n\n"

if callback_data is not None:
callback_data['answer'] = ''.join(full_response)
callback_data['memories_found'] = []
callback_data['ask_for_nps'] = False

yield None
return

except Exception as e:
print(f"Error in execute_persona_chat_stream: {e}")
if callback_data is not None:
callback_data['error'] = str(e)
yield None
return
57 changes: 57 additions & 0 deletions backend/utils/retrieval/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import List, Optional, Tuple, AsyncGenerator

from langchain.callbacks.base import BaseCallbackHandler
from langchain_core.messages import SystemMessage, AIMessage, HumanMessage
from langchain_openai import ChatOpenAI
from langgraph.checkpoint.memory import MemorySaver
from langgraph.constants import END
Expand All @@ -15,6 +16,7 @@
from database.redis_db import get_filter_category_items
from database.vector_db import query_vectors_by_metadata
import database.notifications as notification_db
from models.app import App
from models.chat import ChatSession, Message
from models.memory import Memory
from models.plugin import Plugin
Expand All @@ -39,6 +41,7 @@
from utils.plugins import get_github_docs_content

model = ChatOpenAI(model="gpt-4o-mini")
llm_medium_stream = ChatOpenAI(model='gpt-4o', streaming=True)


class StructuredFilters(TypedDict):
Expand Down Expand Up @@ -419,3 +422,57 @@ async def execute_graph_chat_stream(

yield None
return


async def execute_persona_chat_stream(
uid: str, messages: List[Message], app: App, cited: Optional[bool] = False,
callback_data: dict = None, chat_session: Optional[str] = None
) -> AsyncGenerator[str, None]:
"""Handle streaming chat responses for persona-type apps"""

system_prompt = app.persona_prompt
formatted_messages = [SystemMessage(content=system_prompt)]

for msg in messages:
if msg.sender == "ai":
formatted_messages.append(AIMessage(content=msg.text))
else:
formatted_messages.append(HumanMessage(content=msg.text))

full_response = []
callback = AsyncStreamingCallback()

try:
task = asyncio.create_task(llm_medium_stream.agenerate(
messages=[formatted_messages],
callbacks=[callback]
))

while True:
try:
chunk = await callback.queue.get()
if chunk:
token = chunk.replace("data: ", "")
full_response.append(token)
yield chunk
else:
break
except asyncio.CancelledError:
break

await task

if callback_data is not None:
callback_data['answer'] = ''.join(full_response)
callback_data['memories_found'] = []
callback_data['ask_for_nps'] = False

yield None
return

except Exception as e:
print(f"Error in execute_persona_chat_stream: {e}")
if callback_data is not None:
callback_data['error'] = str(e)
yield None
return

0 comments on commit 6aecb84

Please sign in to comment.