Skip to content

added threading to chat summarization to improve chat response time #751

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 20, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 28 additions & 16 deletions backend/src/QA_integration.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import os
import json
from datetime import datetime
import time
from typing import Any
import logging

import threading
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from typing import Any
from dotenv import load_dotenv
import logging


# LangChain imports
from langchain_community.vectorstores.neo4j_vector import Neo4jVector
Expand Down Expand Up @@ -364,22 +367,26 @@ def setup_chat(model, graph, document_names, chat_mode_settings):

return llm, doc_retriever, model_name

def process_chat_response(messages,history, question, model, graph, document_names,chat_mode_settings):
def process_chat_response(messages, history, question, model, graph, document_names, chat_mode_settings):
try:
llm, doc_retriever, model_version = setup_chat(model, graph, document_names,chat_mode_settings)
llm, doc_retriever, model_version = setup_chat(model, graph, document_names, chat_mode_settings)

docs = retrieve_documents(doc_retriever, messages)
if docs:
content, result, total_tokens = process_documents(docs, question, messages, llm, model,chat_mode_settings)
content, result, total_tokens = process_documents(docs, question, messages, llm, model, chat_mode_settings)
else:
content = "I couldn't find any relevant documents to answer your question."
result = {"sources": [], "chunkdetails": [],"entities":[]}
result = {"sources": [], "chunkdetails": [], "entities": []}
total_tokens = 0

ai_response = AIMessage(content=content)
messages.append(ai_response)
summarize_and_log(history, messages, llm)


summarization_thread = threading.Thread(target=summarize_and_log, args=(history, messages, llm))
summarization_thread.start()
logging.info("Summarization thread started.")
# summarize_and_log(history, messages, llm)

return {
"session_id": "",
"message": content,
Expand All @@ -390,7 +397,7 @@ def process_chat_response(messages,history, question, model, graph, document_nam
"total_tokens": total_tokens,
"response_time": 0,
"mode": chat_mode_settings["mode"],
"entities" : result["entities"]
"entities": result["entities"]
},
"user": "chatbot"
}
Expand All @@ -413,6 +420,7 @@ def process_chat_response(messages,history, question, model, graph, document_nam
}

def summarize_and_log(history, stored_messages, llm):
logging.info("Starting summarization in a separate thread.")
if not stored_messages:
logging.info("No messages to summarize.")
return False
Expand All @@ -433,18 +441,19 @@ def summarize_and_log(history, stored_messages, llm):

summary_message = summarization_chain.invoke({"chat_history": stored_messages})

history.clear()
history.add_user_message("Our current conversation summary till now")
history.add_message(summary_message)
with threading.Lock():
history.clear()
history.add_user_message("Our current conversation summary till now")
history.add_message(summary_message)

history_summarized_time = time.time() - start_time
logging.info(f"Chat History summarized in {history_summarized_time:.2f} seconds")

return True

except Exception as e:
logging.error(f"An error occurred while summarizing messages: {e}")
return False
logging.error(f"An error occurred while summarizing messages: {e}", exc_info=True)
return False

def create_graph_chain(model, graph):
try:
Expand Down Expand Up @@ -501,7 +510,10 @@ def process_graph_response(model, graph, question, messages, history):
ai_response = AIMessage(content=ai_response_content)

messages.append(ai_response)
summarize_and_log(history, messages, qa_llm)
# summarize_and_log(history, messages, qa_llm)
summarization_thread = threading.Thread(target=summarize_and_log, args=(history, messages, qa_llm))
summarization_thread.start()
logging.info("Summarization thread started.")

result = {
"session_id": "",
Expand Down