Skip to content

Commit

Permalink
Merge pull request SomeOddCodeGuy#16 from SomeOddCodeGuy/updates/Logg…
Browse files Browse the repository at this point in the history
…ing_Bug_Fixes_And_Readme_Updates

Updates/logging bug fixes, settings and memory updates and readme updates
  • Loading branch information
SomeOddCodeGuy authored Nov 10, 2024
2 parents 48a34a2 + ab85620 commit ca3383c
Show file tree
Hide file tree
Showing 10 changed files with 305 additions and 60 deletions.
6 changes: 3 additions & 3 deletions Middleware/services/llm_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@ def initialize_llm_handler(self, config_data, preset, endpoint, stream, truncate
api_type_config = get_api_type_config(config_data["apiTypeConfigFileName"])
llm_type = api_type_config["type"]
if llm_type == "openAIV1Completion":
logger.info('Loading v1 Completions endpoint: ' + endpoint)
logger.info('Loading v1 Completions endpoint: %s', endpoint)
llm = OpenAiLlmCompletionsApiService(endpoint=endpoint, presetname=preset,
stream=stream, api_type_config=api_type_config,
max_tokens=max_tokens)
elif llm_type == "openAIChatCompletion":
logger.info('Loading chat Completions endpoint: ' + endpoint)
logger.info('Loading chat Completions endpoint: %s', endpoint)
llm = OpenAiLlmChatCompletionsApiService(endpoint=endpoint, presetname=preset,
stream=stream, api_type_config=api_type_config,
max_tokens=max_tokens)
Expand All @@ -59,7 +59,7 @@ def initialize_llm_handler(self, config_data, preset, endpoint, stream, truncate
def load_model_from_config(self, config_name, preset, stream=False, truncate_length=4096, max_tokens=400,
addGenerationPrompt=None):
try:
logger.info("Loading model from: " + config_name)
logger.info("Loading model from: %s", config_name)
config_file = get_endpoint_config(config_name)
return self.initialize_llm_handler(config_file, preset, config_name, stream, truncate_length, max_tokens,
addGenerationPrompt)
Expand Down
5 changes: 2 additions & 3 deletions Middleware/utilities/prompt_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def hash_single_message(message: Dict[str, str]) -> str:

def find_last_matching_hash_message(messagesOriginal: List[Dict[str, str]],
hashed_chunks_original: List[Tuple[str, str]],
skip_system: bool = False) -> int:
skip_system: bool = False, turns_to_skip_looking_back=4) -> int:
"""
Find the number of messages since the last matching hash, starting from the third-to-last message.
Expand All @@ -134,8 +134,7 @@ def find_last_matching_hash_message(messagesOriginal: List[Dict[str, str]],

current_message_hashes = [hash_single_message(message) for message in filtered_messages]

# We want to start from the third-to-last message (hence len(current_message_hashes) - 3)
start_index = len(current_message_hashes) - 4
start_index = len(current_message_hashes) - turns_to_skip_looking_back

# Iterate from the third-to-last message backwards
for i in range(start_index, -1, -1):
Expand Down
48 changes: 34 additions & 14 deletions Middleware/workflows/tools/slow_but_quality_rag_tool.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from copy import deepcopy
import logging
from copy import deepcopy
from typing import List, Dict

from Middleware.services.llm_service import LlmHandlerService
from Middleware.utilities import memory_utils
from Middleware.utilities.config_utils import get_discussion_memory_file_path, load_config, \
get_discussion_id_workflow_path, get_endpoint_config
from Middleware.utilities.file_utils import read_chunks_with_hashes, \
Expand All @@ -21,6 +22,7 @@

logger = logging.getLogger(__name__)


class SlowButQualityRAGTool:
"""
A very slow but more thorough RAG tool that utilizes LLMs to parse through
Expand Down Expand Up @@ -90,7 +92,7 @@ def perform_conversation_search(self, keywords: str, messagesOriginal, llm_handl
filtered_chunks = [s for s in search_result_chunks if s]

logger.info("******** BEGIN SEARCH RESULT CHUNKS ************")
logger.info("Search result chunks: ", '\n\n'.join(filtered_chunks))
logger.info("Search result chunks: %s", '\n\n'.join(filtered_chunks))
logger.info("******** END SEARCH RESULT CHUNKS ************")

return '--ChunkBreak--'.join(filtered_chunks)
Expand Down Expand Up @@ -128,7 +130,7 @@ def perform_memory_file_keyword_search(self, keywords: str, messagesOriginal, ll
filtered_chunks = [s for s in search_result_chunks if s]

logger.info("******** BEGIN SEARCH RESULT CHUNKS ************")
logger.info("Search result chunks: ", '\n\n'.join(filtered_chunks))
logger.info("Search result chunks: %s", '\n\n'.join(filtered_chunks))
logger.info("******** END SEARCH RESULT CHUNKS ************")

return '\n\n'.join(filtered_chunks)
Expand All @@ -142,14 +144,14 @@ def process_new_memory_chunks(self, chunks, hash_chunks, rag_system_prompt, rag_
chunks.reverse()

all_chunks = "--ChunkBreak--".join(chunks)
logger.info("Processing chunks: ", all_chunks)
logger.info("Processing chunks: %s", all_chunks)

result = rag_tool.perform_rag_on_memory_chunk(rag_system_prompt, rag_prompt, all_chunks, workflow, messages,
discussionId, "--rag_break--", chunks_per_memory)
results = result.split("--rag_break--")
results.reverse()
logger.info("Total results: " + str(len(results)))
logger.info("Total chunks: " + str(len(hash_chunks)))
logger.info("Total results: %s", len(results))
logger.info("Total chunks: %s", len(hash_chunks))
hash_chunks.reverse()

replaced = [(summary, hash_code) for summary, (_, hash_code) in zip(results, hash_chunks)]
Expand Down Expand Up @@ -186,6 +188,11 @@ def handle_discussion_id_flow(self, discussionId: str, messagesOriginal: List[Di

rag_system_prompt = discussion_id_workflow_config['systemPrompt']
rag_prompt = discussion_id_workflow_config['prompt']
messages_from_most_recent_to_skip = discussion_id_workflow_config['lookbackStartTurn']
if not messages_from_most_recent_to_skip or messages_from_most_recent_to_skip < 1:
messages_from_most_recent_to_skip = 3
logger.info("Skipping most recent messages. Number of most recent messages to skip: " + str(
messages_from_most_recent_to_skip))

chunk_size = discussion_id_workflow_config.get('chunkEstimatedTokenSize', 1000)
max_messages_between_chunks = discussion_id_workflow_config.get('maxMessagesBetweenChunks', 0)
Expand All @@ -198,17 +205,19 @@ def handle_discussion_id_flow(self, discussionId: str, messagesOriginal: List[Di
self.process_full_discussion_flow(messages_copy, rag_system_prompt, rag_prompt,
discussion_id_workflow_config, discussionId)
else:
number_of_messages_to_pull = find_last_matching_hash_message(messages_copy, discussion_chunks)
if (number_of_messages_to_pull > 3):
number_of_messages_to_pull = number_of_messages_to_pull - 3
number_of_messages_to_pull = find_last_matching_hash_message(messages_copy, discussion_chunks,
turns_to_skip_looking_back=messages_from_most_recent_to_skip)
if (number_of_messages_to_pull > messages_from_most_recent_to_skip):
number_of_messages_to_pull = number_of_messages_to_pull - messages_from_most_recent_to_skip
else:
number_of_messages_to_pull = 0

logger.info("Number of messages since last memory chunk update: ", number_of_messages_to_pull)
logger.info("Number of messages since last memory chunk update: %s", number_of_messages_to_pull)

messages_to_process = messages_copy[:-3] if len(messages_copy) > 3 else messages_copy
logger.info("Messages to process: ", messages_to_process)
if (len(messages_to_process) == 0):
messages_to_process = messages_copy[:-messages_from_most_recent_to_skip] if len(
messages_copy) > messages_from_most_recent_to_skip else messages_copy
logger.info("Messages to process: %s", messages_to_process)
if len(messages_to_process) == 0:
return

if (rough_estimate_token_length(
Expand Down Expand Up @@ -313,6 +322,7 @@ def perform_rag_on_memory_chunk(rag_system_prompt: str, rag_prompt: str, text_ch

discussion_chunks = read_chunks_with_hashes(get_discussion_memory_file_path(discussionId))
memory_chunks = extract_text_blocks_from_hashed_chunks(discussion_chunks)
chat_summary = memory_utils.handle_get_current_summary_from_file(discussionId)

endpoint_data = get_endpoint_config(config['endpointName'])
llm_handler_service = LlmHandlerService()
Expand All @@ -330,10 +340,20 @@ def perform_rag_on_memory_chunk(rag_system_prompt: str, rag_prompt: str, text_ch
if current_memories is None:
current_memories = ""

logger.info("Processing memory chunk. Current memories is: [[" + current_memories.strip() + "]]")
full_memories = '\n--------------\n'.join(memory_chunks)
if full_memories is None:
full_memories = ""

logger.info("Processing memory chunk")
system_prompt = rag_system_prompt.replace('[Memory_file]', current_memories.strip())
prompt = rag_prompt.replace('[Memory_file]', current_memories.strip())

system_prompt = system_prompt.replace('[Full_Memory_file]', full_memories.strip())
prompt = prompt.replace('[Full_Memory_file]', full_memories.strip())

system_prompt = system_prompt.replace('[Chat_Summary]', chat_summary.strip())
prompt = prompt.replace('[Chat_Summary]', chat_summary.strip())

result_chunk = SlowButQualityRAGTool.process_single_chunk(chunk, llm_handler, prompt, system_prompt,
messages, config)
result_chunks.append(result_chunk)
Expand Down
8 changes: 5 additions & 3 deletions Public/Configs/PromptTemplates/llama3.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
{
"promptTemplateSystemPrefix": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n",
"promptTemplateSystemSuffix": "<|eot_id|>",
"promptTemplateAssistantPrefix": "<|start_header_id|>assistant<|end_header_id|>\n\n",
"promptTemplateAssistantSuffix": "<|eot_id|>",
"promptTemplateEndToken": "",
"promptTemplateSystemPrefix": "<|start_header_id|>system<|end_header_id|>\n\n",
"promptTemplateSystemSuffix": "<|eot_id|>",
"promptTemplateUserPrefix": "<|start_header_id|>user<|end_header_id|>\n\n",
"promptTemplateUserSuffix": "<|eot_id|>"
"promptTemplateUserSuffix": "<|eot_id|>",
"promptTemplateSystemmesPrefix": "<|start_header_id|>system<|end_header_id|>\n\n",
"promptTemplateSystemmesSuffix": "<|eot_id|>"
}
16 changes: 8 additions & 8 deletions Public/Configs/PromptTemplates/mistral.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"promptTemplateSystemPrefix": "",
"promptTemplateSystemSuffix": "[/INST]",
"promptTemplateUserPrefix": "[INST]",
"promptTemplateUserSuffix": "[/INST]",
"promptTemplateAssistantPrefix": " ",
"promptTemplateAssistantSuffix": "",
"promptTemplateSystemPrefix": "<s>\n[INST]\n|SYSTEM PROMPT|\n",
"promptTemplateSystemSuffix": "\n|END OF SYSTEM PROMPT|",
"promptTemplateUserPrefix": "\n[INST]",
"promptTemplateUserSuffix": "",
"promptTemplateAssistantPrefix": "\n[/INST]",
"promptTemplateAssistantSuffix": "\n</s>",
"promptTemplateEndToken": "",
"promptTemplateSystemmesPrefix": "[INST] ",
"promptTemplateSystemmesSuffix": "[/INST]"
"promptTemplateSystemmesPrefix": "\n[INST]",
"promptTemplateSystemmesSuffix": ""
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,7 @@
"endpointName": "Assistant-Multi-Model-MemoryChatSummary-Endpoint",
"preset": "_Assistant_Multi_Model_MemoryChatSummary_Preset",
"maxResponseSizeInTokens": 250,
"chunkEstimatedTokenSize": 2500
"chunkEstimatedTokenSize": 2500,
"maxMessagesBetweenChunks": 20,
"lookbackStartTurn": 7
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,7 @@
"endpointName": "Assistant-Single-Model-Endpoint",
"preset": "_Assistant_Single_Model_MemoryChatSummary_Preset",
"maxResponseSizeInTokens": 250,
"chunkEstimatedTokenSize": 2500
"chunkEstimatedTokenSize": 2500,
"maxMessagesBetweenChunks": 20,
"lookbackStartTurn": 7
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,7 @@
"endpointName": "Convo-Roleplay-Dual-Model-WorkerEndpoint",
"preset": "_Convo_Roleplay_Dual_Model_Summarization_Preset",
"maxResponseSizeInTokens": 250,
"chunkEstimatedTokenSize": 2500
"chunkEstimatedTokenSize": 2500,
"maxMessagesBetweenChunks": 20,
"lookbackStartTurn": 7
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,7 @@
"endpointName": "Convo-Roleplay-Single-Model-Endpoint",
"preset": "_Convo_Roleplay_Single_Model_MemoryChatSummary_Preset",
"maxResponseSizeInTokens": 250,
"chunkEstimatedTokenSize": 2500
"chunkEstimatedTokenSize": 2500,
"maxMessagesBetweenChunks": 20,
"lookbackStartTurn": 7
}
Loading

0 comments on commit ca3383c

Please sign in to comment.