Skip to content

Commit

Permalink
Updates to the readme, added support for the new offline wikipedia AP…
Browse files Browse the repository at this point in the history
…I endpoint top_article, and added a new workflow node for pulling custom text files from the file system.
  • Loading branch information
SomeOddCodeGuy committed Nov 17, 2024
1 parent ca3383c commit 5bdf445
Show file tree
Hide file tree
Showing 8 changed files with 379 additions and 32 deletions.
27 changes: 27 additions & 0 deletions Middleware/utilities/file_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import os
from pathlib import Path
from typing import Dict


Expand Down Expand Up @@ -106,3 +107,29 @@ def save_timestamp_file(filepath: str, timestamps: Dict[str, str]):
"""Save the timestamp data to the appropriate file."""
with open(filepath, 'w') as file:
json.dump(timestamps, file, indent=4)


def load_custom_file(filepath: str, delimiter: str | None, custom_delimiter: str | None):
"""
Load a custom file that contains simple text.
Args:
filepath (str): The path to the file to load
delimiter (str): The delimiter to use when reading the file.
custom_delimiter (str): The delimiter to replace the file delimiter with when returning the contents of the file
Returns:
The contents of the file, separated by the custom_delimiter if applicable.
"""
path = Path(filepath)
if path.exists():
with path.open('r') as f:
content = f.read()
if not content:
return "No additional information added"
if delimiter is not None and custom_delimiter is not None:
content = content.replace(delimiter, custom_delimiter)
print("Returning content")
return content
else:
return "Custom instruction file did not exist"
39 changes: 33 additions & 6 deletions Middleware/workflows/managers/workflow_manager.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import json
import logging
import time
import traceback
import uuid
from copy import deepcopy
from typing import Dict, List
Expand All @@ -14,7 +13,7 @@
get_active_recent_memory_tool_name, get_file_memory_tool_name, \
get_chat_template_name, get_discussion_chat_summary_file_path, get_discussion_memory_file_path, get_workflow_path, \
get_chat_summary_tool_workflow_name
from Middleware.utilities.file_utils import read_chunks_with_hashes
from Middleware.utilities.file_utils import read_chunks_with_hashes, load_custom_file
from Middleware.utilities.instance_utils import INSTANCE_ID
from Middleware.utilities.memory_utils import gather_chat_summary_memories, \
handle_get_current_summary_from_file, gather_recent_memories
Expand All @@ -28,6 +27,7 @@

logger = logging.getLogger(__name__)


class WorkflowManager:
"""
Manages the execution of workflows for various types of LLM-based tasks.
Expand Down Expand Up @@ -144,7 +144,7 @@ def gen():
try:
for idx, config in enumerate(configs):
logger.info(f'------Workflow {self.workflowConfigName}; ' +
f'step {idx}; node type: {config.get("type", "Standard")}')
f'step {idx}; node type: {config.get("type", "Standard")}')

if "systemPrompt" in config or "prompt" in config:
if self.override_first_available_prompts:
Expand Down Expand Up @@ -305,16 +305,17 @@ def _process_section(self, config: Dict, request_id, workflow_id, discussion_id:
agent_outputs)
if config["type"] == "RecentMemorySummarizerTool":
logger.info("Recent memory summarization tool")
prompt_processor_service.handle_memory_file(discussion_id, messages)
memories = gather_recent_memories(messages,
discussion_id,
config["maxTurnsToPull"],
config["maxSummaryChunksFromFile"])
custom_delimiter = config.get("customDelimiter", None)
if custom_delimiter is not None:
if custom_delimiter is not None and memories is not None:
return memories.replace("--ChunkBreak--", custom_delimiter)
else:
elif memories is not None:
return memories
else:
return "There are not yet any memories"
if config["type"] == "ChatSummaryMemoryGatheringTool":
logger.info("Chat summary memory gathering tool")
return gather_chat_summary_memories(messages,
Expand Down Expand Up @@ -345,8 +346,13 @@ def _process_section(self, config: Dict, request_id, workflow_id, discussion_id:
logger.info("Python Module")
return self.handle_python_module(config, prompt_processor_service, messages, agent_outputs)
if config["type"] == "OfflineWikiApiFullArticle":
# DEPRECATED. REMOVING SOON
logger.info("Offline Wikipedia Api Full Article")
return prompt_processor_service.handle_offline_wiki_node(messages, config["promptToSearch"], agent_outputs)
if config["type"] == "OfflineWikiApiBestFullArticle":
logger.info("Offline Wikipedia Api Best Full Article")
return prompt_processor_service.handle_offline_wiki_node(messages, config["promptToSearch"], agent_outputs,
use_new_best_article_endpoint=True)
if config["type"] == "OfflineWikiApiPartialArticle":
logger.info("Offline Wikipedia Api Summary Only")
return prompt_processor_service.handle_offline_wiki_node(messages, config["promptToSearch"], agent_outputs,
Expand Down Expand Up @@ -374,6 +380,27 @@ def _process_section(self, config: Dict, request_id, workflow_id, discussion_id:

if config["type"] == "CustomWorkflow":
return self.handle_custom_workflow(config, messages, agent_outputs, stream, request_id, discussion_id)
if config["type"] == "GetCustomFile":
logger.info("Get custom file")
delimiter = config.get("delimiter")
custom_return_delimiter = config.get("customReturnDelimiter")
filepath = config.get("filepath")

if filepath is None:
return "No filepath specified"

if delimiter is None:
if custom_return_delimiter is None:
custom_return_delimiter = "\n"
delimiter = custom_return_delimiter
else:
delimiter = custom_return_delimiter
elif custom_return_delimiter is None:
custom_return_delimiter = delimiter

file = load_custom_file(filepath=filepath, delimiter=delimiter, custom_delimiter=custom_return_delimiter)
logger.info("Custom file result: %s", file)
return file

def handle_custom_workflow(self, config, messages, agent_outputs, stream, request_id, discussion_id):
print("Custom Workflow initiated")
Expand Down
8 changes: 6 additions & 2 deletions Middleware/workflows/processors/prompt_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,8 @@ def handle_python_module(self, config: Dict, messages: List[Dict[str, str]], mod
return run_dynamic_module(module_path, *new_args, **kwargs)

def handle_offline_wiki_node(self, messages: List[Dict[str, str]], prompt,
agent_outputs: [Dict], get_full_article: bool = True) -> Any:
agent_outputs: [Dict], get_full_article: bool = True,
use_new_best_article_endpoint: bool = False) -> Any:

message_copy = deepcopy(messages)

Expand All @@ -456,7 +457,10 @@ def handle_offline_wiki_node(self, messages: List[Dict[str, str]], prompt,
)

offline_wiki_api_client = OfflineWikiApiClient()
if get_full_article:
if get_full_article and use_new_best_article_endpoint:
result = offline_wiki_api_client.get_top_full_wiki_article_by_prompt(variabled_prompt)
return result
elif get_full_article:
results = offline_wiki_api_client.get_full_wiki_article_by_prompt(variabled_prompt)
else:
results = offline_wiki_api_client.get_wiki_summary_by_prompt(variabled_prompt)
Expand Down
34 changes: 34 additions & 0 deletions Middleware/workflows/tools/offline_wikipedia_api_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ def get_wiki_summary_by_prompt(self, prompt, percentile=0.5, num_results=1):
else:
raise Exception(f"Error: {response.status_code}, {response.text}")

# DEPRECATED. REMOVING SOON
def get_full_wiki_article_by_prompt(self, prompt, percentile=0.5, num_results=1):
"""
Get full text of Wikipedia articles based on a prompt.
Expand Down Expand Up @@ -111,3 +112,36 @@ def get_full_wiki_article_by_prompt(self, prompt, percentile=0.5, num_results=1)
return [result.get('text', "No text element found") for result in results]
else:
raise Exception(f"Error: {response.status_code}, {response.text}")

def get_top_full_wiki_article_by_prompt(self, prompt, percentile=0.5, num_results=10):
"""
Get full text of Wikipedia articles based on a prompt.
Args:
prompt (str): The prompt to generate the articles.
percentile (float): The relevance percentile to match articles. Default is 0.5.
num_results (int): The number of results to return. Default is 10.
Returns:
list: A list containing the article text.
Raises:
Exception: If the API request fails.
"""
if not self.use_offline_wiki_api:
return ["No additional information provided"]

url = f"{self.base_url}/top_article"
params = {
'prompt': prompt,
'percentile': percentile,
'num_results': num_results
}
response = requests.get(url, params=params)
print(f"Response Status Code: {response.status_code}")
print(f"Response Text: {response.text}")
if response.status_code == 200:
result = response.json()
return [result.get('text', "No text element found")] # Wrap the single text in a list
else:
raise Exception(f"Error: {response.status_code}, {response.text}")
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"title": "Querying the offline wikipedia api",
"agentName": "Wikipedia Search Api Agent Three",
"promptToSearch": "{agent2Output}",
"type": "OfflineWikiApiFullArticle"
"type": "OfflineWikiApiBestFullArticle"
},
{
"title": "Answering user's question",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"title": "Querying the offline wikipedia api",
"agentName": "Wikipedia Search Api Agent Three",
"promptToSearch": "{agent2Output}",
"type": "OfflineWikiApiFullArticle"
"type": "OfflineWikiApiBestFullArticle"
},
{
"title": "Answering user's question",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
"title": "Querying the offline wikipedia api",
"agentName": "Wikipedia Search Api Agent Three",
"promptToSearch": "{agent2Output}",
"type": "OfflineWikiApiFullArticle"
"type": "OfflineWikiApiBestFullArticle"
},
{
"title": "Answering user's question",
Expand Down
Loading

0 comments on commit 5bdf445

Please sign in to comment.