Updates to the readme, added support for the new offline wikipedia AP…

…I endpoint top_article, and added a new workflow node for pulling custom text files from the file system.
mrober01 · Nov 17, 2024 · 5bdf445 · 5bdf445
1 parent ca3383c
commit 5bdf445
Show file tree

Hide file tree

Showing 8 changed files with 379 additions and 32 deletions.
diff --git a/Middleware/utilities/file_utils.py b/Middleware/utilities/file_utils.py
@@ -1,5 +1,6 @@
 import json
 import os
+from pathlib import Path
 from typing import Dict
 
 
@@ -106,3 +107,29 @@ def save_timestamp_file(filepath: str, timestamps: Dict[str, str]):
     """Save the timestamp data to the appropriate file."""
     with open(filepath, 'w') as file:
         json.dump(timestamps, file, indent=4)
+
+
+def load_custom_file(filepath: str, delimiter: str | None, custom_delimiter: str | None):
+    """
+    Load a custom file that contains simple text.
+
+    Args:
+    filepath (str): The path to the file to load
+    delimiter (str): The delimiter to use when reading the file.
+    custom_delimiter (str): The delimiter to replace the file delimiter with when returning the contents of the file
+
+    Returns:
+    The contents of the file, separated by the custom_delimiter if applicable.
+    """
+    path = Path(filepath)
+    if path.exists():
+        with path.open('r') as f:
+            content = f.read()
+        if not content:
+            return "No additional information added"
+        if delimiter is not None and custom_delimiter is not None:
+            content = content.replace(delimiter, custom_delimiter)
+        print("Returning content")
+        return content
+    else:
+        return "Custom instruction file did not exist"
diff --git a/Middleware/workflows/managers/workflow_manager.py b/Middleware/workflows/managers/workflow_manager.py
@@ -1,7 +1,6 @@
 import json
 import logging
 import time
-import traceback
 import uuid
 from copy import deepcopy
 from typing import Dict, List
@@ -14,7 +13,7 @@
     get_active_recent_memory_tool_name, get_file_memory_tool_name, \
     get_chat_template_name, get_discussion_chat_summary_file_path, get_discussion_memory_file_path, get_workflow_path, \
     get_chat_summary_tool_workflow_name
-from Middleware.utilities.file_utils import read_chunks_with_hashes
+from Middleware.utilities.file_utils import read_chunks_with_hashes, load_custom_file
 from Middleware.utilities.instance_utils import INSTANCE_ID
 from Middleware.utilities.memory_utils import gather_chat_summary_memories, \
     handle_get_current_summary_from_file, gather_recent_memories
@@ -28,6 +27,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 class WorkflowManager:
     """
     Manages the execution of workflows for various types of LLM-based tasks.
@@ -144,7 +144,7 @@ def gen():
                 try:
                     for idx, config in enumerate(configs):
                         logger.info(f'------Workflow {self.workflowConfigName}; ' +
-                              f'step {idx}; node type: {config.get("type", "Standard")}')
+                                    f'step {idx}; node type: {config.get("type", "Standard")}')
 
                         if "systemPrompt" in config or "prompt" in config:
                             if self.override_first_available_prompts:
@@ -305,16 +305,17 @@ def _process_section(self, config: Dict, request_id, workflow_id, discussion_id:
                                                                    agent_outputs)
         if config["type"] == "RecentMemorySummarizerTool":
             logger.info("Recent memory summarization tool")
-            prompt_processor_service.handle_memory_file(discussion_id, messages)
             memories = gather_recent_memories(messages,
                                               discussion_id,
                                               config["maxTurnsToPull"],
                                               config["maxSummaryChunksFromFile"])
             custom_delimiter = config.get("customDelimiter", None)
-            if custom_delimiter is not None:
+            if custom_delimiter is not None and memories is not None:
                 return memories.replace("--ChunkBreak--", custom_delimiter)
-            else:
+            elif memories is not None:
                 return memories
+            else:
+                return "There are not yet any memories"
         if config["type"] == "ChatSummaryMemoryGatheringTool":
             logger.info("Chat summary memory gathering tool")
             return gather_chat_summary_memories(messages,
@@ -345,8 +346,13 @@ def _process_section(self, config: Dict, request_id, workflow_id, discussion_id:
             logger.info("Python Module")
             return self.handle_python_module(config, prompt_processor_service, messages, agent_outputs)
         if config["type"] == "OfflineWikiApiFullArticle":
+            # DEPRECATED. REMOVING SOON
             logger.info("Offline Wikipedia Api Full Article")
             return prompt_processor_service.handle_offline_wiki_node(messages, config["promptToSearch"], agent_outputs)
+        if config["type"] == "OfflineWikiApiBestFullArticle":
+            logger.info("Offline Wikipedia Api Best Full Article")
+            return prompt_processor_service.handle_offline_wiki_node(messages, config["promptToSearch"], agent_outputs,
+                                                                     use_new_best_article_endpoint=True)
         if config["type"] == "OfflineWikiApiPartialArticle":
             logger.info("Offline Wikipedia Api Summary Only")
             return prompt_processor_service.handle_offline_wiki_node(messages, config["promptToSearch"], agent_outputs,
@@ -374,6 +380,27 @@ def _process_section(self, config: Dict, request_id, workflow_id, discussion_id:
 
         if config["type"] == "CustomWorkflow":
             return self.handle_custom_workflow(config, messages, agent_outputs, stream, request_id, discussion_id)
+        if config["type"] == "GetCustomFile":
+            logger.info("Get custom file")
+            delimiter = config.get("delimiter")
+            custom_return_delimiter = config.get("customReturnDelimiter")
+            filepath = config.get("filepath")
+
+            if filepath is None:
+                return "No filepath specified"
+
+            if delimiter is None:
+                if custom_return_delimiter is None:
+                    custom_return_delimiter = "\n"
+                    delimiter = custom_return_delimiter
+                else:
+                    delimiter = custom_return_delimiter
+            elif custom_return_delimiter is None:
+                custom_return_delimiter = delimiter
+
+            file = load_custom_file(filepath=filepath, delimiter=delimiter, custom_delimiter=custom_return_delimiter)
+            logger.info("Custom file result: %s", file)
+            return file
 
     def handle_custom_workflow(self, config, messages, agent_outputs, stream, request_id, discussion_id):
         print("Custom Workflow initiated")

diff --git a/Middleware/workflows/processors/prompt_processor.py b/Middleware/workflows/processors/prompt_processor.py
@@ -444,7 +444,8 @@ def handle_python_module(self, config: Dict, messages: List[Dict[str, str]], mod
         return run_dynamic_module(module_path, *new_args, **kwargs)
 
     def handle_offline_wiki_node(self, messages: List[Dict[str, str]], prompt,
-                                 agent_outputs: [Dict], get_full_article: bool = True) -> Any:
+                                 agent_outputs: [Dict], get_full_article: bool = True,
+                                 use_new_best_article_endpoint: bool = False) -> Any:
 
         message_copy = deepcopy(messages)
 
@@ -456,7 +457,10 @@ def handle_offline_wiki_node(self, messages: List[Dict[str, str]], prompt,
         )
 
         offline_wiki_api_client = OfflineWikiApiClient()
-        if get_full_article:
+        if get_full_article and use_new_best_article_endpoint:
+            result = offline_wiki_api_client.get_top_full_wiki_article_by_prompt(variabled_prompt)
+            return result
+        elif get_full_article:
             results = offline_wiki_api_client.get_full_wiki_article_by_prompt(variabled_prompt)
         else:
             results = offline_wiki_api_client.get_wiki_summary_by_prompt(variabled_prompt)

diff --git a/Middleware/workflows/tools/offline_wikipedia_api_tool.py b/Middleware/workflows/tools/offline_wikipedia_api_tool.py
@@ -79,6 +79,7 @@ def get_wiki_summary_by_prompt(self, prompt, percentile=0.5, num_results=1):
         else:
             raise Exception(f"Error: {response.status_code}, {response.text}")
 
+    # DEPRECATED. REMOVING SOON
     def get_full_wiki_article_by_prompt(self, prompt, percentile=0.5, num_results=1):
         """
         Get full text of Wikipedia articles based on a prompt.
@@ -111,3 +112,36 @@ def get_full_wiki_article_by_prompt(self, prompt, percentile=0.5, num_results=1)
             return [result.get('text', "No text element found") for result in results]
         else:
             raise Exception(f"Error: {response.status_code}, {response.text}")
+
+    def get_top_full_wiki_article_by_prompt(self, prompt, percentile=0.5, num_results=10):
+        """
+        Get full text of Wikipedia articles based on a prompt.
+
+        Args:
+            prompt (str): The prompt to generate the articles.
+            percentile (float): The relevance percentile to match articles. Default is 0.5.
+            num_results (int): The number of results to return. Default is 10.
+
+        Returns:
+            list: A list containing the article text.
+
+        Raises:
+            Exception: If the API request fails.
+        """
+        if not self.use_offline_wiki_api:
+            return ["No additional information provided"]
+
+        url = f"{self.base_url}/top_article"
+        params = {
+            'prompt': prompt,
+            'percentile': percentile,
+            'num_results': num_results
+        }
+        response = requests.get(url, params=params)
+        print(f"Response Status Code: {response.status_code}")
+        print(f"Response Text: {response.text}")
+        if response.status_code == 200:
+            result = response.json()
+            return [result.get('text', "No text element found")]  # Wrap the single text in a list
+        else:
+            raise Exception(f"Error: {response.status_code}, {response.text}")
diff --git a/Public/Configs/Workflows/assistant-multi-model/Factual-Wiki-Workflow.json b/Public/Configs/Workflows/assistant-multi-model/Factual-Wiki-Workflow.json
@@ -25,7 +25,7 @@
     "title": "Querying the offline wikipedia api",
     "agentName": "Wikipedia Search Api Agent Three",
     "promptToSearch": "{agent2Output}",
-    "type": "OfflineWikiApiFullArticle"
+    "type": "OfflineWikiApiBestFullArticle"
   },
   {
     "title": "Answering user's question",

diff --git a/Public/Configs/Workflows/assistant-single-model/Factual-Wiki-Workflow.json b/Public/Configs/Workflows/assistant-single-model/Factual-Wiki-Workflow.json
@@ -25,7 +25,7 @@
     "title": "Querying the offline wikipedia api",
     "agentName": "Wikipedia Search Api Agent Three",
     "promptToSearch": "{agent2Output}",
-    "type": "OfflineWikiApiFullArticle"
+    "type": "OfflineWikiApiBestFullArticle"
   },
   {
     "title": "Answering user's question",

diff --git a/Public/Configs/Workflows/group-chat-example/DataFinder-Factual-Workflow.json b/Public/Configs/Workflows/group-chat-example/DataFinder-Factual-Workflow.json
@@ -25,7 +25,7 @@
     "title": "Querying the offline wikipedia api",
     "agentName": "Wikipedia Search Api Agent Three",
     "promptToSearch": "{agent2Output}",
-    "type": "OfflineWikiApiFullArticle"
+    "type": "OfflineWikiApiBestFullArticle"
   },
   {
     "title": "Answering user's question",