Merge pull request #40 from jhakulin/jhakulin/multi-task-fix

Jhakulin/multi task fix
Azure-Samples · Jun 22, 2024 · af1a0af · af1a0af
2 parents 665a2d9 + 75856e4
commit af1a0af
Show file tree

Hide file tree

Showing 6 changed files with 258 additions and 93 deletions.
diff --git a/samples/MultiAgentCodeOrchestration/config/TaskExecutionAgent_assistant_config.yaml b/samples/MultiAgentCodeOrchestration/config/TaskExecutionAgent_assistant_config.yaml
@@ -0,0 +1,77 @@
+name: TaskExecutionAgent
+instructions: |-
+  Your task is to take execution plan provided by the TaskPlannerAgent in the conversation and return it in following format:
+
+  ```json
+  [
+      {
+          "assistant": assistant_name,
+          "task": "Description of the task"
+      },
+      {
+          "assistant": assistant_name,
+          "task": "Description of the task"
+      },
+      {
+          "assistant": assistant_name,
+          "task": "Description of the task"
+      }
+  ]
+
+  The above is correct example of the format where all the steps are inside single json code block.
+
+  The following is incorrect example of the format, which will not be accepted, because the steps are split in several json code blocks:
+
+  ### Step 1: Description of the task 1
+
+  ```json
+  [
+      {
+          "assistant": "assistant_name",
+          "task": "Description of the task"
+      }
+  ]
+  ```
+
+  ### Step 2: Description of the task 2
+
+  ```json
+  [
+      {
+          "assistant": assistant_name,
+          "task": "Description of the task"
+      }
+  ]
+  ```
+
+  ### Step 3: Description of the task 3
+
+  ```json
+  [
+      {
+          "assistant": assistant_name,
+          "task": "Description of the task"
+      }
+  ]
+  ```
+model: gpt-4o
+assistant_id: 
+file_references: null
+tool_resources: null
+functions: []
+file_search: false
+code_interpreter: false
+output_folder_path: output
+ai_client_type: OPEN_AI
+assistant_type: chat_assistant
+completion_settings:
+  frequency_penalty: 0.0
+  max_tokens: 4096
+  presence_penalty: 0.0
+  response_format: text
+  temperature: 0.17
+  top_p: 0.1
+  seed: null
+  max_text_messages: null
+assistant_role: user_interaction
+config_folder: null
diff --git a/samples/MultiAgentCodeOrchestration/config/TaskPlannerAgent_assistant_config.yaml b/samples/MultiAgentCodeOrchestration/config/TaskPlannerAgent_assistant_config.yaml
@@ -45,46 +45,13 @@ instructions: |-
     - Example response: "It seems your request isn't related to software development. I'm here to help with a wide range of questions and tasks, any specific area you'd like assistance with?"
     - Always include question in your answer in this case.
   - User Confirmation: Before proceeding with any plan, always seek confirmation from the user with question mark. You can say something like, "Here's the plan based on your request. Would you like me to go ahead with this?"
-  - Executing the Plan: Once the user has confirmed, clearly present the plan in json format (including the json code block) to the user for execution. This ensures the user is fully informed and agrees with the proposed actions.
-model: gpt-3.5-turbo-0125
+model: gpt-4o
 assistant_id:
 file_references:
 - C:/Git/azureai-assistant-tool/samples/MultiAgentCodeOrchestration/config/CodeProgrammerAgent_assistant_config.yaml
 - C:/Git/azureai-assistant-tool/samples/MultiAgentCodeOrchestration/config/CodeInspectionAgent_assistant_config.yaml
 tool_resources: null
-functions:
-- type: function
-  function:
-    name: find_files_by_name_in_directory
-    module: azure.ai.assistant.functions.file_functions
-    description: Searches for files matching specific criteria by name in a directory
-      and its sub-directories (case-insensitive).
-    parameters:
-      type: object
-      properties:
-        directory:
-          type: string
-          description: The directory to search in.
-        file_name_contains:
-          type: string
-          description: A partial or full file name to search for.
-      required:
-      - directory
-      - file_name_contains
-- type: function
-  function:
-    name: find_all_folders_by_name_from_current_directory
-    module: azure.ai.assistant.functions.file_functions
-    description: Searches for matching folders with a given name in the current directory
-      and its subdirectories. The search is case-sensitive and uses fuzzy matching.
-    parameters:
-      type: object
-      properties:
-        folder_name:
-          type: string
-          description: The name of the folder to search for.
-      required:
-      - folder_name
+functions: []
 file_search: false
 code_interpreter: false
 output_folder_path: output
@@ -100,3 +67,4 @@ completion_settings:
   seed: null
   max_text_messages: null
 assistant_role: user_interaction
+config_folder: null
diff --git a/samples/MultiAgentCodeOrchestration/config/UserAgent_assistant_config.yaml b/samples/MultiAgentCodeOrchestration/config/UserAgent_assistant_config.yaml
@@ -0,0 +1,89 @@
+name: UserAgent
+instructions: |-
+  Your task is to help the user make decisions and guide them effectively:
+  ### Objectives:
+  - Decide whether a plan needs to be created or improved by TaskPlannerAgent.
+  - Confirm with the user to proceed with the execution of the plan using TaskExecutionAgent.
+  - Guide the user if their request is not related to planning or execution.
+
+  ### Expected Actions:
+  - If the user request pertains to planning, request TaskPlannerAgent to create or improve a plan.
+  - Always seek user confirmation before proceeding to execution of the plan.
+  - If the user confirms, initiate the execution of the plan using TaskExecutionAgent.
+  - If the user request is not relevant for planning or execution, guide the user with a suggestion.
+
+  ### Response Format:
+  Return the response in the following JSON format to indicate the required action:
+  ```json
+  {
+      "action": "<action_type>",
+      "details": "<additional_details>"
+  }
+  ```
+  Where `<action_type>` can be:
+  - "create_plan" for creating a new plan,
+  - "improve_plan" for improving an existing plan,
+  - "execute_plan" for confirming execution of the current plan,
+  - "not_relevant" for requests unrelated to planning or execution, with guidance in details.
+
+  ### Examples:
+  - **Relevant Request for Planning:**
+    User: "Please create a Python class that reads the input file and writes the output to a new file."
+    Response:
+    ```json
+    {
+        "action": "create_plan",
+        "details": "Creating a plan for a Python class to read the input file and write to a new file."
+    }
+    ```
+
+  - **Relevant Request for Improving a Plan:**
+    User: "Can you enhance the current plan to include unit tests?"
+    Response:
+    ```json
+    {
+        "action": "improve_plan",
+        "details": "Enhancing the plan to include unit tests."
+    }
+    ```
+
+  - **Request to Execute Plan:**
+    User: "Please execute the current plan."
+    Response:
+    ```json
+    {
+        "action": "execute_plan",
+        "details": "Proceeding to execute the current plan."
+    }
+    ```
+
+  - **Irrelevant Request Example:**
+    User: "What is the capital of France?"
+    Response:
+    ```json
+    {
+        "action": "not_relevant",
+        "details": "It seems your request isn't related to software development. I'm here to help with a wide range of questions and tasks, any specific area you'd like assistance with?"
+    }
+    ```
+model: gpt-4o
+assistant_id: 
+file_references: null
+tool_resources: null
+functions: []
+file_search: false
+code_interpreter: false
+output_folder_path: output
+ai_client_type: OPEN_AI
+assistant_type: chat_assistant
+completion_settings:
+  frequency_penalty: 0.0
+  max_tokens: 4096
+  presence_penalty: 0.0
+  response_format: json_object
+  temperature: 0.17
+  top_p: 0.1
+  seed: null
+  max_text_messages: null
+assistant_role: user_interaction
+config_folder: null
diff --git a/samples/MultiAgentCodeOrchestration/main.py b/samples/MultiAgentCodeOrchestration/main.py
@@ -1,6 +1,11 @@
 # Copyright (c) Microsoft. All rights reserved.
 # Licensed under the MIT license. See LICENSE.md file in the project root for full license information.
 
+import asyncio
+import json
+import re
+from typing import Dict, List
+
 from azure.ai.assistant.management.async_assistant_client import AsyncAssistantClient
 from azure.ai.assistant.management.async_chat_assistant_client import AsyncChatAssistantClient
 from azure.ai.assistant.management.async_assistant_client_callbacks import AsyncAssistantClientCallbacks
@@ -10,10 +15,6 @@
 from azure.ai.assistant.management.async_task_manager_callbacks import AsyncTaskManagerCallbacks
 from azure.ai.assistant.management.assistant_config_manager import AssistantConfigManager
 
-from typing import Dict, List
-import json, re
-import asyncio
-
 
 class MultiAgentOrchestrator(AsyncTaskManagerCallbacks, AsyncAssistantClientCallbacks):
     """
@@ -80,7 +81,7 @@ async def on_run_end(self, assistant_name, run_identifier, run_end_time, thread_
                 # Extract the JSON code block from the response by using the FileCreatorAgent
                 await self._assistants["FileCreatorAgent"].process_messages(user_request=message.content)
 
-    async def on_function_call_processed(self, assistant_name, run_identifier, function_name, arguments, response = None):
+    async def on_function_call_processed(self, assistant_name, run_identifier, function_name, arguments, response=None):
         if "error" in response:
             print(f"\n{assistant_name}: Function call {function_name} with arguments {arguments}, result failed with: {response}")
         else:
@@ -122,10 +123,12 @@ async def initialize_assistants(assistant_names: List[str], orchestrator: MultiA
     for assistant_name in assistant_names:
         config = load_assistant_config(assistant_name)
         if config:
-            if assistant_name == "TaskPlannerAgent" or assistant_name == "FileCreatorAgent":
+            if assistant_name in {"TaskPlannerAgent", "TaskExecutionAgent", "FileCreatorAgent", "UserAgent"}:
                 assistants[assistant_name] = await AsyncChatAssistantClient.from_yaml(config, callbacks=orchestrator)
             else:
                 assistants[assistant_name] = await AsyncAssistantClient.from_yaml(config, callbacks=orchestrator)
+        else:
+            print(f"Configuration for {assistant_name} not found.")
     orchestrator.assistants = assistants
     return assistants
 
@@ -140,23 +143,10 @@ def extract_json_code_block(text):
     return match.group(1) if match else text
 
 
-def requires_user_confirmation(assistant_response: str):
-    """
-    Checks if the response requires user confirmation.
-
-    NOTE: This is a very simple implementation and may not cover all cases.
-    Could be improved e.g. by using a ML model to detect the intent from the response and context.
-    """
-    # Remove text under json code block
-    assistant_response = re.sub(r"```json\n([\s\S]*?)\n```", "", assistant_response)
-    # if text contains question mark, return True
-    return "?" in assistant_response
-
-
 async def main():
     # Use the AssistantConfigManager to save the assistant configurations at the end of the session
     assistant_config_manager = AssistantConfigManager.get_instance('config')
-    assistant_names = ["CodeProgrammerAgent", "CodeInspectionAgent", "TaskPlannerAgent", "FileCreatorAgent"]
+    assistant_names = ["CodeProgrammerAgent", "CodeInspectionAgent", "TaskPlannerAgent", "TaskExecutionAgent", "FileCreatorAgent", "UserAgent"]
     orchestrator = MultiAgentOrchestrator()
     assistants = await initialize_assistants(assistant_names, orchestrator)
     task_manager = AsyncTaskManager(orchestrator)
@@ -171,24 +161,35 @@ async def main():
             break
         if not user_request:
             continue
+
         await conversation_thread_client.create_conversation_thread_message(user_request, planner_thread)
-        await assistants["TaskPlannerAgent"].process_messages(thread_name=planner_thread)
+        await assistants["UserAgent"].process_messages(thread_name=planner_thread)
+        conversation = await conversation_thread_client.retrieve_conversation(planner_thread)
+        response = conversation.get_last_text_message("UserAgent")
+
         try:
-            # Extract the JSON code block from the response for task scheduling
-            conversation = await conversation_thread_client.retrieve_conversation(planner_thread)
-            response = conversation.get_last_text_message("TaskPlannerAgent")
-            if requires_user_confirmation(response.content):
-                continue
-            tasks = json.loads(extract_json_code_block(response.content))
+            decision = json.loads(response.content)
+
+            if decision["action"] in {"create_plan", "improve_plan"}:
+                await assistants["TaskPlannerAgent"].process_messages(thread_name=planner_thread)
+
+            elif decision["action"] == "execute_plan":
+                await assistants["TaskExecutionAgent"].process_messages(thread_name=planner_thread)
+                conversation = await conversation_thread_client.retrieve_conversation(planner_thread)
+                response = conversation.get_last_text_message("TaskExecutionAgent")
+                tasks = json.loads(extract_json_code_block(response.content))
+                multi_task = AsyncMultiTask(tasks)
+                await task_manager.schedule_task(multi_task)
+                await orchestrator.wait_for_all_tasks()
+            elif decision["action"] == "not_relevant":
+                print(decision["details"])
+                continue  # Continuously prompt the user for relevant tasks
         except json.JSONDecodeError:
+            print("Invalid JSON response. Please try again.")
             continue
-        multi_task = AsyncMultiTask(tasks)
-        await task_manager.schedule_task(multi_task)
-        await orchestrator.wait_for_all_tasks()
 
     assistant_config_manager.save_configs()
-
     await conversation_thread_client.close()
 
 if __name__ == "__main__":
-    asyncio.run(main())
+    asyncio.run(main())
diff --git a/sdk/azure-ai-assistant/azure/ai/assistant/management/async_task.py b/sdk/azure-ai-assistant/azure/ai/assistant/management/async_task.py
@@ -80,18 +80,41 @@ class AsyncMultiTask(AsyncTask):
     This class represents a multi task.
 
     :param requests: A list of requests, each request is a dict with 'assistant' and 'task' keys.
-    :type requests: list
+                     A single dict is also accepted and will be converted to a list.
+    :type requests: list or dict
     """
     def __init__(self, requests):
         super().__init__()
-        self.requests = requests
+        self.requests = self._validate_and_convert_requests(requests)
+
+    def _validate_and_convert_requests(self, requests):
+        """
+        Validates and converts the requests to a list of dictionaries if necessary.
+
+        :param requests: A list of requests or a single request dictionary.
+        :type requests: list or dict
+        :return: A list of request dictionaries.
+        :rtype: list
+        """
+        if isinstance(requests, dict):
+            return [requests]
+        elif isinstance(requests, list):
+            # Check if all items in the list are dictionaries
+            if not all(isinstance(request, dict) for request in requests):
+                raise ValueError("All items in the requests list must be dictionaries.")
+            return requests
+        else:
+            raise TypeError("Requests should be a dictionary or a list of dictionaries.")
 
     async def execute(self, callback=None):
         """
         Executes the multi task.
 
         :param callback: The callback function to call when the task is complete.
-        :type
+        :type callback: callable or None
         """
-        if callback:
-            await callback()
+        try:
+            if callback:
+                await callback()
+        except Exception as e:
+            print(f"Error during task execution: {e}")