From 2482a9343d5377a6828da664e8823afddbbbe535 Mon Sep 17 00:00:00 2001 From: DeepakAkkil Date: Mon, 17 Jun 2024 12:39:49 +0300 Subject: [PATCH] Minor modification to make planner more generic for dev --- ae/core/agents/browser_nav_agent.py | 24 ++++++++-------------- ae/core/agents/high_level_planner_agent.py | 4 ++-- ae/core/prompts.py | 2 +- 3 files changed, 12 insertions(+), 18 deletions(-) diff --git a/ae/core/agents/browser_nav_agent.py b/ae/core/agents/browser_nav_agent.py index d5bb880..653e0e3 100644 --- a/ae/core/agents/browser_nav_agent.py +++ b/ae/core/agents/browser_nav_agent.py @@ -9,8 +9,9 @@ from ae.core.post_process_responses import final_reply_callback_user_proxy as print_message_from_browser_agent # type: ignore from ae.core.prompts import LLM_PROMPTS from ae.core.skills.click_using_selector import click as click_element -#from ae.core.skills.enter_text_and_click import enter_text_and_click -#from ae.core.skills.enter_text_using_selector import bulk_enter_text +from ae.core.skills.enter_text_using_selector import bulk_enter_text +from ae.core.skills.enter_text_and_click import enter_text_and_click +from ae.core.skills.pdf_text_extractor import extract_text_from_pdf from ae.core.skills.enter_text_using_selector import entertext from ae.core.skills.get_dom_with_content_type import get_dom_with_content_type from ae.core.skills.get_url import geturl @@ -67,12 +68,12 @@ def __register_skills(self): self.agent.register_for_llm(description=LLM_PROMPTS["OPEN_URL_PROMPT"])(openurl) # Register openurl skill for execution by user_proxy_agent self.browser_nav_executor.register_for_execution()(openurl) - ''' + # Register enter_text_and_click skill for LLM by assistant agent self.agent.register_for_llm(description=LLM_PROMPTS["ENTER_TEXT_AND_CLICK_PROMPT"])(enter_text_and_click) # Register enter_text_and_click skill for execution by user_proxy_agent self.browser_nav_executor.register_for_execution()(enter_text_and_click) - ''' + # Register get_dom_with_content_type skill for LLM by assistant agent self.agent.register_for_llm(description=LLM_PROMPTS["GET_DOM_WITH_CONTENT_TYPE_PROMPT"])(get_dom_with_content_type) # Register get_dom_with_content_type skill for execution by user_proxy_agent @@ -89,9 +90,9 @@ def __register_skills(self): self.browser_nav_executor.register_for_execution()(geturl) # Register bulk_enter_text skill for LLM by assistant agent - #self.agent.register_for_llm(description=LLM_PROMPTS["BULK_ENTER_TEXT_PROMPT"])(bulk_enter_text) + self.agent.register_for_llm(description=LLM_PROMPTS["BULK_ENTER_TEXT_PROMPT"])(bulk_enter_text) # Register bulk_enter_text skill for execution by user_proxy_agent - #self.browser_nav_executor.register_for_execution()(bulk_enter_text) + self.browser_nav_executor.register_for_execution()(bulk_enter_text) # Register entertext skill for LLM by assistant agent self.agent.register_for_llm(description=LLM_PROMPTS["ENTER_TEXT_PROMPT"])(entertext) @@ -103,17 +104,10 @@ def __register_skills(self): # Register entertext skill for execution by user_proxy_agent self.browser_nav_executor.register_for_execution()(press_key_combination) - #commented out PDF extraction skill since it was looping for some reason with the planner - #self.agent.register_for_llm(description=LLM_PROMPTS["EXTRACT_TEXT_FROM_PDF_PROMPT"])(extract_text_from_pdf) - #self.browser_nav_executor.register_for_execution()(extract_text_from_pdf) - - - ''' - # Register entertext skill for execution by user_proxy_agent - self.user_proxy_agent.register_for_execution()(extract_text_from_pdf) - # Register entertext skill for LLM by assistant agent self.agent.register_for_llm(description=LLM_PROMPTS["EXTRACT_TEXT_FROM_PDF_PROMPT"])(extract_text_from_pdf) + self.browser_nav_executor.register_for_execution()(extract_text_from_pdf) + ''' # Register reply function for printing messages self.browser_nav_executor.register_reply( # type: ignore [autogen.Agent, None], diff --git a/ae/core/agents/high_level_planner_agent.py b/ae/core/agents/high_level_planner_agent.py index 5714552..809cede 100644 --- a/ae/core/agents/high_level_planner_agent.py +++ b/ae/core/agents/high_level_planner_agent.py @@ -35,12 +35,12 @@ def __init__(self, config_list, user_proxy_agent:ConversableAgent): # type: igno "temperature": 0.0 }, ) - ''' + # Register get_user_input skill for LLM by assistant agent self.agent.register_for_llm(description=LLM_PROMPTS["GET_USER_INPUT_PROMPT"])(get_user_input) # Register get_user_input skill for execution by user_proxy_agent user_proxy_agent.register_for_execution()(get_user_input) - ''' + self.agent.register_reply( # type: ignore [autogen.AssistantAgent, None], reply_func=print_message_as_planner, diff --git a/ae/core/prompts.py b/ae/core/prompts.py index aceb051..eb8d77e 100644 --- a/ae/core/prompts.py +++ b/ae/core/prompts.py @@ -19,7 +19,7 @@ 4. Very Important: Helper cannot go back to previous pages. If you need the helper to return to a previous page, you must explicitly add the URL of the previous page in the step (e.g. return to the search result page by navigating to the url https://www.google.com/search?q=Finland") Guidelines: -1. If the starting url is related to the task, you will perform the task strictly on the website. +1. If you know a URL , you can provide it to the helper to navigate to a new page. 2. Do not assume any capability exists on the webpage. Ask questions to the helper to confirm the presence of features (e.g. is there a sort by price feature available on the page?). This will help you revise the plan as needed and also establish common ground with the helper. 3. Do not combine multiple steps into one. A step should be strictly as simple as interacting with a single element or navigating to a page. If you need to interact with multiple elements or perform multiple actions, you will break it down into multiple steps. 4. Important: You will NOT ask for any URLs of hyperlinks in the page from the helper, instead you will simply ask the helper to click on specific links with text. URL of the current page will be automatically provided to you with each helper response.