Minor modification to make planner more generic for dev

EmergenceAI · Jun 17, 2024 · 2482a93 · 2482a93
1 parent 8b050af
commit 2482a93
Show file tree

Hide file tree

Showing 3 changed files with 12 additions and 18 deletions.
diff --git a/ae/core/agents/browser_nav_agent.py b/ae/core/agents/browser_nav_agent.py
@@ -9,8 +9,9 @@
 from ae.core.post_process_responses import final_reply_callback_user_proxy as print_message_from_browser_agent  # type: ignore
 from ae.core.prompts import LLM_PROMPTS
 from ae.core.skills.click_using_selector import click as click_element
-#from ae.core.skills.enter_text_and_click import enter_text_and_click
-#from ae.core.skills.enter_text_using_selector import bulk_enter_text
+from ae.core.skills.enter_text_using_selector import bulk_enter_text
+from ae.core.skills.enter_text_and_click import enter_text_and_click
+from ae.core.skills.pdf_text_extractor import extract_text_from_pdf
 from ae.core.skills.enter_text_using_selector import entertext
 from ae.core.skills.get_dom_with_content_type import get_dom_with_content_type
 from ae.core.skills.get_url import geturl
@@ -67,12 +68,12 @@ def __register_skills(self):
         self.agent.register_for_llm(description=LLM_PROMPTS["OPEN_URL_PROMPT"])(openurl)
         # Register openurl skill for execution by user_proxy_agent
         self.browser_nav_executor.register_for_execution()(openurl)
-        ''' 
+
         # Register enter_text_and_click skill for LLM by assistant agent
         self.agent.register_for_llm(description=LLM_PROMPTS["ENTER_TEXT_AND_CLICK_PROMPT"])(enter_text_and_click)
         # Register enter_text_and_click skill for execution by user_proxy_agent
         self.browser_nav_executor.register_for_execution()(enter_text_and_click)
-        '''
+
         # Register get_dom_with_content_type skill for LLM by assistant agent
         self.agent.register_for_llm(description=LLM_PROMPTS["GET_DOM_WITH_CONTENT_TYPE_PROMPT"])(get_dom_with_content_type)
         # Register get_dom_with_content_type skill for execution by user_proxy_agent
@@ -89,9 +90,9 @@ def __register_skills(self):
         self.browser_nav_executor.register_for_execution()(geturl)
 
         # Register bulk_enter_text skill for LLM by assistant agent
-        #self.agent.register_for_llm(description=LLM_PROMPTS["BULK_ENTER_TEXT_PROMPT"])(bulk_enter_text)
+        self.agent.register_for_llm(description=LLM_PROMPTS["BULK_ENTER_TEXT_PROMPT"])(bulk_enter_text)
         # Register bulk_enter_text skill for execution by user_proxy_agent
-        #self.browser_nav_executor.register_for_execution()(bulk_enter_text)
+        self.browser_nav_executor.register_for_execution()(bulk_enter_text)
 
         # Register entertext skill for LLM by assistant agent
         self.agent.register_for_llm(description=LLM_PROMPTS["ENTER_TEXT_PROMPT"])(entertext)
@@ -103,17 +104,10 @@ def __register_skills(self):
         # Register entertext skill for execution by user_proxy_agent
         self.browser_nav_executor.register_for_execution()(press_key_combination)
 
-        #commented out PDF extraction skill since it was looping for some reason with the planner
-        #self.agent.register_for_llm(description=LLM_PROMPTS["EXTRACT_TEXT_FROM_PDF_PROMPT"])(extract_text_from_pdf)
-        #self.browser_nav_executor.register_for_execution()(extract_text_from_pdf)
-
-
-        '''
-        # Register entertext skill for execution by user_proxy_agent
-        self.user_proxy_agent.register_for_execution()(extract_text_from_pdf)
-        # Register entertext skill for LLM by assistant agent
         self.agent.register_for_llm(description=LLM_PROMPTS["EXTRACT_TEXT_FROM_PDF_PROMPT"])(extract_text_from_pdf)
+        self.browser_nav_executor.register_for_execution()(extract_text_from_pdf)
 
+        '''
         # Register reply function for printing messages
         self.browser_nav_executor.register_reply( # type: ignore
             [autogen.Agent, None],

diff --git a/ae/core/agents/high_level_planner_agent.py b/ae/core/agents/high_level_planner_agent.py
@@ -35,12 +35,12 @@ def __init__(self, config_list, user_proxy_agent:ConversableAgent): # type: igno
                 "temperature": 0.0
             },
         )
-        '''
+
         # Register get_user_input skill for LLM by assistant agent
         self.agent.register_for_llm(description=LLM_PROMPTS["GET_USER_INPUT_PROMPT"])(get_user_input)
         # Register get_user_input skill for execution by user_proxy_agent
         user_proxy_agent.register_for_execution()(get_user_input)
-        '''
+
         self.agent.register_reply( # type: ignore
             [autogen.AssistantAgent, None],
             reply_func=print_message_as_planner,

diff --git a/ae/core/prompts.py b/ae/core/prompts.py
@@ -19,7 +19,7 @@
 4. Very Important: Helper cannot go back to previous pages. If you need the helper to return to a previous page, you must explicitly add the URL of the previous page in the step (e.g. return to the search result page by navigating to the url https://www.google.com/search?q=Finland")
 
 Guidelines:
-1. If the starting url is related to the task, you will perform the task strictly on the website.
+1. If you know a URL , you can provide it to the helper to navigate to a new page. 
 2. Do not assume any capability exists on the webpage. Ask questions to the helper to confirm the presence of features (e.g. is there a sort by price feature available on the page?). This will help you revise the plan as needed and also establish common ground with the helper.
 3. Do not combine multiple steps into one. A step should be strictly as simple as interacting with a single element or navigating to a page. If you need to interact with multiple elements or perform multiple actions, you will break it down into multiple steps.
 4. Important: You will NOT ask for any URLs of hyperlinks in the page from the helper, instead you will simply ask the helper to click on specific links with text. URL of the current page will be automatically provided to you with each helper response.