Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/dev' into pr/63
Browse files Browse the repository at this point in the history
  • Loading branch information
deepak-akkil committed Jul 4, 2024
2 parents e8f0855 + 896d23c commit df8da54
Show file tree
Hide file tree
Showing 46 changed files with 4,325 additions and 718 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -155,4 +155,7 @@ cython_debug/
ae/log_files/*
ae/temp/*
test/logs/*
test/results/*
test/results/*
Pipfile.lock
requirements.txt
Pipfile
35 changes: 34 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ This provides a natural language way to interacting with a web browser:
- Manage and automate tasks on project management platforms (like JIRA) by filtering issues, easing the workflow for users.
- Provide personal shopping assistance, suggesting products based on the user's needs, such as storage options for game cards.

While Agent-E is growing, it is already equipped to handle a versatile range of tasks, but the best task is the one that you come up with. So, take it for a spin and tell us what you were able to do with it. For more information see our [blog article](https://blog.emergence.ai/2024/03/28/distilling-the-web-agent.html).
While Agent-E is growing, it is already equipped to handle a versatile range of tasks, but the best task is the one that you come up with. So, take it for a spin and tell us what you were able to do with it. For more information see our [blog article](https://www.emergence.ai/blog/distilling-the-web-for-multi-agent-automation).


## Quick Start
Expand Down Expand Up @@ -156,6 +156,39 @@ html_theme = 'sphinx_rtd_theme'
7. Build the documentation, from `docs` directory, run: `sphinx-build -b html . _build`


## Open-source models

Using open-source models is possible through LiteLLM with Ollama. Ollama allows users to run language models locally on their machines, and LiteLLM translates OpenAI-format inputs to local models' endpoints. To use open-source models as Agent-E backbone, follow the steps below:

1. Install LiteLLM
```bash
pip install 'litellm[proxy]'
```
2. Install Ollama
* For Mac and Windows, download [Ollama](https://ollama.com/download).
* For Linux:
```bash
curl -fsSL https://ollama.com/install.sh | sh
```
3. Pull Ollama models
Before you can use a model, you need to download it from the library. The list of available models is [here](https://ollama.com/library). Here, we use Mistral v0.3:
```bash
ollama pull mistral:v0.3
```
4. Run LiteLLM
To run the downloaded model with LiteLLM as a proxy, run:
```bash
litellm --model ollama_chat/mistral:v0.3
```
5. Configure model in Autogen
Configure the `.env` file as follows. Note that the model name and API keys are not needed since the local model is already running.
```bash
AUTOGEN_MODEL_NAME=NotRequired
AUTOGEN_MODEL_API_KEY=NotRequired
AUTOGEN_MODEL_BASE_URL=http://0.0.0.0:400
```


## TODO

- Action verification - Responding from every skill with changes that took place in the DOM (Mutation Observers) so that the LLM can judge whether the skill did execute properly or not
Expand Down
2 changes: 1 addition & 1 deletion ae/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from ae import core
from ae import core # type: ignore # noqa: F401
2 changes: 1 addition & 1 deletion ae/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@

if not os.path.exists(PROJECT_TEMP_PATH):
os.makedirs(PROJECT_TEMP_PATH)
print(f"Created temp folder at: {PROJECT_TEMP_PATH}")
print(f"Created temp folder at: {PROJECT_TEMP_PATH}")
2 changes: 0 additions & 2 deletions ae/core/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
from ae.core import agents
from ae.core import memory
from ae.core import skills

from ae.core.autogen_wrapper import AutogenWrapper
from ae.core.playwright_manager import PlaywrightManager
from ae.core.post_process_responses import final_reply_callback_browser_agent
from ae.core.post_process_responses import final_reply_callback_user_proxy
from ae.core.prompts import LLM_PROMPTS
from ae.core.system_orchestrator import SystemOrchestrator
Expand Down
3 changes: 1 addition & 2 deletions ae/core/agents/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
from ae.core.agents.browser_nav_agent import BrowserNavAgent
from ae.core.agents.browser_nav_agent_no_skills import BrowserNavAgentNoSkills
from ae.core.agents.browser_nav_agent import BrowserNavAgent
74 changes: 40 additions & 34 deletions ae/core/agents/browser_nav_agent.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,26 @@
from datetime import datetime
from string import Template

import autogen # type: ignore

from ae.core.memory.static_ltm import get_user_ltm
from ae.core.post_process_responses import final_reply_callback_browser_agent as print_message_from_user_proxy # type: ignore
from ae.core.post_process_responses import final_reply_callback_user_proxy as print_message_from_browser_agent # type: ignore
from ae.core.prompts import LLM_PROMPTS
from ae.core.skills.click_using_selector import click as click_element
from ae.core.skills.enter_text_and_click import enter_text_and_click

# from ae.core.skills.enter_text_and_click import enter_text_and_click
from ae.core.skills.enter_text_using_selector import bulk_enter_text
from ae.core.skills.enter_text_using_selector import entertext
from ae.core.skills.get_dom_with_content_type import get_dom_with_content_type
from ae.core.skills.get_url import geturl
from ae.core.skills.get_user_input import get_user_input
from ae.core.skills.open_url import openurl
from ae.core.skills.pdf_text_extractor import extract_text_from_pdf

#from ae.core.skills.pdf_text_extractor import extract_text_from_pdf
from ae.core.skills.press_key_combination import press_key_combination


class BrowserNavAgent:
def __init__(self, config_list, user_proxy_agent: autogen.UserProxyAgent): # type: ignore
def __init__(self, config_list, browser_nav_executor: autogen.UserProxyAgent): # type: ignore
"""
Initialize the BrowserNavAgent and store the AssistantAgent instance
as an instance attribute for external access.
Expand All @@ -27,21 +29,23 @@ def __init__(self, config_list, user_proxy_agent: autogen.UserProxyAgent): # typ
- config_list: A list of configuration parameters required for AssistantAgent.
- user_proxy_agent: An instance of the UserProxyAgent class.
"""
self.user_proxy_agent = user_proxy_agent
self.browser_nav_executor = browser_nav_executor
user_ltm = self.__get_ltm()
system_message = LLM_PROMPTS["BROWSER_AGENT_PROMPT"]

system_message = system_message + "\n" + f"Today's date is {datetime.now().strftime('%d %B %Y')}"
if user_ltm: #add the user LTM to the system prompt if it exists
user_ltm = "\n" + user_ltm
system_message = Template(system_message).substitute(basic_user_information=user_ltm)

self.agent = autogen.AssistantAgent(
self.agent = autogen.ConversableAgent(
name="browser_navigation_agent",
system_message=system_message,
llm_config={
"config_list": config_list,
"cache_seed": 2,
"temperature": 0.0
"cache_seed": None,
"temperature": 0.0,
"top_p": 0.001,
"seed":12345
},
)
self.__register_skills()
Expand All @@ -59,54 +63,53 @@ def __register_skills(self):
"""
Register all the skills that the agent can perform.
"""
# Register get_user_input skill for execution by user_proxy_agent
self.user_proxy_agent.register_for_execution()(get_user_input) # type: ignore
# Register get_user_input skill for LLM by assistant agent
self.agent.register_for_llm(description=LLM_PROMPTS["GET_USER_INPUT_PROMPT"])(get_user_input) # type: ignore

# Register openurl skill for execution by user_proxy_agent
self.user_proxy_agent.register_for_execution()(openurl) # type: ignore
# Register openurl skill for LLM by assistant agent
self.agent.register_for_llm(description=LLM_PROMPTS["OPEN_URL_PROMPT"])(openurl) # type: ignore
self.agent.register_for_llm(description=LLM_PROMPTS["OPEN_URL_PROMPT"])(openurl)
# Register openurl skill for execution by user_proxy_agent
self.browser_nav_executor.register_for_execution()(openurl)

# Register enter_text_and_click skill for execution by user_proxy_agent
self.user_proxy_agent.register_for_execution()(enter_text_and_click)
# Register enter_text_and_click skill for LLM by assistant agent
self.agent.register_for_llm(description=LLM_PROMPTS["ENTER_TEXT_AND_CLICK_PROMPT"])(enter_text_and_click)
# self.agent.register_for_llm(description=LLM_PROMPTS["ENTER_TEXT_AND_CLICK_PROMPT"])(enter_text_and_click)
# Register enter_text_and_click skill for execution by user_proxy_agent
# self.browser_nav_executor.register_for_execution()(enter_text_and_click)

# Register get_dom_with_content_type skill for execution by user_proxy_agent
self.user_proxy_agent.register_for_execution()(get_dom_with_content_type)
# Register get_dom_with_content_type skill for LLM by assistant agent
self.agent.register_for_llm(description=LLM_PROMPTS["GET_DOM_WITH_CONTENT_TYPE_PROMPT"])(get_dom_with_content_type)
# Register get_dom_with_content_type skill for execution by user_proxy_agent
self.browser_nav_executor.register_for_execution()(get_dom_with_content_type)

# Register click_element skill for execution by user_proxy_agent
self.user_proxy_agent.register_for_execution()(click_element)
# Register click_element skill for LLM by assistant agent
#self.agent.register_for_llm(description=LLM_PROMPTS["CLICK_PROMPT_ACCESSIBILITY"])(click_element)
self.agent.register_for_llm(description=LLM_PROMPTS["CLICK_PROMPT"])(click_element)
# Register click_element skill for execution by user_proxy_agent
self.browser_nav_executor.register_for_execution()(click_element)

# Register geturl skill for execution by user_proxy_agent
self.user_proxy_agent.register_for_execution()(geturl)
# Register geturl skill for LLM by assistant agent
self.agent.register_for_llm(description=LLM_PROMPTS["GET_URL_PROMPT"])(geturl)
# Register geturl skill for execution by user_proxy_agent
self.browser_nav_executor.register_for_execution()(geturl)

# Register bulk_enter_text skill for execution by user_proxy_agent
self.user_proxy_agent.register_for_execution()(bulk_enter_text)
# Register bulk_enter_text skill for LLM by assistant agent
self.agent.register_for_llm(description=LLM_PROMPTS["BULK_ENTER_TEXT_PROMPT"])(bulk_enter_text)
# Register bulk_enter_text skill for execution by user_proxy_agent
self.browser_nav_executor.register_for_execution()(bulk_enter_text)

# Register entertext skill for execution by user_proxy_agent
self.user_proxy_agent.register_for_execution()(entertext)
# Register entertext skill for LLM by assistant agent
self.agent.register_for_llm(description=LLM_PROMPTS["ENTER_TEXT_PROMPT"])(entertext)

# Register entertext skill for execution by user_proxy_agent
self.user_proxy_agent.register_for_execution()(extract_text_from_pdf)
self.browser_nav_executor.register_for_execution()(entertext)

# Register entertext skill for LLM by assistant agent
self.agent.register_for_llm(description=LLM_PROMPTS["PRESS_KEY_COMBINATION_PROMPT"])(press_key_combination)
# Register entertext skill for execution by user_proxy_agent
self.browser_nav_executor.register_for_execution()(press_key_combination)

self.agent.register_for_llm(description=LLM_PROMPTS["EXTRACT_TEXT_FROM_PDF_PROMPT"])(extract_text_from_pdf)
self.browser_nav_executor.register_for_execution()(extract_text_from_pdf)

'''
# Register reply function for printing messages
self.user_proxy_agent.register_reply( # type: ignore
self.browser_nav_executor.register_reply( # type: ignore
[autogen.Agent, None],
reply_func=print_message_from_user_proxy,
config={"callback": None},
Expand All @@ -116,3 +119,6 @@ def __register_skills(self):
reply_func=print_message_from_browser_agent,
config={"callback": None},
)
'''
# print(f">>> Function map: {self.browser_nav_executor.function_map}") # type: ignore
# print(">>> Registered skills for BrowserNavAgent and BrowserNavExecutorAgent")
41 changes: 0 additions & 41 deletions ae/core/agents/browser_nav_agent_no_skills.py

This file was deleted.

61 changes: 61 additions & 0 deletions ae/core/agents/high_level_planner_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
from datetime import datetime
from string import Template

import autogen # type: ignore
from autogen import ConversableAgent # type: ignore

from ae.core.memory.static_ltm import get_user_ltm
from ae.core.post_process_responses import final_reply_callback_planner_agent as print_message_as_planner # type: ignore
from ae.core.prompts import LLM_PROMPTS
from ae.core.skills.get_user_input import get_user_input


class PlannerAgent:
def __init__(self, config_list, user_proxy_agent:ConversableAgent): # type: ignore
"""
Initialize the PlannerAgent and store the AssistantAgent instance
as an instance attribute for external access.
Parameters:
- config_list: A list of configuration parameters required for AssistantAgent.
- user_proxy_agent: An instance of the UserProxyAgent class.
"""

user_ltm = self.__get_ltm()
system_message = LLM_PROMPTS["PLANNER_AGENT_PROMPT"]

if user_ltm: #add the user LTM to the system prompt if it exists
user_ltm = "\n" + user_ltm
system_message = Template(system_message).substitute(basic_user_information=user_ltm)
system_message = system_message + "\n" + f"Today's date is {datetime.now().strftime('%d %B %Y')}"
self.agent = autogen.AssistantAgent(
name="planner_agent",
system_message=system_message,
llm_config={
"config_list": config_list,
"cache_seed": None,
"temperature": 0.0,
"top_p": 0.001,
"seed":12345
},
)

# Register get_user_input skill for LLM by assistant agent
self.agent.register_for_llm(description=LLM_PROMPTS["GET_USER_INPUT_PROMPT"])(get_user_input)
# Register get_user_input skill for execution by user_proxy_agent
user_proxy_agent.register_for_execution()(get_user_input)

self.agent.register_reply( # type: ignore
[autogen.AssistantAgent, None],
reply_func=print_message_as_planner,
config={"callback": None},
ignore_async_in_sync_chat=True
)

def __get_ltm(self):
"""
Get the the long term memory of the user.
returns: str | None - The user LTM or None if not found.
"""
return get_user_ltm()

Loading

0 comments on commit df8da54

Please sign in to comment.