Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce SeleniumBrowser #1733

Closed
wants to merge 39 commits into from
Closed
Changes from 1 commit
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
23ee145
Update test_web_surfer.py
signalprime Feb 19, 2024
2daec15
Update browser_utils.py
signalprime Feb 19, 2024
9efb297
Update web_surfer.py
signalprime Feb 19, 2024
217ed91
ContentAgent: Custom LLM agent for collecting online content.
signalprime Feb 19, 2024
72a165a
Update content_agent.py
signalprime Feb 19, 2024
46b2424
Update browser_utils.py
signalprime Feb 20, 2024
d34ae1b
Update content_agent.py
signalprime Feb 20, 2024
1ba9e05
Update content_agent.py
signalprime Feb 20, 2024
84fa1b8
Unit Tests for the ContentAgent
signalprime Feb 20, 2024
67f95bf
Update browser_utils.py
signalprime Feb 20, 2024
08f8ff9
Update web_surfer.py
signalprime Feb 20, 2024
3954412
Update content_agent.py
signalprime Feb 20, 2024
749a556
Update content_agent.py
signalprime Feb 20, 2024
818a010
Update browser_utils.py
signalprime Feb 20, 2024
643bad0
Update content_agent.py
signalprime Feb 20, 2024
20cd2a6
Update browser_utils.py
signalprime Feb 20, 2024
0389387
Update test_web_surfer.py
signalprime Feb 20, 2024
be89b9b
Updates to include selenium in websurfer extras, webdrivers in the py…
signalprime Feb 20, 2024
0a40763
Added the websurfer with desktop browser demo notebook
signalprime Feb 22, 2024
25e15e0
Merge branch 'main' into main
signalprime Feb 22, 2024
5602958
Restored to original form in official main branch. Added for clari…
signalprime Feb 22, 2024
8954fef
Further cleaned the two test files and confirmed they passed using th…
signalprime Feb 22, 2024
0c2202c
Update after feedback from GitHub built error, with my apologies for …
signalprime Feb 22, 2024
13ba006
Update contrib-tests.yml for Selenium
signalprime Feb 22, 2024
e1e81f6
Update contrib-openai.yml
signalprime Feb 22, 2024
0b5e733
Update contrib-tests.yml
signalprime Feb 22, 2024
9099b57
Update contrib-openai.yml
signalprime Feb 22, 2024
7443458
Update setup.py
signalprime Feb 22, 2024
1b87acd
Update test_content_agent.py
signalprime Feb 22, 2024
11b00e5
pre-commit fix on setup.py for readability (websurfer extras)
signalprime Feb 22, 2024
66ac7bd
Final cleanup of unnecessary comments within the PR.
signalprime Feb 22, 2024
6fbe0b8
Restored the original copies of the two unrelated notebooks altered b…
signalprime Feb 22, 2024
451405b
Merge branch 'main' into main
sonichi Feb 25, 2024
c06f6fd
Provided a more descriptive name for the agent responsible for collec…
signalprime Feb 25, 2024
ef7586e
Update web_surfer.py
signalprime Mar 26, 2024
2be44bc
Update browser_utils.py
signalprime Mar 26, 2024
e64ae32
Update browser_utils.py
signalprime Mar 26, 2024
3e7cf18
Update contrib-openai.yml
signalprime Mar 26, 2024
841ed31
Merge branch 'main' into main
signalprime Mar 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Unit Tests for the ContentAgent
We cover a small sample of websites, asserting expectations against a number of measurements performed on the collected content.  

The assertions include, but are not limited to: 
- the expected variables contain values
- the presence of the expected output files
- that the expected output files are not empty

Further improvements can include:
- evaluation against all choices of WebDriver to confirm functionality 
- evaluation against a larger sample of websites
-
  • Loading branch information
signalprime authored Feb 20, 2024
commit 84fa1b8b41232b9ba24c772fb38b396ed3dce1f3
129 changes: 129 additions & 0 deletions test/agentchat/contrib/test_content_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
import os
import sys
import re
import tempfile
import pytest
from autogen.agentchat import UserProxyAgent
from autogen.agentchat.contrib.content_agent import ContentAgent
from autogen.oai.openai_utils import filter_config, config_list_from_json
from autogen.cache import Cache

sys.path.append(os.path.join(os.path.dirname(__file__), "../.."))
from conftest import MOCK_OPEN_AI_API_KEY, skip_openai # noqa: E402

sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST # noqa: E402

try:
from openai import OpenAI
except ImportError:
skip_oai = True
else:
skip_oai = False or skip_openai

if not skip_oai:
config_list = config_list_from_json(env_or_file=OAI_CONFIG_LIST, file_location=KEY_LOC)


@pytest.mark.skipif(
skip_oai,
reason="do not run if oai is not installed",
)
def test_content_agent(browser:str) -> None:

llm_config = {"config_list": config_list, "timeout": 180, "cache_seed": 42}

model = ["gpt-3.5-turbo"]
model += [m.replace(".", "") for m in model]

# model = ['dolphin-mistral:7b-v2.6-q8_0']
assert len(llm_config["config_list"]) > 0 # type: ignore[arg-type]

# Define the temporary storage location
temporary_content_storage = os.path.join( tempfile.gettempdir(), "test_content_agent_storage")
print( f"Storing temporary test files in {temporary_content_storage}" )

# Define the system message for the ContentAgent
content_agent_system_msg = "You are data collection agent specializing in content on the web."

# Instantiate the ContentAgent
content_agent = ContentAgent(
name="ContentAgent",
system_message=content_agent_system_msg,
llm_config=llm_config,
max_consecutive_auto_reply=0,
silent=False,

# Below are the arguments specific to the ContentAgent
storage_path=temporary_content_storage,
browser_kwargs={"browser": browser},
max_depth=0,
)

# Instantiate the User Proxy Agent
user_proxy = UserProxyAgent(
"user_proxy",
human_input_mode="NEVER",
code_execution_config=False,
default_auto_reply="",
is_termination_msg=lambda x: True,
)

# Register the collection process as the default reply to the user
content_agent.register_reply(user_proxy, content_agent.collect_content)

# Define the links used during the testing process
links = [
"https://microsoft.github.io/autogen/docs/Examples",
"https://microsoft.github.io/autogen/docs/Getting-Started",
"https://www.microsoft.com/en-us/research/blog/graphrag-unlocking-llm-discovery-on-narrative-private-data/",
]


with Cache.disk():

for link in links:

# Collect the content from the requested link
user_proxy.initiate_chat(content_agent, message=link)

assert content_agent.process_history[link]['url'] == link, "Investigate why the correct not link was reported"

assert os.path.exists( content_agent.process_history[link]['local_path'] ), "The content storage path was not found"

assert len(content_agent.process_history[link]['content']) > 0, "No content was identified or stored"

assert os.path.exists(
os.path.join( content_agent.process_history[link]['local_path'], 'content.txt')
), "The file path for content.txt was not found"

assert os.path.exists(
os.path.join( content_agent.process_history[link]['local_path'], 'metadata.txt')
), "The file path for metadata.txt was not found"

assert os.path.exists(
os.path.join( content_agent.process_history[link]['local_path'], 'index.html')
), "The file path for index.html was not found"

assert os.path.exists(
os.path.join( content_agent.process_history[link]['local_path'], 'screenshot.png')
), "The file path for screenshot.png was not found"

assert os.path.exists(
os.path.join( content_agent.process_history[link]['local_path'], 'links.txt')
), "The file path for links.txt was not found"

assert os.path.getsize( os.path.join( content_agent.process_history[link]['local_path'], 'links.txt') ) > 0, "The file size of links.txt was zero"
assert os.path.getsize( os.path.join( content_agent.process_history[link]['local_path'], 'content.txt') ) > 0, "The file size of content.txt was zero"
assert os.path.getsize( os.path.join( content_agent.process_history[link]['local_path'], 'metadata.txt') ) > 0, "The file size of metadata.txt was zero"
assert os.path.getsize( os.path.join( content_agent.process_history[link]['local_path'], 'index.html') ) > 0, "The file size of index.html was zero"
assert os.path.getsize( os.path.join( content_agent.process_history[link]['local_path'], 'screenshot.png') ) > 0, "The file size of screenshot.png was zero"

print()
print( f"All done, feel free to browse the collected content at: {temporary_content_storage}" )

if __name__ == "__main__":
"""Runs this file's tests from the command line."""

if not skip_oai:
test_content_agent(browser="firefox")