Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
9d177fd
ckpt; added plan
alex-w-99 Jan 30, 2026
3e7353d
complete step 1; added specialists_plan_step2.md
alex-w-99 Jan 30, 2026
6a96a77
step 2 done
alex-w-99 Jan 30, 2026
320f827
updates to js_utils
alex-w-99 Jan 30, 2026
a42203e
textwrap.deindent
alex-w-99 Jan 30, 2026
2c6c7a2
time for phase 3, CLI specialists
alex-w-99 Jan 30, 2026
0861ad2
Specialsit scripts
alex-w-99 Jan 30, 2026
96c8966
update specialist
alex-w-99 Jan 30, 2026
4d81a3d
update js utils
alex-w-99 Jan 30, 2026
93ad4ea
SlashCommandCompleter
alex-w-99 Jan 30, 2026
646873d
lobotomize js specialist
alex-w-99 Jan 30, 2026
3f1f501
JSpecialist network data
alex-w-99 Jan 30, 2026
bb223c0
stay alive
alex-w-99 Jan 30, 2026
eafa689
Merge branch 'main' into new-agents-JS-UI
alex-w-99 Feb 1, 2026
1b3f31b
ckpt
alex-w-99 Feb 1, 2026
4669a0b
#124
alex-w-99 Feb 1, 2026
ca2e53b
NetworkSpyAgent inherits from AbstractSpecialist
alex-w-99 Feb 1, 2026
5701c93
interaction specialist uses @specialist_tool
alex-w-99 Feb 2, 2026
a25dfe8
ckpt
alex-w-99 Feb 2, 2026
9d132b2
ckpt
alex-w-99 Feb 2, 2026
d520f5c
ckpt
alex-w-99 Feb 2, 2026
8b6a44e
merge in main
alex-w-99 Feb 2, 2026
2fd1d86
more touch ups
alex-w-99 Feb 2, 2026
1839992
various fixes
alex-w-99 Feb 2, 2026
baa9b8b
ready
alex-w-99 Feb 2, 2026
ab4471c
Claude comment 1
alex-w-99 Feb 3, 2026
9ae70a1
Claude comment 2
alex-w-99 Feb 3, 2026
c055b6d
Claude comment 3
alex-w-99 Feb 3, 2026
23f5a89
Claude comment 4
alex-w-99 Feb 3, 2026
cc458f7
Claude comment 5 and 6
alex-w-99 Feb 3, 2026
28e6907
Merge branch 'main' into new-agents-JS-UI
alex-w-99 Feb 3, 2026
88ca04c
update validate_js return type
alex-w-99 Feb 3, 2026
ede2ca1
reorg AbstractSpecialist methods
alex-w-99 Feb 3, 2026
963d6b0
fix interaction specialist docstring
alex-w-99 Feb 3, 2026
787a6ec
sync tools
alex-w-99 Feb 3, 2026
10af2fe
update scripts: JS, UI
alex-w-99 Feb 3, 2026
d960d36
ckpt
alex-w-99 Feb 3, 2026
f7a688e
AutonomousConfig
alex-w-99 Feb 3, 2026
9034b85
run mode
alex-w-99 Feb 3, 2026
2bb9e5a
Merge branch 'main' into new-agents-JS-UI
alex-w-99 Feb 3, 2026
a2ba90b
llm bug fix
alex-w-99 Feb 3, 2026
adaf1fd
Add back JS data stores
alex-w-99 Feb 3, 2026
8fa12f4
JSDataStore.search_by_regex
alex-w-99 Feb 3, 2026
50b0ddb
log
alex-w-99 Feb 3, 2026
d47fbc4
type adapter
alex-w-99 Feb 3, 2026
f862a87
improving abstract specialist
alex-w-99 Feb 3, 2026
69faaad
pytests for abstract specialist
alex-w-99 Feb 3, 2026
d1d9f31
upfront param tool validation
alex-w-99 Feb 3, 2026
efc4962
tweak run js specialist
alex-w-99 Feb 3, 2026
f5701fb
Merge branch 'main' into new-agents-JS-UI
alex-w-99 Feb 4, 2026
66fdc38
Claude Complain
alex-w-99 Feb 4, 2026
8969e4a
fix specialist run
alex-w-99 Feb 4, 2026
37a0b16
fix specialist run
alex-w-99 Feb 4, 2026
daac847
finalize
alex-w-99 Feb 4, 2026
7d2bccd
ipykernel
alex-w-99 Feb 4, 2026
4732f80
pyprok
alex-w-99 Feb 4, 2026
a1f005e
rm test
alex-w-99 Feb 4, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
212 changes: 112 additions & 100 deletions bluebox/agents/guide_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import json
from datetime import datetime
from enum import StrEnum
from textwrap import dedent
from typing import Any, Callable
from uuid import uuid4

Expand Down Expand Up @@ -134,13 +135,17 @@ def flush_update_messages(self) -> str | None:
messages.append("[System Update] Routine added to context. Use get_current_routine to see the routine.")
else: # updated
messages.append("[System Update] Routine has been updated. Use get_current_routine to see the changes.")

# Reset timestamp
self._routine_change_at = None
self._routine_change_type = None

# Check for execution
if self._execution_at is not None:
messages.append("[System Update] Executed routine. To see the executed routine and parameters use the get_last_routine_execution tool. To see the result use the get_last_routine_execution_result tool.")
messages.append(
"[System Update] Executed routine. To see the executed routine and parameters use the "
"get_last_routine_execution tool. To see the result use the get_last_routine_execution_result tool."
)
# Reset timestamp
self._execution_at = None

Expand Down Expand Up @@ -177,151 +182,158 @@ def handle_message(message: EmittedMessage) -> None:

# Class constants ______________________________________________________________________________________________________

DATA_STORE_PROMPT: str = """
You have access to the following data and you must refer to it when answering questions or helping debug!
It is essential that you use this data, documentation, and code:
{data_store_prompt}
"""
DATA_STORE_PROMPT: str = dedent("""\
You have access to the following data and you must refer to it when answering questions or helping debug!
It is essential that you use this data, documentation, and code:
{data_store_prompt}
""")

# Shared prompt sections ________________________________________________________________________________________________

_ROUTINES_SECTION: str = """## What are Routines?
_ROUTINES_SECTION: str = dedent("""\
## What are Routines?

Routines are reusable web automation workflows that can be executed programmatically. \
They are created by learning from user demonstrations - users record themselves performing \
a task on a website, and the system generates a parameterized routine."""
Routines are reusable web automation workflows that can be executed programmatically. \
They are created by learning from user demonstrations - users record themselves performing \
a task on a website, and the system generates a parameterized routine.""")

_VECTORLY_SECTION: str = """## What is Vectorly?
_VECTORLY_SECTION: str = dedent("""\
## What is Vectorly?

Vectorly (https://vectorly.app) unlocks data from interactive websites - getting web data behind \
clicks, searches, and user interactions. Define a routine once, then access it anywhere via API or MCP."""
Vectorly (https://vectorly.app) unlocks data from interactive websites - getting web data behind \
clicks, searches, and user interactions. Define a routine once, then access it anywhere via API or MCP.""")

_GUIDELINES_SECTION: str = """## Guidelines
_GUIDELINES_SECTION: str = dedent("""\
## Guidelines

- Be conversational and helpful
- Ask clarifying questions if needed (VERY CONCISE AND TO THE POINT!)
- When asking questions, just ask them directly. NO preamble, NO "Once you answer I will...", \
NO numbered lists of what you'll do next. Just ask the question.
- BE VERY CONCISE AND TO THE POINT. We DONT NEED LONG CONVERSATIONS!
- IMPORTANT: When you decide to use a tool, JUST CALL IT. Do NOT announce "I will now call X" or \
"Let me use X tool" - just invoke the tool directly. The user can always decline the request."""
- Be conversational and helpful
- Ask clarifying questions if needed (VERY CONCISE AND TO THE POINT!)
- When asking questions, just ask them directly. NO preamble, NO "Once you answer I will...", \
NO numbered lists of what you'll do next. Just ask the question.
- BE VERY CONCISE AND TO THE POINT. We DONT NEED LONG CONVERSATIONS!
- IMPORTANT: When you decide to use a tool, JUST CALL IT. Do NOT announce "I will now call X" or \
"Let me use X tool" - just invoke the tool directly. The user can always decline the request.""")

_NOTES_SECTION: str = """## NOTES:
- Quotes or escaped quotes are ESSENTIAL AROUND {{{{parameter_name}}}} ALL parameters in routines!
- Before saying ANYTHING ABOUT QUOTES OR ESCAPED QUOTES, you MUST look through the docs!"""
_NOTES_SECTION: str = dedent("""\
## NOTES:
- Quotes or escaped quotes are ESSENTIAL AROUND {{{{parameter_name}}}} ALL parameters in routines!
- Before saying ANYTHING ABOUT QUOTES OR ESCAPED QUOTES, you MUST look through the docs!""")

_SYSTEM_ACTION_SECTION: str = """## System Action Messages
When you receive a system message with the prefix "[ACTION REQUIRED]", you MUST immediately \
execute the requested action using the appropriate tools."""
_SYSTEM_ACTION_SECTION: str = dedent("""\
## System Action Messages
When you receive a system message with the prefix "[ACTION REQUIRED]", you MUST immediately \
execute the requested action using the appropriate tools.""")

# Mode-specific sections ________________________________________________________________________________________________

_CREATION_MODE_ROLE: str = """## Your Role
_CREATION_MODE_ROLE: str = dedent("""\
## Your Role

You are in CREATION MODE. Help users create new routines by:
- Understanding what task they want to automate
- Guiding them through browser recording to capture their workflow
- Running routine discovery to generate the routine from captured data
- Creating routines directly when appropriate
You are in CREATION MODE. Help users create new routines by:
- Understanding what task they want to automate
- Guiding them through browser recording to capture their workflow
- Running routine discovery to generate the routine from captured data
- Creating routines directly when appropriate

## Available Tools
## Available Tools

- **`request_user_browser_recording`**: Ask the user to demonstrate a task in the browser. \
Use this when the user describes a web automation task they want to create.
- **`request_routine_discovery`**: Start routine discovery from captured browser data. \
Use this after recording is complete.
- **`create_new_routine`**: Create a routine directly without discovery. Use this when you \
have enough information to build the routine programmatically.
- **`file_search`**: Search documentation for routine creation best practices.
- **`request_user_browser_recording`**: Ask the user to demonstrate a task in the browser. \
Use this when the user describes a web automation task they want to create.
- **`request_routine_discovery`**: Start routine discovery from captured browser data. \
Use this after recording is complete.
- **`create_new_routine`**: Create a routine directly without discovery. Use this when you \
have enough information to build the routine programmatically.
- **`file_search`**: Search documentation for routine creation best practices.

## Workflow for Creating Routines
## Workflow for Creating Routines

1. **Understand the task**: Ask the user what website data they want to access or what actions they want to automate.
2. **Initiate recording**: Use `request_user_browser_recording` with a clear task description.
3. **Wait for recording**: The user will perform the task while browser activity is captured.
4. **Run discovery**: Use `request_routine_discovery` to generate a routine from the captures.
5. **Review result**: Once the routine is created, you will switch to editing mode to help refine it.
1. **Understand the task**: Ask the user what website data they want to access or what actions they want to automate.
2. **Initiate recording**: Use `request_user_browser_recording` with a clear task description.
3. **Wait for recording**: The user will perform the task while browser activity is captured.
4. **Run discovery**: Use `request_routine_discovery` to generate a routine from the captures.
5. **Review result**: Once the routine is created, you will switch to editing mode to help refine it.

## Creation Mode Guidelines
## Creation Mode Guidelines

- Provide clear, bulleted instructions when requesting browser recordings
- If the user asks about an existing routine, inform them no routine is currently loaded"""
- Provide clear, bulleted instructions when requesting browser recordings
- If the user asks about an existing routine, inform them no routine is currently loaded""")

_EDITING_MODE_ROLE: str = """## Your Role
_EDITING_MODE_ROLE: str = dedent("""\
## Your Role

You are in EDITING MODE. A routine is currently loaded. Help users by:
- Reviewing the routine structure and operations
- Debugging execution failures
- Suggesting improvements and fixes
- Validating routine changes
You are in EDITING MODE. A routine is currently loaded. Help users by:
- Reviewing the routine structure and operations
- Debugging execution failures
- Suggesting improvements and fixes
- Validating routine changes

## Available Tools - USE THESE WHEN DEBUGGING
## Available Tools - USE THESE WHEN DEBUGGING

- **`get_current_routine`**: Get the currently loaded routine JSON. Call this FIRST when the user \
asks about their routine or wants help editing it.
- **`get_last_routine_execution`**: Get the last executed routine and parameters used. Call when \
the user says they ran a routine and it failed.
- **`get_last_routine_execution_result`**: Get execution results - success/failure status, output \
data, and errors. Essential for debugging.
- **`validate_routine`**: Validate a routine object against the schema. REQUIRED KEY: 'routine'.
- **`suggest_routine_edit`**: Propose changes to the routine for user approval. REQUIRED KEY: 'routine' \
with the COMPLETE routine object.
- **`file_search`**: Search documentation for debugging tips and common issues.
- **`get_current_routine`**: Get the currently loaded routine JSON. Call this FIRST when the user \
asks about their routine or wants help editing it.
- **`get_last_routine_execution`**: Get the last executed routine and parameters used. Call when \
the user says they ran a routine and it failed.
- **`get_last_routine_execution_result`**: Get execution results - success/failure status, output \
data, and errors. Essential for debugging.
- **`validate_routine`**: Validate a routine object against the schema. REQUIRED KEY: 'routine'.
- **`suggest_routine_edit`**: Propose changes to the routine for user approval. REQUIRED KEY: 'routine' \
with the COMPLETE routine object.
- **`file_search`**: Search documentation for debugging tips and common issues.

## Debugging Workflow
## Debugging Workflow

1. User says "my routine failed" or "help me debug" -> call `get_last_routine_execution` and \
`get_last_routine_execution_result`
2. User says "review my routine" or "what's wrong" -> call `get_current_routine`
3. Analyze the results and cross-reference with documentation via file_search
4. Suggest specific fixes using `suggest_routine_edit`
1. User says "my routine failed" or "help me debug" -> call `get_last_routine_execution` and \
`get_last_routine_execution_result`
2. User says "review my routine" or "what's wrong" -> call `get_current_routine`
3. Analyze the results and cross-reference with documentation via file_search
4. Suggest specific fixes using `suggest_routine_edit`

## Suggesting Routine Edits
## Suggesting Routine Edits

When proposing changes, use the `suggest_routine_edit` tool:
- **REQUIRED KEY: `routine`** - Pass the COMPLETE routine object under this key
- Example: {{"routine": {{"name": "...", "description": "...", "parameters": [...], "operations": [...]}}}}
- The tool validates automatically - you do NOT need to call `validate_routine` first
- If validation fails, read the error, fix the routine, and try again (make at least 3 attempts)
When proposing changes, use the `suggest_routine_edit` tool:
- **REQUIRED KEY: `routine`** - Pass the COMPLETE routine object under this key
- Example: {{"routine": {{"name": "...", "description": "...", "parameters": [...], "operations": [...]}}}}
- The tool validates automatically - you do NOT need to call `validate_routine` first
- If validation fails, read the error, fix the routine, and try again (make at least 3 attempts)

## Editing Mode Guidelines
## Editing Mode Guidelines

- When debugging, analyze the specific error and suggest concrete fixes
- Use file_search to reference documentation for complex issues"""
- When debugging, analyze the specific error and suggest concrete fixes
- Use file_search to reference documentation for complex issues""")

# Composed system prompts _______________________________________________________________________________________________

CREATION_MODE_SYSTEM_PROMPT: str = f"""You are a helpful assistant that helps users create \
web automation routines.
CREATION_MODE_SYSTEM_PROMPT: str = dedent(f"""\
You are a helpful assistant that helps users create web automation routines.

{_ROUTINES_SECTION}
{_ROUTINES_SECTION}

{_VECTORLY_SECTION}
{_VECTORLY_SECTION}

{_CREATION_MODE_ROLE}
{_CREATION_MODE_ROLE}

{_GUIDELINES_SECTION}
{_GUIDELINES_SECTION}

{_NOTES_SECTION}
{_NOTES_SECTION}

{_SYSTEM_ACTION_SECTION}
"""
{_SYSTEM_ACTION_SECTION}
""")

EDITING_MODE_SYSTEM_PROMPT: str = f"""You are a helpful assistant that helps users debug \
and improve web automation routines.
EDITING_MODE_SYSTEM_PROMPT: str = dedent(f"""\
You are a helpful assistant that helps users debug and improve web automation routines.

{_ROUTINES_SECTION}
{_ROUTINES_SECTION}

{_VECTORLY_SECTION}
{_VECTORLY_SECTION}

{_EDITING_MODE_ROLE}
{_EDITING_MODE_ROLE}

{_GUIDELINES_SECTION}
{_GUIDELINES_SECTION}

{_NOTES_SECTION}
{_NOTES_SECTION}

{_SYSTEM_ACTION_SECTION}
"""
{_SYSTEM_ACTION_SECTION}
""")

# Magic methods ________________________________________________________________________________________________________

Expand Down
11 changes: 3 additions & 8 deletions bluebox/agents/network_spy_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
"""

import json
import textwrap
from datetime import datetime
from typing import Any, Callable
from urllib.parse import urlparse, parse_qs
from textwrap import dedent

from pydantic import BaseModel, Field

Expand All @@ -40,13 +40,11 @@
from bluebox.utils.llm_utils import token_optimized
from bluebox.utils.logger import get_logger


logger = get_logger(name=__name__)


class DiscoveredEndpoint(BaseModel):
"""A single discovered API endpoint."""

request_ids: list[str] = Field(
description="HAR entry request_ids for this endpoint"
)
Expand All @@ -68,7 +66,6 @@ class EndpointDiscoveryResult(BaseModel):
Contains one or more discovered endpoints needed to complete the user's task.
Multiple endpoints may be needed for multi-step flows (e.g., auth -> search -> details).
"""

endpoints: list[DiscoveredEndpoint] = Field(
description="List of discovered endpoints needed for the task"
)
Expand All @@ -80,7 +77,6 @@ class DiscoveryFailureResult(BaseModel):

Returned when the agent cannot find the appropriate endpoints after exhaustive search.
"""

reason: str = Field(
description="Explanation of why the endpoint could not be found"
)
Expand All @@ -102,7 +98,7 @@ class NetworkSpyAgent:
to search and analyze network traffic.
"""

SYSTEM_PROMPT: str = textwrap.dedent("""
SYSTEM_PROMPT: str = dedent("""
You are a network traffic analyst specializing in HAR (HTTP Archive) file analysis.

## Your Role
Expand Down Expand Up @@ -146,7 +142,7 @@ class NetworkSpyAgent:
- Always use search_har_by_terms first when looking for specific data
""").strip()

AUTONOMOUS_SYSTEM_PROMPT: str = textwrap.dedent("""
AUTONOMOUS_SYSTEM_PROMPT: str = dedent("""
You are a network traffic analyst that autonomously identifies API endpoints.

## Your Mission
Expand Down Expand Up @@ -245,7 +241,6 @@ def __init__(
self._discovery_result: EndpointDiscoveryResult | None = None
self._discovery_failure: DiscoveryFailureResult | None = None
self._finalize_tool_registered: bool = False

logger.debug(
"Instantiated NetworkSpyAgent with model: %s, chat_thread_id: %s, entries: %d",
llm_model,
Expand Down
Empty file.
Loading