browserbase
diff --git a/‎examples/agent_example.py‎
Lines changed: 7 additions & 18 deletions b/‎examples/agent_example.py‎
Lines changed: 7 additions & 18 deletions
diff --git a/‎stagehand/agent.py‎
Lines changed: 0 additions & 110 deletions b/‎stagehand/agent.py‎
Lines changed: 0 additions & 110 deletions
diff --git a/‎stagehand/agent/agent.py‎
Lines changed: 110 additions & 49 deletions b/‎stagehand/agent/agent.py‎
Lines changed: 110 additions & 49 deletions
@@ -38,8 +38,7 @@
         "[yellow]Logging Levels:[/]\n"
         "[white]- Set [bold]verbose=0[/] for errors (ERROR)[/]\n"
         "[white]- Set [bold]verbose=1[/] for minimal logs (INFO)[/]\n"
-        "[white]- Set [bold]verbose=2[/] for medium logs (WARNING)[/]\n"
-        "[white]- Set [bold]verbose=3[/] for detailed logs (DEBUG)[/]",
+        "[white]- Set [bold]verbose=2[/] for detailed logs (DEBUG)[/]",
         title="Verbosity Options",
         border_style="blue",
     )
@@ -51,21 +50,15 @@ async def main():
         env="BROWSERBASE",
         api_key=os.getenv("BROWSERBASE_API_KEY"),
         project_id=os.getenv("BROWSERBASE_PROJECT_ID"),
-        headless=False,
-        dom_settle_timeout_ms=3000,
         model_name="gpt-4o",
         self_heal=True,
-        wait_for_captcha_solves=True,
         system_prompt="You are a browser automation assistant that helps users navigate websites effectively.",
         model_client_options={"apiKey": os.getenv("MODEL_API_KEY")},
         verbose=2,
     )
 
     # Create a Stagehand client using the configuration object.
-    stagehand = Stagehand(
-        config=config, 
-        api_url=os.getenv("STAGEHAND_API_URL"),
-    )
+    stagehand = Stagehand(config)
 
     # Initialize - this creates a new session automatically.
     console.print("\n🚀 [info]Initializing Stagehand...[/]")
@@ -75,14 +68,6 @@ async def main():
         f"🌐 [white]View your live browser:[/] [url]https://www.browserbase.com/sessions/{stagehand.session_id}[/]"
     )
 
-    # Configure the agent
-    agent_config = AgentConfig(
-        provider=AgentProvider.OPENAI,
-        model="computer-use-preview",
-        instructions="You are a helpful web navigation assistant that helps users find information. You are currently on the following page: google.com. Do not ask follow up questions, the user will trust your judgement.",
-        options={"apiKey": os.getenv("MODEL_API_KEY")}
-    )
-    
     # Define the task for the agent
     execute_options = AgentExecuteOptions(
         instruction="Play a game of 2048",
@@ -95,7 +80,11 @@ async def main():
     console.print("✅ [success]Navigated to Google[/]")
 
     console.print("\n▶️ [highlight] Using Agent to perform a task[/]: playing a game of 2048")
-    agent = stagehand.agent(agent_config)
+    agent = stagehand.agent(
+        model="computer-use-preview",
+        instructions="You are a helpful web navigation assistant that helps users find information. You are currently on the following page: google.com. Do not ask follow up questions, the user will trust your judgement.",
+        options={"apiKey": os.getenv("MODEL_API_KEY")}
+    )
     agent_result = await agent.execute(execute_options)
 
     console.print("📊 [info]Agent execution result:[/]")
 
@@ -1,6 +1,10 @@
 from typing import Optional, Union
 
 from ..handlers.cua_handler import CUAHandler
+from ..schemas import (
+    AgentExecuteResult,
+    AgentProvider,
+)
 from ..types.agent import (
     AgentConfig,
     AgentExecuteOptions,
@@ -16,6 +20,12 @@
     "claude-3-5-sonnet-latest": AnthropicCUAClient,
     "claude-3-7-sonnet-latest": AnthropicCUAClient,
 }
+MODEL_TO_PROVIDER_MAP: dict[str, AgentProvider] = {
+    "computer-use-preview": AgentProvider.OPENAI,
+    "claude-3-5-sonnet-20240620": AgentProvider.ANTHROPIC,
+    "claude-3-7-sonnet-20250219": AgentProvider.ANTHROPIC,
+    # Add more mappings as needed
+}
 
 AGENT_METRIC_FUNCTION_NAME = "AGENT_EXECUTE_TASK"
 
@@ -26,6 +36,13 @@ def __init__(self, stagehand_client, **kwargs):
         self.stagehand = stagehand_client
         self.config = AgentConfig(**kwargs) if kwargs else AgentConfig()
         self.logger = self.stagehand.logger
+        if self.config.model in MODEL_TO_PROVIDER_MAP:
+            self.provider = MODEL_TO_PROVIDER_MAP[self.config.model]
+        else:
+            self.provider = None
+            self.logger.error(
+                f"Could not infer provider for model: {self.config.model}"
+            )
 
         if not hasattr(self.stagehand, "page") or not hasattr(
             self.stagehand.page, "_page"
@@ -69,7 +86,6 @@ def _get_client(self) -> AgentClient:
     async def execute(
         self, options_or_instruction: Union[AgentExecuteOptions, str]
     ) -> AgentResult:
-
         options: Optional[AgentExecuteOptions] = None
         instruction: str
 
@@ -83,56 +99,101 @@ async def execute(
             options = options_or_instruction
             instruction = options.instruction
 
-        if not instruction:
-            self.logger.error("No instruction provided for agent execution.")
-            return AgentResult(
-                message="No instruction provided.", completed=True, actions=[], usage={}
+        if self.stagehand.env == "LOCAL":
+            if not instruction:
+                self.logger.error("No instruction provided for agent execution.")
+                return AgentResult(
+                    message="No instruction provided.",
+                    completed=True,
+                    actions=[],
+                    usage={},
+                )
+
+            self.logger.info(
+                f"Agent starting execution for instruction: '{instruction}'",
+                category="agent",
             )
 
-        self.logger.info(
-            f"Agent starting execution for instruction: '{instruction}'",
-            category="agent",
-        )
-
-        try:
-            agent_result = await self.client.run_task(
-                instruction=instruction,
-                max_steps=self.config.max_steps,
-                options=options,
-            )
-        except Exception as e:
-            self.logger.error(
-                f"Exception during client.run_task: {e}", category="agent"
+            try:
+                agent_result = await self.client.run_task(
+                    instruction=instruction,
+                    max_steps=self.config.max_steps,
+                    options=options,
+                )
+            except Exception as e:
+                self.logger.error(
+                    f"Exception during client.run_task: {e}", category="agent"
+                )
+                empty_usage = AgentUsage(
+                    input_tokens=0, output_tokens=0, inference_time_ms=0
+                )
+                return AgentResult(
+                    message=f"Error: {str(e)}",
+                    completed=True,
+                    actions=[],
+                    usage=empty_usage,
+                )
+
+            # Update metrics if usage data is available in the result
+            if agent_result.usage:
+                # self.stagehand.update_metrics(
+                #     AGENT_METRIC_FUNCTION_NAME,
+                #     agent_result.usage.get("input_tokens", 0),
+                #     agent_result.usage.get("output_tokens", 0),
+                #     agent_result.usage.get("inference_time_ms", 0),
+                # )
+                pass  # Placeholder if metrics are to be handled differently or not at all
+
+            self.logger.info(
+                f"Agent execution finished. Success: {agent_result.completed}. Message: {agent_result.message}",
+                category="agent",
             )
-            empty_usage = AgentUsage(
-                input_tokens=0, output_tokens=0, inference_time_ms=0
+            # To clean up pydantic model output
+            actions_repr = [action.root for action in agent_result.actions]
+            self.logger.debug(
+                f"Agent actions: {actions_repr}",
+                category="agent",
             )
-            return AgentResult(
-                message=f"Error: {str(e)}",
-                completed=True,
-                actions=[],
-                usage=empty_usage,
+            agent_result.actions = actions_repr
+            return agent_result
+        else:
+            agent_config_payload = self.config.model_dump(
+                exclude_none=True, by_alias=True
             )
-
-        # Update metrics if usage data is available in the result
-        if agent_result.usage:
-            # self.stagehand.update_metrics(
-            #     AGENT_METRIC_FUNCTION_NAME,
-            #     agent_result.usage.get("input_tokens", 0),
-            #     agent_result.usage.get("output_tokens", 0),
-            #     agent_result.usage.get("inference_time_ms", 0),
-            # )
-            pass  # Placeholder if metrics are to be handled differently or not at all
-
-        self.logger.info(
-            f"Agent execution finished. Success: {agent_result.completed}. Message: {agent_result.message}",
-            category="agent",
-        )
-        # To clean up pydantic model output
-        actions_repr = [action.root for action in agent_result.actions]
-        self.logger.debug(
-            f"Agent actions: {actions_repr}",
-            category="agent",
-        )
-        agent_result.actions = actions_repr
-        return agent_result
+            agent_config_payload["provider"] = self.provider
+            payload = {
+                # Use the stored config
+                "agentConfig": agent_config_payload,
+                "executeOptions": options.model_dump(exclude_none=True, by_alias=True),
+            }
+
+            lock = self.stagehand._get_lock_for_session()
+            async with lock:
+                result = await self.stagehand._execute("agentExecute", payload)
+
+            if isinstance(result, dict):
+                # Ensure all expected fields are present
+                # If not present in result, use defaults from AgentExecuteResult schema
+                if "success" not in result:
+                    raise ValueError("Response missing required field 'success'")
+
+                # Ensure completed is set with default if not present
+                if "completed" not in result:
+                    result["completed"] = False
+
+                # Add default for message if missing
+                if "message" not in result:
+                    result["message"] = None
+
+                return AgentExecuteResult(**result)
+            elif result is None:
+                # Handle cases where the server might return None or an empty response
+                # Return a default failure result or raise an error
+                return AgentExecuteResult(
+                    success=False,
+                    completed=False,
+                    message="No result received from server",
+                )
+            else:
+                # If the result is not a dict and not None, it's unexpected
+                raise TypeError(f"Unexpected result type from server: {type(result)}")