Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -187,3 +187,4 @@ data/

# For Config Files (Current Settings)
.config.pkl
*.pdf
10 changes: 6 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
browser-use==0.1.40
browser-use==0.1.41
pyperclip==1.9.0
gradio==5.23.1
gradio==5.27.0
json-repair
langchain-mistralai==0.2.4
langchain-google-genai==2.0.8
MainContentExtractor==0.0.4
langchain-ibm==0.3.10
langchain-ibm==0.3.10
langchain_mcp_adapters==0.0.9
langgraph==0.3.34
langchain-community
178 changes: 178 additions & 0 deletions src/agent/browser_use/browser_use_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
from __future__ import annotations

import asyncio
import gc
import inspect
import json
import logging
import os
import re
import time
from pathlib import Path
from typing import Any, Awaitable, Callable, Dict, Generic, List, Optional, TypeVar, Union

from dotenv import load_dotenv
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.messages import (
BaseMessage,
HumanMessage,
SystemMessage,
)

# from lmnr.sdk.decorators import observe
from pydantic import BaseModel, ValidationError

from browser_use.agent.gif import create_history_gif
from browser_use.agent.memory.service import Memory, MemorySettings
from browser_use.agent.message_manager.service import MessageManager, MessageManagerSettings
from browser_use.agent.message_manager.utils import convert_input_messages, extract_json_from_model_output, save_conversation
from browser_use.agent.prompts import AgentMessagePrompt, PlannerPrompt, SystemPrompt
from browser_use.agent.views import (
REQUIRED_LLM_API_ENV_VARS,
ActionResult,
AgentError,
AgentHistory,
AgentHistoryList,
AgentOutput,
AgentSettings,
AgentState,
AgentStepInfo,
StepMetadata,
ToolCallingMethod,
)
from browser_use.browser.browser import Browser
from browser_use.browser.context import BrowserContext
from browser_use.browser.views import BrowserState, BrowserStateHistory
from browser_use.controller.registry.views import ActionModel
from browser_use.controller.service import Controller
from browser_use.dom.history_tree_processor.service import (
DOMHistoryElement,
HistoryTreeProcessor,
)
from browser_use.exceptions import LLMException
from browser_use.telemetry.service import ProductTelemetry
from browser_use.telemetry.views import (
AgentEndTelemetryEvent,
AgentRunTelemetryEvent,
AgentStepTelemetryEvent,
)
from browser_use.utils import check_env_variables, time_execution_async, time_execution_sync
from browser_use.agent.service import Agent, AgentHookFunc

load_dotenv()
logger = logging.getLogger(__name__)

SKIP_LLM_API_KEY_VERIFICATION = os.environ.get('SKIP_LLM_API_KEY_VERIFICATION', 'false').lower()[0] in 'ty1'


class BrowserUseAgent(Agent):
@time_execution_async('--run (agent)')
async def run(
self, max_steps: int = 100, on_step_start: AgentHookFunc | None = None,
on_step_end: AgentHookFunc | None = None
) -> AgentHistoryList:
"""Execute the task with maximum number of steps"""

loop = asyncio.get_event_loop()

# Set up the Ctrl+C signal handler with callbacks specific to this agent
from browser_use.utils import SignalHandler

signal_handler = SignalHandler(
loop=loop,
pause_callback=self.pause,
resume_callback=self.resume,
custom_exit_callback=None, # No special cleanup needed on forced exit
exit_on_second_int=True,
)
signal_handler.register()

# Wait for verification task to complete if it exists
if hasattr(self, '_verification_task') and not self._verification_task.done():
try:
await self._verification_task
except Exception:
# Error already logged in the task
pass

try:
self._log_agent_run()

# Execute initial actions if provided
if self.initial_actions:
result = await self.multi_act(self.initial_actions, check_for_new_elements=False)
self.state.last_result = result

for step in range(max_steps):
# Check if waiting for user input after Ctrl+C
while self.state.paused:
await asyncio.sleep(0.5)
if self.state.stopped:
break

# Check if we should stop due to too many failures
if self.state.consecutive_failures >= self.settings.max_failures:
logger.error(f'❌ Stopping due to {self.settings.max_failures} consecutive failures')
break

# Check control flags before each step
if self.state.stopped:
logger.info('Agent stopped')
break

while self.state.paused:
await asyncio.sleep(0.2) # Small delay to prevent CPU spinning
if self.state.stopped: # Allow stopping while paused
break

if on_step_start is not None:
await on_step_start(self)

step_info = AgentStepInfo(step_number=step, max_steps=max_steps)
await self.step(step_info)

if on_step_end is not None:
await on_step_end(self)

if self.state.history.is_done():
if self.settings.validate_output and step < max_steps - 1:
if not await self._validate_output():
continue

await self.log_completion()
break
else:
logger.info('❌ Failed to complete task in maximum steps')

return self.state.history

except KeyboardInterrupt:
# Already handled by our signal handler, but catch any direct KeyboardInterrupt as well
logger.info('Got KeyboardInterrupt during execution, returning current history')
return self.state.history

finally:
# Unregister signal handlers before cleanup
signal_handler.unregister()

self.telemetry.capture(
AgentEndTelemetryEvent(
agent_id=self.state.agent_id,
is_done=self.state.history.is_done(),
success=self.state.history.is_successful(),
steps=self.state.n_steps,
max_steps_reached=self.state.n_steps >= max_steps,
errors=self.state.history.errors(),
total_input_tokens=self.state.history.total_input_tokens(),
total_duration_seconds=self.state.history.total_duration_seconds(),
)
)

await self.close()

if self.settings.generate_gif:
output_path: str = 'agent_history.gif'
if isinstance(self.settings.generate_gif, str):
output_path = self.settings.generate_gif

create_history_gif(task=self.task, history=self.state.history, output_path=output_path)
Loading