Skip to content
This repository was archived by the owner on Nov 10, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crewai_tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
SnowflakeSearchTool,
SpiderTool,
StagehandTool,
SteelScrapeWebsiteTool,
TavilyExtractorTool,
TavilySearchTool,
TXTSearchTool,
Expand Down
1 change: 1 addition & 0 deletions crewai_tools/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@
)
from .spider_tool.spider_tool import SpiderTool
from .stagehand_tool.stagehand_tool import StagehandTool
from .steel_scrape_website_tool.steel_scrape_website_tool import SteelScrapeWebsiteTool
from .tavily_extractor_tool.tavily_extractor_tool import TavilyExtractorTool
from .tavily_search_tool.tavily_search_tool import TavilySearchTool
from .txt_search_tool.txt_search_tool import TXTSearchTool
Expand Down
30 changes: 30 additions & 0 deletions crewai_tools/tools/steel_scrape_website_tool/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# SteelScrapeWebsiteTool

## Description

[Steel](https://steel.dev) is an open-source browser API that makes it easy to build AI apps and agents that interact with the web. Instead of building automation infrastructure from scratch, you can focus on your AI application while Steel handles the complexity.

## Installation

- Get an API key from [steel.dev](https://app.steel.dev) and set it in environment variables (`STEEL_API_KEY`).
- Install the [Steel SDK](https://github.com/steel-dev/steel-python) along with `crewai[tools]`:

```bash
pip install steel-sdk 'crewai[tools]'
```

## Example

U the SteelScrapeWebsiteTool as follows to allow your agent to load websites:

```python
from crewai_tools import SteelScrapeWebsiteTool

tool = SteelScrapeWebsiteTool(formats=["markdown"], proxy=True)
```

## Arguments

- `api_key` Optional. Steel API key. Default is `STEEL_API_KEY` env variable.
- `formats` Optional[List[str]]. Content formats to return. Default: `["markdown"]`.
- `proxy` Optional. Enable/Disable proxies.
3 changes: 3 additions & 0 deletions crewai_tools/tools/steel_scrape_website_tool/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .steel_scrape_website_tool import SteelScrapeWebsiteTool

__all__ = ["SteelScrapeWebsiteTool"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import os
from typing import TYPE_CHECKING, List, Optional, Type

from crewai.tools import BaseTool, EnvVar
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr

if TYPE_CHECKING:
from steel import Steel

try:
from steel import Steel

STEEL_AVAILABLE = True
except ImportError:
STEEL_AVAILABLE = False

class SteelScrapeWebsiteToolSchema(BaseModel):
url: str = Field(description="Website URL")


class SteelScrapeWebsiteTool(BaseTool):
model_config = ConfigDict(arbitrary_types_allowed=True, validate_assignment=True, frozen=False)
name: str = "Steel web scrape tool"
description: str = "Scrape webpages using Steel and return the contents"
args_schema: Type[BaseModel] = SteelScrapeWebsiteToolSchema
api_key: Optional[str] = None
formats: Optional[List[str]] = None
proxy: Optional[bool] = None

_steel: Optional["Steel"] = PrivateAttr(None)
package_dependencies: List[str] = ["steel-sdk"]
env_vars: List[EnvVar] = [
EnvVar(name="STEEL_API_KEY", description="API key for Steel services", required=True),
]

def __init__(
self,
api_key: Optional[str] = None,
formats: Optional[List[str]] = None,
proxy: Optional[bool] = None,
**kwargs
):
super().__init__(**kwargs)
self.api_key = api_key or os.getenv("STEEL_API_KEY")
if not self.api_key:
raise EnvironmentError("STEEL_API_KEY environment variable or api_key is required")

try:
from steel import Steel # type: ignore
except ImportError:
import click

if click.confirm(
"You are missing the 'steel-sdk' package. Would you like to install it?"
):
import subprocess

subprocess.run(["uv", "add", "steel-sdk"], check=True)
from steel import Steel # type: ignore
else:
raise ImportError(
"`steel-sdk` package not found, please run `uv add steel-sdk`"
)

self._steel = Steel(steel_api_key=self.api_key)
self.formats = formats or ["markdown"]
self.proxy = proxy


def _run(self, url: str):
if not self._steel:
raise RuntimeError("Steel not properly initialized")

return self._steel.scrape(url=url, use_proxy=self.proxy, format=self.formats)

try:
from steel import Steel

if not hasattr(SteelScrapeWebsiteTool, "_model_rebuilt"):
SteelScrapeWebsiteTool.model_rebuild()
SteelScrapeWebsiteTool._model_rebuilt = True
except ImportError:
pass
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ dependencies = [
"tiktoken>=0.8.0",
"stagehand>=0.4.1",
"portalocker==2.7.0",
"steel-sdk>=0.9.2",
]

[project.urls]
Expand All @@ -34,6 +35,9 @@ Documentation = "https://docs.crewai.com"
[project.scripts]

[project.optional-dependencies]
steel-sdk = [
"steel-sdk>=0.9.2",
]
scrapfly-sdk = [
"scrapfly-sdk>=0.8.19",
]
Expand Down
66 changes: 66 additions & 0 deletions tests/tools/test_steel_scrape_website_tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import os
import sys
from unittest.mock import MagicMock, patch

import pytest

from crewai_tools.tools.steel_scrape_website_tool.steel_scrape_website_tool import (
SteelScrapeWebsiteTool,
)


@pytest.fixture(autouse=True)
def mock_env_api_key():
with patch.dict(os.environ, {"STEEL_API_KEY": "test_key"}):
yield

@pytest.fixture(autouse=True)
def mock_steel_module():
original = sys.modules.get("steel")
mock_module = MagicMock()
mock_module.Steel = MagicMock()
sys.modules["steel"] = mock_module
try:
yield mock_module.Steel
finally:
if original is not None:
sys.modules["steel"] = original
else:
del sys.modules["steel"]

def test_init_prefers_arg_over_env():
tool = SteelScrapeWebsiteTool(api_key="arg_key")
assert tool.api_key == "arg_key"


def test_init_raises_without_api_key(monkeypatch):
monkeypatch.delenv("STEEL_API_KEY", raising=False)
with pytest.raises(EnvironmentError):
SteelScrapeWebsiteTool()


def test_run_success(mock_steel_module):
mock_client = mock_steel_module.return_value
mock_client.scrape.return_value = {"markdown": "Hello"}

tool = SteelScrapeWebsiteTool(api_key="k", formats=["markdown"], proxy=True)
result = tool._run("https://example.com")

assert result == {"markdown": "Hello"}
mock_client.scrape.assert_called_once_with(
url="https://example.com", use_proxy=True, format=["markdown"]
)


def test_run_raises_when_not_initialized(monkeypatch):
tool = SteelScrapeWebsiteTool(api_key="k")
tool._steel = None
with pytest.raises(RuntimeError):
tool._run("https://example.com")


def test_defaults_and_env():
tool = SteelScrapeWebsiteTool()
assert tool.api_key == "test_key"
assert tool.formats == ["markdown"]
assert tool.proxy is None
112 changes: 112 additions & 0 deletions tool.specs.json
Original file line number Diff line number Diff line change
Expand Up @@ -8429,6 +8429,118 @@
"type": "object"
}
},
{
"description": "Scrape webpages using Steel and return the contents",
"env_vars": [
{
"default": null,
"description": "API key for Steel services",
"name": "STEEL_API_KEY",
"required": true
}
],
"humanized_name": "Steel web scrape tool",
"init_params_schema": {
"$defs": {
"EnvVar": {
"properties": {
"default": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Default"
},
"description": {
"title": "Description",
"type": "string"
},
"name": {
"title": "Name",
"type": "string"
},
"required": {
"default": true,
"title": "Required",
"type": "boolean"
}
},
"required": [
"name",
"description"
],
"title": "EnvVar",
"type": "object"
}
},
"properties": {
"api_key": {
"anyOf": [
{
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"title": "Api Key"
},
"formats": {
"anyOf": [
{
"items": {
"type": "string"
},
"type": "array"
},
{
"type": "null"
}
],
"default": null,
"title": "Formats"
},
"proxy": {
"anyOf": [
{
"type": "boolean"
},
{
"type": "null"
}
],
"default": null,
"title": "Proxy"
}
},
"title": "SteelScrapeWebsiteTool",
"type": "object"
},
"name": "SteelScrapeWebsiteTool",
"package_dependencies": [
"steel-sdk"
],
"run_params_schema": {
"properties": {
"url": {
"description": "Website URL",
"title": "Url",
"type": "string"
}
},
"required": [
"url"
],
"title": "SteelScrapeWebsiteToolSchema",
"type": "object"
}
},
{
"description": "A tool that can be used to semantic search a query from a txt's content.",
"env_vars": [],
Expand Down