Skip to content

Commit eafff89

Browse files
authored
Merge pull request #297 from AgentOps-AI/firecrawl-tool
Firecrawl tool
2 parents a996ba3 + 25e8da8 commit eafff89

File tree

3 files changed

+134
-22
lines changed

3 files changed

+134
-22
lines changed

agentstack/_tools/agentql/__init__.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,32 +17,32 @@ def query_data(url: str, query: Optional[str], prompt: Optional[str]) -> dict:
1717
1818
AgentQL query to scrape the url.
1919
20-
Here is a guide on AgentQL query syntax:
20+
Here is a guide on AgentQL query syntax:
2121
22-
Enclose all AgentQL query terms within curly braces `{}`. The following query structure isn't valid because the term "social\_media\_links" is wrongly enclosed within parenthesis `()`.
22+
Enclose all AgentQL query terms within curly braces `{}`. The following query structure isn't valid because the term "social_media_links" is wrongly enclosed within parenthesis `()`.
2323
24-
```
25-
( # Should be {
26-
social_media_links(The icons that lead to Facebook, Snapchat, etc.)[]
27-
) # Should be }
28-
```
24+
```
25+
( # Should be {
26+
social_media_links(The icons that lead to Facebook, Snapchat, etc.)[]
27+
) # Should be }
28+
```
2929
30-
The following query is also invalid since its missing the curly braces `{}`
30+
The following query is also invalid since its missing the curly braces `{}`
3131
32-
```
33-
# should include {
34-
social_media_links(The icons that lead to Facebook, Snapchat, etc.)[]
35-
# should include }
36-
```
32+
```
33+
# should include {
34+
social_media_links(The icons that lead to Facebook, Snapchat, etc.)[]
35+
# should include }
36+
```
3737
38-
You can't include new lines in your semantic context. The following query structure isn't valid because the semantic context isn't contained within one line.
38+
You can't include new lines in your semantic context. The following query structure isn't valid because the semantic context isn't contained within one line.
3939
40-
```
41-
{
42-
social_media_links(The icons that lead
43-
to Facebook, Snapchat, etc.)[]
44-
}
45-
```
40+
```
41+
{
42+
social_media_links(The icons that lead
43+
to Facebook, Snapchat, etc.)[]
44+
}
45+
```
4646
"""
4747
payload = {
4848
"url": url,

agentstack/_tools/firecrawl/__init__.py

Lines changed: 104 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import os
22
from firecrawl import FirecrawlApp
3-
3+
from typing import List, Dict, Any, Optional
44
app = FirecrawlApp(api_key=os.getenv('FIRECRAWL_API_KEY'))
55

66

@@ -38,3 +38,106 @@ def retrieve_web_crawl(crawl_id: str):
3838
will tell you if the crawl is finished. If it is not, wait some more time then try again.
3939
"""
4040
return app.check_crawl_status(crawl_id)
41+
42+
43+
def batch_scrape(urls: List[str], formats: List[str] = ['markdown', 'html']):
44+
"""
45+
Batch scrape multiple URLs simultaneously.
46+
47+
Args:
48+
urls: List of URLs to scrape
49+
formats: List of desired output formats (e.g., ['markdown', 'html'])
50+
51+
Returns:
52+
Dictionary containing the batch scrape results
53+
"""
54+
batch_result = app.batch_scrape_urls(urls, {'formats': formats})
55+
return batch_result
56+
57+
58+
def async_batch_scrape(urls: List[str], formats: List[str] = ['markdown', 'html']):
59+
"""
60+
Asynchronously batch scrape multiple URLs.
61+
62+
Args:
63+
urls: List of URLs to scrape
64+
formats: List of desired output formats (e.g., ['markdown', 'html'])
65+
66+
Returns:
67+
Dictionary containing the job ID and status URL
68+
"""
69+
batch_job = app.async_batch_scrape_urls(urls, {'formats': formats})
70+
return batch_job
71+
72+
73+
def check_batch_status(job_id: str):
74+
"""
75+
Check the status of an asynchronous batch scrape job.
76+
77+
Args:
78+
job_id: The ID of the batch scrape job
79+
80+
Returns:
81+
Dictionary containing the current status and results if completed
82+
"""
83+
return app.check_batch_scrape_status(job_id)
84+
85+
86+
def extract_data(urls: List[str], schema: Optional[Dict[str, Any]] = None, prompt: Optional[str] = None) -> Dict[
87+
str, Any]:
88+
"""
89+
Extract structured data from URLs using LLMs.
90+
91+
Args:
92+
urls: List of URLs to extract data from
93+
schema: Optional JSON schema defining the structure of data to extract
94+
prompt: Optional natural language prompt describing the data to extract
95+
96+
Returns:
97+
Dictionary containing the extracted structured data
98+
"""
99+
params: Dict[str, Any] = {}
100+
101+
if prompt is not None:
102+
params['prompt'] = prompt
103+
elif schema is not None:
104+
params['schema'] = schema
105+
106+
data = app.extract(urls, params)
107+
return data
108+
109+
110+
def map_website(url: str, search: Optional[str] = None):
111+
"""
112+
Map a website to get all URLs, with optional search functionality.
113+
114+
Args:
115+
url: The base URL to map
116+
search: Optional search term to filter URLs
117+
118+
Returns:
119+
Dictionary containing the list of discovered URLs
120+
"""
121+
params = {'search': search} if search else {}
122+
map_result = app.map_url(url, params)
123+
return map_result
124+
125+
126+
def batch_extract(urls: List[str], extract_params: Dict[str, Any]):
127+
"""
128+
Batch extract structured data from multiple URLs.
129+
130+
Args:
131+
urls: List of URLs to extract data from
132+
extract_params: Dictionary containing extraction parameters including prompt or schema
133+
134+
Returns:
135+
Dictionary containing the extracted data from all URLs
136+
"""
137+
params = {
138+
'formats': ['extract'],
139+
'extract': extract_params
140+
}
141+
142+
batch_result = app.batch_scrape_urls(urls, params)
143+
return batch_result

agentstack/_tools/firecrawl/config.json

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,15 @@
88
"dependencies": [
99
"firecrawl-py>=1.6.4"
1010
],
11-
"tools": ["web_scrape", "web_crawl", "retrieve_web_crawl"],
11+
"tools": [
12+
"web_scrape",
13+
"web_crawl",
14+
"retrieve_web_crawl",
15+
"batch_scrape",
16+
"check_batch_status",
17+
"extract_data",
18+
"map_website",
19+
"batch_extract"
20+
],
1221
"cta": "Create an API key at https://www.firecrawl.dev/"
1322
}

0 commit comments

Comments
 (0)