Skip to content

Commit 2e04e5a

Browse files
committed
feat: searchscraper
1 parent 9149ce8 commit 2e04e5a

12 files changed

+603
-30
lines changed
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
"""
2+
Example of using the async searchscraper functionality to search for information concurrently.
3+
"""
4+
5+
import asyncio
6+
7+
from scrapegraph_py import AsyncClient
8+
from scrapegraph_py.logger import sgai_logger
9+
10+
sgai_logger.set_logging(level="INFO")
11+
12+
13+
async def main():
14+
# Initialize async client
15+
sgai_client = AsyncClient(api_key="your-api-key-here")
16+
17+
# List of search queries
18+
queries = [
19+
"What is the latest version of Python and what are its main features?",
20+
"What are the key differences between Python 2 and Python 3?",
21+
"What is Python's GIL and how does it work?",
22+
]
23+
24+
# Create tasks for concurrent execution
25+
tasks = [sgai_client.searchscraper(user_prompt=query) for query in queries]
26+
27+
# Execute requests concurrently
28+
responses = await asyncio.gather(*tasks, return_exceptions=True)
29+
30+
# Process results
31+
for i, response in enumerate(responses):
32+
if isinstance(response, Exception):
33+
print(f"\nError for query {i+1}: {response}")
34+
else:
35+
print(f"\nSearch {i+1}:")
36+
print(f"Query: {queries[i]}")
37+
print(f"Result: {response['result']}")
38+
print("Reference URLs:")
39+
for url in response["reference_urls"]:
40+
print(f"- {url}")
41+
42+
await sgai_client.close()
43+
44+
45+
if __name__ == "__main__":
46+
asyncio.run(main())
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
"""
2+
Example of using the async searchscraper functionality with output schemas for extraction.
3+
"""
4+
5+
import asyncio
6+
from typing import List
7+
8+
from pydantic import BaseModel
9+
10+
from scrapegraph_py import AsyncClient
11+
from scrapegraph_py.logger import sgai_logger
12+
13+
sgai_logger.set_logging(level="INFO")
14+
15+
16+
# Define schemas for extracting structured data
17+
class PythonVersionInfo(BaseModel):
18+
version: str
19+
release_date: str
20+
major_features: List[str]
21+
22+
23+
class PythonComparison(BaseModel):
24+
key_differences: List[str]
25+
backward_compatible: bool
26+
migration_difficulty: str
27+
28+
29+
class GILInfo(BaseModel):
30+
definition: str
31+
purpose: str
32+
limitations: List[str]
33+
workarounds: List[str]
34+
35+
36+
async def main():
37+
# Initialize async client
38+
sgai_client = AsyncClient(api_key="your-api-key-here")
39+
40+
# Define search queries with their corresponding schemas
41+
searches = [
42+
{
43+
"prompt": "What is the latest version of Python? Include the release date and main features.",
44+
"schema": PythonVersionInfo,
45+
},
46+
{
47+
"prompt": "Compare Python 2 and Python 3, including backward compatibility and migration difficulty.",
48+
"schema": PythonComparison,
49+
},
50+
{
51+
"prompt": "Explain Python's GIL, its purpose, limitations, and possible workarounds.",
52+
"schema": GILInfo,
53+
},
54+
]
55+
56+
# Create tasks for concurrent execution
57+
tasks = [
58+
sgai_client.searchscraper(
59+
user_prompt=search["prompt"],
60+
output_schema=search["schema"],
61+
)
62+
for search in searches
63+
]
64+
65+
# Execute requests concurrently
66+
responses = await asyncio.gather(*tasks, return_exceptions=True)
67+
68+
# Process results
69+
for i, response in enumerate(responses):
70+
if isinstance(response, Exception):
71+
print(f"\nError for search {i+1}: {response}")
72+
else:
73+
print(f"\nSearch {i+1}:")
74+
print(f"Query: {searches[i]['prompt']}")
75+
# print(f"Raw Result: {response['result']}")
76+
77+
try:
78+
# Try to extract structured data using the schema
79+
result = searches[i]["schema"].model_validate(response["result"])
80+
81+
# Print extracted structured data
82+
if isinstance(result, PythonVersionInfo):
83+
print("\nExtracted Data:")
84+
print(f"Python Version: {result.version}")
85+
print(f"Release Date: {result.release_date}")
86+
print("Major Features:")
87+
for feature in result.major_features:
88+
print(f"- {feature}")
89+
90+
elif isinstance(result, PythonComparison):
91+
print("\nExtracted Data:")
92+
print("Key Differences:")
93+
for diff in result.key_differences:
94+
print(f"- {diff}")
95+
print(f"Backward Compatible: {result.backward_compatible}")
96+
print(f"Migration Difficulty: {result.migration_difficulty}")
97+
98+
elif isinstance(result, GILInfo):
99+
print("\nExtracted Data:")
100+
print(f"Definition: {result.definition}")
101+
print(f"Purpose: {result.purpose}")
102+
print("Limitations:")
103+
for limit in result.limitations:
104+
print(f"- {limit}")
105+
print("Workarounds:")
106+
for workaround in result.workarounds:
107+
print(f"- {workaround}")
108+
except Exception as e:
109+
print(f"\nCould not extract structured data: {e}")
110+
111+
print("\nReference URLs:")
112+
for url in response["reference_urls"]:
113+
print(f"- {url}")
114+
115+
await sgai_client.close()
116+
117+
118+
if __name__ == "__main__":
119+
asyncio.run(main())
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
"""
2+
Example of using the searchscraper functionality to search for information.
3+
"""
4+
5+
from scrapegraph_py import Client
6+
from scrapegraph_py.logger import sgai_logger
7+
8+
sgai_logger.set_logging(level="INFO")
9+
10+
# Initialize the client
11+
client = Client(api_key="your-api-key-here")
12+
13+
# Send a searchscraper request
14+
response = client.searchscraper(
15+
user_prompt="What is the latest version of Python and what are its main features?"
16+
)
17+
18+
# Print the results
19+
print("\nResults:")
20+
print(f"Answer: {response['result']}")
21+
print("\nReference URLs:")
22+
for url in response["reference_urls"]:
23+
print(f"- {url}")
24+
25+
# Close the client
26+
client.close()
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
"""
2+
Example of using the searchscraper functionality with a custom output schema.
3+
"""
4+
5+
from typing import List
6+
7+
from pydantic import BaseModel
8+
9+
from scrapegraph_py import Client
10+
from scrapegraph_py.logger import sgai_logger
11+
12+
sgai_logger.set_logging(level="INFO")
13+
14+
15+
# Define a custom schema for the output
16+
class PythonVersionInfo(BaseModel):
17+
version: str
18+
release_date: str
19+
major_features: List[str]
20+
is_latest: bool
21+
22+
23+
# Initialize the client
24+
client = Client(api_key="your-api-key-here")
25+
26+
# Send a searchscraper request with schema
27+
response = client.searchscraper(
28+
user_prompt="What is the latest version of Python? Include the release date and main features.",
29+
output_schema=PythonVersionInfo,
30+
)
31+
32+
# The result will be structured according to our schema
33+
print(f"Request ID: {response['request_id']}")
34+
print(f"Result: {response['result']}")
35+
36+
print("\nReference URLs:")
37+
for url in response["reference_urls"]:
38+
print(f"- {url}")
39+
40+
# Close the client
41+
client.close()
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,28 @@
1-
from scrapegraph_py import Client
2-
from scrapegraph_py.logger import sgai_logger
3-
4-
sgai_logger.set_logging(level="INFO")
5-
6-
# Initialize the client
7-
sgai_client = Client(api_key="your-api-key-here")
8-
9-
# Example request_id (replace with an actual request_id from a previous request)
10-
request_id = "your-request-id-here"
11-
12-
# Check remaining credits
13-
credits = sgai_client.get_credits()
14-
print(f"Credits Info: {credits}")
15-
16-
# Submit feedback for a previous request
17-
feedback_response = sgai_client.submit_feedback(
18-
request_id=request_id,
19-
rating=5, # Rating from 1-5
20-
feedback_text="The extraction was accurate and exactly what I needed!",
21-
)
22-
print(f"\nFeedback Response: {feedback_response}")
23-
24-
# Get previous results using get_smartscraper
25-
previous_result = sgai_client.get_smartscraper(request_id=request_id)
26-
print(f"\nRetrieved Previous Result: {previous_result}")
27-
28-
sgai_client.close()
1+
from scrapegraph_py import Client
2+
from scrapegraph_py.logger import sgai_logger
3+
4+
sgai_logger.set_logging(level="INFO")
5+
6+
# Initialize the client
7+
sgai_client = Client(api_key="your-api-key-here")
8+
9+
# Example request_id (replace with an actual request_id from a previous request)
10+
request_id = "your-request-id-here"
11+
12+
# Check remaining credits
13+
credits = sgai_client.get_credits()
14+
print(f"Credits Info: {credits}")
15+
16+
# Submit feedback for a previous request
17+
feedback_response = sgai_client.submit_feedback(
18+
request_id=request_id,
19+
rating=5, # Rating from 1-5
20+
feedback_text="The extraction was accurate and exactly what I needed!",
21+
)
22+
print(f"\nFeedback Response: {feedback_response}")
23+
24+
# Get previous results using get_smartscraper
25+
previous_result = sgai_client.get_smartscraper(request_id=request_id)
26+
print(f"\nRetrieved Previous Result: {previous_result}")
27+
28+
sgai_client.close()

β€Žscrapegraph-py/pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ line-length = 88
8383

8484
[tool.ruff.lint]
8585
select = ["F", "E", "W", "C"]
86-
ignore = ["E203", "E501"] # Ignore conflicts with Black
86+
ignore = ["E203", "E501", "C901"] # Ignore conflicts with Black and function complexity
8787

8888
[tool.mypy]
8989
python_version = "3.10"
@@ -97,4 +97,4 @@ build-backend = "hatchling.build"
9797

9898
[tool.poe.tasks]
9999
pylint-local = "pylint scrapegraph_py/**/*.py"
100-
pylint-ci = "pylint --disable=C0114,C0115,C0116 --exit-zero scrapegraph_py/**/*.py"
100+
pylint-ci = "pylint --disable=C0114,C0115,C0116,C901 --exit-zero scrapegraph_py/**/*.py"

β€Žscrapegraph-py/scrapegraph_py/async_client.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
from scrapegraph_py.logger import sgai_logger as logger
1111
from scrapegraph_py.models.feedback import FeedbackRequest
1212
from scrapegraph_py.models.markdownify import GetMarkdownifyRequest, MarkdownifyRequest
13+
from scrapegraph_py.models.searchscraper import (
14+
GetSearchScraperRequest,
15+
SearchScraperRequest,
16+
)
1317
from scrapegraph_py.models.smartscraper import (
1418
GetSmartScraperRequest,
1519
SmartScraperRequest,
@@ -241,6 +245,45 @@ async def get_credits(self):
241245
)
242246
return result
243247

248+
async def searchscraper(
249+
self,
250+
user_prompt: str,
251+
headers: Optional[dict[str, str]] = None,
252+
output_schema: Optional[BaseModel] = None,
253+
):
254+
"""Send a searchscraper request"""
255+
logger.info("πŸ” Starting searchscraper request")
256+
logger.debug(f"πŸ“ Prompt: {user_prompt}")
257+
if headers:
258+
logger.debug("πŸ”§ Using custom headers")
259+
260+
request = SearchScraperRequest(
261+
user_prompt=user_prompt,
262+
headers=headers,
263+
output_schema=output_schema,
264+
)
265+
logger.debug("βœ… Request validation passed")
266+
267+
result = await self._make_request(
268+
"POST", f"{API_BASE_URL}/searchscraper", json=request.model_dump()
269+
)
270+
logger.info("✨ Searchscraper request completed successfully")
271+
return result
272+
273+
async def get_searchscraper(self, request_id: str):
274+
"""Get the result of a previous searchscraper request"""
275+
logger.info(f"πŸ” Fetching searchscraper result for request {request_id}")
276+
277+
# Validate input using Pydantic model
278+
GetSearchScraperRequest(request_id=request_id)
279+
logger.debug("βœ… Request ID validation passed")
280+
281+
result = await self._make_request(
282+
"GET", f"{API_BASE_URL}/searchscraper/{request_id}"
283+
)
284+
logger.info(f"✨ Successfully retrieved result for request {request_id}")
285+
return result
286+
244287
async def close(self):
245288
"""Close the session to free up resources"""
246289
logger.info("πŸ”’ Closing AsyncClient session")

0 commit comments

Comments
Β (0)