Skip to content

Commit 5cbc551

Browse files
committed
feat: add client integration
1 parent 8ded3d8 commit 5cbc551

16 files changed

+242
-131
lines changed

scrapegraph-py/README.md

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,51 @@ The SDK provides four main functionalities:
3434

3535
## Usage
3636

37+
### Basic Web Scraping
38+
39+
```python
40+
from scrapegraph_py import ScrapeGraphClient, scrape
41+
from dotenv import load_dotenv
42+
43+
load_dotenv()
44+
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
45+
client = ScrapeGraphClient(api_key)
46+
47+
url = "https://scrapegraphai.com/"
48+
prompt = "What does the company do?"
49+
50+
result = scrape(client, url, prompt)
51+
print(result)
52+
```
53+
54+
### Local HTML Scraping
55+
56+
You can also scrape content from local HTML files:
57+
58+
```python
59+
from scrapegraph_py import ScrapeGraphClient, scrape_text
60+
from bs4 import BeautifulSoup
61+
62+
def scrape_local_html(client: ScrapeGraphClient, file_path: str, prompt: str):
63+
with open(file_path, 'r', encoding='utf-8') as file:
64+
html_content = file.read()
65+
66+
# Use BeautifulSoup to extract text content
67+
soup = BeautifulSoup(html_content, 'html.parser')
68+
text_content = soup.get_text(separator='\n', strip=True)
69+
70+
# Use ScrapeGraph AI to analyze the text
71+
return scrape_text(client, text_content, prompt)
72+
73+
# Usage
74+
client = ScrapeGraphClient(api_key)
75+
result = scrape_local_html(
76+
client,
77+
'sample.html',
78+
"Extract main content and important information"
79+
)
80+
print("Extracted Data:", result)
81+
```
3782

3883
### Structured Data Extraction
3984

scrapegraph-py/examples/credits_example.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,16 @@
55

66
import os
77
from dotenv import load_dotenv
8-
from scrapegraph_py import credits
8+
from scrapegraph_py import ScrapeGraphClient, credits
99

1010
# Load environment variables from a .env file
1111
load_dotenv()
1212

1313
def main():
1414
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
15+
client = ScrapeGraphClient(api_key)
1516

16-
response = credits(api_key)
17+
response = credits(client)
1718
print("Response from the API:")
1819
print(response)
1920

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
import os
22
from dotenv import load_dotenv
3-
from scrapegraph_py import status
4-
from scrapegraph_py import feedback
3+
from scrapegraph_py import ScrapeGraphClient, feedback, status
54

65
# Load environment variables from .env file
76
load_dotenv()
87

98
def main():
109
# Get API key from environment variables
1110
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
11+
client = ScrapeGraphClient(api_key)
1212

1313
# Check API status
1414
try:
15-
result = status(api_key)
15+
result = status(client)
1616
print(f"API Status: {result}")
1717
except Exception as e:
1818
print(f"Error occurred: {e}")
@@ -21,8 +21,8 @@ def main():
2121
request_id = "3fa85f64-5717-4562-b3fc-2c963f66afa6"
2222
rating = 5
2323
feedback_message = "This is a test feedback message."
24-
feedback_response = feedback(api_key, request_id, rating, feedback_message) # Call the feedback function
25-
print(f"Feedback Response: {feedback_response}") # Print the response
24+
feedback_response = feedback(client, request_id, rating, feedback_message)
25+
print(f"Feedback Response: {feedback_response}")
2626

2727
if __name__ == "__main__":
2828
main()

scrapegraph-py/examples/local_scraper_example.py

Lines changed: 21 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,45 @@
11
from bs4 import BeautifulSoup
22
import os
3+
from scrapegraph_py import ScrapeGraphClient, scrape_text
4+
from dotenv import load_dotenv
35

4-
def scrape_local_html(file_path):
6+
def scrape_local_html(client: ScrapeGraphClient, file_path: str, prompt: str):
57
"""
6-
Scrape content from a local HTML file.
8+
Scrape content from a local HTML file using ScrapeGraph AI.
79
810
Args:
11+
client (ScrapeGraphClient): Initialized ScrapeGraph client
912
file_path (str): Path to the local HTML file
13+
prompt (str): Natural language prompt describing what to extract
1014
1115
Returns:
12-
dict: Extracted data from the HTML file
16+
str: Extracted data in JSON format
1317
"""
14-
# Check if file exists
1518
if not os.path.exists(file_path):
1619
raise FileNotFoundError(f"HTML file not found at: {file_path}")
1720

18-
# Read the HTML file
1921
with open(file_path, 'r', encoding='utf-8') as file:
2022
html_content = file.read()
2123

22-
# Parse HTML with BeautifulSoup
24+
# Use BeautifulSoup to extract text content
2325
soup = BeautifulSoup(html_content, 'html.parser')
26+
text_content = soup.get_text(separator='\n', strip=True)
2427

25-
# Example extraction - modify based on your HTML structure
26-
data = {
27-
'title': soup.title.string if soup.title else None,
28-
'paragraphs': [p.text for p in soup.find_all('p')],
29-
'links': [{'text': a.text, 'href': a.get('href')} for a in soup.find_all('a')],
30-
'headers': [h.text for h in soup.find_all(['h1', 'h2', 'h3'])]
31-
}
32-
33-
return data
28+
# Use ScrapeGraph AI to analyze the text
29+
return scrape_text(client, text_content, prompt)
3430

3531
def main():
36-
# Example usage
32+
load_dotenv()
33+
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
34+
client = ScrapeGraphClient(api_key)
35+
3736
try:
38-
# Assuming you have a sample.html file in the same directory
39-
result = scrape_local_html('sample.html')
40-
41-
# Print extracted data
42-
print("Title:", result['title'])
43-
print("\nParagraphs:")
44-
for p in result['paragraphs']:
45-
print(f"- {p}")
46-
47-
print("\nLinks:")
48-
for link in result['links']:
49-
print(f"- {link['text']}: {link['href']}")
50-
51-
print("\nHeaders:")
52-
for header in result['headers']:
53-
print(f"- {header}")
37+
result = scrape_local_html(
38+
client,
39+
'sample.html',
40+
"Extract main content and important information"
41+
)
42+
print("Extracted Data:", result)
5443

5544
except FileNotFoundError as e:
5645
print(f"Error: {e}")
Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
import os
2-
from scrapegraph_py import scrape
2+
from scrapegraph_py import ScrapeGraphClient, scrape
33
from dotenv import load_dotenv
44

5-
65
load_dotenv()
76
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
7+
client = ScrapeGraphClient(api_key)
8+
89
url = "https://scrapegraphai.com/"
910
prompt = "What does the company do?"
1011

11-
result = scrape(api_key, url, prompt)
12+
result = scrape(client, url, prompt)
1213
print(result)
Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import os
22
from pydantic import BaseModel, Field
3-
from scrapegraph_py import scrape
3+
from scrapegraph_py import ScrapeGraphClient, scrape
44
from dotenv import load_dotenv
55

66
load_dotenv()
@@ -11,12 +11,13 @@ class CompanyInfoSchema(BaseModel):
1111
description: str = Field(description="A description of the company")
1212
main_products: list[str] = Field(description="The main products of the company")
1313

14-
# Example usage
14+
# Initialize client
1515
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
16+
client = ScrapeGraphClient(api_key)
17+
1618
url = "https://scrapegraphai.com/"
1719
prompt = "What does the company do?"
1820

1921
# Call the scrape function with the schema
20-
result = scrape(api_key=api_key, url=url, prompt=prompt, schema=CompanyInfoSchema)
21-
22+
result = scrape(client=client, url=url, prompt=prompt, schema=CompanyInfoSchema)
2223
print(result)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from .client import ScrapeGraphClient
12
from .scrape import scrape
23
from .credits import credits
34
from .feedback import feedback
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
2+
class ScrapeGraphClient:
3+
"""Client for interacting with the ScrapeGraph AI API."""
4+
5+
def __init__(self, api_key: str, base_url: str = "https://api.scrapegraphai.com/v1"):
6+
"""Initialize the ScrapeGraph client.
7+
8+
Args:
9+
api_key (str): Your ScrapeGraph AI API key
10+
base_url (str): Base URL for the API (optional)
11+
"""
12+
self.api_key = api_key
13+
self.base_url = base_url.rstrip('/')
14+
15+
def get_headers(self, include_content_type: bool = True) -> dict:
16+
"""Get the headers for API requests.
17+
18+
Args:
19+
include_content_type (bool): Whether to include Content-Type header
20+
21+
Returns:
22+
dict: Headers for the API request
23+
"""
24+
headers = {
25+
"accept": "application/json",
26+
"SGAI-API-KEY": self.api_key
27+
}
28+
29+
if include_content_type:
30+
headers["Content-Type"] = "application/json"
31+
32+
return headers
33+
34+
def get_endpoint(self, path: str) -> str:
35+
"""Get the full endpoint URL.
36+
37+
Args:
38+
path (str): API endpoint path
39+
40+
Returns:
41+
str: Full endpoint URL
42+
"""
43+
return f"{self.base_url}/api/v1/{path}"

scrapegraph-py/scrapegraph_py/credits.py

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,28 +6,24 @@
66

77
import requests
88
import json
9+
from .client import ScrapeGraphClient
10+
from .exceptions import raise_for_status_code
911

10-
def credits(api_key: str) -> str:
12+
def credits(client: ScrapeGraphClient) -> str:
1113
"""Retrieve credits from the API.
1214
1315
Args:
14-
api_key (str): Your ScrapeGraph AI API key.
16+
client (ScrapeGraphClient): Initialized ScrapeGraph client
1517
1618
Returns:
1719
str: Response from the API in JSON format.
1820
"""
19-
endpoint = "https://sgai-api.onrender.com/api/v1/credits"
20-
headers = {
21-
"accept": "application/json",
22-
"SGAI-API-KEY": api_key
23-
}
21+
endpoint = client.get_endpoint("credits")
22+
headers = client.get_headers(include_content_type=False)
2423

2524
try:
2625
response = requests.get(endpoint, headers=headers)
27-
response.raise_for_status()
28-
except requests.exceptions.HTTPError as http_err:
29-
return json.dumps({"error": "HTTP error occurred", "message": str(http_err), "status_code": response.status_code})
26+
raise_for_status_code(response.status_code, response)
27+
return response.text
3028
except requests.exceptions.RequestException as e:
31-
return json.dumps({"error": "An error occurred", "message": str(e)})
32-
33-
return response.text
29+
raise APIError(f"Request failed: {str(e)}", response=None)
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
class APIError(Exception):
2+
"""Base class for API exceptions."""
3+
def __init__(self, message=None, response=None):
4+
self.message = message or self.__doc__
5+
self.response = response
6+
super().__init__(self.message)
7+
8+
class AuthenticationError(APIError):
9+
"""Raised when API key is invalid or missing."""
10+
11+
class RateLimitError(APIError):
12+
"""Raised when rate limits are exceeded."""
13+
def __init__(self, message=None, reset_time=None, response=None):
14+
super().__init__(message, response)
15+
self.reset_time = reset_time
16+
17+
class BadRequestError(APIError):
18+
"""Raised when a 400 Bad Request error occurs."""
19+
20+
class InternalServerError(APIError):
21+
"""Raised when a 500 Internal Server Error occurs."""

0 commit comments

Comments
 (0)