Skip to content

Pre/beta #11

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions scrapegraph-py/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
## [1.4.3-beta.1](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.4.2...v1.4.3-beta.1) (2024-12-03)


### Bug Fixes

* updated comment ([8250818](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/825081883940bc1caa37f4f13e10f710770aeb9c))


### chore

* improved url validation ([83eac53](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/83eac530269a767e5469c4aded1656fe00a2cdc0))

## [1.4.2](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.4.1...v1.4.2) (2024-12-02)


Expand Down
57 changes: 29 additions & 28 deletions scrapegraph-py/scrapegraph_py/async_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,35 @@


class AsyncClient:
@classmethod
def from_env(
cls,
verify_ssl: bool = True,
timeout: float = 120,
max_retries: int = 3,
retry_delay: float = 1.0,
):
"""Initialize AsyncClient using API key from environment variable.

Args:
verify_ssl: Whether to verify SSL certificates
timeout: Request timeout in seconds
max_retries: Maximum number of retry attempts
retry_delay: Delay between retries in seconds
"""
from os import getenv

api_key = getenv("SGAI_API_KEY")
if not api_key:
raise ValueError("SGAI_API_KEY environment variable not set")
return cls(
api_key=api_key,
verify_ssl=verify_ssl,
timeout=timeout,
max_retries=max_retries,
retry_delay=retry_delay,
)

def __init__(
self,
api_key: str,
Expand Down Expand Up @@ -54,34 +83,6 @@ def __init__(

logger.info("✅ AsyncClient initialized successfully")

@classmethod
def from_env(
cls,
verify_ssl: bool = True,
timeout: float = 120,
max_retries: int = 3,
retry_delay: float = 1.0,
):
"""Initialize AsyncClient using API key from environment variable.

Args:
verify_ssl: Whether to verify SSL certificates
timeout: Request timeout in seconds
max_retries: Maximum number of retry attempts
retry_delay: Delay between retries in seconds
"""
from os import getenv
api_key = getenv("SGAI_API_KEY")
if not api_key:
raise ValueError("SGAI_API_KEY environment variable not set")
return cls(
api_key=api_key,
verify_ssl=verify_ssl,
timeout=timeout,
max_retries=max_retries,
retry_delay=retry_delay,
)

async def _make_request(self, method: str, url: str, **kwargs) -> Any:
"""Make HTTP request with retry logic."""
for attempt in range(self.max_retries):
Expand Down
12 changes: 8 additions & 4 deletions scrapegraph-py/scrapegraph_py/models/smartscraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from typing import Optional, Type
from uuid import UUID

import validators
from pydantic import BaseModel, Field, model_validator


Expand All @@ -25,9 +24,13 @@ def validate_user_prompt(self) -> "SmartScraperRequest":

@model_validator(mode="after")
def validate_url(self) -> "SmartScraperRequest":
url = self.website_url
if not validators.url(url):
raise ValueError(f"Invalid URL: {url}")
if self.website_url is None or not self.website_url.strip():
raise ValueError("Website URL cannot be empty")
if not (
self.website_url.startswith("http://")
or self.website_url.startswith("https://")
):
raise ValueError("Invalid URL")
return self

def model_dump(self, *args, **kwargs) -> dict:
Expand All @@ -46,6 +49,7 @@ class GetSmartScraperRequest(BaseModel):
@model_validator(mode="after")
def validate_request_id(self) -> "GetSmartScraperRequest":
try:
# Validate the request_id is a valid UUID
UUID(self.request_id)
except ValueError:
raise ValueError("request_id must be a valid UUID")
Expand Down