Skip to content

Commit 18f1277

Browse files
authored
Merge branch 'main' into js-eslint-prettier-configuration
2 parents 025b118 + 14274a6 commit 18f1277

15 files changed

+173
-35
lines changed

package.json

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,6 @@
99
},
1010
"author": "ScrapeGraphAI",
1111
"license": "MIT",
12-
"workspaces": [
13-
"scrapegraph-js"
14-
],
1512
"scripts": {
1613
"semantic-release": "semantic-release"
1714
},

scrapegraph-js/examples/.env.example

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
# ScrapegraphAI API Key
2-
SGAI-APIKEY="your ScrapegraphAI API Key"
2+
SGAI_APIKEY="your ScrapegraphAI API Key"

scrapegraph-js/examples/getCredits_example.js

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
1-
import { getCredits } from 'scrapegraph-sdk';
1+
import { getCredits } from 'scrapegraph-js';
22
import 'dotenv/config';
33

4-
try {
5-
const apiKey = process.env.SGAI_APIKEY;
4+
const apiKey = process.env.SGAI_APIKEY;
65

6+
try {
77
const myCredit = await getCredits(apiKey);
8-
98
console.log(myCredit)
109
} catch (error) {
1110
console.error(error)

scrapegraph-js/examples/getSmartScraperRequest_example.js

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
1-
import { getSmartScraperRequest } from 'scrapegraph-sdk';
1+
import { getSmartScraperRequest } from 'scrapegraph-js';
22
import 'dotenv/config';
33

4-
try {
5-
const apiKey = process.env.SGAI_APIKEY;
6-
const requestId = '3fa85f64-5717-4562-b3fc-2c963f66afa6'
4+
const apiKey = process.env.SGAI_APIKEY;
5+
const requestId = '3fa85f64-5717-4562-b3fc-2c963f66afa6'
76

7+
try {
88
const requestInfo = await getSmartScraperRequest(apiKey, requestId);
9-
109
console.log(requestInfo);
1110
} catch (error) {
1211
console.error(error);
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import { smartScraper } from 'scrapegraph-js';
2+
import { z } from 'zod';
3+
import 'dotenv/config';
4+
5+
const apiKey = process.env.SGAI_APIKEY;
6+
const url = 'https://scrapegraphai.com/';
7+
const prompt = 'What does the company do? and ';
8+
9+
const schema = 2;
10+
11+
try {
12+
const response = await smartScraper(apiKey, url, prompt, schema);
13+
console.log(response.result);
14+
} catch (error) {
15+
console.error(error);
16+
}

scrapegraph-js/examples/sendFeedback_example.js

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
import { sendFeedback } from 'scrapegraph-sdk';
1+
import { sendFeedback } from 'scrapegraph-js';
22
import 'dotenv/config';
33

4-
try {
5-
const apiKey = process.env.SGAI_APIKEY;
6-
const requestId = '16a63a80-c87f-4cde-b005-e6c3ecda278b';
7-
const rating = 5;
8-
const feedbackMessage = 'This is a test feedback message.';
4+
const apiKey = process.env.SGAI_APIKEY;
5+
const requestId = '16a63a80-c87f-4cde-b005-e6c3ecda278b';
6+
const rating = 5;
7+
const feedbackMessage = 'This is a test feedback message.';
98

9+
try {
1010
const feedback_response = await sendFeedback(apiKey, requestId, rating, feedbackMessage);
1111
console.log(feedback_response);
1212
} catch (error) {

scrapegraph-js/examples/smartScraper_example.js

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
1-
import { smartScraper } from 'scrapegraph-sdk';
1+
import { smartScraper } from 'scrapegraph-js';
22
import 'dotenv/config';
33

4-
try {
5-
const apiKey = process.env.SGAI_APIKEY;
6-
const url = 'https://scrapegraphai.com';
7-
const prompt = 'What does the company do?';
4+
const apiKey = process.env.SGAI_APIKEY;
5+
const url = 'https://scrapegraphai.com';
6+
const prompt = 'What does the company do?';
87

8+
try {
99
const response = await smartScraper(apiKey, url, prompt);
10-
1110
console.log(response);
1211
} catch (error) {
1312
console.error(error);

scrapegraph-js/package-lock.json

Lines changed: 11 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

scrapegraph-js/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@
2828
"type": "module",
2929
"dependencies": {
3030
"axios": "^1.6.0",
31-
"zod": "^3.23.8"
31+
"zod": "^3.23.8",
32+
"zod-to-json-schema": "^3.23.5"
3233
},
3334
"devDependencies": {
3435
"@eslint/js": "^9.16.0",

scrapegraph-js/readme.md

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,34 @@ const prompt = 'Extract the main heading and description.';
7777
```
7878

7979
#### Scraping with Custom Output Schema
80+
> [!NOTE]
81+
> To use this feature, it is necessary to employ the [Zod](https://www.npmjs.com/package/zod) package for schema creation.
82+
83+
Here is a real-world example:
8084

8185
```javascript
82-
//TODO
86+
import { smartScraper } from 'scrapegraph-js';
87+
import { z } from 'zod';
88+
import 'dotenv/config';
89+
90+
const apiKey = 'your-api-key';
91+
const url = 'https://scrapegraphai.com/';
92+
const prompt = 'What does the company do? and ';
93+
94+
const schema = z.object({
95+
title: z.string().describe('The title of the webpage'),
96+
description: z.string().describe('The description of the webpage'),
97+
summary: z.string().describe('A brief summary of the webpage')
98+
});
99+
100+
(async () => {
101+
try {
102+
const response = await smartScraper(apiKey, url, prompt, schema);
103+
console.log(response.result);
104+
} catch (error) {
105+
console.error('Error:', error);
106+
}
107+
})();
83108
```
84109

85110
### Checking API Credits

scrapegraph-js/src/smartScraper.js

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import axios from 'axios';
2-
import handleError from './utils/handleError.js'
2+
import handleError from './utils/handleError.js';
3+
import { ZodType } from 'zod';
4+
import { zodToJsonSchema } from 'zod-to-json-schema';
35

46
/**
57
* Scrape and extract structured data from a webpage using ScrapeGraph AI.
@@ -25,12 +27,11 @@ export async function smartScraper(apiKey, url, prompt, schema = null) {
2527
};
2628

2729
if (schema) {
28-
payload.output_schema = {
29-
description: schema.title || 'Schema',
30-
name: schema.title || 'Schema',
31-
properties: schema.properties || {},
32-
required: schema.required || []
33-
};
30+
if (schema instanceof ZodType) {
31+
payload.output_schema = zodToJsonSchema(schema);
32+
} else {
33+
throw new Error('The schema must be an instance of a valid Zod schema');
34+
}
3435
}
3536

3637
try {

scrapegraph-py/CHANGELOG.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,38 @@
1+
## [1.4.0](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.3.0...v1.4.0) (2024-11-30)
2+
3+
4+
### Features
5+
6+
* added example of the smartScraper function using a schema ([baf933b](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/baf933b0826b63d4ecf61c8593676357619a1c73))
7+
* implemented support for requests with schema ([10a1a5a](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/10a1a5a477a6659aabf3afebfffdbefc14d12d3e))
8+
9+
10+
### Bug Fixes
11+
12+
* the "workspace" key has been removed because it was conflicting with the package.json file in the scrapegraph-js folder. ([1299173](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/129917377b6a685d769a480b717bf980d3199833))
13+
14+
15+
### chore
16+
17+
* added Zod package dependency ([ee5738b](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/ee5738bd737cd07a553d148403a4bbb5e80e5be3))
18+
19+
20+
### Docs
21+
22+
* added an example of the smartScraper functionality using a schema ([cf2f28f](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/cf2f28fa029df0acb7058fde8239046d77ef0a8a))
23+
24+
25+
### Refactor
26+
27+
* code refactoring ([a2b57c7](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/a2b57c7e482dfb5c7c1a125d1684e0367088c83b))
28+
29+
## [1.3.0](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.2.2...v1.3.0) (2024-11-30)
30+
31+
32+
### Features
33+
34+
* add integration for env variables ([6a351f3](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/6a351f3ef70a1f00b5f5de5aaba2f408b6bf07dd))
35+
136
## [1.2.2](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.2.1...v1.2.2) (2024-11-29)
237

338

scrapegraph-py/scrapegraph_py/async_client.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,34 @@ def __init__(
5454

5555
logger.info("✅ AsyncClient initialized successfully")
5656

57+
@classmethod
58+
def from_env(
59+
cls,
60+
verify_ssl: bool = True,
61+
timeout: float = 120,
62+
max_retries: int = 3,
63+
retry_delay: float = 1.0,
64+
):
65+
"""Initialize AsyncClient using API key from environment variable.
66+
67+
Args:
68+
verify_ssl: Whether to verify SSL certificates
69+
timeout: Request timeout in seconds
70+
max_retries: Maximum number of retry attempts
71+
retry_delay: Delay between retries in seconds
72+
"""
73+
from os import getenv
74+
api_key = getenv("SGAI_API_KEY")
75+
if not api_key:
76+
raise ValueError("SGAI_API_KEY environment variable not set")
77+
return cls(
78+
api_key=api_key,
79+
verify_ssl=verify_ssl,
80+
timeout=timeout,
81+
max_retries=max_retries,
82+
retry_delay=retry_delay,
83+
)
84+
5785
async def _make_request(self, method: str, url: str, **kwargs) -> Any:
5886
"""Make HTTP request with retry logic."""
5987
for attempt in range(self.max_retries):

scrapegraph-py/scrapegraph_py/client.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,34 @@
1818

1919

2020
class SyncClient:
21+
@classmethod
22+
def from_env(
23+
cls,
24+
verify_ssl: bool = True,
25+
timeout: float = 30,
26+
max_retries: int = 3,
27+
retry_delay: float = 1.0,
28+
):
29+
"""Initialize SyncClient using API key from environment variable.
30+
31+
Args:
32+
verify_ssl: Whether to verify SSL certificates
33+
timeout: Request timeout in seconds
34+
max_retries: Maximum number of retry attempts
35+
retry_delay: Delay between retries in seconds
36+
"""
37+
from os import getenv
38+
api_key = getenv("SGAI_API_KEY")
39+
if not api_key:
40+
raise ValueError("SGAI_API_KEY environment variable not set")
41+
return cls(
42+
api_key=api_key,
43+
verify_ssl=verify_ssl,
44+
timeout=timeout,
45+
max_retries=max_retries,
46+
retry_delay=retry_delay,
47+
)
48+
2149
def __init__(
2250
self,
2351
api_key: str,

0 commit comments

Comments
 (0)