Skip to content

Commit a28be9d

Browse files
authored
Merge pull request #39 from ScrapeGraphAI/infinite-scroll-js
feat: add infinte scrolling
2 parents 763e52b + 3166542 commit a28be9d

File tree

3 files changed

+48
-1
lines changed

3 files changed

+48
-1
lines changed

scrapegraph-js/README.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,30 @@ const schema = z.object({
107107
})();
108108
```
109109

110+
#### Scraping with Infinite Scrolling
111+
112+
For websites that load content dynamically through infinite scrolling (like social media feeds), you can use the `numberOfScrolls` parameter:
113+
114+
```javascript
115+
import { smartScraper } from 'scrapegraph-js';
116+
117+
const apiKey = 'your-api-key';
118+
const url = 'https://example.com/infinite-scroll-page';
119+
const prompt = 'Extract all the posts from the feed';
120+
const numberOfScrolls = 10; // Will scroll 10 times to load more content
121+
122+
(async () => {
123+
try {
124+
const response = await smartScraper(apiKey, url, prompt, null, numberOfScrolls);
125+
console.log('Extracted data from scrolled page:', response);
126+
} catch (error) {
127+
console.error('Error:', error);
128+
}
129+
})();
130+
```
131+
132+
The `numberOfScrolls` parameter accepts values between 0 and 100, allowing you to control how many times the page should be scrolled before extraction.
133+
110134
### Search Scraping
111135

112136
Search and extract information from multiple web sources using AI.
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import { smartScraper } from 'scrapegraph-js';
2+
import 'dotenv/config';
3+
4+
const apiKey = process.env.SGAI_APIKEY;
5+
// Example URL that requires scrolling (e.g., a social media feed or infinite scroll page)
6+
const url = 'https://example.com/infinite-scroll-page';
7+
const prompt = 'Extract all the posts from the feed';
8+
const numberOfScrolls = 10; // Will scroll 10 times to load more content
9+
10+
try {
11+
const response = await smartScraper(apiKey, url, prompt, null, numberOfScrolls);
12+
console.log('Extracted data from scrolled page:', response);
13+
} catch (error) {
14+
console.error('Error:', error);
15+
}

scrapegraph-js/src/smartScraper.js

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,11 @@ import { zodToJsonSchema } from 'zod-to-json-schema';
1010
* @param {string} url - The URL of the webpage to scrape
1111
* @param {string} prompt - Natural language prompt describing what data to extract
1212
* @param {Object} [schema] - Optional schema object defining the output structure
13+
* @param {number} [numberOfScrolls] - Optional number of times to scroll the page (0-100). If not provided, no scrolling will be performed.
1314
* @returns {Promise<string>} Extracted data in JSON format matching the provided schema
1415
* @throws - Will throw an error in case of an HTTP failure.
1516
*/
16-
export async function smartScraper(apiKey, url, prompt, schema = null) {
17+
export async function smartScraper(apiKey, url, prompt, schema = null, numberOfScrolls = null) {
1718
const endpoint = 'https://api.scrapegraphai.com/v1/smartscraper';
1819
const headers = {
1920
'accept': 'application/json',
@@ -34,6 +35,13 @@ export async function smartScraper(apiKey, url, prompt, schema = null) {
3435
}
3536
}
3637

38+
if (numberOfScrolls !== null) {
39+
if (!Number.isInteger(numberOfScrolls) || numberOfScrolls < 0 || numberOfScrolls > 100) {
40+
throw new Error('numberOfScrolls must be an integer between 0 and 100');
41+
}
42+
payload.number_of_scrolls = numberOfScrolls;
43+
}
44+
3745
try {
3846
const response = await axios.post(endpoint, payload, { headers });
3947
return response.data;

0 commit comments

Comments
 (0)