@@ -4,39 +4,211 @@ description: API parameters, returns, examples.
44sidebar_position : 4
55---
66
7- ### ` functionName(param1, param2) `
7+ ## WebScraper Class
88
9- Description of function.
9+ ### ` new WebScraper(options?: ScraperOptions) `
10+
11+ Creates a new instance of the ` WebScraper ` class.
12+
13+ ** Parameters:**
14+
15+ - ` options ` (optional) - ` ScraperOptions ` - An object containing configuration
16+ options for the scraper.
17+
18+ - ` usePuppeteer ` - ` boolean ` (optional) - Whether to use Puppeteer for
19+ JavaScript-rendered pages. Default: ` true ` .
20+ - ` throttle ` - ` number ` (optional) - Delay in milliseconds between requests.
21+ Default: ` 1000 ` .
22+ - ` rules ` - ` Record<string, string> ` - CSS selectors defining data extraction
23+ rules.
24+
25+ ** Returns:**
26+
27+ - A new instance of ` WebScraper ` .
28+
29+ ** Example:**
30+
31+ ``` typescript
32+ import { WebScraper } from ' simple-web-scraper' ;
33+
34+ const scraper = new WebScraper ({
35+ usePuppeteer: true ,
36+ rules: { title: ' h1' , content: ' p' },
37+ });
38+ ```
39+
40+ ---
41+
42+ ## Methods
43+
44+ ### ` scrape(url: string): Promise<Record<string, any>> `
45+
46+ Scrapes the given URL based on the configured options.
47+
48+ ** Parameters:**
49+
50+ - ` url ` - ` string ` - The webpage URL to scrape.
51+
52+ ** Returns:**
53+
54+ - ` Promise<Record<string, any>> ` - The extracted data as an object.
55+
56+ ** Example:**
57+
58+ ``` typescript
59+ const data = await scraper .scrape (' https://example.com' );
60+ console .log (data );
61+ ```
62+
63+ ---
64+
65+ ### ` exportToJSON(data: any, filePath: string): void `
66+
67+ Exports the given data to a JSON file.
68+
69+ ** Parameters:**
70+
71+ - ` data ` - ` any ` - The data to be exported.
72+ - ` filePath ` - ` string ` - The path where the JSON file should be saved.
73+
74+ ** Returns:**
75+
76+ - ` void `
77+
78+ ** Example:**
79+
80+ ``` typescript
81+ import { exportToJSON } from ' simple-web-scraper' ;
82+
83+ const data = { name: ' Example' , value: 42 };
84+ exportToJSON (data , ' output.json' );
85+ ```
86+
87+ ---
88+
89+ ### ` exportToCSV(data: any | any[], filePath: string): void `
90+
91+ Exports the given data to a CSV file.
1092
1193** Parameters:**
1294
13- - ` param1 ` - Type - Description
14- - ` param2 ` - Type - Description
95+ - ` data ` - ` any | any[] ` - The data to be exported.
96+ - ` filePath ` - ` string ` - The path where the CSV file should be saved.
1597
1698** Returns:**
1799
18- - Return Type - Description
100+ - ` void `
19101
20- ** Examples :**
102+ ** Example :**
21103
22- ``` js
23- functionName (arg1, arg2);
104+ ``` typescript
105+ import { exportToCSV } from ' simple-web-scraper' ;
106+
107+ const data = [
108+ { name: ' Example 1' , value: 42 },
109+ { name: ' Example 2' , value: 99 },
110+ ];
111+ exportToCSV (data , ' output.csv' );
24112```
25113
26- ``` js
27- const { greet } = require (' ./packageName' );
114+ ---
115+
116+ ### ` readCSV(filePath: string): Promise<any[]> `
117+
118+ Reads a CSV file and converts it to JSON.
119+
120+ ** Parameters:**
28121
29- console .log (greet (' Charlie' )); // Output: Hello, Charlie!
122+ - ` filePath ` - ` string ` - The path to the CSV file.
123+
124+ ** Returns:**
125+
126+ - ` Promise<any[]> ` - The parsed CSV data as an array of objects.
127+
128+ ** Example:**
129+
130+ ``` typescript
131+ import { readCSV } from ' simple-web-scraper' ;
132+
133+ (async () => {
134+ try {
135+ const data = await readCSV (' output.csv' );
136+ console .log (data );
137+ } catch (error ) {
138+ console .error (' Error reading CSV:' , error );
139+ }
140+ })();
30141```
31142
32- ``` js
33- import React from ' react ' ;
34- import { greet } from ' ./packageName ' ;
143+ ---
144+
145+ ## Backend Example
35146
36- const Greeting: React .FC <{ name: string }> = ({ name }) => {
37- return < h1> {greet (name)}< / h1> ;
38- };
147+ This example demonstrates how to use ` simple-web-scraper ` in a Node.js backend:
39148
40- export default Greeting ;
149+ ``` typescript
150+ import express from ' express' ;
151+ import { WebScraper , exportToJSON , exportToCSV , readCSV } from ' simple-web-scraper' ;
41152
153+ const app = express ();
154+ const scraper = new WebScraper ({
155+ usePuppeteer: true ,
156+ rules: {
157+ fullHTML: ' html' , // Entire page HTML
158+ title: ' head > title' , // Page title
159+ description: ' meta[name="description"]' , // Meta description
160+ keywords: ' meta[name="keywords"]' , // Meta keywords
161+ favicon: ' link[rel="icon"]' , // Favicon URL
162+ mainHeading: ' h1' , // First H1 heading
163+ allHeadings: ' h1, h2, h3, h4, h5, h6' , // All headings on the page
164+ firstParagraph: ' p' , // First paragraph
165+ allParagraphs: ' p' , // All paragraphs on the page
166+ links: ' a' , // All links on the page
167+ images: ' img' , // All image URLs
168+ imageAlts: ' img' , // Alternative text for images
169+ videos: ' video, iframe[src*="youtube.com"], iframe[src*="vimeo.com"]' , // Video sources
170+ tables: ' table' , // Capture table elements
171+ tableData: ' td' , // Capture table cells
172+ lists: ' ul, ol' , // Capture all lists
173+ listItems: ' li' , // Capture all list items
174+ scripts: ' script' , // JavaScript file sources
175+ stylesheets: ' link[rel="stylesheet"]' , // External CSS files
176+ structuredData: ' script[type="application/ld+json"]' , // JSON-LD structured data
177+ socialLinks:
178+ ' a[href*="facebook.com"], a[href*="twitter.com"], a[href*="linkedin.com"], a[href*="instagram.com"]' , // Social media links
179+ author: ' meta[name="author"]' , // Author meta tag
180+ publishDate: ' meta[property="article:published_time"], time' , // Publish date
181+ modifiedDate: ' meta[property="article:modified_time"]' , // Last modified date
182+ canonicalURL: ' link[rel="canonical"]' , // Canonical URL
183+ openGraphTitle: ' meta[property="og:title"]' , // OpenGraph title
184+ openGraphDescription: ' meta[property="og:description"]' , // OpenGraph description
185+ openGraphImage: ' meta[property="og:image"]' , // OpenGraph image
186+ twitterCard: ' meta[name="twitter:card"]' , // Twitter card type
187+ twitterTitle: ' meta[name="twitter:title"]' , // Twitter title
188+ twitterDescription: ' meta[name="twitter:description"]' , // Twitter description
189+ twitterImage: ' meta[name="twitter:image"]' , // Twitter image
190+ },
191+ });
192+
193+ app .get (' /scrape-example' , async (req , res ) => {
194+ try {
195+ const url = ' https://github.com/The-Node-Forge' ;
196+ const data = await scraper .scrape (url );
197+
198+ exportToJSON (data , ' output.json' ); // export JSON
199+ exportToCSV (data , ' output.csv' ); // export CSV
200+
201+ const readData = await readCSV (data ); // Read CSV
202+
203+ res .status (200 ).json ({ success: true , readData });
204+ } catch (error ) {
205+ res .status (500 ).json ({ success: false , error: error .message });
206+ }
207+ });
42208```
209+
210+ ---
211+
212+ ## Contributing
213+
214+ Contributions are welcome! Please submit issues or pull requests on GitHub.
0 commit comments