Skip to content

Commit 2242a74

Browse files
committed
resolve conflict
2 parents 5999a62 + f5e6648 commit 2242a74

25 files changed

+1467
-760
lines changed

.eslintrc.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"es6": true,
55
"node": true
66
},
7-
"ignorePatterns": ["**/*.d.ts"],
7+
"ignorePatterns": ["**/*.d.ts", "examples/**/*"],
88
"parser": "@typescript-eslint/parser",
99
"parserOptions": {
1010
"project": "tsconfig.json",

.github/workflows/publish.yaml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
name: Publish to NPM
2+
3+
on:
4+
workflow_dispatch:
5+
push:
6+
tags:
7+
- 'v*' # This will run the workflow when you push a tag with a version format, like v1.0.0
8+
9+
jobs:
10+
build:
11+
runs-on: ubuntu-latest
12+
13+
steps:
14+
- uses: actions/checkout@v2
15+
- uses: volta-cli/action@v1
16+
- run: npm ci --no-audit
17+
- run: npm run lint --if-present
18+
- run: npm test
19+
- run: npm run build --if-present
20+
env:
21+
CI: true
22+
- name: Setup .npmrc file to publish to npm
23+
run: |
24+
echo "//registry.npmjs.org/:_authToken=$NPM_TOKEN" > .npmrc
25+
env:
26+
NPM_TOKEN: ${{secrets.NPM_AUTOMATION_TOKEN}}
27+
- name: Publish to NPM
28+
run: npm publish

.gitleaks.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
[allowlist]
2+
description = "global allow lists"
3+
paths = [
4+
'''.devcontainer''',
5+
]

.pre-commit-config.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
repos:
2+
- repo: https://github.com/zricethezav/gitleaks
3+
rev: v8.17.0
4+
hooks:
5+
- id: gitleaks

.prettierignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
/.history/
2+
/examples/.history/
3+
/.github/

LICENSE

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
BSD 2-Clause License
2+
--------------------
3+
4+
Copyright (c) 2013-2016, Scrapfly <https://scrapfly.io> and
5+
individual contributors. All rights reserved.
6+
7+
Redistribution and use in source and binary forms, with or without
8+
modification, are permitted provided that the following conditions are met:
9+
10+
* Redistributions of source code must retain the above copyright notice, this
11+
list of conditions and the following disclaimer.
12+
13+
* Redistributions in binary form must reproduce the above copyright notice,
14+
this list of conditions and the following disclaimer in the documentation
15+
and/or other materials provided with the distribution.
16+
17+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

README.md

Lines changed: 58 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,35 +2,71 @@
22

33
`npm install scrapfly-sdk`
44

5-
Quick use:
5+
Typescript/NodeJS SDK for [Scrapfly.io](https://scrapfly.io/) web scraping API which allows to:
6+
7+
- Scrape the web without being blocked.
8+
- Use headless browsers to access Javascript-powered page data.
9+
- Scale up web scraping.
10+
- ... and [much more](https://scrapfly.io/docs/scrape-api/getting-started)!
11+
12+
For web scraping guides see [our blog](https://scrapfly.io/blog/) and [#scrapeguide](https://scrapfly.io/blog/tag/scrapeguide/) tag for how to scrape specific targets.
13+
14+
## Quick Intro
15+
16+
1. Register a [Scrapfly account for free](https://scrapfly.io/register)
17+
2. Get your API Key on [scrapfly.io/dashboard](https://scrapfly.io/dashboard)
18+
3. Start scraping: 🚀
619

720
```javascript
8-
import { ScrapflyClient, ScrapeConfig } from "scrapfly-sdk";
9-
10-
const client = new ScrapflyClient({key: "YOUR SCRAPFLY KEY"});
11-
const result = await client.scrape(new ScrapeConfig({
12-
url: "https://httpbin.dev/html",
13-
// optional:
14-
aps: true, // enable anti-scraping protection bypass
15-
render_js: true, // enable headless browsers for javascript rendering
16-
country: "us", // use a US proxy
17-
method: "GET", // use GET, POST or other type of requests
18-
data: {}, // what data to send if POST is used
19-
...
20-
}))
21-
console.log(result.result.content) // html content
21+
import { ScrapflyClient, ScrapeConfig } from 'scrapfly-sdk';
22+
23+
const key = 'YOUR SCRAPFLY KEY';
24+
const client = new ScrapflyClient({ key });
25+
const apiResponse = await client.scrape(
26+
new ScrapeConfig({
27+
url: 'https://web-scraping.dev/product/1',
28+
// optional parameters:
29+
// enable javascript rendering
30+
render_js: true,
31+
// set proxy country
32+
country: 'us',
33+
// enable anti-scraping protection bypass
34+
asp: true,
35+
// set residential proxies
36+
proxy_pool: 'public_residential_pool',
37+
// etc.
38+
}),
39+
);
40+
console.log(apiResponse.result.content); // html content
41+
// Parse HTML directly with SDK (through cheerio)
42+
console.log(apiResponse.result.selector('h3').text());
2243
```
2344

24-
See [/examples](./examples/) for more.
45+
For more see [/examples](/examples/) directory.
46+
For more on Scrapfly API see our [getting started documentation](https://scrapfly.io/docs/scrape-api/getting-started)
47+
For Python see [Scrapfly Python SDK](https://github.com/scrapfly/python-scrapfly)
48+
49+
## Debugging
2550

26-
## Get Your API Key
51+
To enable debug logs set Scrapfly's log level to `"DEBUG"`:
2752

28-
You can create a free account on [Scrapfly](https://scrapfly.io/register) to get your API Key.
53+
```javascript
54+
import { log } from 'scrapfly-sdk';
55+
56+
log.setLevel('DEBUG');
57+
```
2958

30-
- [Usage](https://scrapfly.io/docs/sdk/python)
31-
- [Python API](https://scrapfly.github.io/python-scrapfly/scrapfly)
32-
- [Open API 3 Spec](https://scrapfly.io/docs/openapi#get-/scrape)
33-
- [Scrapy Integration](https://scrapfly.io/docs/sdk/scrapy)
59+
Additionally, set `debug=true` in `ScrapeConfig` to access debug information in [Scrapfly web dashboard](https://scrapfly.io/dashboard):
60+
61+
```typescript
62+
import { ScrapflyClient } from 'scrapfly-sdk';
63+
64+
new ScrapeConfig({
65+
url: 'https://web-scraping.dev/product/1',
66+
debug: true,
67+
// ^ enable debug information - this will show extra details on web dashboard
68+
});
69+
```
3470

3571
## Development
3672

__tests__/client.test.ts

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import axios from 'axios';
2+
import { AxiosRequestConfig } from 'axios';
23
import { ScrapflyClient } from '../src/client.js';
34
import * as errors from '../src/errors.js';
45
import { ScrapeConfig } from '../src/scrapeconfig.js';
6+
import { describe, it, expect, jest, beforeEach } from '@jest/globals';
57

68
jest.mock('axios');
79

@@ -37,7 +39,7 @@ describe('concurrent scrape', () => {
3739
// mock axios to return /account data and 2 types of results:
3840
// - success for /success endpoints
3941
// - ASP failure for /failure endpoints
40-
mockedAxios.request.mockImplementation(async (config) => {
42+
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
4143
if (config.url.includes('/account')) {
4244
return {
4345
status: 200,
@@ -123,7 +125,7 @@ describe('scrape', () => {
123125

124126
it('GET success', async () => {
125127
const url = 'https://httpbin.dev/json';
126-
mockedAxios.request.mockImplementation(async (config) => {
128+
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
127129
// Ensure the URL matches the pattern
128130
expect(config.url).toMatch(client.HOST + '/scrape');
129131
expect(config.method).toEqual('GET');
@@ -147,7 +149,7 @@ describe('scrape', () => {
147149

148150
it('GET complex urls', async () => {
149151
const url = 'https://httpbin.dev/anything/?website=https://httpbin.dev/anything';
150-
mockedAxios.request.mockImplementation(async (config) => {
152+
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
151153
// Ensure the URL matches the pattern
152154
expect(config.url).toMatch(client.HOST + '/scrape');
153155
expect(config.method).toEqual('GET');
@@ -169,11 +171,9 @@ describe('scrape', () => {
169171
expect(mockedAxios.request).toHaveBeenCalledTimes(1);
170172
});
171173

172-
173-
174174
it('POST success', async () => {
175175
const url = 'https://httpbin.dev/json';
176-
mockedAxios.request.mockImplementation(async (config) => {
176+
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
177177
// Ensure the URL matches the pattern
178178
expect(config.url).toMatch(client.HOST + '/scrape');
179179
expect(config.method).toEqual('POST');
@@ -235,7 +235,7 @@ describe('client errors', () => {
235235

236236
it('raises ApiHttpServerError on 500 and success', async () => {
237237
const url = 'https://httpbin.dev/json';
238-
mockedAxios.request.mockImplementation(async (config) => {
238+
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
239239
return resultFactory({
240240
url: config.url,
241241
status_code: 500,
@@ -249,7 +249,7 @@ describe('client errors', () => {
249249

250250
it('raises BadApiKeyError on 401', async () => {
251251
const url = 'https://httpbin.dev/json';
252-
mockedAxios.request.mockImplementation(async (config) => {
252+
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
253253
return resultFactory({
254254
url: config.url,
255255
status_code: 401,
@@ -262,7 +262,7 @@ describe('client errors', () => {
262262
});
263263
it('raises TooManyRequests on 429 and success', async () => {
264264
const url = 'https://httpbin.dev/json';
265-
mockedAxios.request.mockImplementation(async (config) => {
265+
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
266266
return resultFactory({
267267
url: config.url,
268268
status_code: 429,
@@ -273,7 +273,7 @@ describe('client errors', () => {
273273
await expect(client.scrape(new ScrapeConfig({ url }))).rejects.toThrow(errors.TooManyRequests);
274274
});
275275
it('raises ScrapflyScrapeError on ::SCRAPE:: resource and success', async () => {
276-
mockedAxios.request.mockImplementation(async (config) => {
276+
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
277277
return resultFactory({
278278
url: config.url,
279279
status: 'ERR::SCRAPE::BAD_PROTOCOL',
@@ -286,7 +286,7 @@ describe('client errors', () => {
286286
});
287287

288288
it('raises ScrapflyWebhookError on ::WEBHOOK:: resource and success', async () => {
289-
mockedAxios.request.mockImplementation(async (config) => {
289+
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
290290
return resultFactory({
291291
url: config.url,
292292
status: 'ERR::WEBHOOK::DISABLED ',
@@ -298,7 +298,7 @@ describe('client errors', () => {
298298
);
299299
});
300300
it('raises ScrapflyProxyError on ERR::PROXY::POOL_NOT_FOUND resource and success', async () => {
301-
mockedAxios.request.mockImplementation(async (config) => {
301+
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
302302
return resultFactory({
303303
url: config.url,
304304
status: 'ERR::PROXY::POOL_NOT_FOUND ',
@@ -311,7 +311,7 @@ describe('client errors', () => {
311311
});
312312

313313
it('raises ScrapflyScheduleError on ERR::SCHEDULE::DISABLED resource and success', async () => {
314-
mockedAxios.request.mockImplementation(async (config) => {
314+
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
315315
return resultFactory({
316316
url: config.url,
317317
status: 'ERR::SCHEDULE::DISABLED',
@@ -324,7 +324,7 @@ describe('client errors', () => {
324324
});
325325

326326
it('raises ScrapflyAspError on ERR::ASP::SHIELD_ERROR resource and success', async () => {
327-
mockedAxios.request.mockImplementation(async (config) => {
327+
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
328328
return resultFactory({
329329
url: config.url,
330330
status: 'ERR::ASP::SHIELD_ERROR',
@@ -337,7 +337,7 @@ describe('client errors', () => {
337337
});
338338

339339
it('raises ScrapflySessionError on ERR::SESSION::CONCURRENT_ACCESS resource and success', async () => {
340-
mockedAxios.request.mockImplementation(async (config) => {
340+
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
341341
return resultFactory({
342342
url: config.url,
343343
status: 'ERR::SESSION::CONCURRENT_ACCESS',
@@ -350,7 +350,7 @@ describe('client errors', () => {
350350
});
351351

352352
it('raises ApiHttpClientError on success and unknown status', async () => {
353-
mockedAxios.request.mockImplementation(async (config) => {
353+
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
354354
return resultFactory({
355355
url: config.url,
356356
status: 'ERR::NEW',
@@ -362,7 +362,7 @@ describe('client errors', () => {
362362
);
363363
});
364364
it('raises UpstreamHttpServerError on failure, ERR::SCRAPE::BAD_UPSTREAM_RESPONSE and >=500', async () => {
365-
mockedAxios.request.mockImplementation(async (config) => {
365+
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
366366
return resultFactory({
367367
url: config.url,
368368
success: false,
@@ -375,7 +375,7 @@ describe('client errors', () => {
375375
);
376376
});
377377
it('raises UpstreamHttpClientError on failure, ERR::SCRAPE::BAD_UPSTREAM_RESPONSE and 4xx status', async () => {
378-
mockedAxios.request.mockImplementation(async (config) => {
378+
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
379379
return resultFactory({
380380
url: config.url,
381381
success: false,
@@ -398,7 +398,7 @@ describe('client errors', () => {
398398
SESSION: errors.ScrapflySessionError,
399399
};
400400
for (const [resource, err] of Object.entries(resourceErrMap)) {
401-
mockedAxios.request.mockImplementation(async (config) => {
401+
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
402402
return resultFactory({
403403
url: config.url,
404404
success: false,
@@ -410,7 +410,7 @@ describe('client errors', () => {
410410
});
411411

412412
it('raises ScrapflyError on unhandled failure', async () => {
413-
mockedAxios.request.mockImplementation(async (config) => {
413+
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
414414
return resultFactory({
415415
url: config.url,
416416
success: false,
@@ -423,7 +423,7 @@ describe('client errors', () => {
423423
);
424424
});
425425
it('raises on unhandled failure', async () => {
426-
mockedAxios.request.mockImplementation(async (config) => {
426+
mockedAxios.request.mockImplementation(async (config: AxiosRequestConfig): Promise<any> => {
427427
return resultFactory({
428428
url: config.url,
429429
success: false,

0 commit comments

Comments
 (0)