-
-
Notifications
You must be signed in to change notification settings - Fork 22.7k
feat: added 3 Bright Data web scraping tools #4700
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
Show all changes
15 commits
Select commit
Hold shift + click to select a range
9982824
feat: Add Bright Data tools for FlowiseAI
Idanvilenski 8f0e649
Merge branch 'main' into feature/brightdata-tools
Idanvilenski d61524a
Fix structured data tool patterns and descriptions
Idanvilenski 7b0073f
Merge branch 'main' into feature/brightdata-tools
Idanvilenski 002094b
Fix structured data tool patterns and descriptions
Idanvilenski 091823b
Merge branch 'feature/brightdata-tools' of https://github.com/brightd…
Idanvilenski 8c6c985
Fix linting issues: remove console statements, fix regex escapes, rem…
Idanvilenski 5e9e6e6
Merge branch 'main' into feature/brightdata-tools
Idanvilenski 32386e0
Merge branch 'main' into feature/brightdata-tools
Idanvilenski b53f125
Remove redundant shared folder as requested by maintainer
Idanvilenski f53d769
Merge branch 'feature/brightdata-tools' of https://github.com/brightd…
Idanvilenski 88c6d64
Merge branch 'main' into feature/brightdata-tools
Idanvilenski 7a7bb74
Merge branch 'main' into feature/brightdata-tools
Idanvilenski 80d9ae5
Fixed Amazon Data Set issue
Idanvilenski 75897f9
Merge branch 'main' into feature/brightdata-tools
Idanvilenski File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
import { INodeParams, INodeCredential } from '../src/Interface' | ||
|
||
class BrightDataApiCredential implements INodeCredential { | ||
label: string | ||
name: string | ||
version: number | ||
description: string | ||
inputs: INodeParams[] | ||
|
||
constructor() { | ||
this.label = 'Bright Data API' | ||
this.name = 'brightDataApi' | ||
this.version = 1.0 | ||
this.description = 'Bright Data API credentials for web scraping and data extraction' | ||
this.inputs = [ | ||
{ | ||
label: 'Bright Data API Token', | ||
name: 'brightDataApiToken', | ||
type: 'password', | ||
description: 'Your Bright Data API token from the user settings page' | ||
} | ||
] | ||
} | ||
} | ||
|
||
module.exports = { credClass: BrightDataApiCredential } |
241 changes: 241 additions & 0 deletions
241
packages/components/nodes/tools/BrightData/BrightDataSearchEngine/BrightDataSearchEngine.ts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,241 @@ | ||
import { Tool } from '@langchain/core/tools' | ||
import { ICommonObject, INode, INodeData, INodeParams } from '../../../../src/Interface' | ||
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../../src/utils' | ||
import axios from 'axios' | ||
|
||
class BrightDataSearchEngineTool extends Tool { | ||
name = 'brightdata_search_engine' | ||
description = | ||
'Search the web using Bright Data Web Unlocker with support for Google, Bing, and Yandex search engines. Returns SERP results in markdown format with URLs, titles, and descriptions. Supports pagination.' | ||
|
||
constructor( | ||
private apiToken: string, | ||
private searchEngine: string = 'google', | ||
private maxResults: number = 10, | ||
private timeoutMs: number = 60000, | ||
private zone: string = 'mcp_unlocker' | ||
) { | ||
super() | ||
} | ||
|
||
private getApiHeaders() { | ||
return { | ||
authorization: `Bearer ${this.apiToken}`, | ||
'user-agent': 'flowise-brightdata/1.0.0', | ||
'Content-Type': 'application/json' | ||
} | ||
} | ||
|
||
private buildSearchUrl(query: string, engine: string, cursor?: string): string { | ||
const encodedQuery = encodeURIComponent(query) | ||
const page = cursor ? parseInt(cursor) : 0 | ||
const start = page * 10 | ||
|
||
switch (engine.toLowerCase()) { | ||
case 'yandex': | ||
return `https://yandex.com/search/?text=${encodedQuery}&p=${page}` | ||
case 'bing': | ||
return `https://www.bing.com/search?q=${encodedQuery}&first=${start + 1}` | ||
case 'google': | ||
default: | ||
return `https://www.google.com/search?q=${encodedQuery}&start=${start}` | ||
} | ||
} | ||
|
||
private calculatePagination(maxResults: number): { pages: number; cursor?: string } { | ||
const resultsPerPage = 10 | ||
const pages = Math.ceil(maxResults / resultsPerPage) | ||
return { pages, cursor: pages > 1 ? String(pages - 1) : undefined } | ||
} | ||
|
||
async _call(input: string): Promise<string> { | ||
try { | ||
const query = input.trim() | ||
if (!query) { | ||
const errorMsg = 'No search query provided. Please specify a search term.' | ||
return `Error: ${errorMsg}` | ||
} | ||
|
||
const { pages } = this.calculatePagination(this.maxResults) | ||
let allResults: string[] = [] | ||
|
||
for (let page = 0; page < pages; page++) { | ||
try { | ||
const cursor = page > 0 ? String(page) : undefined | ||
const searchUrl = this.buildSearchUrl(query, this.searchEngine, cursor) | ||
|
||
const requestData = { | ||
url: searchUrl, | ||
zone: this.zone, | ||
format: 'raw', | ||
data_format: 'markdown' | ||
} | ||
|
||
const response = await axios({ | ||
url: 'https://api.brightdata.com/request', | ||
method: 'POST', | ||
data: requestData, | ||
headers: this.getApiHeaders(), | ||
responseType: 'text', | ||
timeout: this.timeoutMs | ||
}) | ||
|
||
if (response.data && typeof response.data === 'string') { | ||
allResults.push(`## Page ${page + 1} Results\n\n${response.data}`) | ||
} | ||
|
||
if (page < pages - 1) { | ||
await new Promise((resolve) => setTimeout(resolve, 1000)) | ||
} | ||
} catch (pageError: any) { | ||
const errorMsg = `Page ${page + 1} failed: ${pageError.message}` | ||
allResults.push(`## Page ${page + 1} Error\n\n${errorMsg}`) | ||
} | ||
} | ||
|
||
if (allResults.length === 0) { | ||
const errorMsg = 'No search results could be retrieved.' | ||
return `Error: ${errorMsg}` | ||
} | ||
|
||
const combinedResults = [ | ||
`# Search Results for "${query}"`, | ||
`**Search Engine:** ${this.searchEngine}`, | ||
`**Pages Retrieved:** ${allResults.length}/${pages}`, | ||
`**Requested Results:** ${this.maxResults}`, | ||
'', | ||
...allResults | ||
].join('\n') | ||
|
||
return combinedResults | ||
} catch (error: any) { | ||
if (error.code === 'ECONNABORTED' || error.message.includes('timeout')) { | ||
const errorMsg = `Search timeout after ${this.timeoutMs / 1000} seconds.` | ||
return `Error: ${errorMsg}` | ||
} | ||
|
||
if (error.response) { | ||
const statusCode = error.response.status | ||
const statusText = error.response.statusText | ||
const errorData = error.response.data | ||
|
||
const errorMsg = `HTTP Error ${statusCode}: ${statusText}. ${errorData || ''}`.trim() | ||
return `Error: ${errorMsg}` | ||
} | ||
|
||
const errorMsg = `Search failed: ${error.message || 'Unknown error occurred'}` | ||
return `Error: ${errorMsg}` | ||
} | ||
} | ||
} | ||
|
||
class BrightDataSearchEngine_Tools implements INode { | ||
label: string | ||
name: string | ||
version: number | ||
description: string | ||
type: string | ||
icon: string | ||
category: string | ||
baseClasses: string[] | ||
credential: INodeParams | ||
inputs: INodeParams[] | ||
|
||
constructor() { | ||
this.label = 'Bright Data Search Engine' | ||
this.name = 'brightDataSearchEngine' | ||
this.version = 1.0 | ||
this.type = 'BrightDataSearchEngine' | ||
this.icon = 'brightdata-search.svg' | ||
this.category = 'Tools' | ||
this.description = 'Search the web using Bright Data Web Unlocker with support for multiple search engines and bot detection bypass' | ||
this.baseClasses = [this.type, ...getBaseClasses(BrightDataSearchEngineTool)] | ||
|
||
this.credential = { | ||
label: 'Connect Credential', | ||
name: 'credential', | ||
type: 'credential', | ||
credentialNames: ['brightDataApi'] | ||
} | ||
|
||
this.inputs = [ | ||
{ | ||
label: 'Search Engine', | ||
name: 'searchEngine', | ||
type: 'options', | ||
options: [ | ||
{ label: 'Google', name: 'google' }, | ||
{ label: 'Bing', name: 'bing' }, | ||
{ label: 'Yandex', name: 'yandex' } | ||
], | ||
default: 'google', | ||
description: 'Search engine to use for web searches.', | ||
optional: true | ||
}, | ||
{ | ||
label: 'Max Results', | ||
name: 'maxResults', | ||
type: 'number', | ||
description: 'Maximum number of search results to retrieve (10 results per page).', | ||
placeholder: '10', | ||
default: 10, | ||
optional: true | ||
}, | ||
{ | ||
label: 'Timeout (seconds)', | ||
name: 'timeoutS', | ||
type: 'number', | ||
description: 'Maximum time in seconds to wait for each search request.', | ||
placeholder: '60', | ||
default: 60, | ||
optional: true, | ||
additionalParams: true | ||
}, | ||
{ | ||
label: 'Zone Name', | ||
name: 'zone', | ||
type: 'string', | ||
description: 'Bright Data zone name to use. Leave empty to use default zone.', | ||
placeholder: 'mcp_unlocker', | ||
optional: true, | ||
additionalParams: true | ||
}, | ||
{ | ||
label: 'Tool Description', | ||
name: 'description', | ||
type: 'string', | ||
description: 'Custom description of what the tool does. This helps the LLM understand when to use this tool.', | ||
rows: 3, | ||
additionalParams: true, | ||
optional: true, | ||
placeholder: | ||
'Search the web using Bright Data Web Unlocker with support for multiple search engines and bot detection bypass.' | ||
} | ||
] | ||
} | ||
|
||
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> { | ||
const credentialData = await getCredentialData(nodeData.credential ?? '', options) | ||
const apiToken = getCredentialParam('brightDataApiToken', credentialData, nodeData) | ||
|
||
if (!apiToken) { | ||
throw new Error('Bright Data API token is required but not found in credentials') | ||
} | ||
|
||
const searchEngine = (nodeData.inputs?.searchEngine as string) || 'google' | ||
const maxResults = (nodeData.inputs?.maxResults as number) || 10 | ||
const timeoutS = (nodeData.inputs?.timeoutS as number) || 60 | ||
const zone = (nodeData.inputs?.zone as string) || 'mcp_unlocker' | ||
const customDescription = nodeData.inputs?.description as string | ||
|
||
const tool = new BrightDataSearchEngineTool(apiToken, searchEngine, maxResults, timeoutS * 1000, zone) | ||
|
||
if (customDescription) { | ||
tool.description = customDescription | ||
} | ||
|
||
return tool | ||
} | ||
} | ||
|
||
module.exports = { nodeClass: BrightDataSearchEngine_Tools } |
1 change: 1 addition & 0 deletions
1
.../components/nodes/tools/BrightData/BrightDataSearchEngine/brightdata-search.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.