diff --git a/packages/components/nodes/documentloaders/S3File/S3File.ts b/packages/components/nodes/documentloaders/S3File/S3File.ts
new file mode 100644
index 00000000000..07295abaa88
--- /dev/null
+++ b/packages/components/nodes/documentloaders/S3File/S3File.ts
@@ -0,0 +1,241 @@
+import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
+import { S3Loader } from 'langchain/document_loaders/web/s3'
+import { UnstructuredLoader } from 'langchain/document_loaders/fs/unstructured'
+import { getCredentialData, getCredentialParam } from '../../../src/utils'
+import { S3Client, GetObjectCommand, S3ClientConfig } from '@aws-sdk/client-s3'
+import { Readable } from 'node:stream'
+import * as fsDefault from 'node:fs'
+import * as path from 'node:path'
+import * as os from 'node:os'
+
+type S3Config = S3ClientConfig & {
+ /** @deprecated Use the credentials object instead */
+ accessKeyId?: string
+ /** @deprecated Use the credentials object instead */
+ secretAccessKey?: string
+}
+
+class S3_DocumentLoaders implements INode {
+ label: string
+ name: string
+ version: number
+ description: string
+ type: string
+ icon: string
+ category: string
+ baseClasses: string[]
+ credential: INodeParams
+ inputs?: INodeParams[]
+
+ constructor() {
+ this.label = 'S3'
+ this.name = 'S3'
+ this.version = 1.0
+ this.type = 'Document'
+ this.icon = 's3.svg'
+ this.category = 'Document Loaders'
+ this.description = 'Load Data from S3 Buckets'
+ this.baseClasses = [this.type]
+ this.credential = {
+ label: 'AWS Credential',
+ name: 'credential',
+ type: 'credential',
+ credentialNames: ['awsApi']
+ }
+ this.inputs = [
+ {
+ label: 'Bucket',
+ name: 'bucketName',
+ type: 'string'
+ },
+ {
+ label: 'Object Key',
+ name: 'keyName',
+ type: 'string',
+ description: 'The object key (or key name) that uniquely identifies object in an Amazon S3 bucket',
+ placeholder: 'AI-Paper.pdf'
+ },
+ {
+ label: 'Region',
+ name: 'region',
+ type: 'options',
+ options: [
+ { label: 'af-south-1', name: 'af-south-1' },
+ { label: 'ap-east-1', name: 'ap-east-1' },
+ { label: 'ap-northeast-1', name: 'ap-northeast-1' },
+ { label: 'ap-northeast-2', name: 'ap-northeast-2' },
+ { label: 'ap-northeast-3', name: 'ap-northeast-3' },
+ { label: 'ap-south-1', name: 'ap-south-1' },
+ { label: 'ap-south-2', name: 'ap-south-2' },
+ { label: 'ap-southeast-1', name: 'ap-southeast-1' },
+ { label: 'ap-southeast-2', name: 'ap-southeast-2' },
+ { label: 'ap-southeast-3', name: 'ap-southeast-3' },
+ { label: 'ap-southeast-4', name: 'ap-southeast-4' },
+ { label: 'ap-southeast-5', name: 'ap-southeast-5' },
+ { label: 'ap-southeast-6', name: 'ap-southeast-6' },
+ { label: 'ca-central-1', name: 'ca-central-1' },
+ { label: 'ca-west-1', name: 'ca-west-1' },
+ { label: 'cn-north-1', name: 'cn-north-1' },
+ { label: 'cn-northwest-1', name: 'cn-northwest-1' },
+ { label: 'eu-central-1', name: 'eu-central-1' },
+ { label: 'eu-central-2', name: 'eu-central-2' },
+ { label: 'eu-north-1', name: 'eu-north-1' },
+ { label: 'eu-south-1', name: 'eu-south-1' },
+ { label: 'eu-south-2', name: 'eu-south-2' },
+ { label: 'eu-west-1', name: 'eu-west-1' },
+ { label: 'eu-west-2', name: 'eu-west-2' },
+ { label: 'eu-west-3', name: 'eu-west-3' },
+ { label: 'il-central-1', name: 'il-central-1' },
+ { label: 'me-central-1', name: 'me-central-1' },
+ { label: 'me-south-1', name: 'me-south-1' },
+ { label: 'sa-east-1', name: 'sa-east-1' },
+ { label: 'us-east-1', name: 'us-east-1' },
+ { label: 'us-east-2', name: 'us-east-2' },
+ { label: 'us-gov-east-1', name: 'us-gov-east-1' },
+ { label: 'us-gov-west-1', name: 'us-gov-west-1' },
+ { label: 'us-west-1', name: 'us-west-1' },
+ { label: 'us-west-2', name: 'us-west-2' }
+ ],
+ default: 'us-east-1'
+ },
+ {
+ label: 'Unstructured API URL',
+ name: 'unstructuredAPIUrl',
+ description:
+ 'Your Unstructured.io URL. Read more on how to get started',
+ type: 'string',
+ default: 'http://localhost:8000/general/v0/general'
+ },
+ {
+ label: 'Unstructured API KEY',
+ name: 'unstructuredAPIKey',
+ type: 'password',
+ optional: true
+ },
+ {
+ label: 'NarrativeText Only',
+ name: 'narrativeTextOnly',
+ description:
+ 'Only load documents with NarrativeText metadata from Unstructured. See how Unstructured partition data here',
+ default: true,
+ type: 'boolean',
+ optional: true,
+ additionalParams: true
+ },
+ {
+ label: 'Metadata',
+ name: 'metadata',
+ type: 'json',
+ optional: true,
+ additionalParams: true
+ }
+ ]
+ }
+ async init(nodeData: INodeData, _: string, options: ICommonObject): Promise {
+ const bucketName = nodeData.inputs?.bucketName as string
+ const keyName = nodeData.inputs?.keyName as string
+ const region = nodeData.inputs?.region as string
+ const unstructuredAPIUrl = nodeData.inputs?.unstructuredAPIUrl as string
+ const unstructuredAPIKey = nodeData.inputs?.unstructuredAPIKey as string
+ const metadata = nodeData.inputs?.metadata
+ const narrativeTextOnly = nodeData.inputs?.narrativeTextOnly as boolean
+
+ const credentialData = await getCredentialData(nodeData.credential ?? '', options)
+ const accessKeyId = getCredentialParam('awsKey', credentialData, nodeData)
+ const secretAccessKey = getCredentialParam('awsSecret', credentialData, nodeData)
+
+ const loader = new S3Loader({
+ bucket: bucketName,
+ key: keyName,
+ s3Config: {
+ region,
+ credentials: {
+ accessKeyId,
+ secretAccessKey
+ }
+ },
+ unstructuredAPIURL: unstructuredAPIUrl,
+ unstructuredAPIKey: unstructuredAPIKey
+ })
+
+ const s3Config: S3Config & {
+ accessKeyId?: string
+ secretAccessKey?: string
+ } = {
+ accessKeyId,
+ secretAccessKey
+ }
+
+ loader.load = async () => {
+ const tempDir = fsDefault.mkdtempSync(path.join(os.tmpdir(), 's3fileloader-'))
+
+ const filePath = path.join(tempDir, keyName)
+
+ try {
+ const s3Client = new S3Client(s3Config)
+
+ const getObjectCommand = new GetObjectCommand({
+ Bucket: bucketName,
+ Key: keyName
+ })
+
+ const response = await s3Client.send(getObjectCommand)
+
+ const objectData = await new Promise((resolve, reject) => {
+ const chunks: Buffer[] = []
+
+ if (response.Body instanceof Readable) {
+ response.Body.on('data', (chunk: Buffer) => chunks.push(chunk))
+ response.Body.on('end', () => resolve(Buffer.concat(chunks)))
+ response.Body.on('error', reject)
+ } else {
+ reject(new Error('Response body is not a readable stream.'))
+ }
+ })
+
+ fsDefault.mkdirSync(path.dirname(filePath), { recursive: true })
+
+ fsDefault.writeFileSync(filePath, objectData)
+ } catch (e: any) {
+ throw new Error(`Failed to download file ${keyName} from S3 bucket ${bucketName}: ${e.message}`)
+ }
+
+ try {
+ const options = {
+ apiUrl: unstructuredAPIUrl,
+ apiKey: unstructuredAPIKey
+ }
+
+ const unstructuredLoader = new UnstructuredLoader(filePath, options)
+
+ const docs = await unstructuredLoader.load()
+
+ fsDefault.rmdirSync(path.dirname(filePath), { recursive: true })
+
+ return docs
+ } catch {
+ fsDefault.rmdirSync(path.dirname(filePath), { recursive: true })
+ throw new Error(`Failed to load file ${filePath} using unstructured loader.`)
+ }
+ }
+
+ const docs = await loader.load()
+
+ if (metadata) {
+ const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
+ const finaldocs = docs.map((doc) => {
+ return {
+ ...doc,
+ metadata: {
+ ...doc.metadata,
+ ...parsedMetadata
+ }
+ }
+ })
+ return narrativeTextOnly ? finaldocs.filter((doc) => doc.metadata.category === 'NarrativeText') : finaldocs
+ }
+
+ return narrativeTextOnly ? docs.filter((doc) => doc.metadata.category === 'NarrativeText') : docs
+ }
+}
+module.exports = { nodeClass: S3_DocumentLoaders }
diff --git a/packages/components/nodes/documentloaders/S3File/s3.svg b/packages/components/nodes/documentloaders/S3File/s3.svg
new file mode 100644
index 00000000000..cd203eaad6e
--- /dev/null
+++ b/packages/components/nodes/documentloaders/S3File/s3.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/packages/components/package.json b/packages/components/package.json
index f6d5f516949..830e0381603 100644
--- a/packages/components/package.json
+++ b/packages/components/package.json
@@ -18,6 +18,7 @@
"dependencies": {
"@aws-sdk/client-bedrock-runtime": "3.422.0",
"@aws-sdk/client-dynamodb": "^3.360.0",
+ "@aws-sdk/client-s3": "^3.427.0",
"@dqbd/tiktoken": "^1.0.7",
"@getzep/zep-js": "^0.6.3",
"@gomomento/sdk": "^1.40.2",