Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion apps/sim/app/api/knowledge/[id]/documents/route.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ describe('Knowledge Base Documents API Route', () => {
],
processingOptions: {
chunkSize: 50, // Invalid: too small
minCharactersPerChunk: 10, // Invalid: too small
minCharactersPerChunk: 0, // Invalid: too small
recipe: 'default',
lang: 'en',
chunkOverlap: 1000, // Invalid: too large
Expand Down
2 changes: 1 addition & 1 deletion apps/sim/app/api/knowledge/[id]/documents/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ const BulkCreateDocumentsSchema = z.object({
documents: z.array(CreateDocumentSchema),
processingOptions: z.object({
chunkSize: z.number().min(100).max(4000),
minCharactersPerChunk: z.number().min(50).max(2000),
minCharactersPerChunk: z.number().min(1).max(2000),
recipe: z.string(),
lang: z.string(),
chunkOverlap: z.number().min(0).max(500),
Expand Down
2 changes: 1 addition & 1 deletion apps/sim/app/api/knowledge/route.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ describe('Knowledge Base API Route', () => {
expect(data.data.embeddingDimension).toBe(1536)
expect(data.data.chunkingConfig).toEqual({
maxSize: 1024,
minSize: 100,
minSize: 1,
overlap: 200,
})
})
Expand Down
4 changes: 2 additions & 2 deletions apps/sim/app/api/knowledge/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ const CreateKnowledgeBaseSchema = z.object({
chunkingConfig: z
.object({
maxSize: z.number().min(100).max(4000).default(1024),
minSize: z.number().min(50).max(2000).default(100),
minSize: z.number().min(1).max(2000).default(1),
overlap: z.number().min(0).max(500).default(200),
})
.default({
maxSize: 1024,
minSize: 100,
minSize: 1,
overlap: 200,
})
.refine((data) => data.minSize < data.maxSize, {
Expand Down
3 changes: 2 additions & 1 deletion apps/sim/app/api/knowledge/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,8 @@ export async function processDocumentAsync(
docData.filename,
docData.mimeType,
processingOptions.chunkSize || 1000,
processingOptions.chunkOverlap || 200
processingOptions.chunkOverlap || 200,
processingOptions.minCharactersPerChunk || 1
)

const now = new Date()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ export function UploadModal({
try {
await uploadFiles(files, knowledgeBaseId, {
chunkSize: chunkingConfig?.maxSize || 1024,
minCharactersPerChunk: chunkingConfig?.minSize || 100,
minCharactersPerChunk: chunkingConfig?.minSize || 1,
chunkOverlap: chunkingConfig?.overlap || 200,
recipe: 'default',
})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ const FormSchema = z
description: z.string().max(500, 'Description must be less than 500 characters').optional(),
minChunkSize: z
.number()
.min(50, 'Min chunk size must be at least 50')
.min(1, 'Min chunk size must be at least 1')
.max(2000, 'Min chunk size must be less than 2000'),
maxChunkSize: z
.number()
Expand Down Expand Up @@ -115,7 +115,7 @@ export function CreateModal({ open, onOpenChange, onKnowledgeBaseCreated }: Crea
defaultValues: {
name: '',
description: '',
minChunkSize: 100,
minChunkSize: 1,
maxChunkSize: 1024,
overlapSize: 200,
},
Expand Down Expand Up @@ -299,7 +299,7 @@ export function CreateModal({ open, onOpenChange, onKnowledgeBaseCreated }: Crea
reset({
name: '',
description: '',
minChunkSize: 100,
minChunkSize: 1,
maxChunkSize: 1024,
overlapSize: 200,
})
Expand Down Expand Up @@ -423,7 +423,7 @@ export function CreateModal({ open, onOpenChange, onKnowledgeBaseCreated }: Crea
<Input
id='minChunkSize'
type='number'
placeholder='100'
placeholder='1'
{...register('minChunkSize', { valueAsNumber: true })}
className={errors.minChunkSize ? 'border-red-500' : ''}
autoComplete='off'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ export function useKnowledgeUpload(options: UseKnowledgeUploadOptions = {}) {
})),
processingOptions: {
chunkSize: processingOptions.chunkSize || 1024,
minCharactersPerChunk: processingOptions.minCharactersPerChunk || 100,
minCharactersPerChunk: processingOptions.minCharactersPerChunk || 1,
chunkOverlap: processingOptions.chunkOverlap || 200,
recipe: processingOptions.recipe || 'default',
lang: 'en',
Expand Down
2 changes: 1 addition & 1 deletion apps/sim/lib/documents/chunker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ export class TextChunker {

constructor(options: ChunkerOptions = {}) {
this.chunkSize = options.chunkSize ?? 512
this.minChunkSize = options.minChunkSize ?? 50
this.minChunkSize = options.minChunkSize ?? 1
this.overlap = options.overlap ?? 0
}

Expand Down
2 changes: 1 addition & 1 deletion apps/sim/lib/documents/docs-chunker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ export class DocsChunker {
// Use the existing TextChunker for chunking logic
this.textChunker = new TextChunker({
chunkSize: options.chunkSize ?? 300, // Max 300 tokens per chunk
minChunkSize: options.minChunkSize ?? 100,
minChunkSize: options.minChunkSize ?? 1,
overlap: options.overlap ?? 50,
})
// Use localhost docs in development, production docs otherwise
Expand Down
4 changes: 3 additions & 1 deletion apps/sim/lib/documents/document-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ export async function processDocument(
filename: string,
mimeType: string,
chunkSize = 1000,
chunkOverlap = 200
chunkOverlap = 200,
minChunkSize = 1
): Promise<{
chunks: Chunk[]
metadata: {
Expand All @@ -85,6 +86,7 @@ export async function processDocument(
const chunker = new TextChunker({
chunkSize,
overlap: chunkOverlap,
minChunkSize,
})

const chunks = await chunker.chunk(content)
Expand Down
6 changes: 3 additions & 3 deletions apps/sim/tools/knowledge/create_document.ts
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ export const knowledgeCreateDocumentTool: ToolConfig<any, KnowledgeCreateDocumen
if (/[<>:"/\\|?*]/.test(documentName)) {
throw new Error('Document name contains invalid characters. Avoid: < > : " / \\ | ? *')
}
if (!textContent || textContent.length < 10) {
throw new Error('Document content must be at least 10 characters long')
if (!textContent || textContent.length < 1) {
throw new Error('Document content cannot be empty')
}
if (textContent.length > 1000000) {
throw new Error('Document content exceeds maximum size of 1MB')
Expand Down Expand Up @@ -157,7 +157,7 @@ export const knowledgeCreateDocumentTool: ToolConfig<any, KnowledgeCreateDocumen
documents: documents,
processingOptions: {
chunkSize: 1024,
minCharactersPerChunk: 100,
minCharactersPerChunk: 1,
chunkOverlap: 200,
recipe: 'default',
lang: 'en',
Expand Down