janhq
diff --git a/‎extensions/llamacpp-extension/src/index.ts‎
Lines changed: 42 additions & 19 deletions b/‎extensions/llamacpp-extension/src/index.ts‎
Lines changed: 42 additions & 19 deletions
diff --git a/‎extensions/llamacpp-extension/src/util.ts‎
Lines changed: 76 additions & 0 deletions b/‎extensions/llamacpp-extension/src/util.ts‎
Lines changed: 76 additions & 0 deletions
diff --git a/‎extensions/rag-extension/settings.json‎
Lines changed: 2 additions & 2 deletions b/‎extensions/rag-extension/settings.json‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎extensions/rag-extension/src/index.ts‎
Lines changed: 23 additions & 28 deletions b/‎extensions/rag-extension/src/index.ts‎
Lines changed: 23 additions & 28 deletions
diff --git a/‎extensions/vector-db-extension/src/index.ts‎
Lines changed: 11 additions & 21 deletions b/‎extensions/vector-db-extension/src/index.ts‎
Lines changed: 11 additions & 21 deletions
@@ -35,7 +35,12 @@ import {
   mapOldBackendToNew,
 } from './backend'
 import { invoke } from '@tauri-apps/api/core'
-import { getProxyConfig } from './util'
+import {
+  getProxyConfig,
+  buildEmbedBatches,
+  mergeEmbedResponses,
+  type EmbedBatchResult,
+} from './util'
 import { basename } from '@tauri-apps/api/path'
 import {
   loadLlamaModel,
@@ -2331,14 +2336,20 @@ export default class llamacpp_extension extends AIEngine {
       sInfo = await this.load('sentence-transformer-mini', undefined, true)
     }
 
-    const attemptRequest = async (session: SessionInfo) => {
+    const ubatchSize =
+      (this.config?.ubatch_size && this.config.ubatch_size > 0
+        ? this.config.ubatch_size
+        : 512) || 512
+    const batches = buildEmbedBatches(text, ubatchSize)
+
+    const attemptRequest = async (session: SessionInfo, batchInput: string[]) => {
       const baseUrl = `http://localhost:${session.port}/v1/embeddings`
       const headers = {
         'Content-Type': 'application/json',
         'Authorization': `Bearer ${session.api_key}`,
       }
       const body = JSON.stringify({
-        input: text,
+        input: batchInput,
         model: session.model_id,
         encoding_format: 'float',
       })
@@ -2350,26 +2361,38 @@ export default class llamacpp_extension extends AIEngine {
       return response
     }
 
-    // First try with the existing session (may have been started without --embedding previously)
-    let response = await attemptRequest(sInfo)
+    const sendBatch = async (batchInput: string[]) => {
+      let response = await attemptRequest(sInfo as SessionInfo, batchInput)
 
-    // If embeddings endpoint is not available (501), reload with embedding mode and retry once
-    if (response.status === 501) {
-      try {
-        await this.unload('sentence-transformer-mini')
-      } catch {}
-      sInfo = await this.load('sentence-transformer-mini', undefined, true)
-      response = await attemptRequest(sInfo)
+      // If embeddings endpoint is not available (501), reload with embedding mode and retry once
+      if (response.status === 501) {
+        try {
+          await this.unload('sentence-transformer-mini')
+        } catch {}
+        sInfo = await this.load('sentence-transformer-mini', undefined, true)
+        response = await attemptRequest(sInfo as SessionInfo, batchInput)
+      }
+
+      if (!response.ok) {
+        const errorData = await response.json().catch(() => null)
+        throw new Error(
+          `API request failed with status ${response.status}: ${JSON.stringify(errorData)}`
+        )
+      }
+      const responseData = (await response.json()) as EmbedBatchResult
+      return responseData
     }
 
-    if (!response.ok) {
-      const errorData = await response.json().catch(() => null)
-      throw new Error(
-        `API request failed with status ${response.status}: ${JSON.stringify(errorData)}`
-      )
+    const batchResults: Array<{ result: EmbedBatchResult; offset: number }> = []
+    for (const { batch, offset } of batches) {
+      const result = await sendBatch(batch)
+      batchResults.push({ result, offset })
     }
-    const responseData = await response.json()
-    return responseData as EmbeddingResponse
+
+    return mergeEmbedResponses(
+      (sInfo as SessionInfo).model_id,
+      batchResults
+    ) as EmbeddingResponse
   }
 
   /**
 
@@ -106,3 +106,79 @@ export function getProxyConfig(): Record<
     throw error
   }
 }
+
+// --- Embedding batching helpers ---
+
+export type EmbedBatch = { batch: string[]; offset: number }
+export type EmbedUsage = { prompt_tokens?: number; total_tokens?: number }
+export type EmbedData = { embedding: number[]; index: number }
+
+export type EmbedBatchResult = {
+  data: EmbedData[]
+  usage?: EmbedUsage
+}
+
+export function estimateTokensFromText(text: string, charsPerToken = 3): number {
+  return Math.max(1, Math.ceil(text.length / Math.max(charsPerToken, 1)))
+}
+
+export function buildEmbedBatches(
+  inputs: string[],
+  ubatchSize: number,
+  charsPerToken = 3
+): EmbedBatch[] {
+  const batches: EmbedBatch[] = []
+  let current: string[] = []
+  let currentTokens = 0
+  let offset = 0
+
+  const push = () => {
+    if (current.length) {
+      batches.push({ batch: current, offset })
+      offset += current.length
+      current = []
+      currentTokens = 0
+    }
+  }
+
+  for (const text of inputs) {
+    const estTokens = estimateTokensFromText(text, charsPerToken)
+    if (!current.length && estTokens > ubatchSize) {
+      batches.push({ batch: [text], offset })
+      offset += 1
+      continue
+    }
+
+    if (currentTokens + estTokens > ubatchSize && current.length) {
+      push()
+    }
+
+    current.push(text)
+    currentTokens += estTokens
+  }
+
+  push()
+  return batches
+}
+
+export function mergeEmbedResponses(
+  model: string,
+  batchResults: Array<{ result: EmbedBatchResult; offset: number }>
+) {
+  const aggregated = {
+    model,
+    object: 'list',
+    usage: { prompt_tokens: 0, total_tokens: 0 },
+    data: [] as EmbedData[],
+  }
+
+  for (const { result, offset } of batchResults) {
+    aggregated.usage.prompt_tokens += result.usage?.prompt_tokens ?? 0
+    aggregated.usage.total_tokens += result.usage?.total_tokens ?? 0
+    for (const item of result.data || []) {
+      aggregated.data.push({ ...item, index: item.index + offset })
+    }
+  }
+
+  return aggregated
+}
@@ -50,14 +50,14 @@
     "controllerProps": { "value": 0.3, "type": "number", "min": 0, "max": 1, "step": 0.01, "textAlign": "right" }
   },
   {
-    "key": "chunk_size_tokens",
+    "key": "chunk_size_chars",
     "titleKey": "settings:attachments.chunkSize",
     "descriptionKey": "settings:attachments.chunkSizeDesc",
     "controllerType": "input",
     "controllerProps": { "value": 512, "type": "number", "min": 64, "max": 8192, "step": 64, "textAlign": "right" }
   },
   {
-    "key": "overlap_tokens",
+    "key": "overlap_chars",
     "titleKey": "settings:attachments.chunkOverlap",
     "descriptionKey": "settings:attachments.chunkOverlapDesc",
     "controllerType": "input",
 
@@ -8,8 +8,8 @@ export default class RagExtension extends RAGExtension {
     enabled: true,
     retrievalLimit: 3,
     retrievalThreshold: 0.3,
-    chunkSizeTokens: 512,
-    overlapTokens: 64,
+    chunkSizeChars: 512,
+    overlapChars: 64,
     searchMode: 'auto' as 'auto' | 'ann' | 'linear',
     maxFileSizeMB: 20,
     parseMode: 'auto' as 'auto' | 'inline' | 'embeddings' | 'prompt',
@@ -23,8 +23,13 @@ export default class RagExtension extends RAGExtension {
     this.config.maxFileSizeMB = await this.getSetting('max_file_size_mb', this.config.maxFileSizeMB)
     this.config.retrievalLimit = await this.getSetting('retrieval_limit', this.config.retrievalLimit)
     this.config.retrievalThreshold = await this.getSetting('retrieval_threshold', this.config.retrievalThreshold)
-    this.config.chunkSizeTokens = await this.getSetting('chunk_size_tokens', this.config.chunkSizeTokens)
-    this.config.overlapTokens = await this.getSetting('overlap_tokens', this.config.overlapTokens)
+    // Prefer char-based keys; fall back to legacy token keys for backward compatibility
+    this.config.chunkSizeChars =
+      (await this.getSetting('chunk_size_chars', this.config.chunkSizeChars)) ||
+      (await this.getSetting('chunk_size_tokens', this.config.chunkSizeChars))
+    this.config.overlapChars =
+      (await this.getSetting('overlap_chars', this.config.overlapChars)) ||
+      (await this.getSetting('overlap_tokens', this.config.overlapChars))
     this.config.searchMode = await this.getSetting('search_mode', this.config.searchMode)
     this.config.parseMode = await this.getSetting('parse_mode', this.config.parseMode)
     this.config.autoInlineContextRatio = await this.getSetting(
@@ -242,8 +247,8 @@ export default class RagExtension extends RAGExtension {
     // Load settings
     const s = this.config
     const maxSize = (s?.enabled === false ? 0 : s?.maxFileSizeMB) || undefined
-    const chunkSize = s?.chunkSizeTokens as number | undefined
-    const chunkOverlap = s?.overlapTokens as number | undefined
+    const chunkSize = s?.chunkSizeChars as number | undefined
+    const chunkOverlap = s?.overlapChars as number | undefined
 
     let totalChunks = 0
     const processedFiles: AttachmentFileInfo[] = []
@@ -291,11 +296,11 @@ export default class RagExtension extends RAGExtension {
         case 'retrieval_threshold':
           this.config.retrievalThreshold = Number(value)
           break
-        case 'chunk_size_tokens':
-          this.config.chunkSizeTokens = Number(value)
+        case 'chunk_size_chars':
+          this.config.chunkSizeChars = Number(value)
           break
-        case 'overlap_tokens':
-          this.config.overlapTokens = Number(value)
+        case 'overlap_chars':
+          this.config.overlapChars = Number(value)
           break
         case 'search_mode':
           this.config.searchMode = String(value) as 'auto' | 'ann' | 'linear'
@@ -311,27 +316,17 @@ export default class RagExtension extends RAGExtension {
   }
 
   // Locally implement embedding logic (previously in embeddings-extension)
-  private async embedTexts(texts: string[], batchSize: number = 128): Promise<number[][]> {
-    const llm = window.core?.extensionManager.getByName('@janhq/llamacpp-extension') as AIEngine & { embed?: (texts: string[]) => Promise<{ data: Array<{ embedding: number[]; index: number }> }> }
+  private async embedTexts(texts: string[]): Promise<number[][]> {
+    const llm = window.core?.extensionManager.getByName('@janhq/llamacpp-extension') as AIEngine & {
+      embed?: (texts: string[]) => Promise<{ data: Array<{ embedding: number[]; index: number }> }>
+    }
     if (!llm?.embed) throw new Error('llamacpp extension not available')
+    const res = await llm.embed(texts)
+    const data: Array<{ embedding: number[]; index: number }> = res?.data || []
     const out: number[][] = new Array(texts.length)
-    for (let i = 0; i < texts.length; i += batchSize) {
-    const batch = texts.slice(i, i + batchSize)
-    const batchStartIndex = i
-    try {
-      const res = await llm.embed(batch)
-      const data: Array<{ embedding: number[]; index: number }> = res?.data || []
-
-      // Map batch results to correct positions in output array
-      for (const item of data) {
-        const globalIndex = batchStartIndex + item.index
-        out[globalIndex] = item.embedding
-      }
-    } catch (error) {
-      console.error(`Failed to embed batch starting at index ${i}:`, error)
-      throw new Error(`Embedding failed at batch starting index ${i}: ${error}`)
+    for (const item of data) {
+      out[item.index] = item.embedding
     }
-  }
     return out
   }
 }
@@ -49,29 +49,19 @@ export default class VectorDBExt extends VectorDBExtension {
     return await vecdb.chunkText(text, chunkSize, chunkOverlap)
   }
 
-  private async embedTexts(texts: string[], batchSize: number = 128): Promise<number[][]> {
-    const llm = window.core?.extensionManager.getByName('@janhq/llamacpp-extension') as AIEngine & { embed?: (texts: string[]) => Promise<{ data: Array<{ embedding: number[]; index: number }> }> }
+  private async embedTexts(texts: string[]): Promise<number[][]> {
+    const llm = window.core?.extensionManager.getByName('@janhq/llamacpp-extension') as AIEngine & {
+      embed?: (texts: string[]) => Promise<{ data: Array<{ embedding: number[]; index: number }> }>
+    }
     if (!llm?.embed) throw new Error('llamacpp extension not available')
+
+    const res = await llm.embed(texts)
+    const data: Array<{ embedding: number[]; index: number }> = res?.data || []
     const out: number[][] = new Array(texts.length)
-    for (let i = 0; i < texts.length; i += batchSize) {
-        const batch = texts.slice(i, i + batchSize)
-        const batchStartIndex = i
-        try {
-          const res = await llm.embed(batch)
-          const data: Array<{ embedding: number[]; index: number }> = res?.data || []
-
-          // Map batch results to correct positions in output array
-          for (const item of data) {
-            const globalIndex = batchStartIndex + item.index
-            out[globalIndex] = item.embedding
-          }
-        } catch (error) {
-          console.error(`Failed to embed batch starting at index ${i}:`, error)
-          throw new Error(`Embedding failed at batch starting index ${i}: ${error}`)
-        }
-      }
-
-      return out
+    for (const item of data) {
+      out[item.index] = item.embedding
+    }
+    return out
   }
 
   async ingestFile(threadId: string, file: VectorDBFileInput, opts: VectorDBIngestOptions): Promise<AttachmentFileInfo> {