Skip to content

Commit 51c5ae8

Browse files
attachment works properly now
1 parent 66adba5 commit 51c5ae8

File tree

11 files changed

+248
-150
lines changed

11 files changed

+248
-150
lines changed

extensions/llamacpp-extension/src/index.ts

Lines changed: 42 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,12 @@ import {
3535
mapOldBackendToNew,
3636
} from './backend'
3737
import { invoke } from '@tauri-apps/api/core'
38-
import { getProxyConfig } from './util'
38+
import {
39+
getProxyConfig,
40+
buildEmbedBatches,
41+
mergeEmbedResponses,
42+
type EmbedBatchResult,
43+
} from './util'
3944
import { basename } from '@tauri-apps/api/path'
4045
import {
4146
loadLlamaModel,
@@ -2331,14 +2336,20 @@ export default class llamacpp_extension extends AIEngine {
23312336
sInfo = await this.load('sentence-transformer-mini', undefined, true)
23322337
}
23332338

2334-
const attemptRequest = async (session: SessionInfo) => {
2339+
const ubatchSize =
2340+
(this.config?.ubatch_size && this.config.ubatch_size > 0
2341+
? this.config.ubatch_size
2342+
: 512) || 512
2343+
const batches = buildEmbedBatches(text, ubatchSize)
2344+
2345+
const attemptRequest = async (session: SessionInfo, batchInput: string[]) => {
23352346
const baseUrl = `http://localhost:${session.port}/v1/embeddings`
23362347
const headers = {
23372348
'Content-Type': 'application/json',
23382349
'Authorization': `Bearer ${session.api_key}`,
23392350
}
23402351
const body = JSON.stringify({
2341-
input: text,
2352+
input: batchInput,
23422353
model: session.model_id,
23432354
encoding_format: 'float',
23442355
})
@@ -2350,26 +2361,38 @@ export default class llamacpp_extension extends AIEngine {
23502361
return response
23512362
}
23522363

2353-
// First try with the existing session (may have been started without --embedding previously)
2354-
let response = await attemptRequest(sInfo)
2364+
const sendBatch = async (batchInput: string[]) => {
2365+
let response = await attemptRequest(sInfo as SessionInfo, batchInput)
23552366

2356-
// If embeddings endpoint is not available (501), reload with embedding mode and retry once
2357-
if (response.status === 501) {
2358-
try {
2359-
await this.unload('sentence-transformer-mini')
2360-
} catch {}
2361-
sInfo = await this.load('sentence-transformer-mini', undefined, true)
2362-
response = await attemptRequest(sInfo)
2367+
// If embeddings endpoint is not available (501), reload with embedding mode and retry once
2368+
if (response.status === 501) {
2369+
try {
2370+
await this.unload('sentence-transformer-mini')
2371+
} catch {}
2372+
sInfo = await this.load('sentence-transformer-mini', undefined, true)
2373+
response = await attemptRequest(sInfo as SessionInfo, batchInput)
2374+
}
2375+
2376+
if (!response.ok) {
2377+
const errorData = await response.json().catch(() => null)
2378+
throw new Error(
2379+
`API request failed with status ${response.status}: ${JSON.stringify(errorData)}`
2380+
)
2381+
}
2382+
const responseData = (await response.json()) as EmbedBatchResult
2383+
return responseData
23632384
}
23642385

2365-
if (!response.ok) {
2366-
const errorData = await response.json().catch(() => null)
2367-
throw new Error(
2368-
`API request failed with status ${response.status}: ${JSON.stringify(errorData)}`
2369-
)
2386+
const batchResults: Array<{ result: EmbedBatchResult; offset: number }> = []
2387+
for (const { batch, offset } of batches) {
2388+
const result = await sendBatch(batch)
2389+
batchResults.push({ result, offset })
23702390
}
2371-
const responseData = await response.json()
2372-
return responseData as EmbeddingResponse
2391+
2392+
return mergeEmbedResponses(
2393+
(sInfo as SessionInfo).model_id,
2394+
batchResults
2395+
) as EmbeddingResponse
23732396
}
23742397

23752398
/**

extensions/llamacpp-extension/src/util.ts

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,3 +106,79 @@ export function getProxyConfig(): Record<
106106
throw error
107107
}
108108
}
109+
110+
// --- Embedding batching helpers ---
111+
112+
export type EmbedBatch = { batch: string[]; offset: number }
113+
export type EmbedUsage = { prompt_tokens?: number; total_tokens?: number }
114+
export type EmbedData = { embedding: number[]; index: number }
115+
116+
export type EmbedBatchResult = {
117+
data: EmbedData[]
118+
usage?: EmbedUsage
119+
}
120+
121+
export function estimateTokensFromText(text: string, charsPerToken = 3): number {
122+
return Math.max(1, Math.ceil(text.length / Math.max(charsPerToken, 1)))
123+
}
124+
125+
export function buildEmbedBatches(
126+
inputs: string[],
127+
ubatchSize: number,
128+
charsPerToken = 3
129+
): EmbedBatch[] {
130+
const batches: EmbedBatch[] = []
131+
let current: string[] = []
132+
let currentTokens = 0
133+
let offset = 0
134+
135+
const push = () => {
136+
if (current.length) {
137+
batches.push({ batch: current, offset })
138+
offset += current.length
139+
current = []
140+
currentTokens = 0
141+
}
142+
}
143+
144+
for (const text of inputs) {
145+
const estTokens = estimateTokensFromText(text, charsPerToken)
146+
if (!current.length && estTokens > ubatchSize) {
147+
batches.push({ batch: [text], offset })
148+
offset += 1
149+
continue
150+
}
151+
152+
if (currentTokens + estTokens > ubatchSize && current.length) {
153+
push()
154+
}
155+
156+
current.push(text)
157+
currentTokens += estTokens
158+
}
159+
160+
push()
161+
return batches
162+
}
163+
164+
export function mergeEmbedResponses(
165+
model: string,
166+
batchResults: Array<{ result: EmbedBatchResult; offset: number }>
167+
) {
168+
const aggregated = {
169+
model,
170+
object: 'list',
171+
usage: { prompt_tokens: 0, total_tokens: 0 },
172+
data: [] as EmbedData[],
173+
}
174+
175+
for (const { result, offset } of batchResults) {
176+
aggregated.usage.prompt_tokens += result.usage?.prompt_tokens ?? 0
177+
aggregated.usage.total_tokens += result.usage?.total_tokens ?? 0
178+
for (const item of result.data || []) {
179+
aggregated.data.push({ ...item, index: item.index + offset })
180+
}
181+
}
182+
183+
return aggregated
184+
}

extensions/rag-extension/settings.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,14 +50,14 @@
5050
"controllerProps": { "value": 0.3, "type": "number", "min": 0, "max": 1, "step": 0.01, "textAlign": "right" }
5151
},
5252
{
53-
"key": "chunk_size_tokens",
53+
"key": "chunk_size_chars",
5454
"titleKey": "settings:attachments.chunkSize",
5555
"descriptionKey": "settings:attachments.chunkSizeDesc",
5656
"controllerType": "input",
5757
"controllerProps": { "value": 512, "type": "number", "min": 64, "max": 8192, "step": 64, "textAlign": "right" }
5858
},
5959
{
60-
"key": "overlap_tokens",
60+
"key": "overlap_chars",
6161
"titleKey": "settings:attachments.chunkOverlap",
6262
"descriptionKey": "settings:attachments.chunkOverlapDesc",
6363
"controllerType": "input",

extensions/rag-extension/src/index.ts

Lines changed: 23 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ export default class RagExtension extends RAGExtension {
88
enabled: true,
99
retrievalLimit: 3,
1010
retrievalThreshold: 0.3,
11-
chunkSizeTokens: 512,
12-
overlapTokens: 64,
11+
chunkSizeChars: 512,
12+
overlapChars: 64,
1313
searchMode: 'auto' as 'auto' | 'ann' | 'linear',
1414
maxFileSizeMB: 20,
1515
parseMode: 'auto' as 'auto' | 'inline' | 'embeddings' | 'prompt',
@@ -23,8 +23,13 @@ export default class RagExtension extends RAGExtension {
2323
this.config.maxFileSizeMB = await this.getSetting('max_file_size_mb', this.config.maxFileSizeMB)
2424
this.config.retrievalLimit = await this.getSetting('retrieval_limit', this.config.retrievalLimit)
2525
this.config.retrievalThreshold = await this.getSetting('retrieval_threshold', this.config.retrievalThreshold)
26-
this.config.chunkSizeTokens = await this.getSetting('chunk_size_tokens', this.config.chunkSizeTokens)
27-
this.config.overlapTokens = await this.getSetting('overlap_tokens', this.config.overlapTokens)
26+
// Prefer char-based keys; fall back to legacy token keys for backward compatibility
27+
this.config.chunkSizeChars =
28+
(await this.getSetting('chunk_size_chars', this.config.chunkSizeChars)) ||
29+
(await this.getSetting('chunk_size_tokens', this.config.chunkSizeChars))
30+
this.config.overlapChars =
31+
(await this.getSetting('overlap_chars', this.config.overlapChars)) ||
32+
(await this.getSetting('overlap_tokens', this.config.overlapChars))
2833
this.config.searchMode = await this.getSetting('search_mode', this.config.searchMode)
2934
this.config.parseMode = await this.getSetting('parse_mode', this.config.parseMode)
3035
this.config.autoInlineContextRatio = await this.getSetting(
@@ -242,8 +247,8 @@ export default class RagExtension extends RAGExtension {
242247
// Load settings
243248
const s = this.config
244249
const maxSize = (s?.enabled === false ? 0 : s?.maxFileSizeMB) || undefined
245-
const chunkSize = s?.chunkSizeTokens as number | undefined
246-
const chunkOverlap = s?.overlapTokens as number | undefined
250+
const chunkSize = s?.chunkSizeChars as number | undefined
251+
const chunkOverlap = s?.overlapChars as number | undefined
247252

248253
let totalChunks = 0
249254
const processedFiles: AttachmentFileInfo[] = []
@@ -291,11 +296,11 @@ export default class RagExtension extends RAGExtension {
291296
case 'retrieval_threshold':
292297
this.config.retrievalThreshold = Number(value)
293298
break
294-
case 'chunk_size_tokens':
295-
this.config.chunkSizeTokens = Number(value)
299+
case 'chunk_size_chars':
300+
this.config.chunkSizeChars = Number(value)
296301
break
297-
case 'overlap_tokens':
298-
this.config.overlapTokens = Number(value)
302+
case 'overlap_chars':
303+
this.config.overlapChars = Number(value)
299304
break
300305
case 'search_mode':
301306
this.config.searchMode = String(value) as 'auto' | 'ann' | 'linear'
@@ -311,27 +316,17 @@ export default class RagExtension extends RAGExtension {
311316
}
312317

313318
// Locally implement embedding logic (previously in embeddings-extension)
314-
private async embedTexts(texts: string[], batchSize: number = 128): Promise<number[][]> {
315-
const llm = window.core?.extensionManager.getByName('@janhq/llamacpp-extension') as AIEngine & { embed?: (texts: string[]) => Promise<{ data: Array<{ embedding: number[]; index: number }> }> }
319+
private async embedTexts(texts: string[]): Promise<number[][]> {
320+
const llm = window.core?.extensionManager.getByName('@janhq/llamacpp-extension') as AIEngine & {
321+
embed?: (texts: string[]) => Promise<{ data: Array<{ embedding: number[]; index: number }> }>
322+
}
316323
if (!llm?.embed) throw new Error('llamacpp extension not available')
324+
const res = await llm.embed(texts)
325+
const data: Array<{ embedding: number[]; index: number }> = res?.data || []
317326
const out: number[][] = new Array(texts.length)
318-
for (let i = 0; i < texts.length; i += batchSize) {
319-
const batch = texts.slice(i, i + batchSize)
320-
const batchStartIndex = i
321-
try {
322-
const res = await llm.embed(batch)
323-
const data: Array<{ embedding: number[]; index: number }> = res?.data || []
324-
325-
// Map batch results to correct positions in output array
326-
for (const item of data) {
327-
const globalIndex = batchStartIndex + item.index
328-
out[globalIndex] = item.embedding
329-
}
330-
} catch (error) {
331-
console.error(`Failed to embed batch starting at index ${i}:`, error)
332-
throw new Error(`Embedding failed at batch starting index ${i}: ${error}`)
327+
for (const item of data) {
328+
out[item.index] = item.embedding
333329
}
334-
}
335330
return out
336331
}
337332
}

extensions/vector-db-extension/src/index.ts

Lines changed: 11 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -49,29 +49,19 @@ export default class VectorDBExt extends VectorDBExtension {
4949
return await vecdb.chunkText(text, chunkSize, chunkOverlap)
5050
}
5151

52-
private async embedTexts(texts: string[], batchSize: number = 128): Promise<number[][]> {
53-
const llm = window.core?.extensionManager.getByName('@janhq/llamacpp-extension') as AIEngine & { embed?: (texts: string[]) => Promise<{ data: Array<{ embedding: number[]; index: number }> }> }
52+
private async embedTexts(texts: string[]): Promise<number[][]> {
53+
const llm = window.core?.extensionManager.getByName('@janhq/llamacpp-extension') as AIEngine & {
54+
embed?: (texts: string[]) => Promise<{ data: Array<{ embedding: number[]; index: number }> }>
55+
}
5456
if (!llm?.embed) throw new Error('llamacpp extension not available')
57+
58+
const res = await llm.embed(texts)
59+
const data: Array<{ embedding: number[]; index: number }> = res?.data || []
5560
const out: number[][] = new Array(texts.length)
56-
for (let i = 0; i < texts.length; i += batchSize) {
57-
const batch = texts.slice(i, i + batchSize)
58-
const batchStartIndex = i
59-
try {
60-
const res = await llm.embed(batch)
61-
const data: Array<{ embedding: number[]; index: number }> = res?.data || []
62-
63-
// Map batch results to correct positions in output array
64-
for (const item of data) {
65-
const globalIndex = batchStartIndex + item.index
66-
out[globalIndex] = item.embedding
67-
}
68-
} catch (error) {
69-
console.error(`Failed to embed batch starting at index ${i}:`, error)
70-
throw new Error(`Embedding failed at batch starting index ${i}: ${error}`)
71-
}
72-
}
73-
74-
return out
61+
for (const item of data) {
62+
out[item.index] = item.embedding
63+
}
64+
return out
7565
}
7666

7767
async ingestFile(threadId: string, file: VectorDBFileInput, opts: VectorDBIngestOptions): Promise<AttachmentFileInfo> {

0 commit comments

Comments
 (0)