Skip to content

Commit 56a356f

Browse files
Handle attachment parsing error (exit gracefully)
1 parent c74bbbd commit 56a356f

File tree

5 files changed

+107
-30
lines changed

5 files changed

+107
-30
lines changed

extensions/llamacpp-extension/src/util.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,14 +118,14 @@ export type EmbedBatchResult = {
118118
usage?: EmbedUsage
119119
}
120120

121-
export function estimateTokensFromText(text: string, charsPerToken = 3): number {
121+
export function estimateTokensFromText(text: string, charsPerToken = 2): number {
122122
return Math.max(1, Math.ceil(text.length / Math.max(charsPerToken, 1)))
123123
}
124124

125125
export function buildEmbedBatches(
126126
inputs: string[],
127127
ubatchSize: number,
128-
charsPerToken = 3
128+
charsPerToken = 2
129129
): EmbedBatch[] {
130130
const batches: EmbedBatch[] = []
131131
let current: string[] = []

extensions/yarn.lock

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -342,73 +342,73 @@ __metadata:
342342

343343
"@janhq/core@file:../../core/package.tgz::locator=%40janhq%2Fassistant-extension%40workspace%3Aassistant-extension":
344344
version: 0.1.10
345-
resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=16006d&locator=%40janhq%2Fassistant-extension%40workspace%3Aassistant-extension"
345+
resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=a97af4&locator=%40janhq%2Fassistant-extension%40workspace%3Aassistant-extension"
346346
dependencies:
347347
rxjs: "npm:^7.8.1"
348348
ulidx: "npm:^2.3.0"
349349
peerDependencies:
350350
react: 19.0.0
351-
checksum: 10c0/58a966e7f4aabfe7ee6c34958ca4b90c0de1d1210be9397e8f3fa77dbb4d744c4bc9814647652abd747cdbce630a0035be5e1eb748e38c354199110a5f3dc42f
351+
checksum: 10c0/350e658f44d7ace8f829a548eed13d34291f50096537abeb36f6ffdc738781e26cf4feb839ce2bc3f43c18d82742b4118c1e960969b38abaeaca1f6c428386ed
352352
languageName: node
353353
linkType: hard
354354

355355
"@janhq/core@file:../../core/package.tgz::locator=%40janhq%2Fconversational-extension%40workspace%3Aconversational-extension":
356356
version: 0.1.10
357-
resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=16006d&locator=%40janhq%2Fconversational-extension%40workspace%3Aconversational-extension"
357+
resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=a97af4&locator=%40janhq%2Fconversational-extension%40workspace%3Aconversational-extension"
358358
dependencies:
359359
rxjs: "npm:^7.8.1"
360360
ulidx: "npm:^2.3.0"
361361
peerDependencies:
362362
react: 19.0.0
363-
checksum: 10c0/58a966e7f4aabfe7ee6c34958ca4b90c0de1d1210be9397e8f3fa77dbb4d744c4bc9814647652abd747cdbce630a0035be5e1eb748e38c354199110a5f3dc42f
363+
checksum: 10c0/350e658f44d7ace8f829a548eed13d34291f50096537abeb36f6ffdc738781e26cf4feb839ce2bc3f43c18d82742b4118c1e960969b38abaeaca1f6c428386ed
364364
languageName: node
365365
linkType: hard
366366

367367
"@janhq/core@file:../../core/package.tgz::locator=%40janhq%2Fdownload-extension%40workspace%3Adownload-extension":
368368
version: 0.1.10
369-
resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=16006d&locator=%40janhq%2Fdownload-extension%40workspace%3Adownload-extension"
369+
resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=a97af4&locator=%40janhq%2Fdownload-extension%40workspace%3Adownload-extension"
370370
dependencies:
371371
rxjs: "npm:^7.8.1"
372372
ulidx: "npm:^2.3.0"
373373
peerDependencies:
374374
react: 19.0.0
375-
checksum: 10c0/58a966e7f4aabfe7ee6c34958ca4b90c0de1d1210be9397e8f3fa77dbb4d744c4bc9814647652abd747cdbce630a0035be5e1eb748e38c354199110a5f3dc42f
375+
checksum: 10c0/350e658f44d7ace8f829a548eed13d34291f50096537abeb36f6ffdc738781e26cf4feb839ce2bc3f43c18d82742b4118c1e960969b38abaeaca1f6c428386ed
376376
languageName: node
377377
linkType: hard
378378

379379
"@janhq/core@file:../../core/package.tgz::locator=%40janhq%2Fllamacpp-extension%40workspace%3Allamacpp-extension":
380380
version: 0.1.10
381-
resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=16006d&locator=%40janhq%2Fllamacpp-extension%40workspace%3Allamacpp-extension"
381+
resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=a97af4&locator=%40janhq%2Fllamacpp-extension%40workspace%3Allamacpp-extension"
382382
dependencies:
383383
rxjs: "npm:^7.8.1"
384384
ulidx: "npm:^2.3.0"
385385
peerDependencies:
386386
react: 19.0.0
387-
checksum: 10c0/58a966e7f4aabfe7ee6c34958ca4b90c0de1d1210be9397e8f3fa77dbb4d744c4bc9814647652abd747cdbce630a0035be5e1eb748e38c354199110a5f3dc42f
387+
checksum: 10c0/350e658f44d7ace8f829a548eed13d34291f50096537abeb36f6ffdc738781e26cf4feb839ce2bc3f43c18d82742b4118c1e960969b38abaeaca1f6c428386ed
388388
languageName: node
389389
linkType: hard
390390

391391
"@janhq/core@file:../../core/package.tgz::locator=%40janhq%2Frag-extension%40workspace%3Arag-extension":
392392
version: 0.1.10
393-
resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=16006d&locator=%40janhq%2Frag-extension%40workspace%3Arag-extension"
393+
resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=a97af4&locator=%40janhq%2Frag-extension%40workspace%3Arag-extension"
394394
dependencies:
395395
rxjs: "npm:^7.8.1"
396396
ulidx: "npm:^2.3.0"
397397
peerDependencies:
398398
react: 19.0.0
399-
checksum: 10c0/58a966e7f4aabfe7ee6c34958ca4b90c0de1d1210be9397e8f3fa77dbb4d744c4bc9814647652abd747cdbce630a0035be5e1eb748e38c354199110a5f3dc42f
399+
checksum: 10c0/350e658f44d7ace8f829a548eed13d34291f50096537abeb36f6ffdc738781e26cf4feb839ce2bc3f43c18d82742b4118c1e960969b38abaeaca1f6c428386ed
400400
languageName: node
401401
linkType: hard
402402

403403
"@janhq/core@file:../../core/package.tgz::locator=%40janhq%2Fvector-db-extension%40workspace%3Avector-db-extension":
404404
version: 0.1.10
405-
resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=16006d&locator=%40janhq%2Fvector-db-extension%40workspace%3Avector-db-extension"
405+
resolution: "@janhq/core@file:../../core/package.tgz#../../core/package.tgz::hash=a97af4&locator=%40janhq%2Fvector-db-extension%40workspace%3Avector-db-extension"
406406
dependencies:
407407
rxjs: "npm:^7.8.1"
408408
ulidx: "npm:^2.3.0"
409409
peerDependencies:
410410
react: 19.0.0
411-
checksum: 10c0/58a966e7f4aabfe7ee6c34958ca4b90c0de1d1210be9397e8f3fa77dbb4d744c4bc9814647652abd747cdbce630a0035be5e1eb748e38c354199110a5f3dc42f
411+
checksum: 10c0/350e658f44d7ace8f829a548eed13d34291f50096537abeb36f6ffdc738781e26cf4feb839ce2bc3f43c18d82742b4118c1e960969b38abaeaca1f6c428386ed
412412
languageName: node
413413
linkType: hard
414414

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
use crate::{RagError, parser};
1+
use crate::{parser, RagError};
2+
use std::panic::{catch_unwind, AssertUnwindSafe};
23

34
#[tauri::command]
45
pub async fn parse_document<R: tauri::Runtime>(
@@ -7,6 +8,22 @@ pub async fn parse_document<R: tauri::Runtime>(
78
file_type: String,
89
) -> Result<String, RagError> {
910
log::info!("Parsing document: {} (type: {})", file_path, file_type);
10-
let res = parser::parse_document(&file_path, &file_type);
11-
res
11+
let res = catch_unwind(AssertUnwindSafe(|| parser::parse_document(&file_path, &file_type)));
12+
match res {
13+
Ok(result) => result,
14+
Err(payload) => {
15+
let reason = if let Some(s) = payload.downcast_ref::<&str>() {
16+
*s
17+
} else if let Some(s) = payload.downcast_ref::<String>() {
18+
s.as_str()
19+
} else {
20+
"unknown panic"
21+
};
22+
log::error!("Document parsing panicked: {}", reason);
23+
Err(RagError::ParseError(format!(
24+
"Document parsing failed unexpectedly: {}",
25+
reason
26+
)))
27+
}
28+
}
1229
}

src-tauri/plugins/tauri-plugin-rag/src/parser.rs

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,37 @@
11
use crate::RagError;
2+
use std::borrow::Cow;
23
use std::fs;
3-
use std::io::{Read, Cursor};
4-
use zip::read::ZipArchive;
5-
use quick_xml::events::Event;
6-
use quick_xml::Reader;
4+
use std::io::{Cursor, Read};
5+
use std::panic::{catch_unwind, AssertUnwindSafe};
6+
use calamine::{open_workbook_auto, DataType, Reader as _};
7+
use chardetng::EncodingDetector;
78
use csv as csv_crate;
8-
use calamine::{Reader as _, open_workbook_auto, DataType};
99
use html2text;
10-
use chardetng::EncodingDetector;
1110
use infer;
12-
use std::borrow::Cow;
11+
use quick_xml::events::Event;
12+
use quick_xml::Reader;
13+
use zip::read::ZipArchive;
1314

1415
pub fn parse_pdf(file_path: &str) -> Result<String, RagError> {
1516
let bytes = fs::read(file_path)?;
16-
let text = pdf_extract::extract_text_from_mem(&bytes)
17-
.map_err(|e| RagError::ParseError(format!("PDF parse error: {}", e)))?;
17+
// pdf-extract can panic on some malformed PDFs; guard to avoid crashing the app
18+
let text = match catch_unwind(AssertUnwindSafe(|| pdf_extract::extract_text_from_mem(&bytes))) {
19+
Ok(Ok(t)) => t,
20+
Ok(Err(e)) => return Err(RagError::ParseError(format!("PDF parse error: {}", e))),
21+
Err(payload) => {
22+
let reason = if let Some(s) = payload.downcast_ref::<&str>() {
23+
*s
24+
} else if let Some(s) = payload.downcast_ref::<String>() {
25+
s.as_str()
26+
} else {
27+
"unknown parser panic"
28+
};
29+
return Err(RagError::ParseError(format!(
30+
"PDF parsing failed unexpectedly: {}",
31+
reason
32+
)));
33+
}
34+
};
1835

1936
// Validate that the PDF has extractable text (not image-based/scanned)
2037
// Count meaningful characters (excluding whitespace)

web-app/src/lib/attachmentProcessing.ts

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,49 @@ export type AttachmentProcessingResult = {
2929
hasEmbeddedDocuments: boolean
3030
}
3131

32+
const formatAttachmentError = (err: unknown): string => {
33+
if (!err) return 'Unknown error'
34+
if (err instanceof Error) return err.message || err.toString()
35+
if (typeof err === 'string') return err
36+
if (Array.isArray(err)) {
37+
const parts = err.map((e) => formatAttachmentError(e)).filter(Boolean)
38+
return parts.length ? Array.from(new Set(parts)).join('; ') : 'Unknown error'
39+
}
40+
if (typeof err === 'object') {
41+
const obj = err as Record<string, unknown>
42+
const candidates = [
43+
obj.message,
44+
obj.reason,
45+
obj.detail,
46+
]
47+
for (const val of candidates) {
48+
if (typeof val === 'string' && val.trim().length > 0) {
49+
return val
50+
}
51+
}
52+
const nestedSources = [obj.error, obj.cause]
53+
for (const nested of nestedSources) {
54+
if (nested && typeof nested === 'object') {
55+
const nestedMsg = formatAttachmentError(nested)
56+
if (nestedMsg && nestedMsg !== 'Unknown error') {
57+
return nestedMsg
58+
}
59+
} else if (typeof nested === 'string' && nested.trim().length > 0) {
60+
return nested
61+
}
62+
}
63+
if (typeof obj.code === 'string' && obj.code.trim().length > 0) {
64+
return obj.code
65+
}
66+
try {
67+
return JSON.stringify(obj)
68+
} catch {
69+
return String(err)
70+
}
71+
}
72+
return String(err)
73+
}
74+
3275
export const processAttachmentsForSend = async (
3376
options: AttachmentProcessingOptions
3477
): Promise<AttachmentProcessingResult> => {
@@ -83,9 +126,9 @@ export const processAttachmentsForSend = async (
83126
if (updateAttachmentProcessing) {
84127
updateAttachmentProcessing(img.name, 'error')
85128
}
86-
const desc = err instanceof Error ? err.message : String(err)
129+
const desc = formatAttachmentError(err)
87130
toast.error('Failed to ingest image attachment', { description: desc })
88-
throw err
131+
throw err instanceof Error ? err : new Error(desc)
89132
}
90133
}
91134
}
@@ -204,9 +247,9 @@ export const processAttachmentsForSend = async (
204247
if (updateAttachmentProcessing) {
205248
updateAttachmentProcessing(doc.name, 'error')
206249
}
207-
const desc = err instanceof Error ? err.message : String(err)
250+
const desc = formatAttachmentError(err)
208251
toast.error('Failed to index attachments', { description: desc })
209-
throw err
252+
throw err instanceof Error ? err : new Error(desc)
210253
}
211254
}
212255
}

0 commit comments

Comments
 (0)