Skip to content

Commit ed5a111

Browse files
authored
fix(component,ai,gemini): correct text-based documents logic (#1103)
Because - The previous implementation extracted text content using `doc.Text()` for text-based documents but Gemini actually supports plain text, making `doc.Text()` unnecessary. This commit - Updates text-based document processing to use base64 encoding like PDFs for consistent handling - Removes the `doc.Text()` extraction step that could cause processing failures - Ensures both PDFs and text-based documents (TXT, Markdown, HTML, XML, etc.) are handled uniformly by passing base64 data directly to the Gemini API - Maintains proper MIME type detection by using the actual `contentType` instead of hardcoded values
1 parent a82d155 commit ed5a111

File tree

1 file changed

+5
-6
lines changed

1 file changed

+5
-6
lines changed

pkg/component/ai/gemini/v0/task_chat.go

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -541,14 +541,13 @@ func buildReqParts(in TaskChatInput) ([]genai.Part, error) {
541541
}
542542
} else if isTextBasedDocument(contentType) {
543543
// Text-based documents (TXT, Markdown, HTML, XML, etc.)
544-
// These are processed as pure text content - visual formatting is lost
545-
// The model won't see HTML tags, Markdown formatting, etc.
546-
textContent, err := doc.Text()
544+
// Pass as base64 like PDFs for consistent handling
545+
docBase64, err := doc.Base64()
547546
if err != nil {
548-
return nil, fmt.Errorf("failed to extract text from document: %w", err)
547+
return nil, err
549548
}
550-
if textContent.String() != "" {
551-
parts = append(parts, genai.Part{Text: textContent.String()})
549+
if p := newURIOrDataPart(docBase64.String(), detectMIMEFromPath(docBase64.String(), contentType)); p != nil {
550+
parts = append(parts, *p)
552551
}
553552
} else if isConvertibleToPDF(contentType) {
554553
// Office documents (DOC, DOCX, PPT, PPTX, XLS, XLSX)

0 commit comments

Comments
 (0)