fix(component,ai,gemini): correct text-based documents logic (#1103)

pinglin · web-flow · commit ed5a11167f6b · 2025-09-18T18:23:01.000+01:00
Because

- The previous implementation extracted text content using `doc.Text()`
for text-based documents but Gemini actually supports plain text, making
`doc.Text()` unnecessary.

This commit

- Updates text-based document processing to use base64 encoding like
PDFs for consistent handling
- Removes the `doc.Text()` extraction step that could cause processing
failures
- Ensures both PDFs and text-based documents (TXT, Markdown, HTML, XML,
etc.) are handled uniformly by passing base64 data directly to the
Gemini API
- Maintains proper MIME type detection by using the actual `contentType`
instead of hardcoded values
diff --git a/pkg/component/ai/gemini/v0/task_chat.go b/pkg/component/ai/gemini/v0/task_chat.go
@@ -541,14 +541,13 @@ func buildReqParts(in TaskChatInput) ([]genai.Part, error) {
 			}
 		} else if isTextBasedDocument(contentType) {
 			// Text-based documents (TXT, Markdown, HTML, XML, etc.)
-			// These are processed as pure text content - visual formatting is lost
-			// The model won't see HTML tags, Markdown formatting, etc.
-			textContent, err := doc.Text()
+			// Pass as base64 like PDFs for consistent handling
+			docBase64, err := doc.Base64()
 			if err != nil {
-				return nil, fmt.Errorf("failed to extract text from document: %w", err)
+				return nil, err
 			}
-			if textContent.String() != "" {
-				parts = append(parts, genai.Part{Text: textContent.String()})
+			if p := newURIOrDataPart(docBase64.String(), detectMIMEFromPath(docBase64.String(), contentType)); p != nil {
+				parts = append(parts, *p)
 			}
 		} else if isConvertibleToPDF(contentType) {
 			// Office documents (DOC, DOCX, PPT, PPTX, XLS, XLSX)