rootflo · vizsatiz · Sep 18, 2025 · Sep 17, 2025 · Sep 17, 2025
diff --git a/flo_ai/examples/document_processing_example.py b/flo_ai/examples/document_processing_example.py
diff --git a/flo_ai/flo_ai/__init__.py b/flo_ai/flo_ai/__init__.py
@@ -3,7 +3,15 @@
 """
 
 # Models package - Agent framework components
-from .models import Agent, AgentError, BaseAgent, AgentType, ReasoningPattern
+from .models import (
+    Agent,
+    AgentError,
+    BaseAgent,
+    AgentType,
+    ReasoningPattern,
+    DocumentType,
+    DocumentMessage,
+)
 
 from .builder.agent_builder import AgentBuilder
 
@@ -50,6 +58,8 @@
     'OpenAIVLLM',
     # LLM DataClass
     'ImageMessage',
+    'DocumentType',
+    'DocumentMessage',
     # Tools
     'Tool',
     'ToolExecutionError',

diff --git a/flo_ai/flo_ai/arium/arium.py b/flo_ai/flo_ai/arium/arium.py
@@ -1,6 +1,7 @@
 from flo_ai.arium.base import BaseArium
 from flo_ai.arium.memory import MessageMemory, BaseMemory
 from flo_ai.llm.base_llm import ImageMessage
+from flo_ai.models.document import DocumentMessage
 from typing import List, Dict, Any, Optional, Callable
 from flo_ai.models.agent import Agent
 from flo_ai.tool.base_tool import Tool
@@ -29,7 +30,7 @@ def compile(self):
 
     async def run(
         self,
-        inputs: List[str | ImageMessage],
+        inputs: List[str | ImageMessage | DocumentMessage],
         variables: Optional[Dict[str, Any]] = None,
         event_callback: Optional[Callable[[AriumEvent], None]] = None,
         events_filter: Optional[List[AriumEventType]] = None,
@@ -114,7 +115,7 @@ def _emit_event(
 
     async def _execute_graph(
         self,
-        inputs: List[str | ImageMessage],
+        inputs: List[str | ImageMessage | DocumentMessage],
         event_callback: Optional[Callable[[AriumEvent], None]] = None,
         events_filter: Optional[List[AriumEventType]] = None,
     ):
@@ -221,7 +222,9 @@ async def _execute_graph(
         return self.memory.get()
 
     def _extract_and_validate_variables(
-        self, inputs: List[str | ImageMessage], variables: Dict[str, Any]
+        self,
+        inputs: List[str | ImageMessage | DocumentMessage],
+        variables: Dict[str, Any],
     ) -> None:
         """Extract variables from inputs and agents, then validate them.
 
@@ -258,8 +261,10 @@ def _extract_and_validate_variables(
             validate_multi_agent_variables(agents_variables, variables)
 
     def _resolve_inputs(
-        self, inputs: List[str | ImageMessage], variables: Dict[str, Any]
-    ) -> List[str | ImageMessage]:
+        self,
+        inputs: List[str | ImageMessage | DocumentMessage],
+        variables: Dict[str, Any],
+    ) -> List[str | ImageMessage | DocumentMessage]:
         """Resolve variables in input messages.
 
         Args:
@@ -276,7 +281,7 @@ def _resolve_inputs(
                 resolved_input = resolve_variables(input_item, variables)
                 resolved_inputs.append(resolved_input)
             else:
-                # ImageMessage objects don't need variable resolution
+                # ImageMessage and DocumentMessage objects don't need variable resolution
                 resolved_inputs.append(input_item)
         return resolved_inputs
 

diff --git a/flo_ai/flo_ai/arium/builder.py b/flo_ai/flo_ai/arium/builder.py
@@ -4,6 +4,7 @@
 from flo_ai.models.agent import Agent
 from flo_ai.tool.base_tool import Tool
 from flo_ai.llm.base_llm import ImageMessage
+from flo_ai.models.document import DocumentMessage
 import yaml
 from flo_ai.builder.agent_builder import AgentBuilder
 from flo_ai.llm import BaseLLM
@@ -134,7 +135,7 @@ def build(self) -> Arium:
 
     async def build_and_run(
         self,
-        inputs: List[Union[str, ImageMessage]],
+        inputs: List[Union[str, ImageMessage, DocumentMessage]],
         variables: Optional[Dict[str, Any]] = None,
     ) -> List[dict]:
         """Build the Arium and run it with the given inputs and optional runtime variables."""

diff --git a/flo_ai/flo_ai/llm/base_llm.py b/flo_ai/flo_ai/llm/base_llm.py
@@ -1,7 +1,10 @@
 from abc import ABC, abstractmethod
 from typing import Dict, Any, List, Optional
 from flo_ai.tool.base_tool import Tool
+from flo_ai.utils.document_processor import get_default_processor
+from flo_ai.utils.logger import logger
 from dataclasses import dataclass
+from flo_ai.models.document import DocumentMessage
 
 
 @dataclass
@@ -65,3 +68,25 @@ def format_tools_for_llm(self, tools: List['Tool']) -> List[Dict[str, Any]]:
     def format_image_in_message(self, image: ImageMessage) -> str:
         """Format a image in the message"""
         pass
+
+    async def format_document_in_message(self, document: 'DocumentMessage') -> str:
+        """Format a document in the message by extracting text content"""
+
+        try:
+            # Process document to extract text
+            result = await get_default_processor().process_document(document)
+
+            # Format the extracted content for the LLM
+            extracted_text = result.get('extracted_text', '')
+            doc_type = result.get('document_type', 'unknown')
+
+            logger.info(
+                f'Successfully formatted {doc_type} document for {self.__class__.__name__} LLM'
+            )
+            return extracted_text
+
+        except Exception as e:
+            logger.error(
+                f'Error formatting document for {self.__class__.__name__}: {e}'
+            )
+            raise Exception(f'Failed to format document: {str(e)}')
diff --git a/flo_ai/flo_ai/models/__init__.py b/flo_ai/flo_ai/models/__init__.py
@@ -5,5 +5,14 @@
 from .agent import Agent
 from .agent_error import AgentError
 from .base_agent import BaseAgent, AgentType, ReasoningPattern
+from .document import DocumentMessage, DocumentType
 
-__all__ = ['Agent', 'AgentError', 'BaseAgent', 'AgentType', 'ReasoningPattern']
+__all__ = [
+    'Agent',
+    'AgentError',
+    'BaseAgent',
+    'AgentType',
+    'ReasoningPattern',
+    'DocumentMessage',
+    'DocumentType',
+]
diff --git a/flo_ai/flo_ai/models/agent.py b/flo_ai/flo_ai/models/agent.py
@@ -2,6 +2,7 @@
 from typing import Dict, Any, List, Optional
 from flo_ai.models.base_agent import BaseAgent, AgentType, ReasoningPattern
 from flo_ai.llm.base_llm import BaseLLM, ImageMessage
+from flo_ai.models.document import DocumentMessage
 from flo_ai.tool.base_tool import Tool, ToolExecutionError
 from flo_ai.models.agent_error import AgentError
 from flo_ai.utils.logger import logger
@@ -48,7 +49,7 @@ def __init__(
 
     async def run(
         self,
-        inputs: List[str | ImageMessage] | str,
+        inputs: List[str | ImageMessage | DocumentMessage] | str,
         variables: Optional[Dict[str, Any]] = None,
     ) -> str:
         variables = variables or {}
@@ -75,6 +76,9 @@ async def run(
             for input in inputs:
                 if isinstance(input, ImageMessage):
                     self.add_to_history('user', self.llm.format_image_in_message(input))
+                elif isinstance(input, DocumentMessage):
+                    formatted_doc = await self.llm.format_document_in_message(input)
+                    self.add_to_history('user', formatted_doc)
                 else:
                     # Resolve variables in text input
                     resolved_input = resolve_variables(input, variables)
@@ -88,6 +92,9 @@ async def run(
             for input in inputs:
                 if isinstance(input, ImageMessage):
                     self.add_to_history('user', self.llm.format_image_in_message(input))
+                elif isinstance(input, DocumentMessage):
+                    formatted_doc = await self.llm.format_document_in_message(input)
+                    self.add_to_history('user', formatted_doc)
                 else:
                     self.add_to_history('user', input)
 

diff --git a/flo_ai/flo_ai/models/document.py b/flo_ai/flo_ai/models/document.py
@@ -0,0 +1,33 @@
+"""
+Document-related data models for Flo AI framework.
+
+This module contains document types and message classes to avoid circular imports.
+"""
+
+from dataclasses import dataclass
+from enum import Enum
+from typing import Dict, Any, Optional
+
+
+class DocumentType(Enum):
+    """Enumeration of supported document types."""
+
+    PDF = 'pdf'
+    TXT = 'txt'
+
+
+@dataclass
+class DocumentMessage:
+    """
+    Data structure for document inputs to LLMs.
+
+    Supports various document formats with extensible design for future types.
+    """
+
+    document_type: DocumentType
+    document_url: Optional[str] = None
+    document_bytes: Optional[bytes] = None
+    document_file_path: Optional[str] = None
+    document_base64: Optional[str] = None
+    mime_type: Optional[str] = None
+    metadata: Optional[Dict[str, Any]] = None