run-llama · david20571015 · Mar 10, 2025 · Mar 10, 2025 · Mar 8, 2025 · logan-markewich
diff --git a/...ndex-integrations/readers/llama-index-readers-mongodb/llama_index/readers/mongodb/base.py b/...ndex-integrations/readers/llama-index-readers-mongodb/llama_index/readers/mongodb/base.py
@@ -1,6 +1,7 @@
 """Mongo client."""
 
-from typing import Dict, Iterable, List, Optional, Union
+from collections.abc import Callable
+from typing import Dict, Iterable, List, Optional
 
 from llama_index.core.readers.base import BaseReader
 from llama_index.core.schema import Document
@@ -40,12 +41,6 @@ def __init__(
 
         self.client = client
 
-    def _flatten(self, texts: List[Union[str, List[str]]]) -> List[str]:
-        result = []
-        for text in texts:
-            result += text if isinstance(text, list) else [text]
-        return result
-
     def lazy_load_data(
         self,
         db_name: str,
@@ -55,6 +50,7 @@ def lazy_load_data(
         query_dict: Optional[Dict] = None,
         max_docs: int = 0,
         metadata_names: Optional[List[str]] = None,
+        field_extractors: Optional[Dict[str, Callable[..., str]]] = None,
     ) -> Iterable[Document]:
         """Load data from the input directory.
 
@@ -72,6 +68,9 @@ def lazy_load_data(
                 Defaults to 0 (no limit)
             metadata_names (Optional[List[str]]): names of the fields to be added
                 to the metadata attribute of the Document. Defaults to None
+            field_extractors (Optional[Dict[str, Callable[..., str]]]): dictionary
+                containing field name and a function to extract text from the field.
+                The default extractor function is `str`. Defaults to None.
 
         Returns:
             List[Document]: A list of documents.
@@ -84,15 +83,18 @@ def lazy_load_data(
             projection={name: 1 for name in field_names + (metadata_names or [])},
         )
 
+        field_extractors = field_extractors or {}
+
         for item in cursor:
             try:
-                texts = [str(item[name]) for name in field_names]
+                texts = [
+                    field_extractors.get(name, str)(item[name]) for name in field_names
+                ]
             except KeyError as err:
                 raise ValueError(
                     f"{err.args[0]} field not found in Mongo document."
                 ) from err
 
-            texts = self._flatten(texts)
             text = separator.join(texts)
 
             if metadata_names is None:

diff --git a/llama-index-integrations/readers/llama-index-readers-mongodb/pyproject.toml b/llama-index-integrations/readers/llama-index-readers-mongodb/pyproject.toml
@@ -28,7 +28,7 @@ license = "MIT"
 maintainers = ["jerryjliu"]
 name = "llama-index-readers-mongodb"
 readme = "README.md"
-version = "0.3.0"
+version = "0.3.1"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<4.0"