r3d91ll · coderabbitai · Sep 13, 2025
diff --git a/core/embedders/__init__.py b/core/embedders/__init__.py
@@ -29,13 +29,13 @@
 # Convenience function for backward compatibility
 def create_embedder(model_name: str = "jinaai/jina-embeddings-v4", **kwargs):
     """
-    Create an embedder instance (backward compatibility).
-
-    Args:
-        model_name: Model name or path
-        **kwargs: Additional configuration
-
+    Create an embedder instance by delegating to EmbedderFactory.
+    
+    Parameters:
+        model_name (str): Model identifier or factory key (defaults to "jinaai/jina-embeddings-v4").
+        **kwargs: Additional keyword arguments forwarded to EmbedderFactory.create.
+    
     Returns:
-        Embedder instance
+        EmbedderBase: A new embedder instance for the requested model.
     """
     return EmbedderFactory.create(model_name, **kwargs)
diff --git a/core/embedders/embedders_base.py b/core/embedders/embedders_base.py
@@ -40,10 +40,11 @@ class EmbedderBase(ABC):
 
     def __init__(self, config: Optional[EmbeddingConfig] = None):
         """
-        Initialize embedder with configuration.
-
-        Args:
-            config: Embedding configuration
+        Initialize the embedder with an EmbeddingConfig.
+
+        If no config is provided, a default EmbeddingConfig with model_name="default" is created and assigned.
+        Parameters:
+            config (Optional[EmbeddingConfig]): Configuration for the embedder; when None a default config is used.
         """
         self.config = config or EmbeddingConfig(model_name="default")
 
@@ -53,74 +54,91 @@ def embed_texts(self,
                     task: str = "retrieval",
                     batch_size: Optional[int] = None) -> np.ndarray:
         """
-        Embed a list of texts.
-
-        Args:
-            texts: List of texts to embed
-            task: Task type (retrieval, classification, etc.)
-            batch_size: Override default batch size
-
-        Returns:
-            Array of embeddings (N x D)
-        """
+                    Embed a list of texts into dense vectors.
+
+                    Returns a 2-D NumPy array of shape (len(texts), embedding_dimension) where each row is the embedding for the corresponding input text.
+
+                    Parameters:
+                        texts: List of input texts to embed.
+                        task: Optional task hint that may affect embedding behavior (default "retrieval").
+                        batch_size: Optional override for the embedding batch size; if omitted the embedder's configured batch size is used.
+
+                    Returns:
+                        np.ndarray: 2-D array with one embedding vector per input text.
+                    """
         pass
 
     @abstractmethod
     def embed_single(self,
                      text: str,
                      task: str = "retrieval") -> np.ndarray:
         """
-        Embed a single text.
-
-        Args:
-            text: Text to embed
-            task: Task type
-
-        Returns:
-            Embedding vector (1D array)
-        """
+                     Embed a single piece of text and return its embedding vector.
+                     
+                     Parameters:
+                         text (str): The input text to embed.
+                         task (str): Optional task hint affecting embedding behavior (default "retrieval").
+                     
+                     Returns:
+                         numpy.ndarray: 1-D embedding vector whose length equals the embedder's embedding_dimension.
+                     """
         pass
 
     def embed_queries(self,
                      queries: List[str],
                      batch_size: Optional[int] = None) -> np.ndarray:
         """
-        Embed search queries (convenience method).
-
-        Args:
-            queries: List of search queries
-            batch_size: Override default batch size
-
-        Returns:
-            Array of query embeddings
-        """
+                     Embed a list of search queries into embedding vectors (convenience wrapper).
+
+                     This is a thin convenience method that produces embeddings for the provided queries using the model's retrieval embedding pathway.
+
+                     Parameters:
+                         queries: List of query strings to embed.
+                         batch_size: Optional override for the embedding batch size.
+
+                     Returns:
+                         A 2D numpy array of shape (len(queries), embedding_dimension) containing the embeddings.
+                     """
         return self.embed_texts(queries, task="retrieval", batch_size=batch_size)
 
     def embed_documents(self,
                        documents: List[str],
                        batch_size: Optional[int] = None) -> np.ndarray:
         """
-        Embed documents for retrieval (convenience method).
-
-        Args:
-            documents: List of documents
-            batch_size: Override default batch size
-
-        Returns:
-            Array of document embeddings
-        """
+                       Convenience wrapper to embed a list of documents for retrieval.
+                       
+                       Parameters:
+                           documents (List[str]): Documents to embed.
+                           batch_size (Optional[int]): Optional override for the batch size; if None uses the embedder's configured batch size.
+                       
+                       Returns:
+                           np.ndarray: 2D array of shape (len(documents), embedding_dimension) containing the document embeddings.
+                       """
         return self.embed_texts(documents, task="retrieval", batch_size=batch_size)
 
     @property
     @abstractmethod
     def embedding_dimension(self) -> int:
-        """Get the dimension of embeddings produced by this model."""
+        """
+        Return the dimensionality of vectors produced by this embedder.
+
+        This is an abstract property that concrete embedder implementations must provide;
+        it indicates the length of each embedding vector (number of dimensions).
+
+        Returns:
+            int: Number of dimensions in each embedding vector.
+        """
         pass
 
     @property
     @abstractmethod
     def max_sequence_length(self) -> int:
-        """Get the maximum sequence length supported."""
+        """
+        Maximum input sequence length supported by the embedder.
+
+        Returns:
+            int: The model's maximum number of input tokens (or sequence units) accepted for a single inference. Implementations should return the hard limit used for chunking and validation.
+        """
         pass
 
     @property
@@ -130,15 +148,26 @@ def supports_late_chunking(self) -> bool:
 
     @property
     def supports_multimodal(self) -> bool:
-        """Whether this embedder supports multimodal inputs."""
+        """
+        Whether the embedder accepts multimodal (non-text) inputs such as images or audio.
+
+        Implementations that support embedding inputs beyond plain text should override this to return True. Defaults to False.
+        """
         return False
 
     def get_model_info(self) -> Dict[str, Any]:
         """
-        Get information about the model.
-
+        Return a dictionary of metadata describing the embedder and its configured model.
+        
         Returns:
-            Dictionary with model metadata
+            dict: Metadata keys:
+                - model_name (str): Name of the configured model.
+                - embedding_dimension (int): Dimensionality of produced embedding vectors.
+                - max_sequence_length (int): Maximum supported input sequence length (tokens).
+                - supports_late_chunking (bool): Whether the embedder supports late chunking.
+                - supports_multimodal (bool): Whether the embedder accepts multimodal inputs.
+                - device (str): Device configured for the model (e.g., "cuda", "cpu").
+                - use_fp16 (bool): Whether mixed/half precision is enabled for the model.
         """
         return {
             "model_name": self.config.model_name,

diff --git a/core/embedders/embedders_factory.py b/core/embedders/embedders_factory.py
@@ -32,11 +32,11 @@ class EmbedderFactory:
     @classmethod
     def register(cls, name: str, embedder_class: type):
         """
-        Register an embedder class.
-
-        Args:
-            name: Name to register under
-            embedder_class: Embedder class to register
+        Register an embedder class under a given name in the factory registry.
+        
+        Adds the provided embedder class to the class-level registry mapping used by
+        EmbedderFactory. If a different class is already registered under the same
+        name, it will be replaced.
         """
         cls._embedders[name] = embedder_class
         logger.info(f"Registered embedder: {name}")
@@ -47,19 +47,21 @@ def create(cls,
               config: Optional[EmbeddingConfig] = None,
               **kwargs) -> EmbedderBase:
         """
-        Create an embedder instance.
-
-        Args:
-            model_name: Name or path of the model
-            config: Embedding configuration
-            **kwargs: Additional arguments for the embedder
-
-        Returns:
-            Embedder instance
-
-        Raises:
-            ValueError: If no suitable embedder found
-        """
+              Create and return an EmbedderBase instance for the given model.
+
+              If `config` is not provided, an EmbeddingConfig is constructed from `model_name` and any extra keyword arguments. The factory determines an embedder type from `model_name`, attempts on-demand registration for that type, and instantiates the registered embedder class.
+
+              Parameters:
+                  model_name (str): Model identifier or path used to pick and configure the embedder.
+                  config (Optional[EmbeddingConfig]): Pre-built embedding configuration. If omitted, one is created.
+                  **kwargs: Additional fields forwarded to EmbeddingConfig when `config` is not supplied.
+
+              Returns:
+                  EmbedderBase: An instance of the selected embedder class initialized with `config`.
+
+              Raises:
+                  ValueError: If no embedder is registered (after auto-registration) for the determined embedder type.
+              """
         # Create config if not provided
         if config is None:
             config = EmbeddingConfig(model_name=model_name, **kwargs)
@@ -86,13 +88,15 @@ def create(cls,
     @classmethod
     def _determine_embedder_type(cls, model_name: str) -> str:
         """
-        Determine embedder type from model name.
-
-        Args:
-            model_name: Model name or path
-
+        Determine the embedder type from a model name.
+
+        Performs a case-insensitive substring check against the provided model_name and returns one of the supported embedder type keys: "jina", "sentence", "openai", or "cohere". If no known marker is found, defaults to "jina".
+
+        Parameters:
+            model_name: Model identifier or path used to infer the embedder type.
+
         Returns:
-            Embedder type string
+            A string key identifying the embedder type: "jina", "sentence", "openai", or "cohere".
         """
         model_lower = model_name.lower()
 
@@ -111,10 +115,16 @@ def _determine_embedder_type(cls, model_name: str) -> str:
     @classmethod
     def _auto_register(cls, embedder_type: str):
         """
-        Attempt to auto-register an embedder type.
-
-        Args:
-            embedder_type: Type of embedder to register
+        Auto-registers a known embedder implementation for the given embedder type.
+
+        If embedder_type == "jina", attempts to import JinaV4Embedder and register it under the name "jina".
+        If embedder_type == "sentence", emits a warning that the sentence-transformers embedder is not yet migrated.
+        For any other embedder_type, emits a warning about the unknown type.
+
+        Side effects:
+        - May call cls.register(...) to add an embedder to the factory registry.
+        - Logs warnings for unimplemented or unknown types.
+        - Logs an error if an ImportError occurs while attempting to import a backend (the exception is not propagated).
         """
         try:
             if embedder_type == "jina":
@@ -131,10 +141,15 @@ def _auto_register(cls, embedder_type: str):
     @classmethod
     def list_available(cls) -> Dict[str, Any]:
         """
-        List available embedders.
-
+        Return a mapping of registered embedder names to their basic metadata.
+
+        For each embedder registered in the factory registry, the mapping contains either:
+        - a dict with keys `"class"` (the embedder class name) and `"module"` (the class' module path),
+        or
+        - a dict with key `"error"` containing a string message if retrieving the info failed.
+
         Returns:
-            Dictionary of available embedders with their info
+            Dict[str, Any]: Mapping from embedder registry name to metadata or error information.
         """
         available = {}
         for name, embedder_class in cls._embedders.items():