fix pre-commit checks and added proper type safety

kensteele · kensteele · commit ebbeb45c7dc2 · 2025-10-02T04:53:49.000-07:00
diff --git a/docling/cli/main.py b/docling/cli/main.py
@@ -579,17 +579,27 @@ def convert(  # noqa: C901
             ocr_options.lang = ocr_lang_list
 
         accelerator_options = AcceleratorOptions(num_threads=num_threads, device=device)
-        
+
         # Auto-detect pipeline based on input file formats
         if pipeline == ProcessingPipeline.STANDARD:
             # Check if any input files are audio files by extension
-            audio_extensions = {'.mp3', '.wav', '.m4a', '.aac', '.ogg', '.flac', '.mp4', '.avi', '.mov'}
+            audio_extensions = {
+                ".mp3",
+                ".wav",
+                ".m4a",
+                ".aac",
+                ".ogg",
+                ".flac",
+                ".mp4",
+                ".avi",
+                ".mov",
+            }
             for path in input_doc_paths:
                 if path.suffix.lower() in audio_extensions:
                     pipeline = ProcessingPipeline.ASR
                     _log.info(f"Auto-detected ASR pipeline for audio file: {path}")
                     break
-        
+
         # pipeline_options: PaginatedPipelineOptions
         pipeline_options: PipelineOptions
 
diff --git a/docling/datamodel/asr_model_specs.py b/docling/datamodel/asr_model_specs.py
@@ -10,34 +10,37 @@
     # AsrResponseFormat,
     # ApiAsrOptions,
     InferenceAsrFramework,
-    InlineAsrNativeWhisperOptions,
     InlineAsrMlxWhisperOptions,
+    InlineAsrNativeWhisperOptions,
     TransformersModelType,
 )
 
 _log = logging.getLogger(__name__)
 
+
 def _get_whisper_tiny_model():
     """
     Get the best Whisper Tiny model for the current hardware.
-    
+
     Automatically selects MLX Whisper Tiny for Apple Silicon (MPS) if available,
     otherwise falls back to native Whisper Tiny.
     """
     # Check if MPS is available (Apple Silicon)
     try:
         import torch
+
         has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
     except ImportError:
         has_mps = False
-    
+
     # Check if mlx-whisper is available
     try:
         import mlx_whisper  # type: ignore
+
         has_mlx_whisper = True
     except ImportError:
         has_mlx_whisper = False
-    
+
     # Use MLX Whisper if both MPS and mlx-whisper are available
     if has_mps and has_mlx_whisper:
         return InlineAsrMlxWhisperOptions(
@@ -66,27 +69,30 @@ def _get_whisper_tiny_model():
 # Create the model instance
 WHISPER_TINY = _get_whisper_tiny_model()
 
+
 def _get_whisper_small_model():
     """
     Get the best Whisper Small model for the current hardware.
-    
+
     Automatically selects MLX Whisper Small for Apple Silicon (MPS) if available,
     otherwise falls back to native Whisper Small.
     """
     # Check if MPS is available (Apple Silicon)
     try:
         import torch
+
         has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
     except ImportError:
         has_mps = False
-    
+
     # Check if mlx-whisper is available
     try:
         import mlx_whisper  # type: ignore
+
         has_mlx_whisper = True
     except ImportError:
         has_mlx_whisper = False
-    
+
     # Use MLX Whisper if both MPS and mlx-whisper are available
     if has_mps and has_mlx_whisper:
         return InlineAsrMlxWhisperOptions(
@@ -115,27 +121,30 @@ def _get_whisper_small_model():
 # Create the model instance
 WHISPER_SMALL = _get_whisper_small_model()
 
+
 def _get_whisper_medium_model():
     """
     Get the best Whisper Medium model for the current hardware.
-    
+
     Automatically selects MLX Whisper Medium for Apple Silicon (MPS) if available,
     otherwise falls back to native Whisper Medium.
     """
     # Check if MPS is available (Apple Silicon)
     try:
         import torch
+
         has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
     except ImportError:
         has_mps = False
-    
+
     # Check if mlx-whisper is available
     try:
         import mlx_whisper  # type: ignore
+
         has_mlx_whisper = True
     except ImportError:
         has_mlx_whisper = False
-    
+
     # Use MLX Whisper if both MPS and mlx-whisper are available
     if has_mps and has_mlx_whisper:
         return InlineAsrMlxWhisperOptions(
@@ -164,27 +173,30 @@ def _get_whisper_medium_model():
 # Create the model instance
 WHISPER_MEDIUM = _get_whisper_medium_model()
 
+
 def _get_whisper_base_model():
     """
     Get the best Whisper Base model for the current hardware.
-    
+
     Automatically selects MLX Whisper Base for Apple Silicon (MPS) if available,
     otherwise falls back to native Whisper Base.
     """
     # Check if MPS is available (Apple Silicon)
     try:
         import torch
+
         has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
     except ImportError:
         has_mps = False
-    
+
     # Check if mlx-whisper is available
     try:
         import mlx_whisper  # type: ignore
+
         has_mlx_whisper = True
     except ImportError:
         has_mlx_whisper = False
-    
+
     # Use MLX Whisper if both MPS and mlx-whisper are available
     if has_mps and has_mlx_whisper:
         return InlineAsrMlxWhisperOptions(
@@ -213,27 +225,30 @@ def _get_whisper_base_model():
 # Create the model instance
 WHISPER_BASE = _get_whisper_base_model()
 
+
 def _get_whisper_large_model():
     """
     Get the best Whisper Large model for the current hardware.
-    
+
     Automatically selects MLX Whisper Large for Apple Silicon (MPS) if available,
     otherwise falls back to native Whisper Large.
     """
     # Check if MPS is available (Apple Silicon)
     try:
         import torch
+
         has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
     except ImportError:
         has_mps = False
-    
+
     # Check if mlx-whisper is available
     try:
         import mlx_whisper  # type: ignore
+
         has_mlx_whisper = True
     except ImportError:
         has_mlx_whisper = False
-    
+
     # Use MLX Whisper if both MPS and mlx-whisper are available
     if has_mps and has_mlx_whisper:
         return InlineAsrMlxWhisperOptions(
@@ -262,27 +277,30 @@ def _get_whisper_large_model():
 # Create the model instance
 WHISPER_LARGE = _get_whisper_large_model()
 
+
 def _get_whisper_turbo_model():
     """
     Get the best Whisper Turbo model for the current hardware.
-    
+
     Automatically selects MLX Whisper Turbo for Apple Silicon (MPS) if available,
     otherwise falls back to native Whisper Turbo.
     """
     # Check if MPS is available (Apple Silicon)
     try:
         import torch
+
         has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
     except ImportError:
         has_mps = False
-    
+
     # Check if mlx-whisper is available
     try:
         import mlx_whisper  # type: ignore
+
         has_mlx_whisper = True
     except ImportError:
         has_mlx_whisper = False
-    
+
     # Use MLX Whisper if both MPS and mlx-whisper are available
     if has_mps and has_mlx_whisper:
         return InlineAsrMlxWhisperOptions(
diff --git a/docling/datamodel/pipeline_options_asr_model.py b/docling/datamodel/pipeline_options_asr_model.py
@@ -60,9 +60,10 @@ class InlineAsrNativeWhisperOptions(InlineAsrOptions):
 class InlineAsrMlxWhisperOptions(InlineAsrOptions):
     """
     MLX Whisper options for Apple Silicon optimization.
-    
+
     Uses mlx-whisper library for efficient inference on Apple Silicon devices.
     """
+
     inference_framework: InferenceAsrFramework = InferenceAsrFramework.MLX
 
     language: str = "en"
diff --git a/docling/pipeline/asr_pipeline.py b/docling/pipeline/asr_pipeline.py
@@ -3,7 +3,7 @@
 import re
 from io import BytesIO
 from pathlib import Path
-from typing import List, Optional, Union, cast
+from typing import TYPE_CHECKING, List, Optional, Union, cast
 
 from docling_core.types.doc import DoclingDocument, DocumentOrigin
 
@@ -31,8 +31,8 @@
     AsrPipelineOptions,
 )
 from docling.datamodel.pipeline_options_asr_model import (
-    InlineAsrNativeWhisperOptions,
     InlineAsrMlxWhisperOptions,
+    InlineAsrNativeWhisperOptions,
     # AsrResponseFormat,
     InlineAsrOptions,
 )
@@ -236,7 +236,7 @@ def __init__(
 
             self.model_name = asr_options.repo_id
             _log.info(f"loading _MlxWhisperModel({self.model_name})")
-            
+
             # MLX Whisper models are loaded differently - they use HuggingFace repos
             self.model_path = self.model_name
 
@@ -281,10 +281,10 @@ def run(self, conv_res: ConversionResult) -> ConversionResult:
     def transcribe(self, fpath: Path) -> list[_ConversationItem]:
         """
         Transcribe audio using MLX Whisper.
-        
+
         Args:
             fpath: Path to audio file
-            
+
         Returns:
             List of conversation items with timestamps
         """
@@ -300,16 +300,16 @@ def transcribe(self, fpath: Path) -> list[_ConversationItem]:
         )
 
         convo: list[_ConversationItem] = []
-        
+
         # MLX Whisper returns segments similar to native Whisper
         for segment in result.get("segments", []):
             item = _ConversationItem(
                 start_time=segment.get("start"),
                 end_time=segment.get("end"),
                 text=segment.get("text", "").strip(),
-                words=[]
+                words=[],
             )
-            
+
             # Add word-level timestamps if available
             if self.word_timestamps and "words" in segment:
                 item.words = []
@@ -332,26 +332,27 @@ def __init__(self, pipeline_options: AsrPipelineOptions):
         self.keep_backend = True
 
         self.pipeline_options: AsrPipelineOptions = pipeline_options
+        self._model: Union[_NativeWhisperModel, _MlxWhisperModel]
 
         if isinstance(self.pipeline_options.asr_options, InlineAsrNativeWhisperOptions):
-            asr_options: InlineAsrNativeWhisperOptions = (
+            native_asr_options: InlineAsrNativeWhisperOptions = (
                 self.pipeline_options.asr_options
             )
             self._model = _NativeWhisperModel(
                 enabled=True,  # must be always enabled for this pipeline to make sense.
                 artifacts_path=self.artifacts_path,
                 accelerator_options=pipeline_options.accelerator_options,
-                asr_options=asr_options,
+                asr_options=native_asr_options,
             )
         elif isinstance(self.pipeline_options.asr_options, InlineAsrMlxWhisperOptions):
-            asr_options: InlineAsrMlxWhisperOptions = (
+            mlx_asr_options: InlineAsrMlxWhisperOptions = (
                 self.pipeline_options.asr_options
             )
             self._model = _MlxWhisperModel(
                 enabled=True,  # must be always enabled for this pipeline to make sense.
                 artifacts_path=self.artifacts_path,
                 accelerator_options=pipeline_options.accelerator_options,
-                asr_options=asr_options,
+                asr_options=mlx_asr_options,
             )
         else:
             _log.error(f"No model support for {self.pipeline_options.asr_options}")
diff --git a/docs/examples/minimal_asr_pipeline.py b/docs/examples/minimal_asr_pipeline.py
@@ -43,7 +43,7 @@ def get_asr_converter():
     implementation for your hardware:
     - MLX Whisper Turbo for Apple Silicon (M1/M2/M3) with mlx-whisper installed
     - Native Whisper Turbo as fallback
-    
+
     You can swap in another model spec from `docling.datamodel.asr_model_specs`
     to experiment with different model sizes.
     """
diff --git a/docs/examples/mlx_whisper_example.py b/docs/examples/mlx_whisper_example.py
diff --git a/tests/test_asr_mlx_whisper.py b/tests/test_asr_mlx_whisper.py