test(asr): add coverage for MLX options, pipeline helpers, and VLM prompts

kensteele · kensteele · commit f3a2ba20b0cd · 2025-10-20T01:06:17.000-07:00
- tests/test_asr_mlx_whisper.py: verify explicit MLX options (framework, repo ids)
- tests/test_asr_pipeline.py: cover _has_text/_determine_status and backend support with proper InputDocument/NoOpBackend wiring
- tests/test_interfaces.py: add BaseVlmPageModel.formulate_prompt tests (RAW/NONE/CHAT, invalid style), with minimal InlineVlmOptions scaffold

Improves reliability of ASR and VLM components by validating configuration paths and helper logic.

Signed-off-by: Ken Steele &lt;ksteele@gmail.com&gt;
diff --git a/tests/test_asr_mlx_whisper.py b/tests/test_asr_mlx_whisper.py
@@ -10,7 +10,9 @@
 from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
 from docling.datamodel.asr_model_specs import (
     WHISPER_BASE,
+    WHISPER_BASE_MLX,
     WHISPER_LARGE,
+    WHISPER_LARGE_MLX,
     WHISPER_MEDIUM,
     WHISPER_SMALL,
     WHISPER_TINY,
@@ -58,6 +60,12 @@ def test_whisper_models_auto_select_mlx(self):
         assert hasattr(WHISPER_SMALL, "inference_framework")
         assert hasattr(WHISPER_SMALL, "repo_id")
 
+    def test_explicit_mlx_models_shape(self):
+        """Explicit MLX options should have MLX framework and valid repos."""
+        assert WHISPER_BASE_MLX.inference_framework.name == "MLX"
+        assert WHISPER_LARGE_MLX.inference_framework.name == "MLX"
+        assert WHISPER_BASE_MLX.repo_id.startswith("mlx-community/")
+
     @patch("builtins.__import__")
     def test_mlx_whisper_model_initialization(self, mock_import):
         """Test MLX Whisper model initialization."""
diff --git a/tests/test_asr_pipeline.py b/tests/test_asr_pipeline.py
@@ -1,10 +1,11 @@
 from pathlib import Path
+from unittest.mock import Mock
 
 import pytest
 
 from docling.datamodel import asr_model_specs
 from docling.datamodel.base_models import ConversionStatus, InputFormat
-from docling.datamodel.document import ConversionResult
+from docling.datamodel.document import ConversionResult, InputDocument
 from docling.datamodel.pipeline_options import AsrPipelineOptions
 from docling.document_converter import AudioFormatOption, DocumentConverter
 from docling.pipeline.asr_pipeline import AsrPipeline
@@ -83,3 +84,54 @@ def test_asr_pipeline_with_silent_audio(silent_audio_path):
     assert len(doc_result.document.texts) == 0, (
         "Document should contain zero text items"
     )
+
+
+def test_has_text_and_determine_status_helpers():
+    """Unit-test _has_text and _determine_status on a minimal ConversionResult."""
+    pipeline_options = AsrPipelineOptions()
+    pipeline_options.asr_options = asr_model_specs.WHISPER_TINY
+    pipeline = AsrPipeline(pipeline_options)
+
+    # Create an empty ConversionResult with proper InputDocument
+    doc_path = Path("./tests/data/audio/sample_10s.mp3")
+    from docling.backend.noop_backend import NoOpBackend
+    from docling.datamodel.base_models import InputFormat
+
+    input_doc = InputDocument(
+        path_or_stream=doc_path,
+        format=InputFormat.AUDIO,
+        backend=NoOpBackend,
+    )
+    conv_res = ConversionResult(input=input_doc)
+
+    # Simulate run result with empty document/texts
+    conv_res.status = ConversionStatus.SUCCESS
+    assert pipeline._has_text(conv_res.document) is False
+    assert pipeline._determine_status(conv_res) in (
+        ConversionStatus.PARTIAL_SUCCESS,
+        ConversionStatus.SUCCESS,
+        ConversionStatus.FAILURE,
+    )
+
+
+def test_is_backend_supported_noop_backend():
+    from pathlib import Path
+
+    from docling.backend.noop_backend import NoOpBackend
+    from docling.datamodel.base_models import InputFormat
+    from docling.datamodel.document import InputDocument
+
+    class _Dummy:
+        pass
+
+    # Create a proper NoOpBackend instance
+    doc_path = Path("./tests/data/audio/sample_10s.mp3")
+    input_doc = InputDocument(
+        path_or_stream=doc_path,
+        format=InputFormat.AUDIO,
+        backend=NoOpBackend,
+    )
+    noop_backend = NoOpBackend(input_doc, doc_path)
+
+    assert AsrPipeline.is_backend_supported(noop_backend) is True
+    assert AsrPipeline.is_backend_supported(_Dummy()) is False
diff --git a/tests/test_interfaces.py b/tests/test_interfaces.py
@@ -1,12 +1,19 @@
 from io import BytesIO
 from pathlib import Path
+from unittest.mock import Mock
 
 import pytest
 
 from docling.datamodel.accelerator_options import AcceleratorDevice
 from docling.datamodel.base_models import DocumentStream, InputFormat
-from docling.datamodel.pipeline_options import PdfPipelineOptions
+from docling.datamodel.pipeline_options_vlm_model import (
+    InferenceFramework,
+    InlineVlmOptions,
+    ResponseFormat,
+    TransformersPromptStyle,
+)
 from docling.document_converter import DocumentConverter, PdfFormatOption
+from docling.models.base_model import BaseVlmPageModel
 
 from .test_data_gen_flag import GEN_TEST_DATA
 from .verify_utils import verify_conversion_result_v2
@@ -21,6 +28,8 @@ def get_pdf_path():
 
 @pytest.fixture
 def converter():
+    from docling.datamodel.pipeline_options import PdfPipelineOptions
+
     pipeline_options = PdfPipelineOptions()
     pipeline_options.do_ocr = False
     pipeline_options.do_table_structure = True
@@ -61,3 +70,56 @@ def test_convert_stream(converter: DocumentConverter):
     verify_conversion_result_v2(
         input_path=pdf_path, doc_result=doc_result, generate=GENERATE
     )
+
+
+class _DummyVlm(BaseVlmPageModel):
+    def __init__(self, prompt_style: TransformersPromptStyle, repo_id: str = ""):  # type: ignore[no-untyped-def]
+        self.vlm_options = InlineVlmOptions(
+            repo_id=repo_id or "dummy/repo",
+            prompt="test prompt",
+            inference_framework=InferenceFramework.TRANSFORMERS,
+            response_format=ResponseFormat.PLAINTEXT,
+            transformers_prompt_style=prompt_style,
+        )
+        self.processor = Mock()
+
+    def __call__(self, conv_res, page_batch):  # type: ignore[no-untyped-def]
+        return []
+
+    def process_images(self, image_batch, prompt):  # type: ignore[no-untyped-def]
+        return []
+
+
+def test_formulate_prompt_raw():
+    model = _DummyVlm(TransformersPromptStyle.RAW)
+    assert model.formulate_prompt("hello") == "hello"
+
+
+def test_formulate_prompt_none():
+    model = _DummyVlm(TransformersPromptStyle.NONE)
+    assert model.formulate_prompt("ignored") == ""
+
+
+def test_formulate_prompt_phi4_special_case():
+    model = _DummyVlm(
+        TransformersPromptStyle.RAW, repo_id="ibm-granite/granite-docling-258M"
+    )
+    # RAW style with granite-docling should still invoke the special path only when style not RAW;
+    # ensure RAW returns the user text
+    assert model.formulate_prompt("describe image") == "describe image"
+
+
+def test_formulate_prompt_chat_uses_processor_template():
+    model = _DummyVlm(TransformersPromptStyle.CHAT)
+    model.processor.apply_chat_template.return_value = "templated"
+    out = model.formulate_prompt("summarize")
+    assert out == "templated"
+    model.processor.apply_chat_template.assert_called()
+
+
+def test_formulate_prompt_unknown_style_raises():
+    # Create an InlineVlmOptions with an invalid enum by patching attribute directly
+    model = _DummyVlm(TransformersPromptStyle.RAW)
+    model.vlm_options.transformers_prompt_style = "__invalid__"  # type: ignore[assignment]
+    with pytest.raises(RuntimeError):
+        model.formulate_prompt("x")