Skip to content

Commit f3a2ba2

Browse files
committed
test(asr): add coverage for MLX options, pipeline helpers, and VLM prompts
- tests/test_asr_mlx_whisper.py: verify explicit MLX options (framework, repo ids) - tests/test_asr_pipeline.py: cover _has_text/_determine_status and backend support with proper InputDocument/NoOpBackend wiring - tests/test_interfaces.py: add BaseVlmPageModel.formulate_prompt tests (RAW/NONE/CHAT, invalid style), with minimal InlineVlmOptions scaffold Improves reliability of ASR and VLM components by validating configuration paths and helper logic. Signed-off-by: Ken Steele <ksteele@gmail.com>
1 parent ef8590b commit f3a2ba2

File tree

3 files changed

+124
-2
lines changed

3 files changed

+124
-2
lines changed

tests/test_asr_mlx_whisper.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
from docling.datamodel.accelerator_options import AcceleratorDevice, AcceleratorOptions
1111
from docling.datamodel.asr_model_specs import (
1212
WHISPER_BASE,
13+
WHISPER_BASE_MLX,
1314
WHISPER_LARGE,
15+
WHISPER_LARGE_MLX,
1416
WHISPER_MEDIUM,
1517
WHISPER_SMALL,
1618
WHISPER_TINY,
@@ -58,6 +60,12 @@ def test_whisper_models_auto_select_mlx(self):
5860
assert hasattr(WHISPER_SMALL, "inference_framework")
5961
assert hasattr(WHISPER_SMALL, "repo_id")
6062

63+
def test_explicit_mlx_models_shape(self):
64+
"""Explicit MLX options should have MLX framework and valid repos."""
65+
assert WHISPER_BASE_MLX.inference_framework.name == "MLX"
66+
assert WHISPER_LARGE_MLX.inference_framework.name == "MLX"
67+
assert WHISPER_BASE_MLX.repo_id.startswith("mlx-community/")
68+
6169
@patch("builtins.__import__")
6270
def test_mlx_whisper_model_initialization(self, mock_import):
6371
"""Test MLX Whisper model initialization."""

tests/test_asr_pipeline.py

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
from pathlib import Path
2+
from unittest.mock import Mock
23

34
import pytest
45

56
from docling.datamodel import asr_model_specs
67
from docling.datamodel.base_models import ConversionStatus, InputFormat
7-
from docling.datamodel.document import ConversionResult
8+
from docling.datamodel.document import ConversionResult, InputDocument
89
from docling.datamodel.pipeline_options import AsrPipelineOptions
910
from docling.document_converter import AudioFormatOption, DocumentConverter
1011
from docling.pipeline.asr_pipeline import AsrPipeline
@@ -83,3 +84,54 @@ def test_asr_pipeline_with_silent_audio(silent_audio_path):
8384
assert len(doc_result.document.texts) == 0, (
8485
"Document should contain zero text items"
8586
)
87+
88+
89+
def test_has_text_and_determine_status_helpers():
90+
"""Unit-test _has_text and _determine_status on a minimal ConversionResult."""
91+
pipeline_options = AsrPipelineOptions()
92+
pipeline_options.asr_options = asr_model_specs.WHISPER_TINY
93+
pipeline = AsrPipeline(pipeline_options)
94+
95+
# Create an empty ConversionResult with proper InputDocument
96+
doc_path = Path("./tests/data/audio/sample_10s.mp3")
97+
from docling.backend.noop_backend import NoOpBackend
98+
from docling.datamodel.base_models import InputFormat
99+
100+
input_doc = InputDocument(
101+
path_or_stream=doc_path,
102+
format=InputFormat.AUDIO,
103+
backend=NoOpBackend,
104+
)
105+
conv_res = ConversionResult(input=input_doc)
106+
107+
# Simulate run result with empty document/texts
108+
conv_res.status = ConversionStatus.SUCCESS
109+
assert pipeline._has_text(conv_res.document) is False
110+
assert pipeline._determine_status(conv_res) in (
111+
ConversionStatus.PARTIAL_SUCCESS,
112+
ConversionStatus.SUCCESS,
113+
ConversionStatus.FAILURE,
114+
)
115+
116+
117+
def test_is_backend_supported_noop_backend():
118+
from pathlib import Path
119+
120+
from docling.backend.noop_backend import NoOpBackend
121+
from docling.datamodel.base_models import InputFormat
122+
from docling.datamodel.document import InputDocument
123+
124+
class _Dummy:
125+
pass
126+
127+
# Create a proper NoOpBackend instance
128+
doc_path = Path("./tests/data/audio/sample_10s.mp3")
129+
input_doc = InputDocument(
130+
path_or_stream=doc_path,
131+
format=InputFormat.AUDIO,
132+
backend=NoOpBackend,
133+
)
134+
noop_backend = NoOpBackend(input_doc, doc_path)
135+
136+
assert AsrPipeline.is_backend_supported(noop_backend) is True
137+
assert AsrPipeline.is_backend_supported(_Dummy()) is False

tests/test_interfaces.py

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,19 @@
11
from io import BytesIO
22
from pathlib import Path
3+
from unittest.mock import Mock
34

45
import pytest
56

67
from docling.datamodel.accelerator_options import AcceleratorDevice
78
from docling.datamodel.base_models import DocumentStream, InputFormat
8-
from docling.datamodel.pipeline_options import PdfPipelineOptions
9+
from docling.datamodel.pipeline_options_vlm_model import (
10+
InferenceFramework,
11+
InlineVlmOptions,
12+
ResponseFormat,
13+
TransformersPromptStyle,
14+
)
915
from docling.document_converter import DocumentConverter, PdfFormatOption
16+
from docling.models.base_model import BaseVlmPageModel
1017

1118
from .test_data_gen_flag import GEN_TEST_DATA
1219
from .verify_utils import verify_conversion_result_v2
@@ -21,6 +28,8 @@ def get_pdf_path():
2128

2229
@pytest.fixture
2330
def converter():
31+
from docling.datamodel.pipeline_options import PdfPipelineOptions
32+
2433
pipeline_options = PdfPipelineOptions()
2534
pipeline_options.do_ocr = False
2635
pipeline_options.do_table_structure = True
@@ -61,3 +70,56 @@ def test_convert_stream(converter: DocumentConverter):
6170
verify_conversion_result_v2(
6271
input_path=pdf_path, doc_result=doc_result, generate=GENERATE
6372
)
73+
74+
75+
class _DummyVlm(BaseVlmPageModel):
76+
def __init__(self, prompt_style: TransformersPromptStyle, repo_id: str = ""): # type: ignore[no-untyped-def]
77+
self.vlm_options = InlineVlmOptions(
78+
repo_id=repo_id or "dummy/repo",
79+
prompt="test prompt",
80+
inference_framework=InferenceFramework.TRANSFORMERS,
81+
response_format=ResponseFormat.PLAINTEXT,
82+
transformers_prompt_style=prompt_style,
83+
)
84+
self.processor = Mock()
85+
86+
def __call__(self, conv_res, page_batch): # type: ignore[no-untyped-def]
87+
return []
88+
89+
def process_images(self, image_batch, prompt): # type: ignore[no-untyped-def]
90+
return []
91+
92+
93+
def test_formulate_prompt_raw():
94+
model = _DummyVlm(TransformersPromptStyle.RAW)
95+
assert model.formulate_prompt("hello") == "hello"
96+
97+
98+
def test_formulate_prompt_none():
99+
model = _DummyVlm(TransformersPromptStyle.NONE)
100+
assert model.formulate_prompt("ignored") == ""
101+
102+
103+
def test_formulate_prompt_phi4_special_case():
104+
model = _DummyVlm(
105+
TransformersPromptStyle.RAW, repo_id="ibm-granite/granite-docling-258M"
106+
)
107+
# RAW style with granite-docling should still invoke the special path only when style not RAW;
108+
# ensure RAW returns the user text
109+
assert model.formulate_prompt("describe image") == "describe image"
110+
111+
112+
def test_formulate_prompt_chat_uses_processor_template():
113+
model = _DummyVlm(TransformersPromptStyle.CHAT)
114+
model.processor.apply_chat_template.return_value = "templated"
115+
out = model.formulate_prompt("summarize")
116+
assert out == "templated"
117+
model.processor.apply_chat_template.assert_called()
118+
119+
120+
def test_formulate_prompt_unknown_style_raises():
121+
# Create an InlineVlmOptions with an invalid enum by patching attribute directly
122+
model = _DummyVlm(TransformersPromptStyle.RAW)
123+
model.vlm_options.transformers_prompt_style = "__invalid__" # type: ignore[assignment]
124+
with pytest.raises(RuntimeError):
125+
model.formulate_prompt("x")

0 commit comments

Comments
 (0)