Skip to content

Commit e3d1045

Browse files
NathanHBqubvel
andauthored
Adds multimodal support and MMMU pro (#675)
``` uv run lighteval accelerate "model_name=HuggingFaceTB/SmolVLM-Instruct" "lighteval|mmmu_pro|0|0" --use-chat-template --vision-model ``` --------- Co-authored-by: qubvel <qubvel@gmail.com>
1 parent 6366ada commit e3d1045

13 files changed

+786
-32
lines changed
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
model_parameters:
2+
model_name: "Qwen/Qwen2.5-VL-3B-Instruct"
3+
revision: "main"
4+
dtype: "float16"
5+
compile: false
6+
model_parallel: false
7+
batch_size: 1
8+
generation_parameters:
9+
temperature: 0.2
10+
top_p: 0.9

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ classifiers = [
5454
keywords = ["evaluation", "nlp", "llm"]
5555
dependencies = [
5656
# Base dependencies
57-
"transformers>=4.38.0",
57+
"transformers>=4.51.0",
5858
"accelerate",
5959
"huggingface_hub[hf_xet]>=0.30.2",
6060
"torch>=2.0,<3.0",

src/lighteval/main_accelerate.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ def accelerate( # noqa C901
4848
use_chat_template: Annotated[
4949
bool, Option(help="Use chat template for evaluation.", rich_help_panel=HELP_PANEL_NAME_4)
5050
] = False,
51+
vision_model: Annotated[
52+
bool, Option(help="Use vision model for evaluation.", rich_help_panel=HELP_PANEL_NAME_4)
53+
] = False,
5154
system_prompt: Annotated[
5255
Optional[str], Option(help="Use system prompt for evaluation.", rich_help_panel=HELP_PANEL_NAME_4)
5356
] = None,
@@ -109,6 +112,7 @@ def accelerate( # noqa C901
109112
from lighteval.models.transformers.adapter_model import AdapterModelConfig
110113
from lighteval.models.transformers.delta_model import DeltaModelConfig
111114
from lighteval.models.transformers.transformers_model import TransformersModelConfig
115+
from lighteval.models.transformers.vlm_transformers_model import VLMTransformersModelConfig
112116
from lighteval.models.utils import ModelConfig
113117
from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters
114118

@@ -147,7 +151,10 @@ def accelerate( # noqa C901
147151
elif config.get("adapter_weights", False):
148152
model_config = AdapterModelConfig(**config)
149153
else:
150-
model_config = TransformersModelConfig(**config)
154+
if vision_model:
155+
model_config = VLMTransformersModelConfig(**config)
156+
else:
157+
model_config = TransformersModelConfig(**config)
151158

152159
pipeline = Pipeline(
153160
tasks=tasks,

src/lighteval/models/model_loader.py

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@
4242
from lighteval.models.transformers.adapter_model import AdapterModel, AdapterModelConfig
4343
from lighteval.models.transformers.delta_model import DeltaModel, DeltaModelConfig
4444
from lighteval.models.transformers.transformers_model import TransformersModel, TransformersModelConfig
45+
from lighteval.models.transformers.vlm_transformers_model import VLMTransformersModel, VLMTransformersModelConfig
46+
from lighteval.models.utils import ModelConfig
4547
from lighteval.models.vllm.vllm_model import VLLMModel, VLLMModelConfig
4648
from lighteval.utils.imports import (
4749
NO_LITELLM_ERROR_MSG,
@@ -60,21 +62,8 @@
6062

6163

6264
def load_model( # noqa: C901
63-
config: Union[
64-
TransformersModelConfig,
65-
AdapterModelConfig,
66-
DeltaModelConfig,
67-
TGIModelConfig,
68-
InferenceEndpointModelConfig,
69-
DummyModelConfig,
70-
VLLMModelConfig,
71-
CustomModelConfig,
72-
OpenAIModelConfig,
73-
LiteLLMModelConfig,
74-
SGLangModelConfig,
75-
InferenceProvidersModelConfig,
76-
],
77-
) -> Union[TransformersModel, AdapterModel, DeltaModel, ModelClient, DummyModel]:
65+
config: ModelConfig,
66+
) -> LightevalModel:
7867
"""Will load either a model from an inference server or a model from a checkpoint, depending
7968
on the config type.
8069
@@ -100,6 +89,9 @@ def load_model( # noqa: C901
10089
if isinstance(config, TransformersModelConfig):
10190
return load_model_with_accelerate_or_default(config)
10291

92+
if isinstance(config, VLMTransformersModelConfig):
93+
return load_model_with_accelerate_or_default(config)
94+
10395
if isinstance(config, DummyModelConfig):
10496
return load_dummy_model(config)
10597

@@ -186,7 +178,9 @@ def load_model_with_inference_endpoints(config: Union[InferenceEndpointModelConf
186178

187179

188180
def load_model_with_accelerate_or_default(
189-
config: Union[AdapterModelConfig, TransformersModelConfig, DeltaModelConfig],
181+
config: Union[
182+
AdapterModelConfig, TransformersModelConfig, DeltaModelConfig, VLLMModelConfig, VLMTransformersModelConfig
183+
],
190184
):
191185
if isinstance(config, AdapterModelConfig):
192186
model = AdapterModel(config=config)
@@ -197,6 +191,9 @@ def load_model_with_accelerate_or_default(
197191
raise ImportError(NO_VLLM_ERROR_MSG)
198192
model = VLLMModel(config=config)
199193
return model
194+
elif isinstance(config, VLMTransformersModelConfig):
195+
model = VLMTransformersModel(config=config)
196+
return model
200197
else:
201198
model = TransformersModel(config=config)
202199

0 commit comments

Comments
 (0)