18
18
# - Different model results in differences more than 1e-3
19
19
# 1e-4 is a good tolerance threshold
20
20
MTEB_EMBED_TASKS = ["STS12" ]
21
- MTEB_EMBED_TOL = 1e-4
21
+ MTEB_EMBED_TOL = 0.02
22
22
23
23
# See #19344
24
24
MTEB_RERANK_TASKS = ["NFCorpus" ]
@@ -175,6 +175,7 @@ def mteb_test_embed_models(hf_runner,
175
175
with vllm_runner (model_info .name ,
176
176
runner = "pooling" ,
177
177
max_model_len = None ,
178
+ enforce_eager = True ,
178
179
** vllm_extra_kwargs ) as vllm_model :
179
180
180
181
model_config = vllm_model .llm .llm_engine .model_config
@@ -198,6 +199,7 @@ def mteb_test_embed_models(hf_runner,
198
199
st_main_score = run_mteb_embed_task (hf_model , MTEB_EMBED_TASKS )
199
200
st_dtype = next (hf_model .model .parameters ()).dtype
200
201
202
+ print ("Model:" , model_info .name )
201
203
print ("VLLM:" , vllm_dtype , vllm_main_score )
202
204
print ("SentenceTransformers:" , st_dtype , st_main_score )
203
205
print ("Difference:" , st_main_score - vllm_main_score )
@@ -286,6 +288,7 @@ def mteb_test_rerank_models(hf_runner,
286
288
runner = "pooling" ,
287
289
max_model_len = None ,
288
290
max_num_seqs = 8 ,
291
+ enforce_eager = True ,
289
292
** vllm_extra_kwargs ) as vllm_model :
290
293
291
294
model_config = vllm_model .llm .llm_engine .model_config
@@ -304,6 +307,7 @@ def mteb_test_rerank_models(hf_runner,
304
307
st_main_score , st_dtype = mteb_test_rerank_models_hf (
305
308
hf_runner , model_info .name , hf_model_callback )
306
309
310
+ print ("Model:" , model_info .name )
307
311
print ("VLLM:" , vllm_dtype , vllm_main_score )
308
312
print ("SentenceTransformers:" , st_dtype , st_main_score )
309
313
print ("Difference:" , st_main_score - vllm_main_score )
0 commit comments