From b4dc3bb17180405653e5458658dce528aaf252b7 Mon Sep 17 00:00:00 2001 From: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com> Date: Tue, 28 May 2024 08:32:42 -0700 Subject: [PATCH] [BugFix] Fix Embedding Models with TP>1 (#5075) --- vllm/worker/embedding_model_runner.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vllm/worker/embedding_model_runner.py b/vllm/worker/embedding_model_runner.py index ef02de95fc54e..0ba1200696cab 100644 --- a/vllm/worker/embedding_model_runner.py +++ b/vllm/worker/embedding_model_runner.py @@ -79,6 +79,10 @@ def execute_model( execute_model_kwargs.update({"image_input": multi_modal_input}) hidden_states = model_executable(**execute_model_kwargs) + # Only perform pooling in the driver worker. + if not self.is_driver_worker: + return None + return self.model.pooler(hidden_states=hidden_states, pooling_metadata=pooling_metadata)