Address Nick nits and fix CUDAGraph correctness

Signed-off-by: Muralidhar Andoorveedu <muralidhar.andoorveedu@centml.ai>
vllm-project · Jul 2, 2024 · c92257c · c92257c
1 parent 5a4b323
commit c92257c
Show file tree

Hide file tree

Showing 4 changed files with 11 additions and 11 deletions.
diff --git a/vllm/model_executor/models/gpt2.py b/vllm/model_executor/models/gpt2.py
@@ -219,12 +219,12 @@ def forward(
                                   kv_caches[i - self.start_layer],
                                   attn_metadata)
 
-        if get_pp_group().is_last_rank:
-            hidden_states = self.ln_f(hidden_states)
-            return hidden_states
-        else:
+        if not get_pp_group().is_last_rank:
             return IntermediateTensors({"hidden_states": hidden_states})
 
+        hidden_states = self.ln_f(hidden_states)
+        return hidden_states
+
 
 class GPT2LMHeadModel(nn.Module):
 

diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py
@@ -311,15 +311,15 @@ def forward(
                 residual,
             )
 
-        if get_pp_group().is_last_rank:
-            hidden_states, _ = self.norm(hidden_states, residual)
-            return hidden_states
-        else:
+        if not get_pp_group().is_last_rank:
             return IntermediateTensors({
                 "hidden_states": hidden_states,
                 "residual": residual
             })
 
+        hidden_states, _ = self.norm(hidden_states, residual)
+        return hidden_states
+
 
 class LlamaForCausalLM(nn.Module, SupportsLoRA):
     packed_modules_mapping = {

diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py
@@ -1359,8 +1359,8 @@ def forward(
         # Return the output tensor.
         if get_pp_group().is_last_rank:
             return self.output_buffers["hidden_states"]
-        else:
-            return self.output_buffers
+
+        return self.output_buffers
 
     def __call__(self, *args, **kwargs):
         return self.forward(*args, **kwargs)

diff --git a/vllm/worker/worker_base.py b/vllm/worker/worker_base.py
@@ -141,7 +141,7 @@ def from_broadcasted_tensor_dict(
             blocks_to_swap_in=tensor_dict.pop("blocks_to_swap_in"),
             blocks_to_swap_out=tensor_dict.pop("blocks_to_swap_out"),
             blocks_to_copy=tensor_dict.pop("blocks_to_copy"),
-            virtual_engine=tensor_dict.pop("virtual_engine"),
+            virtual_engine=tensor_dict["virtual_engine"],
         )
 
     def as_broadcastable_tensor_dict(