Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[wip/spmd] Serialization Optimization #6903

Closed
wants to merge 14 commits into from
Prev Previous commit
Next Next commit
working
  • Loading branch information
rkooo567 committed Jul 25, 2024
commit 36e786d66d108c3a78ec85d222b451dcc34ca4aa
4 changes: 3 additions & 1 deletion vllm/core/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from dataclasses import dataclass, field
from typing import Deque, Dict, Iterable, List, Optional, Set, Tuple, Union

import vllm.envs as envs
from vllm.config import CacheConfig, LoRAConfig, SchedulerConfig
from vllm.core.interfaces import AllocStatus, BlockSpaceManager
from vllm.core.policy import Policy, PolicyFactory
Expand Down Expand Up @@ -1019,7 +1020,7 @@ def schedule(self) -> Tuple[List[SequenceGroupMetadata], SchedulerOutputs]:
# It assumes the scheduled_seq_groups is ordered by
# prefill < decoding.
is_prompt = seq_group.is_prefill()
if is_prompt:
if is_prompt or not envs.VLLM_USE_RAY_SPMD_WORKER:
seq_group_metadata = SequenceGroupMetadata(
request_id=seq_group.request_id,
is_prompt=is_prompt,
Expand All @@ -1041,6 +1042,7 @@ def schedule(self) -> Tuple[List[SequenceGroupMetadata], SchedulerOutputs]:
prompt_adapter_request=seq_group.prompt_adapter_request,
)
else:
# Delta is used only for spmd workers.
seq_data_delta = {}
for id, data in seq_data.items():
seq_data_delta[id] = data.get_delta()
Expand Down
5 changes: 0 additions & 5 deletions vllm/worker/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,11 +315,6 @@ def execute_model(
self,
execute_model_req: Optional[ExecuteModelRequest] = None
) -> Optional[List[SamplerOutput]]:
if execute_model_req is not None:
new_seq_group_metadata_list = self._get_cached_seq_group_metadata(
execute_model_req.seq_group_metadata_list)
execute_model_req.seq_group_metadata_list = (
new_seq_group_metadata_list)
output = super().execute_model(execute_model_req)
return output

Expand Down
Loading