Skip to content

Commit

Permalink
Multi step scheduling (vllm-project#441)
Browse files Browse the repository at this point in the history
Implementation of multi-step scheduling. To use the feature, pass
--num_scheduler_steps=[n] as a server parameter. In my tests, best
results were achieved with n==64, but this will vary depending on the
model.

---------

Co-authored-by: Karol Damaszke <kdamaszke@habana.ai>
Co-authored-by: jmaksymczuk <jmaksymczuk@habana.ai>
  • Loading branch information
3 people authored Oct 29, 2024
1 parent 1dcdb37 commit 78e947a
Show file tree
Hide file tree
Showing 4 changed files with 361 additions and 118 deletions.
11 changes: 9 additions & 2 deletions vllm/executor/hpu_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,16 @@ def _create_worker(self,
local_rank: int = 0,
rank: int = 0,
distributed_init_method: Optional[str] = None):
if self.scheduler_config.is_multi_step:
module_name = "vllm.worker.multi_step_hpu_worker"
class_name = "MultiStepHPUWorker"
else:
module_name = "vllm.worker.hpu_worker"
class_name = "HPUWorker"

wrapper = WorkerWrapperBase(
worker_module_name="vllm.worker.hpu_worker",
worker_class_name="HPUWorker",
worker_module_name=module_name,
worker_class_name=class_name,
)
wrapper.init_worker(**self._get_worker_kwargs(local_rank, rank,
distributed_init_method))
Expand Down
4 changes: 2 additions & 2 deletions vllm/executor/ray_hpu_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ def _get_worker_module_and_class(
Type[WorkerBase]]]]: # noqa: F821
worker_class_fn = None
if self.scheduler_config.is_multi_step:
raise NotImplementedError(
"Multi-step execution is not implemented for HPU")
worker_module_name = "vllm.worker.multi_step_hpu_worker"
worker_class_name = "MultiStepHPUWorker"
elif self.speculative_config:
raise NotImplementedError(
"Speculative decoding is not implemented for HPU")
Expand Down
Loading

0 comments on commit 78e947a

Please sign in to comment.