Skip to content

Commit 102dcbe

Browse files
samzongchoprahetarth
authored andcommitted
[Docs] improve code formatting and comments for eliminate griffe build warning. (vllm-project#25010)
Signed-off-by: samzong <samzong.lu@gmail.com>
1 parent 8ce66f0 commit 102dcbe

File tree

3 files changed

+20
-14
lines changed

3 files changed

+20
-14
lines changed

vllm/benchmarks/serve.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ async def get_request(
139139
A lower burstiness value (0 < burstiness < 1) results
140140
in more bursty requests, while a higher burstiness value
141141
(burstiness > 1) results in a more uniform arrival of requests.
142-
ramp_up_strategy (optional):
142+
ramp_up_strategy (optional):
143143
The ramp-up strategy. Can be "linear" or "exponential".
144144
If None, uses constant request rate (specified by request_rate).
145145
ramp_up_start_rps (optional):

vllm/distributed/eplb/eplb_state.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -337,11 +337,12 @@ def step(self,
337337
Args:
338338
model (MixtureOfExperts): The MoE model.
339339
is_dummy (bool): If `True`, this is a dummy step and the load
340-
metrics recorded in this forward pass will not count. Defaults
341-
to `False`.
340+
metrics recorded in this forward pass will not count.
341+
Defaults to `False`.
342342
is_profile (bool): If `True`, perform a dummy rearrangement
343-
with maximum communication cost. This is used in `profile_run`
344-
to reserve enough memory for the communication buffer.
343+
with maximum communication cost. This is used in
344+
`profile_run` to reserve enough memory
345+
for the communication buffer.
345346
log_stats (bool): If `True`, log the expert load metrics.
346347
347348
# Stats

vllm/distributed/eplb/rebalance_algo.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -109,13 +109,16 @@ def rebalance_experts_hierarchical(
109109
num_physical_experts: number of physical experts after replication
110110
num_groups: number of expert groups
111111
num_nodes: number of server nodes, where the intra-node network
112-
(e.g, NVLink) is faster
112+
(e.g., NVLink) is faster
113113
num_gpus: number of GPUs, must be a multiple of `num_nodes`
114114
115115
Returns:
116-
physical_to_logical_map: [num_moe_layers, num_physical_experts]
117-
logical_to_physical_map: [num_moe_layers, num_logical_experts, X]
118-
logical_count: [num_moe_layers, num_logical_experts]
116+
physical_to_logical_map (torch.Tensor):
117+
[num_moe_layers, num_physical_experts]
118+
logical_to_physical_map (torch.Tensor):
119+
[num_moe_layers, num_logical_experts, X]
120+
logical_count (torch.Tensor):
121+
[num_moe_layers, num_logical_experts]
119122
"""
120123
num_layers, num_logical_experts = weight.shape
121124
assert num_logical_experts % num_groups == 0
@@ -197,11 +200,13 @@ def rebalance_experts(
197200
num_gpus: number of GPUs, must be a multiple of `num_nodes`
198201
199202
Returns:
200-
physical_to_logical_map: [layers, num_replicas], the expert index of
201-
each replica
202-
logical_to_physical_map: [layers, num_logical_experts, X], the replica
203-
indices for each expert
204-
expert_count: [layers, num_logical_experts], number of physical
203+
physical_to_logical_map:
204+
[layers, num_replicas], the expert index of each replica
205+
logical_to_physical_map:
206+
[layers, num_logical_experts, X], the replica indices for each
207+
expert
208+
expert_count:
209+
[layers, num_logical_experts], number of physical
205210
replicas for each logical expert
206211
"""
207212
num_layers, num_logical_experts = weight.shape

0 commit comments

Comments
 (0)