Skip to content

Commit 65a0b1f

Browse files
committed
Fixed some bugs
1 parent 0a4d391 commit 65a0b1f

File tree

2 files changed

+6
-2
lines changed

2 files changed

+6
-2
lines changed

python/sglang/srt/distributed/parallel_state.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,13 +258,16 @@ def __init__(
258258
self.local_size = get_int_env_var("LOCAL_SIZE", 0)
259259

260260
for ranks in group_ranks:
261+
from mooncake.ep import MooncakeBackendOptions
261262
device_group = torch.distributed.new_group(
262-
ranks, backend=torch_distributed_backend
263+
ranks, backend=torch_distributed_backend, pg_options=MooncakeBackendOptions(active_ranks) if active_ranks is not None else None
263264
)
264265
# a cpu_group to allow direct coordination between processes through
265266
# the CPU. The backend is chosen based on `torch_distributed_backend`
266267
if "mooncake" in torch_distributed_backend:
267-
cpu_group = torch.distributed.new_group(ranks, backend="mooncake-cpu")
268+
cpu_group = torch.distributed.new_group(
269+
ranks, backend="mooncake-cpu", pg_options=MooncakeBackendOptions(active_ranks_cpu) if active_ranks_cpu is not None else None
270+
)
268271
else:
269272
cpu_group = torch.distributed.new_group(
270273
ranks, backend="gloo", timeout=gloo_timeout

python/sglang/srt/managers/scheduler.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2274,6 +2274,7 @@ def run_batch(
22742274
batch_result.extend_logprob_start_len_per_req = (
22752275
extend_logprob_start_len_per_req
22762276
)
2277+
self.send_to_tokenizer.send_pyobj(Ranks(status=get_tp_active_ranks_cpu().tolist()))
22772278
return batch_result
22782279
else: # embedding or reward model
22792280
model_worker_batch = batch.get_model_worker_batch()

0 commit comments

Comments
 (0)