File tree Expand file tree Collapse file tree 2 files changed +6
-2
lines changed
Expand file tree Collapse file tree 2 files changed +6
-2
lines changed Original file line number Diff line number Diff line change @@ -258,13 +258,16 @@ def __init__(
258258 self .local_size = get_int_env_var ("LOCAL_SIZE" , 0 )
259259
260260 for ranks in group_ranks :
261+ from mooncake .ep import MooncakeBackendOptions
261262 device_group = torch .distributed .new_group (
262- ranks , backend = torch_distributed_backend
263+ ranks , backend = torch_distributed_backend , pg_options = MooncakeBackendOptions ( active_ranks ) if active_ranks is not None else None
263264 )
264265 # a cpu_group to allow direct coordination between processes through
265266 # the CPU. The backend is chosen based on `torch_distributed_backend`
266267 if "mooncake" in torch_distributed_backend :
267- cpu_group = torch .distributed .new_group (ranks , backend = "mooncake-cpu" )
268+ cpu_group = torch .distributed .new_group (
269+ ranks , backend = "mooncake-cpu" , pg_options = MooncakeBackendOptions (active_ranks_cpu ) if active_ranks_cpu is not None else None
270+ )
268271 else :
269272 cpu_group = torch .distributed .new_group (
270273 ranks , backend = "gloo" , timeout = gloo_timeout
Original file line number Diff line number Diff line change @@ -2274,6 +2274,7 @@ def run_batch(
22742274 batch_result .extend_logprob_start_len_per_req = (
22752275 extend_logprob_start_len_per_req
22762276 )
2277+ self .send_to_tokenizer .send_pyobj (Ranks (status = get_tp_active_ranks_cpu ().tolist ()))
22772278 return batch_result
22782279 else : # embedding or reward model
22792280 model_worker_batch = batch .get_model_worker_batch ()
You can’t perform that action at this time.
0 commit comments