Skip to content

Commit f4826ef

Browse files
committed
support optim cases
1 parent 0f21fa8 commit f4826ef

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

torch/distributed/algorithms/ddp_comm_hooks/ddp_zero_hook.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -233,12 +233,12 @@ def hook_with_zero_step(
233233
ddp_ref = weakref.ref(ddp)
234234

235235
# NOTE: Gloo may hang with this overlapping approach, so we require
236-
# NCCL/HCCL backend for now; see https://github.com/pytorch/pytorch/issues/62300
236+
# NCCL/HCCL/XCCL backend for now; see https://github.com/pytorch/pytorch/issues/62300
237237
pg = dist.get_backend(ddp_ref().process_group) # type: ignore[union-attr]
238-
if (pg != dist.Backend.NCCL) and (pg != "hccl"):
238+
if (pg != dist.Backend.NCCL) and (pg != "hccl") and (pg != "xccl"):
239239
raise RuntimeError(
240240
"Overlapping DDP with ZeRO using this approach currently requires "
241-
"NCCL/HCCL backend to avoid hangs"
241+
"NCCL/HCCL/XCCL backend to avoid hangs"
242242
)
243243

244244
if shard_buckets:
@@ -395,12 +395,12 @@ def hook_with_zero_step_interleaved(
395395
ddp_ref = weakref.ref(ddp)
396396

397397
# NOTE: Gloo may hang with this overlapping approach, so we require
398-
# NCCL/HCCL backend for now; see https://github.com/pytorch/pytorch/issues/62300
398+
# NCCL/HCCL/XCCL backend for now; see https://github.com/pytorch/pytorch/issues/62300
399399
pg = dist.get_backend(ddp_ref().process_group) # type: ignore[union-attr]
400-
if (pg != dist.Backend.NCCL) and (pg != "hccl"):
400+
if (pg != dist.Backend.NCCL) and (pg != "hccl") and (pg != "xccl"):
401401
raise RuntimeError(
402402
"Overlapping DDP with ZeRO using this approach currently requires "
403-
"NCCL/HCCL backend to avoid hangs"
403+
"NCCL/HCCL/XCCL backend to avoid hangs"
404404
)
405405

406406
if shard_buckets:

0 commit comments

Comments
 (0)