Skip to content

Commit ffa2aba

Browse files
committed
Z3: use .contiguous() for NCCL allgather send buffer; add comment
Signed-off-by: Abhishek <dalakotiashu150@gmail.com>
1 parent 94e3dd6 commit ffa2aba

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

csrc/compile/z3.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,8 @@ class Z3CustomOpExecutor : public CustomOpExecutor {
7474
const int64_t shard_elems = ds_tensor.numel();
7575

7676
// Perform all-gather directly into the pre-allocated padded output buffer
77-
ncclResult_t result = ncclAllGather(ds_tensor.flatten().data_ptr(),
77+
// NCCL requires contiguous storage; use .contiguous() explicitly
78+
ncclResult_t result = ncclAllGather(ds_tensor.contiguous().data_ptr(),
7879
output_buf.data_ptr(),
7980
shard_elems,
8081
get_nccl_data_type(ds_tensor.scalar_type()),

0 commit comments

Comments
 (0)