We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 94e3dd6 commit ffa2abaCopy full SHA for ffa2aba
csrc/compile/z3.cpp
@@ -74,7 +74,8 @@ class Z3CustomOpExecutor : public CustomOpExecutor {
74
const int64_t shard_elems = ds_tensor.numel();
75
76
// Perform all-gather directly into the pre-allocated padded output buffer
77
- ncclResult_t result = ncclAllGather(ds_tensor.flatten().data_ptr(),
+ // NCCL requires contiguous storage; use .contiguous() explicitly
78
+ ncclResult_t result = ncclAllGather(ds_tensor.contiguous().data_ptr(),
79
output_buf.data_ptr(),
80
shard_elems,
81
get_nccl_data_type(ds_tensor.scalar_type()),
0 commit comments