Skip to content

Commit

Permalink
cann: fix multi-npu exec error
Browse files Browse the repository at this point in the history
  • Loading branch information
wangshuai09 committed Jul 27, 2024
1 parent 01245f5 commit 937a12c
Showing 1 changed file with 15 additions and 15 deletions.
30 changes: 15 additions & 15 deletions ggml/src/ggml-cann.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1559,23 +1559,18 @@ GGML_CALL static bool ggml_backend_cann_cpy_tensor_async(
return false;
}

// need open both directions for memcpyasync between devices.
ggml_cann_set_device(cann_ctx_dst->device);
ACL_CHECK(aclrtDeviceEnablePeerAccess(cann_ctx_src->device, 0));
ggml_cann_set_device(cann_ctx_src->device);
ACL_CHECK(aclrtDeviceEnablePeerAccess(cann_ctx_dst->device, 0));

ACL_CHECK(aclrtMemcpyAsync(dst->data, copy_size, src->data, copy_size,
ACL_MEMCPY_DEVICE_TO_DEVICE,
cann_ctx_dst->stream()));

// record event on src stream
if (!cann_ctx_src->copy_event) {
ACL_CHECK(aclrtCreateEvent(&cann_ctx_src->copy_event));
}

ACL_CHECK(
aclrtRecordEvent(cann_ctx_src->copy_event, cann_ctx_src->stream()));
cann_ctx_src->stream()));

// wait on dst stream for the copy to complete
ACL_CHECK(aclrtStreamWaitEvent(cann_ctx_dst->stream(),
cann_ctx_src->copy_event));
//TODO: workaround for Event didn`t work here.
aclrtSynchronizeStream(cann_ctx_src->stream());
} else {
// src and dst are on the same backend
ACL_CHECK(aclrtMemcpyAsync(dst->data, copy_size, src->data, copy_size,
Expand Down Expand Up @@ -1773,9 +1768,14 @@ static bool ggml_backend_buft_is_cann(ggml_backend_buffer_type_t buft) {
*/
GGML_CALL static bool ggml_backend_cann_supports_buft(
ggml_backend_t backend, ggml_backend_buffer_type_t buft) {
return buft->iface.get_name == ggml_backend_cann_buffer_type_name;

GGML_UNUSED(backend);
if (ggml_backend_buft_is_cann(buft)) {
ggml_backend_cann_context * cann_ctx =
(ggml_backend_cann_context *)backend->context;
ggml_backend_cann_buffer_type_context * buft_ctx =
(ggml_backend_cann_buffer_type_context *)buft->context;
return buft_ctx->device == cann_ctx->device;
}
return false;
}

/**
Expand Down

0 comments on commit 937a12c

Please sign in to comment.