Skip to content

Commit 66e7c14

Browse files
author
luyuhong
committed
[CANN]:Replace aclrtMemsetSync with aclnnInplaceZero operator to create zero tensors more efficiently and consistently within the computation graph
[CANN]:Replace aclrtMemsetSync with aclnnInplaceZero operator to create zero tensors more efficiently and consistently within the computation graph
1 parent caf5681 commit 66e7c14

File tree

1 file changed

+3
-1
lines changed

1 file changed

+3
-1
lines changed

ggml/src/ggml-cann/aclnn_ops.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
#include <aclnnop/aclnn_pow.h>
6868
#include <aclnnop/aclnn_grouped_matmul_v2.h>
6969
#include <aclnnop/aclnn_fused_infer_attention_score_v2.h>
70+
#include <aclnnop/aclnn_zero.h>
7071
#include <float.h>
7172

7273
#include <cmath>
@@ -804,10 +805,11 @@ static aclTensor* aclnn_zero(ggml_backend_cann_context& ctx, void* buffer,
804805
nb[i] = nb[i - 1] * ne[i - 1];
805806
}
806807

807-
ggml_cann_async_memset(ctx, buffer, n_bytes, 0);
808808
aclTensor* zero =
809809
ggml_cann_create_tensor(buffer, type, type_size, ne, nb, dims);
810+
GGML_CANN_CALL_ACLNN_OP(ctx, InplaceZero, zero);
810811
return zero;
812+
GGML_UNUSED(n_bytes);
811813
}
812814

813815
/**

0 commit comments

Comments
 (0)