Skip to content

Commit 490da45

Browse files
committed
feat: Add host buffer type for Ascend NPU(CANN backend)
1 parent 436787f commit 490da45

File tree

3 files changed

+78
-0
lines changed

3 files changed

+78
-0
lines changed

ggml/include/ggml-cann.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,13 @@ ggml_backend_cann_buffer_type(int32_t device);
8080
*/
8181
GGML_API GGML_CALL int32_t ggml_backend_cann_get_device_count(void);
8282

83+
/**
84+
* @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU.
85+
*
86+
* @return A pointer to the host buffer type interface.
87+
*/
88+
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
89+
8390
/**
8491
* @brief Retrieves the description of a specific CANN device.
8592
*

ggml/src/ggml-cann.cpp

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1220,6 +1220,73 @@ ggml_backend_cann_buffer_type(int32_t device) {
12201220
return &ggml_backend_cann_buffer_types[device];
12211221
}
12221222

1223+
// host buffer type
1224+
1225+
GGML_CALL static const char * ggml_backend_cann_host_buffer_type_name(ggml_backend_buffer_type_t buft) {
1226+
return "CANN_Host";
1227+
1228+
GGML_UNUSED(buft);
1229+
}
1230+
1231+
GGML_CALL static const char * ggml_backend_cann_host_buffer_name(ggml_backend_buffer_t buffer) {
1232+
return "CANN_Host";
1233+
1234+
GGML_UNUSED(buffer);
1235+
}
1236+
1237+
GGML_CALL static void ggml_backend_cann_host_buffer_free_buffer(ggml_backend_buffer_t buffer) {
1238+
ACL_CHECK(aclrtFreeHost(buffer->context));
1239+
}
1240+
1241+
static void * ggml_cann_host_malloc(size_t size) {
1242+
if (getenv("GGML_CANN_NO_PINNED") != nullptr) {
1243+
return nullptr;
1244+
}
1245+
1246+
void * ptr = nullptr;
1247+
aclError err = aclrtMallocHost((void **) &ptr, size);
1248+
if (err != ACL_SUCCESS) {
1249+
1250+
GGML_CANN_LOG_WARN("%s: failed to allocate %.2f MiB of pinned memory: %s\n", __func__,
1251+
size / 1024.0 / 1024.0, aclGetRecentErrMsg());
1252+
return nullptr;
1253+
}
1254+
1255+
return ptr;
1256+
}
1257+
1258+
GGML_CALL static ggml_backend_buffer_t ggml_backend_cann_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
1259+
void * ptr = ggml_cann_host_malloc(size);
1260+
1261+
if (ptr == nullptr) {
1262+
// fallback to cpu buffer
1263+
return ggml_backend_buft_alloc_buffer(ggml_backend_cpu_buffer_type(), size);
1264+
}
1265+
1266+
ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr(ptr, size);
1267+
buffer->buft = buft;
1268+
buffer->iface.get_name = ggml_backend_cann_host_buffer_name;
1269+
buffer->iface.free_buffer = ggml_backend_cann_host_buffer_free_buffer;
1270+
1271+
return buffer;
1272+
}
1273+
1274+
GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type() {
1275+
static struct ggml_backend_buffer_type ggml_backend_cann_buffer_type_host = {
1276+
/* .iface = */ {
1277+
/* .get_name = */ ggml_backend_cann_host_buffer_type_name,
1278+
/* .alloc_buffer = */ ggml_backend_cann_host_buffer_type_alloc_buffer,
1279+
/* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
1280+
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
1281+
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
1282+
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
1283+
},
1284+
/* .context = */ nullptr,
1285+
};
1286+
1287+
return &ggml_backend_cann_buffer_type_host;
1288+
}
1289+
12231290
/**
12241291
* @brief Computes the forward operation for a given tensor using CANN
12251292
* operations.

src/llama.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2088,6 +2088,10 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_cpu(bool host_buffer
20882088
if (host_buffer) {
20892089
buft = ggml_backend_sycl_host_buffer_type();
20902090
}
2091+
#elif defined(GGML_USE_CANN)
2092+
if (host_buffer) {
2093+
buft = ggml_backend_cann_host_buffer_type();
2094+
}
20912095
#elif defined(GGML_USE_CPU_HBM)
20922096
buft = ggml_backend_cpu_hbm_buffer_type();
20932097
#elif defined(GGML_USE_VULKAN)

0 commit comments

Comments
 (0)