Skip to content

Commit e6b7801

Browse files
authored
cann: Add host buffer type for Ascend NPU (#9406)
* feat: Add host buffer type for Ascend NPU(CANN backend) * fix some checking errors * Add a few comments
1 parent e665744 commit e6b7801

File tree

3 files changed

+121
-0
lines changed

3 files changed

+121
-0
lines changed

ggml/include/ggml-cann.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,13 @@ ggml_backend_cann_buffer_type(int32_t device);
8080
*/
8181
GGML_API GGML_CALL int32_t ggml_backend_cann_get_device_count(void);
8282

83+
/**
84+
* @brief pinned host buffer for use with the CPU backend for faster copies between CPU and NPU.
85+
*
86+
* @return A pointer to the host buffer type interface.
87+
*/
88+
GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type(void);
89+
8390
/**
8491
* @brief Retrieves the description of a specific CANN device.
8592
*

ggml/src/ggml-cann.cpp

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1221,6 +1221,116 @@ ggml_backend_cann_buffer_type(int32_t device) {
12211221
return &ggml_backend_cann_buffer_types[device];
12221222
}
12231223

1224+
/**
1225+
* @brief Retrieves the name associated with a CANN host buffer type.
1226+
*
1227+
* This function returns the descriptive name associated with the specified
1228+
* CANN host buffer type context.
1229+
*
1230+
* @param buft Pointer to the host buffer type context.
1231+
* @return Const pointer to the C-style string containing the name.
1232+
*/
1233+
GGML_CALL static const char * ggml_backend_cann_host_buffer_type_name(ggml_backend_buffer_type_t buft) {
1234+
return "CANN_Host";
1235+
1236+
GGML_UNUSED(buft);
1237+
}
1238+
1239+
/**
1240+
* @brief Retrieves the name associated with a CANN host buffer.
1241+
*
1242+
* This function returns the descriptive name associated with the specified
1243+
* CANN host buffer context.
1244+
*
1245+
* @param buft Pointer to the host buffer context.
1246+
* @return Const pointer to the C-style string containing the name.
1247+
*/
1248+
GGML_CALL static const char * ggml_backend_cann_host_buffer_name(ggml_backend_buffer_t buffer) {
1249+
return "CANN_Host";
1250+
1251+
GGML_UNUSED(buffer);
1252+
}
1253+
1254+
/**
1255+
* @brief Free resources associated with a CANN host buffer.
1256+
*
1257+
* This function frees the resources associated with a CANN host buffer, including
1258+
* its context.
1259+
*
1260+
* @param buffer The CANN host buffer to free.
1261+
*/
1262+
GGML_CALL static void ggml_backend_cann_host_buffer_free(ggml_backend_buffer_t buffer) {
1263+
ACL_CHECK(aclrtFreeHost(buffer->context));
1264+
}
1265+
1266+
/**
1267+
* @brief Allocates a new CANN host buffer of the specified size.
1268+
*
1269+
* This function allocates a new CANN host buffer with the given size.
1270+
* @param size Size in bytes of the host buffer to allocate.
1271+
* @return Pointer to the allocated host buffer, or nullptr if allocation fails.
1272+
*/
1273+
static void * ggml_cann_host_malloc(size_t size) {
1274+
if (getenv("GGML_CANN_NO_PINNED") != nullptr) {
1275+
return nullptr;
1276+
}
1277+
1278+
void * hostPtr = nullptr;
1279+
aclError err = aclrtMallocHost((void **) &hostPtr, size);
1280+
if (err != ACL_SUCCESS) {
1281+
1282+
GGML_CANN_LOG_WARN("%s: failed to allocate %.2f MiB of pinned memory: %s\n", __func__,
1283+
size / 1024.0 / 1024.0, aclGetRecentErrMsg());
1284+
return nullptr;
1285+
}
1286+
return hostPtr;
1287+
}
1288+
1289+
/**
1290+
* @brief Allocates a new CANN host buffer of the specified type and size.
1291+
*
1292+
* @param buft Pointer to the host buffer type context.
1293+
* @param size Size in bytes of the host buffer to allocate.
1294+
* @return Pointer to the allocated host buffer, or CPU buffer pointer if allocation fails.
1295+
*/
1296+
GGML_CALL static ggml_backend_buffer_t ggml_backend_cann_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
1297+
void * hostPtr = ggml_cann_host_malloc(size);
1298+
1299+
if (hostPtr == nullptr) {
1300+
// fallback to cpu buffer
1301+
return ggml_backend_buft_alloc_buffer(ggml_backend_cpu_buffer_type(), size);
1302+
}
1303+
1304+
ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr(hostPtr, size);
1305+
buffer->buft = buft;
1306+
buffer->iface.get_name = ggml_backend_cann_host_buffer_name;
1307+
buffer->iface.free_buffer = ggml_backend_cann_host_buffer_free;
1308+
1309+
return buffer;
1310+
}
1311+
1312+
/**
1313+
* @brief Interface for managing CANN host buffer types in the GGML backend.
1314+
*
1315+
* Provides function pointers for allocating, querying properties, and managing
1316+
* memory for CANN buffer types in the GGML backend.
1317+
*/
1318+
GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type() {
1319+
static struct ggml_backend_buffer_type ggml_backend_cann_buffer_type_host = {
1320+
/* .iface = */ {
1321+
/* .get_name = */ ggml_backend_cann_host_buffer_type_name,
1322+
/* .alloc_buffer = */ ggml_backend_cann_host_buffer_type_alloc_buffer,
1323+
/* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment,
1324+
/* .get_max_size = */ NULL, // defaults to SIZE_MAX
1325+
/* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size,
1326+
/* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host,
1327+
},
1328+
/* .context = */ nullptr,
1329+
};
1330+
1331+
return &ggml_backend_cann_buffer_type_host;
1332+
}
1333+
12241334
/**
12251335
* @brief Computes the forward operation for a given tensor using CANN
12261336
* operations.

src/llama.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2156,6 +2156,10 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_cpu(bool host_buffer
21562156
if (host_buffer) {
21572157
buft = ggml_backend_sycl_host_buffer_type();
21582158
}
2159+
#elif defined(GGML_USE_CANN)
2160+
if (host_buffer) {
2161+
buft = ggml_backend_cann_host_buffer_type();
2162+
}
21592163
#elif defined(GGML_USE_CPU_HBM)
21602164
buft = ggml_backend_cpu_hbm_buffer_type();
21612165
#elif defined(GGML_USE_VULKAN)

0 commit comments

Comments
 (0)