@@ -1220,6 +1220,73 @@ ggml_backend_cann_buffer_type(int32_t device) {
1220
1220
return &ggml_backend_cann_buffer_types[device];
1221
1221
}
1222
1222
1223
+ // host buffer type
1224
+
1225
+ GGML_CALL static const char * ggml_backend_cann_host_buffer_type_name (ggml_backend_buffer_type_t buft) {
1226
+ return " CANN_Host" ;
1227
+
1228
+ GGML_UNUSED (buft);
1229
+ }
1230
+
1231
+ GGML_CALL static const char * ggml_backend_cann_host_buffer_name (ggml_backend_buffer_t buffer) {
1232
+ return " CANN_Host" ;
1233
+
1234
+ GGML_UNUSED (buffer);
1235
+ }
1236
+
1237
+ GGML_CALL static void ggml_backend_cann_host_buffer_free_buffer (ggml_backend_buffer_t buffer) {
1238
+ ACL_CHECK (aclrtFreeHost (buffer->context ));
1239
+ }
1240
+
1241
+ static void * ggml_cann_host_malloc (size_t size) {
1242
+ if (getenv (" GGML_CANN_NO_PINNED" ) != nullptr ) {
1243
+ return nullptr ;
1244
+ }
1245
+
1246
+ void * ptr = nullptr ;
1247
+ aclError err = aclrtMallocHost ((void **) &ptr, size);
1248
+ if (err != ACL_SUCCESS) {
1249
+
1250
+ GGML_CANN_LOG_WARN (" %s: failed to allocate %.2f MiB of pinned memory: %s\n " , __func__,
1251
+ size / 1024.0 / 1024.0 , aclGetRecentErrMsg ());
1252
+ return nullptr ;
1253
+ }
1254
+
1255
+ return ptr;
1256
+ }
1257
+
1258
+ GGML_CALL static ggml_backend_buffer_t ggml_backend_cann_host_buffer_type_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size) {
1259
+ void * ptr = ggml_cann_host_malloc (size);
1260
+
1261
+ if (ptr == nullptr ) {
1262
+ // fallback to cpu buffer
1263
+ return ggml_backend_buft_alloc_buffer (ggml_backend_cpu_buffer_type (), size);
1264
+ }
1265
+
1266
+ ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr (ptr, size);
1267
+ buffer->buft = buft;
1268
+ buffer->iface .get_name = ggml_backend_cann_host_buffer_name;
1269
+ buffer->iface .free_buffer = ggml_backend_cann_host_buffer_free_buffer;
1270
+
1271
+ return buffer;
1272
+ }
1273
+
1274
+ GGML_CALL ggml_backend_buffer_type_t ggml_backend_cann_host_buffer_type () {
1275
+ static struct ggml_backend_buffer_type ggml_backend_cann_buffer_type_host = {
1276
+ /* .iface = */ {
1277
+ /* .get_name = */ ggml_backend_cann_host_buffer_type_name,
1278
+ /* .alloc_buffer = */ ggml_backend_cann_host_buffer_type_alloc_buffer,
1279
+ /* .get_alignment = */ ggml_backend_cpu_buffer_type ()->iface .get_alignment ,
1280
+ /* .get_max_size = */ NULL , // defaults to SIZE_MAX
1281
+ /* .get_alloc_size = */ ggml_backend_cpu_buffer_type ()->iface .get_alloc_size ,
1282
+ /* .is_host = */ ggml_backend_cpu_buffer_type ()->iface .is_host ,
1283
+ },
1284
+ /* .context = */ nullptr ,
1285
+ };
1286
+
1287
+ return &ggml_backend_cann_buffer_type_host;
1288
+ }
1289
+
1223
1290
/* *
1224
1291
* @brief Computes the forward operation for a given tensor using CANN
1225
1292
* operations.
0 commit comments