@@ -35,6 +35,13 @@ bool ggml_backend_buft_supports_backend(ggml_backend_buffer_type_t buft, ggml_ba
3535 return buft -> iface .supports_backend (buft , backend );
3636}
3737
38+ bool ggml_backend_buft_is_host (ggml_backend_buffer_type_t buft ) {
39+ if (buft -> iface .is_host ) {
40+ return buft -> iface .is_host (buft );
41+ }
42+ return false;
43+ }
44+
3845// backend buffer
3946
4047ggml_backend_buffer_t ggml_backend_buffer_init (
@@ -94,6 +101,14 @@ size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct g
94101 return ggml_backend_buft_get_alloc_size (ggml_backend_buffer_type (buffer ), tensor );
95102}
96103
104+ void ggml_backend_buffer_clear (ggml_backend_buffer_t buffer , uint8_t value ) {
105+ buffer -> iface .clear (buffer , value );
106+ }
107+
108+ bool ggml_backend_buffer_is_host (ggml_backend_buffer_t buffer ) {
109+ return ggml_backend_buft_is_host (ggml_backend_buffer_type (buffer ));
110+ }
111+
97112ggml_backend_buffer_type_t ggml_backend_buffer_type (ggml_backend_buffer_t buffer ) {
98113 return buffer -> buft ;
99114}
@@ -378,7 +393,6 @@ static void * ggml_backend_cpu_buffer_get_base(ggml_backend_buffer_t buffer) {
378393
379394static void ggml_backend_cpu_buffer_free_buffer (ggml_backend_buffer_t buffer ) {
380395 free (buffer -> context );
381- GGML_UNUSED (buffer );
382396}
383397
384398static void ggml_backend_cpu_buffer_set_tensor (ggml_backend_buffer_t buffer , struct ggml_tensor * tensor , const void * data , size_t offset , size_t size ) {
@@ -411,6 +425,10 @@ static void ggml_backend_cpu_buffer_cpy_tensor_to(ggml_backend_buffer_t buffer,
411425 GGML_UNUSED (buffer );
412426}
413427
428+ static void ggml_backend_cpu_buffer_clear (ggml_backend_buffer_t buffer , uint8_t value ) {
429+ memset (buffer -> context , value , buffer -> size );
430+ }
431+
414432static struct ggml_backend_buffer_i cpu_backend_buffer_i = {
415433 /* .free_buffer = */ ggml_backend_cpu_buffer_free_buffer ,
416434 /* .get_base = */ ggml_backend_cpu_buffer_get_base ,
@@ -419,6 +437,7 @@ static struct ggml_backend_buffer_i cpu_backend_buffer_i = {
419437 /* .get_tensor = */ ggml_backend_cpu_buffer_get_tensor ,
420438 /* .cpy_tensor_from = */ ggml_backend_cpu_buffer_cpy_tensor_from ,
421439 /* .cpy_tensor_to = */ ggml_backend_cpu_buffer_cpy_tensor_to ,
440+ /* .clear = */ ggml_backend_cpu_buffer_clear ,
422441};
423442
424443// for buffers from ptr, free is not called
@@ -430,6 +449,7 @@ static struct ggml_backend_buffer_i cpu_backend_buffer_i_from_ptr = {
430449 /* .get_tensor = */ ggml_backend_cpu_buffer_get_tensor ,
431450 /* .cpy_tensor_from = */ ggml_backend_cpu_buffer_cpy_tensor_from ,
432451 /* .cpy_tensor_to = */ ggml_backend_cpu_buffer_cpy_tensor_to ,
452+ /* .clear = */ ggml_backend_cpu_buffer_clear ,
433453};
434454
435455static const size_t TENSOR_ALIGNMENT = 64 ; // should be enough for AVX 512
@@ -455,20 +475,70 @@ static bool ggml_backend_cpu_buffer_type_supports_backend(ggml_backend_buffer_ty
455475 GGML_UNUSED (buft );
456476}
457477
478+ static bool ggml_backend_cpu_buffer_type_is_host (ggml_backend_buffer_type_t buft ) {
479+ return true;
480+
481+ GGML_UNUSED (buft );
482+ }
483+
458484ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type (void ) {
459- static struct ggml_backend_buffer_type ggml_backend_buffer_type_cpu = {
485+ static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type = {
460486 /* .iface = */ {
461487 /* .alloc_buffer = */ ggml_backend_cpu_buffer_type_alloc_buffer ,
462488 /* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment ,
463489 /* .get_alloc_size = */ NULL , // defaults to ggml_nbytes
464490 /* .supports_backend = */ ggml_backend_cpu_buffer_type_supports_backend ,
491+ /* .is_host = */ ggml_backend_cpu_buffer_type_is_host ,
465492 },
466493 /* .context = */ NULL ,
467494 };
468495
469- return & ggml_backend_buffer_type_cpu ;
496+ return & ggml_backend_cpu_buffer_type ;
470497}
471498
499+ #ifdef GGML_USE_CPU_HBM
500+
501+ // buffer type HBM
502+
503+ #include <hbwmalloc.h>
504+
505+ static void ggml_backend_cpu_hbm_buffer_free_buffer (ggml_backend_buffer_t buffer ) {
506+ hbw_free (buffer -> context );
507+ }
508+
509+ static ggml_backend_buffer_t ggml_backend_cpu_hbm_buffer_type_alloc_buffer (ggml_backend_buffer_type_t buft , size_t size ) {
510+ //void * ptr = hbw_malloc(size);
511+ void * ptr ;
512+ int result = hbw_posix_memalign (& ptr , ggml_backend_cpu_buffer_type_get_alignment (buft ), size );
513+ if (result != 0 ) {
514+ fprintf (stderr , "failed to allocate HBM buffer of size %zu\n" , size );
515+ return NULL ;
516+ }
517+
518+ // FIXME: this is a hack to avoid having to implement a new buffer type
519+ ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr (ptr , size );
520+ buffer -> buft = buft ;
521+ buffer -> iface .free_buffer = ggml_backend_cpu_hbm_buffer_free_buffer ;
522+
523+ return buffer ;
524+ }
525+
526+ ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type () {
527+ static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_hbm = {
528+ /* .iface = */ {
529+ /* .alloc_buffer = */ ggml_backend_cpu_hbm_buffer_type_alloc_buffer ,
530+ /* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment ,
531+ /* .get_alloc_size = */ NULL , // defaults to ggml_nbytes
532+ /* .supports_backend = */ ggml_backend_cpu_buffer_type_supports_backend ,
533+ /* .is_host = */ ggml_backend_cpu_buffer_type_is_host ,
534+ },
535+ /* .context = */ NULL ,
536+ };
537+
538+ return & ggml_backend_cpu_buffer_type_hbm ;
539+ }
540+ #endif
541+
472542struct ggml_backend_cpu_context {
473543 int n_threads ;
474544 void * work_data ;
@@ -505,7 +575,7 @@ static ggml_backend_graph_plan_t ggml_backend_cpu_graph_plan_create(ggml_backend
505575 struct ggml_backend_plan_cpu * cpu_plan = malloc (sizeof (struct ggml_backend_plan_cpu ));
506576
507577 cpu_plan -> cplan = ggml_graph_plan (cgraph , cpu_ctx -> n_threads );
508- cpu_plan -> cgraph = * cgraph ;
578+ cpu_plan -> cgraph = * cgraph ; // FIXME: deep copy
509579
510580 if (cpu_plan -> cplan .work_size > 0 ) {
511581 cpu_plan -> cplan .work_data = malloc (cpu_plan -> cplan .work_size );
@@ -1180,7 +1250,7 @@ void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml
11801250// utils
11811251void ggml_backend_view_init (ggml_backend_buffer_t buffer , struct ggml_tensor * tensor ) {
11821252 GGML_ASSERT (tensor -> buffer == NULL );
1183- GGML_ASSERT (tensor -> data == NULL );
1253+ // GGML_ASSERT(tensor->data == NULL); // views of pre-allocted tensors may have the data set, but still need to be initialized
11841254 GGML_ASSERT (tensor -> view_src != NULL );
11851255 GGML_ASSERT (tensor -> view_src -> buffer != NULL );
11861256 GGML_ASSERT (tensor -> view_src -> data != NULL );
0 commit comments