@@ -87,7 +87,7 @@ struct gpt2_model {
8787 //
8888 struct ggml_context * ctx;
8989
90- ggml_backend_t backends = NULL ;
90+ ggml_backend_t backend = NULL ;
9191
9292 ggml_backend_buffer_t buffer_w;
9393 ggml_backend_buffer_t buffer_kv;
@@ -238,8 +238,8 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
238238#ifdef GGML_USE_CUBLAS
239239 if (n_gpu_layers > 0 ) {
240240 fprintf (stderr, " %s: using CUDA backend\n " , __func__);
241- model.backends = ggml_backend_cuda_init ();
242- if (!model.backends ) {
241+ model.backend = ggml_backend_cuda_init ();
242+ if (!model.backend ) {
243243 fprintf (stderr, " %s: ggml_backend_cuda_init() failed\n " , __func__);
244244 }
245245 }
@@ -256,19 +256,19 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
256256 }
257257#endif
258258
259- if (!model.backends ) {
259+ if (!model.backend ) {
260260 // fallback to CPU backend
261261 fprintf (stderr, " %s: using CPU backend\n " , __func__);
262- model.backends = ggml_backend_cpu_init ();
262+ model.backend = ggml_backend_cpu_init ();
263263 }
264264
265- if (!model.backends ) {
265+ if (!model.backend ) {
266266 fprintf (stderr, " %s: ggml_backend_cpu_init() failed\n " , __func__);
267267 return false ;
268268 }
269269
270270 // allocate weights buffer
271- model.buffer_w = ggml_backend_alloc_buffer (model.backends , buffer_size);
271+ model.buffer_w = ggml_backend_alloc_buffer (model.backend , buffer_size);
272272
273273 // prepare memory for the weights
274274 {
@@ -357,7 +357,7 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
357357 printf (" %s: memory size = %8.2f MB, n_mem = %d\n " , __func__, memory_size/1024.0 /1024.0 , n_mem);
358358
359359 // create a backend buffer (can be in host or device memory)
360- model.buffer_kv = ggml_backend_alloc_buffer (model.backends , memory_size + 256 );
360+ model.buffer_kv = ggml_backend_alloc_buffer (model.backend , memory_size + 256 );
361361
362362 // allocate the tensors into the backend buffer
363363 {
@@ -439,7 +439,7 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
439439
440440 ggml_allocr_alloc (alloc, tensor);
441441
442- if (ggml_backend_is_cpu (model.backends )
442+ if (ggml_backend_is_cpu (model.backend )
443443#ifdef GGML_USE_METAL
444444 || ggml_backend_is_metal (model.backend )
445445#endif
@@ -799,15 +799,15 @@ bool gpt2_eval(
799799 ggml_allocr_alloc_graph (allocr, gf);
800800
801801 // run the computation
802- if (ggml_backend_is_cpu (model.backends )) {
803- ggml_backend_cpu_set_n_threads (model.backends , n_threads);
802+ if (ggml_backend_is_cpu (model.backend )) {
803+ ggml_backend_cpu_set_n_threads (model.backend , n_threads);
804804 }
805805#ifdef GGML_USE_METAL
806806 if (ggml_backend_is_metal (model.backend )) {
807807 ggml_backend_metal_set_n_cb (model.backend , n_threads);
808808 }
809809#endif
810- ggml_backend_graph_compute (model.backends , gf);
810+ ggml_backend_graph_compute (model.backend , gf);
811811
812812 // if (n_past%100 == 0) {
813813 // ggml_graph_print (&gf);
@@ -876,7 +876,7 @@ int main(int argc, char ** argv) {
876876 // allocate the compute buffer
877877 {
878878 // alignment required by the backend
879- size_t align = ggml_backend_get_alignment (model.backends );
879+ size_t align = ggml_backend_get_alignment (model.backend );
880880 allocr = ggml_allocr_new_measure (align);
881881
882882 // create the worst case graph for memory usage estimation
@@ -889,7 +889,7 @@ int main(int argc, char ** argv) {
889889
890890 // recreate the allocator with the required memory
891891 ggml_allocr_free (allocr);
892- buf_compute = ggml_backend_alloc_buffer (model.backends , mem_size);
892+ buf_compute = ggml_backend_alloc_buffer (model.backend , mem_size);
893893 allocr = ggml_allocr_new_from_buffer (buf_compute);
894894
895895 fprintf (stderr, " %s: compute buffer size: %.2f MB\n " , __func__, mem_size/1024.0 /1024.0 );
@@ -993,7 +993,7 @@ int main(int argc, char ** argv) {
993993 ggml_backend_buffer_free (model.buffer_w );
994994 ggml_backend_buffer_free (model.buffer_kv );
995995 ggml_backend_buffer_free (buf_compute);
996- ggml_backend_free (model.backends );
996+ ggml_backend_free (model.backend );
997997
998998 return 0 ;
999999}
0 commit comments