Skip to content

Commit 0f80411

Browse files
committed
fix metal build
1 parent 38f61d9 commit 0f80411

File tree

4 files changed

+26
-35
lines changed

4 files changed

+26
-35
lines changed

examples/gpt-2/main-backend.cpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ struct gpt2_model {
8787
//
8888
struct ggml_context * ctx;
8989

90-
ggml_backend_t backends = NULL;
90+
ggml_backend_t backend = NULL;
9191

9292
ggml_backend_buffer_t buffer_w;
9393
ggml_backend_buffer_t buffer_kv;
@@ -238,8 +238,8 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
238238
#ifdef GGML_USE_CUBLAS
239239
if (n_gpu_layers > 0) {
240240
fprintf(stderr, "%s: using CUDA backend\n", __func__);
241-
model.backends = ggml_backend_cuda_init();
242-
if (!model.backends) {
241+
model.backend = ggml_backend_cuda_init();
242+
if (!model.backend) {
243243
fprintf(stderr, "%s: ggml_backend_cuda_init() failed\n", __func__);
244244
}
245245
}
@@ -256,19 +256,19 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
256256
}
257257
#endif
258258

259-
if (!model.backends) {
259+
if (!model.backend) {
260260
// fallback to CPU backend
261261
fprintf(stderr, "%s: using CPU backend\n", __func__);
262-
model.backends = ggml_backend_cpu_init();
262+
model.backend = ggml_backend_cpu_init();
263263
}
264264

265-
if (!model.backends) {
265+
if (!model.backend) {
266266
fprintf(stderr, "%s: ggml_backend_cpu_init() failed\n", __func__);
267267
return false;
268268
}
269269

270270
// allocate weights buffer
271-
model.buffer_w = ggml_backend_alloc_buffer(model.backends, buffer_size);
271+
model.buffer_w = ggml_backend_alloc_buffer(model.backend, buffer_size);
272272

273273
// prepare memory for the weights
274274
{
@@ -357,7 +357,7 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
357357
printf("%s: memory size = %8.2f MB, n_mem = %d\n", __func__, memory_size/1024.0/1024.0, n_mem);
358358

359359
// create a backend buffer (can be in host or device memory)
360-
model.buffer_kv = ggml_backend_alloc_buffer(model.backends, memory_size + 256);
360+
model.buffer_kv = ggml_backend_alloc_buffer(model.backend, memory_size + 256);
361361

362362
// allocate the tensors into the backend buffer
363363
{
@@ -439,7 +439,7 @@ bool gpt2_model_load(const std::string & fname, gpt2_model & model, gpt_vocab &
439439

440440
ggml_allocr_alloc(alloc, tensor);
441441

442-
if (ggml_backend_is_cpu (model.backends)
442+
if (ggml_backend_is_cpu (model.backend)
443443
#ifdef GGML_USE_METAL
444444
|| ggml_backend_is_metal(model.backend)
445445
#endif
@@ -799,15 +799,15 @@ bool gpt2_eval(
799799
ggml_allocr_alloc_graph(allocr, gf);
800800

801801
// run the computation
802-
if (ggml_backend_is_cpu(model.backends)) {
803-
ggml_backend_cpu_set_n_threads(model.backends, n_threads);
802+
if (ggml_backend_is_cpu(model.backend)) {
803+
ggml_backend_cpu_set_n_threads(model.backend, n_threads);
804804
}
805805
#ifdef GGML_USE_METAL
806806
if (ggml_backend_is_metal(model.backend)) {
807807
ggml_backend_metal_set_n_cb(model.backend, n_threads);
808808
}
809809
#endif
810-
ggml_backend_graph_compute(model.backends, gf);
810+
ggml_backend_graph_compute(model.backend, gf);
811811

812812
//if (n_past%100 == 0) {
813813
// ggml_graph_print (&gf);
@@ -876,7 +876,7 @@ int main(int argc, char ** argv) {
876876
// allocate the compute buffer
877877
{
878878
// alignment required by the backend
879-
size_t align = ggml_backend_get_alignment(model.backends);
879+
size_t align = ggml_backend_get_alignment(model.backend);
880880
allocr = ggml_allocr_new_measure(align);
881881

882882
// create the worst case graph for memory usage estimation
@@ -889,7 +889,7 @@ int main(int argc, char ** argv) {
889889

890890
// recreate the allocator with the required memory
891891
ggml_allocr_free(allocr);
892-
buf_compute = ggml_backend_alloc_buffer(model.backends, mem_size);
892+
buf_compute = ggml_backend_alloc_buffer(model.backend, mem_size);
893893
allocr = ggml_allocr_new_from_buffer(buf_compute);
894894

895895
fprintf(stderr, "%s: compute buffer size: %.2f MB\n", __func__, mem_size/1024.0/1024.0);
@@ -993,7 +993,7 @@ int main(int argc, char ** argv) {
993993
ggml_backend_buffer_free(model.buffer_w);
994994
ggml_backend_buffer_free(model.buffer_kv);
995995
ggml_backend_buffer_free(buf_compute);
996-
ggml_backend_free(model.backends);
996+
ggml_backend_free(model.backend);
997997

998998
return 0;
999999
}

examples/gpt-2/main.cpp

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -119,18 +119,21 @@ void init_backends(gpt2_model & model, const gpt_params & params) {
119119
fprintf(stderr, "%s: using Metal backend\n", __func__);
120120
ggml_metal_log_set_callback(ggml_log_callback_default, nullptr);
121121
gpu_backend = ggml_backend_metal_init();
122-
if (gpu_backend) {
122+
if (!gpu_backend) {
123123
fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__);
124+
} else {
125+
ggml_backend_metal_set_n_cb(gpu_backend, params.n_threads)
124126
}
125127
}
126128
#endif
127-
128129
if (gpu_backend) {
129130
model.backends.push_back(gpu_backend);
130131
}
131132

132133
// always add the CPU backend as a fallback
133-
model.backends.push_back(ggml_backend_cpu_init());
134+
ggml_backend_t cpu_backend = ggml_backend_cpu_init();
135+
ggml_backend_cpu_set_n_threads(cpu_backend, params.n_threads);
136+
model.backends.push_back(cpu_backend);
134137
}
135138

136139
// load the model's weights from a file
@@ -874,7 +877,6 @@ struct ggml_cgraph * gpt2_graph(
874877
bool gpt2_eval(
875878
const gpt2_model & model,
876879
ggml_backend_sched_t sched,
877-
const int n_threads,
878880
const int n_past,
879881
const std::vector<gpt_vocab::id> & embd_inp,
880882
std::vector<float> & embd_w) {
@@ -889,17 +891,6 @@ bool gpt2_eval(
889891
// allocate tensors
890892

891893
// run the computation
892-
#if 0
893-
ggml_backend_t backend = model.backends.front();
894-
if (ggml_backend_is_cpu(backend)) {
895-
ggml_backend_cpu_set_n_threads(backend, n_threads);
896-
}
897-
#ifdef GGML_USE_METAL
898-
if (ggml_backend_is_metal(backend)) {
899-
ggml_backend_metal_set_n_cb(backend, n_threads);
900-
}
901-
#endif
902-
#endif
903894
ggml_backend_sched_graph_compute(sched, gf);
904895

905896
//if (n_past%100 == 0) {
@@ -1020,7 +1011,7 @@ int main(int argc, char ** argv) {
10201011
if (embd.size() > 0) {
10211012
const int64_t t_start_us = ggml_time_us();
10221013

1023-
if (!gpt2_eval(model, sched, params.n_threads, n_past, embd, logits)) {
1014+
if (!gpt2_eval(model, sched, n_past, embd, logits)) {
10241015
printf("Failed to predict\n");
10251016
return 1;
10261017
}

src/ggml-alloc.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717

1818
//#define GGML_ALLOCATOR_DEBUG
1919

20-
#define AT_PRINTF(...) fprintf(stderr, __VA_ARGS__)
21-
//#define AT_PRINTF(...) ((void)0)
20+
//#define AT_PRINTF(...) fprintf(stderr, __VA_ARGS__)
21+
#define AT_PRINTF(...)
2222

2323
// TODO: GGML_PAD ?
2424
static size_t aligned_offset(const void * buffer, size_t offset, size_t alignment) {

src/ggml-backend.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(ggml_backend_t backend_cp
418418
// scheduler
419419

420420
#define GGML_MAX_BACKENDS 4
421-
#define GGML_MAX_SPLITS 64
421+
#define GGML_MAX_SPLITS 256
422422
#define GGML_MAX_SPLIT_INPUTS 16
423423

424424
struct ggml_backend_sched_split {
@@ -733,7 +733,7 @@ static void sched_split_graph(ggml_backend_sched_t sched) {
733733
sched->splits[cur_split].i_end = graph->n_nodes;
734734
sched->n_splits = cur_split + 1;
735735

736-
fprintf(stderr, "PASS 4 ASSIGNMENTS\n"); sched_print_assignments(sched, graph); fflush(stdout);
736+
//fprintf(stderr, "PASS 4 ASSIGNMENTS\n"); sched_print_assignments(sched, graph); fflush(stdout);
737737

738738
#if 1
739739
// sanity check: all sources should have the same backend as the node

0 commit comments

Comments
 (0)