Skip to content

Commit

Permalink
Fix Metal backend broken from the allocator changes (ggerganov#2455)
Browse files Browse the repository at this point in the history
* fix Metal backend broken from the allocator changes
  • Loading branch information
slaren authored Jul 31, 2023
1 parent a113689 commit 9d2382b
Showing 1 changed file with 10 additions and 7 deletions.
17 changes: 10 additions & 7 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1812,6 +1812,12 @@ static bool llama_eval_internal(
// otherwise, the threads are spin-lock waiting for the BLAS calls and are degrading the performance
n_threads = N >= 32 && ggml_cpu_has_blas() && !ggml_cpu_has_gpublas() ? 1 : n_threads;

struct ggml_tensor * res = gf->nodes[gf->n_nodes - 1];
struct ggml_tensor * embeddings = gf->nodes[gf->n_nodes - 2];

LLAMA_ASSERT(strcmp(res->name, "result_output") == 0);
LLAMA_ASSERT(strcmp(embeddings->name, "result_norm") == 0);

#if GGML_USE_MPI
const int64_t n_layer = hparams.n_layer;
ggml_mpi_graph_compute_pre(lctx.ctx_mpi, gf, n_layer);
Expand All @@ -1825,7 +1831,10 @@ static bool llama_eval_internal(
//}
ggml_metal_set_n_cb (lctx.ctx_metal, n_threads);
ggml_metal_graph_compute(lctx.ctx_metal, gf);
ggml_metal_get_tensor (lctx.ctx_metal, cur);
ggml_metal_get_tensor (lctx.ctx_metal, res);
if (!lctx.embedding.empty()) {
ggml_metal_get_tensor(lctx.ctx_metal, embeddings);
}
} else {
// IMPORTANT:
// Since we don't have efficient Matrix x Matrix Metal multiplication yet, we fallback to vanilla
Expand Down Expand Up @@ -1856,12 +1865,6 @@ static bool llama_eval_internal(
// update kv token count
lctx.kv_self.n = n_past + N;

struct ggml_tensor * res = gf->nodes[gf->n_nodes - 1];
struct ggml_tensor * embeddings = gf->nodes[gf->n_nodes - 2];

LLAMA_ASSERT(strcmp(res->name, "result_output") == 0);
LLAMA_ASSERT(strcmp(embeddings->name, "result_norm") == 0);

if (cgraph_fname) {
ggml_graph_export(gf, cgraph_fname);
}
Expand Down

0 comments on commit 9d2382b

Please sign in to comment.