ggml-backend v2 : add ggml_backend_sched (#586)

* ggml-backend-v2 wip * fix metal build * ggml-alloc : use a real backend buffer in measure mode * backend sched : ignore view ops to reduce the number of splits * dynamic ggml_cgraph wip * dyn graphs : remove n_tasks from ggml_cplan * dyn graphs : update ggml_graph_import * reset hash table in ggml_build_forward * ggml-alloc : split into tensor and graph allocators * add ggml_backend_sched_set_node_backend * remove ggml_build_forward_ctx, ggml_build_backward_ctx add ggml_opt_params::graph_size add ggml_new_graph_custom, ggml_graph_overhead_custom add ggml_graph_clear * update examples and tests, fix issues * update more examples * update gpt-2/main-backend.cpp from master * ggml : fix copmile warning * ci : update yolo, fix mnist, use gpt-2-backend * ggml : fix uninit warning * ci : switch to gpt-2-backend2 ggml-ci * metal : skip noops early to avoid warnings from ggml_metal_get_buffer --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
ggerganov · Oct 30, 2023 · 08d748b · 08d748b
1 parent 05ff36f
commit 08d748b
Show file tree

Hide file tree

Showing 42 changed files with 5,066 additions and 1,095 deletions.
diff --git a/ci/run.sh b/ci/run.sh
@@ -145,8 +145,8 @@ function gg_run_gpt_2 {
     model="../models-mnt/gpt-2/ggml-model-gpt-2-117M.bin"
     prompts="../examples/prompts/gpt-2.txt"
 
-    (time ./bin/gpt-2 --model ${model} -s 1234 -n 64 -tt ${prompts}                       ) 2>&1 | tee -a $OUT/${ci}-tg.log
-    (time ./bin/gpt-2 --model ${model} -s 1234 -n 64 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg.log
+    (time ./bin/gpt-2-backend2 --model ${model} -s 1234 -n 64 -tt ${prompts}                       ) 2>&1 | tee -a $OUT/${ci}-tg.log
+    (time ./bin/gpt-2-backend2 --model ${model} -s 1234 -n 64 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg.log
 
     (time ./bin/gpt-2-batched --model ${model} -s 1234 -n 64 -np 8 -p "I believe the meaning of life is") 2>&1 | tee -a $OUT/${ci}-tg.log
 

diff --git a/examples/dolly-v2/main.cpp b/examples/dolly-v2/main.cpp
@@ -497,7 +497,7 @@ bool dollyv2_eval(
     };
 
     struct ggml_context * ctx0 = ggml_init(params);
-    struct ggml_cgraph gf = { };
+    struct ggml_cgraph * gf = ggml_new_graph(ctx0);
 
     // KQ_pos - contains the positions
     struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
@@ -555,8 +555,8 @@ bool dollyv2_eval(
                         (   n_ctx)*ggml_element_size(model.memory_v),
                         (il*n_ctx)*ggml_element_size(model.memory_v)*n_embd + n_past*ggml_element_size(model.memory_v));
 
-                ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k));
-                ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v));
+                ggml_build_forward_expand(gf, ggml_cpy(ctx0, Kcur, k));
+                ggml_build_forward_expand(gf, ggml_cpy(ctx0, Vcur, v));
             }
 
             // Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3)
@@ -666,8 +666,8 @@ bool dollyv2_eval(
     //inpL = ggml_soft_max_inplace(ctx0, inpL);
 
     // run the computation
-    ggml_build_forward_expand(&gf, inpL);
-    ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
+    ggml_build_forward_expand(gf, inpL);
+    ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
 
     //if (n_past%100 == 0) {
     //    ggml_graph_print   (&gf);

diff --git a/examples/gpt-2/CMakeLists.txt b/examples/gpt-2/CMakeLists.txt
@@ -1,7 +1,19 @@
 #
 # gpt-2
 
-set(TEST_TARGET gpt-2)
+set(TEST_TARGET gpt-2-ctx)
+add_executable(${TEST_TARGET} main-ctx.cpp)
+target_link_libraries(${TEST_TARGET} PRIVATE ggml common common-ggml)
+
+set(TEST_TARGET gpt-2-alloc)
+add_executable(${TEST_TARGET} main-alloc.cpp)
+target_link_libraries(${TEST_TARGET} PRIVATE ggml common common-ggml)
+
+set(TEST_TARGET gpt-2-backend)
+add_executable(${TEST_TARGET} main-backend.cpp)
+target_link_libraries(${TEST_TARGET} PRIVATE ggml common common-ggml)
+
+set(TEST_TARGET gpt-2-backend2)
 add_executable(${TEST_TARGET} main.cpp)
 target_link_libraries(${TEST_TARGET} PRIVATE ggml common common-ggml)