Skip to content

Commit 96e56b4

Browse files
committed
update more examples
1 parent 39e63fa commit 96e56b4

File tree

11 files changed

+57
-57
lines changed

11 files changed

+57
-57
lines changed

examples/dolly-v2/main.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -497,7 +497,7 @@ bool dollyv2_eval(
497497
};
498498

499499
struct ggml_context * ctx0 = ggml_init(params);
500-
struct ggml_cgraph gf = { };
500+
struct ggml_cgraph * gf = ggml_new_graph(ctx0);
501501

502502
// KQ_pos - contains the positions
503503
struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
@@ -555,8 +555,8 @@ bool dollyv2_eval(
555555
( n_ctx)*ggml_element_size(model.memory_v),
556556
(il*n_ctx)*ggml_element_size(model.memory_v)*n_embd + n_past*ggml_element_size(model.memory_v));
557557

558-
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k));
559-
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v));
558+
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Kcur, k));
559+
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Vcur, v));
560560
}
561561

562562
// Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3)
@@ -666,8 +666,8 @@ bool dollyv2_eval(
666666
//inpL = ggml_soft_max_inplace(ctx0, inpL);
667667

668668
// run the computation
669-
ggml_build_forward_expand(&gf, inpL);
670-
ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
669+
ggml_build_forward_expand(gf, inpL);
670+
ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
671671

672672
//if (n_past%100 == 0) {
673673
// ggml_graph_print (&gf);

examples/gpt-2/main-ctx.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -429,7 +429,7 @@ bool gpt2_eval(
429429
};
430430

431431
struct ggml_context * ctx0 = ggml_init(params);
432-
struct ggml_cgraph gf = {};
432+
struct ggml_cgraph * gf = ggml_new_graph(ctx0);
433433

434434
struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
435435
memcpy(embd->data, embd_inp.data(), N*ggml_element_size(embd));
@@ -491,8 +491,8 @@ bool gpt2_eval(
491491
struct ggml_tensor * k = ggml_view_1d(ctx0, model.memory_k, N*n_embd, (ggml_element_size(model.memory_k)*n_embd)*(il*n_ctx + n_past));
492492
struct ggml_tensor * v = ggml_view_1d(ctx0, model.memory_v, N*n_embd, (ggml_element_size(model.memory_v)*n_embd)*(il*n_ctx + n_past));
493493

494-
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k));
495-
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v));
494+
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Kcur, k));
495+
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Vcur, v));
496496
}
497497

498498
// Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3)
@@ -673,8 +673,8 @@ bool gpt2_eval(
673673
//inpL = ggml_soft_max_inplace(ctx0, inpL);
674674

675675
// run the computation
676-
ggml_build_forward_expand(&gf, inpL);
677-
ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
676+
ggml_build_forward_expand(gf, inpL);
677+
ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
678678

679679
//if (n_past%100 == 0) {
680680
// ggml_graph_print (&gf);
@@ -767,7 +767,7 @@ int main(int argc, char ** argv) {
767767
size_t mem_per_token = 0;
768768
gpt2_eval(model, params.n_threads, 0, { 0, 1, 2, 3 }, logits, mem_per_token);
769769

770-
for (int i = embd.size(); i < embd_inp.size() + params.n_predict; i++) {
770+
for (size_t i = embd.size(); i < embd_inp.size() + params.n_predict; i++) {
771771
// predict
772772
if (embd.size() > 0) {
773773
const int64_t t_start_us = ggml_time_us();
@@ -805,9 +805,9 @@ int main(int argc, char ** argv) {
805805
embd.push_back(id);
806806
} else {
807807
// if here, it means we are still processing the input prompt
808-
for (int k = i; k < embd_inp.size(); k++) {
808+
for (size_t k = i; k < embd_inp.size(); k++) {
809809
embd.push_back(embd_inp[k]);
810-
if (embd.size() >= params.n_batch) {
810+
if (int32_t(embd.size()) >= params.n_batch) {
811811
break;
812812
}
813813
}

examples/gpt-j/main.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,7 @@ bool gptj_eval(
425425
};
426426

427427
struct ggml_context * ctx0 = ggml_init(params);
428-
struct ggml_cgraph gf = {};
428+
struct ggml_cgraph * gf = ggml_new_graph(ctx0);
429429

430430
// KQ_pos - contains the positions
431431
struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
@@ -471,8 +471,8 @@ bool gptj_eval(
471471
( n_ctx)*ggml_element_size(model.memory_v),
472472
(il*n_ctx)*ggml_element_size(model.memory_v)*n_embd + n_past*ggml_element_size(model.memory_v));
473473

474-
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k));
475-
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v));
474+
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Kcur, k));
475+
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Vcur, v));
476476
}
477477

478478
// Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3)
@@ -590,8 +590,8 @@ bool gptj_eval(
590590
//inpL = ggml_soft_max_inplace(ctx0, inpL);
591591

592592
// run the computation
593-
ggml_build_forward_expand(&gf, inpL);
594-
ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
593+
ggml_build_forward_expand(gf, inpL);
594+
ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
595595

596596
//if (n_past%100 == 0) {
597597
// ggml_graph_print (&gf);

examples/gpt-neox/main.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -477,7 +477,7 @@ bool gpt_neox_eval(
477477
};
478478

479479
struct ggml_context * ctx0 = ggml_init(params);
480-
struct ggml_cgraph gf = {};
480+
struct ggml_cgraph * gf = ggml_new_graph(ctx0);
481481

482482
// KQ_pos - contains the positions
483483
struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
@@ -537,8 +537,8 @@ bool gpt_neox_eval(
537537
( n_ctx)*ggml_element_size(model.memory_v),
538538
(il*n_ctx)*ggml_element_size(model.memory_v)*n_embd + n_past*ggml_element_size(model.memory_v));
539539

540-
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k));
541-
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v));
540+
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Kcur, k));
541+
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Vcur, v));
542542
}
543543

544544
// Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3)
@@ -653,8 +653,8 @@ bool gpt_neox_eval(
653653
//inpL = ggml_soft_max_inplace(ctx0, inpL);
654654

655655
// run the computation
656-
ggml_build_forward_expand(&gf, inpL);
657-
ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
656+
ggml_build_forward_expand(gf, inpL);
657+
ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
658658

659659
//if (n_past%100 == 0) {
660660
// ggml_graph_print (&gf);

examples/mnist/main-cnn.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ int mnist_eval(
6161
};
6262

6363
struct ggml_context * ctx0 = ggml_init(params);
64-
struct ggml_cgraph gf = {};
64+
struct ggml_cgraph * gf = ggml_new_graph(ctx0);
6565

6666
struct ggml_tensor * input = ggml_new_tensor_4d(ctx0, GGML_TYPE_F32, 28, 28, 1, 1);
6767
memcpy(input->data, digit.data(), ggml_nbytes(input));
@@ -86,16 +86,16 @@ int mnist_eval(
8686
ggml_tensor * probs = ggml_soft_max(ctx0, cur);
8787
ggml_set_name(probs, "probs");
8888

89-
ggml_build_forward_expand(&gf, probs);
90-
ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
89+
ggml_build_forward_expand(gf, probs);
90+
ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
9191

9292
//ggml_graph_print(&gf);
93-
ggml_graph_dump_dot(&gf, NULL, "mnist-cnn.dot");
93+
ggml_graph_dump_dot(gf, NULL, "mnist-cnn.dot");
9494

9595
if (fname_cgraph) {
9696
// export the compute graph for later use
9797
// see the "mnist-cpu" example
98-
ggml_graph_export(&gf, fname_cgraph);
98+
ggml_graph_export(gf, fname_cgraph);
9999

100100
fprintf(stderr, "%s: exported compute graph to '%s'\n", __func__, fname_cgraph);
101101
}

examples/mnist/main-mtl.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ int mnist_eval(
3535
struct ggml_context * ctx_data = NULL;
3636
struct ggml_context * ctx_eval = NULL;
3737

38-
struct ggml_cgraph gf = ggml_graph_import(fname_cgraph, &ctx_data, &ctx_eval);
38+
struct ggml_cgraph * gf = ggml_graph_import(fname_cgraph, &ctx_data, &ctx_eval);
3939

4040
// allocate work context
4141
static size_t buf_size = 128ull*1024*1024; // TODO
@@ -50,12 +50,12 @@ int mnist_eval(
5050
struct ggml_context * ctx_work = ggml_init(params);
5151

5252
// this allocates all Metal resources and memory buffers
53-
auto ctx_mtl = mnist_mtl_init(ctx_data, ctx_eval, ctx_work, &gf);
53+
auto ctx_mtl = mnist_mtl_init(ctx_data, ctx_eval, ctx_work, gf);
5454

5555
int prediction = -1;
5656

5757
for (int i = 0; i < 1; ++i) {
58-
struct ggml_tensor * input = ggml_graph_get_tensor(&gf, "input");
58+
struct ggml_tensor * input = ggml_graph_get_tensor(gf, "input");
5959

6060
if (i % 2 == 0) {
6161
memcpy(input->data, digit.data(), ggml_nbytes(input));
@@ -64,7 +64,7 @@ int mnist_eval(
6464
}
6565

6666
// the actual inference happens here
67-
prediction = mnist_mtl_eval(ctx_mtl, &gf);
67+
prediction = mnist_mtl_eval(ctx_mtl, gf);
6868
}
6969

7070
mnist_mtl_free(ctx_mtl);

examples/mnist/main.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ int mnist_eval(
188188
};
189189

190190
struct ggml_context * ctx0 = ggml_init(params);
191-
struct ggml_cgraph gf = {};
191+
struct ggml_cgraph * gf = ggml_new_graph(ctx0);
192192

193193
struct ggml_tensor * input = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, hparams.n_input);
194194
memcpy(input->data, digit.data(), ggml_nbytes(input));
@@ -203,16 +203,16 @@ int mnist_eval(
203203
ggml_set_name(probs, "probs");
204204

205205
// build / export / run the computation graph
206-
ggml_build_forward_expand(&gf, probs);
207-
ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
206+
ggml_build_forward_expand(gf, probs);
207+
ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
208208

209209
//ggml_graph_print (&gf);
210-
ggml_graph_dump_dot(&gf, NULL, "mnist.dot");
210+
ggml_graph_dump_dot(gf, NULL, "mnist.dot");
211211

212212
if (fname_cgraph) {
213213
// export the compute graph for later use
214214
// see the "mnist-cpu" example
215-
ggml_graph_export(&gf, "mnist.ggml");
215+
ggml_graph_export(gf, "mnist.ggml");
216216

217217
fprintf(stderr, "%s: exported compute graph to '%s'\n", __func__, fname_cgraph);
218218
}

examples/mpt/main.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,7 @@ bool mpt_eval(const mpt_model & model, const int n_threads, const int n_past,
499499
};
500500

501501
struct ggml_context * ctx0 = ggml_init(params);
502-
struct ggml_cgraph gf = {};
502+
struct ggml_cgraph * gf = ggml_new_graph(ctx0);
503503

504504
struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
505505
memcpy(embd->data, embd_inp.data(), N * ggml_element_size(embd));
@@ -544,8 +544,8 @@ bool mpt_eval(const mpt_model & model, const int n_threads, const int n_past,
544544
ggml_view_1d(ctx0, model.memory_v, N * n_embd,
545545
(ggml_element_size(model.memory_v) * n_embd) * (il * n_ctx + n_past));
546546

547-
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k));
548-
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v));
547+
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Kcur, k));
548+
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Vcur, v));
549549
}
550550

551551
// Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0,
@@ -650,8 +650,8 @@ bool mpt_eval(const mpt_model & model, const int n_threads, const int n_past,
650650
// inpL = ggml_soft_max(ctx0, inpL);
651651

652652
// run the computation
653-
ggml_build_forward_expand(&gf, inpL);
654-
ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
653+
ggml_build_forward_expand(gf, inpL);
654+
ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
655655

656656
// std::cout << "Qcur" << std::endl;
657657
// print_tensor(Qcur);

examples/replit/main.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -476,7 +476,7 @@ bool replit_eval(const replit_model & model, const int n_threads, const int n_pa
476476
};
477477

478478
struct ggml_context * ctx0 = ggml_init(params);
479-
struct ggml_cgraph gf = {};
479+
struct ggml_cgraph * gf = ggml_new_graph(ctx0);
480480

481481
struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
482482
memcpy(embd->data, embd_inp.data(), N * ggml_element_size(embd));
@@ -515,8 +515,8 @@ bool replit_eval(const replit_model & model, const int n_threads, const int n_pa
515515
ggml_view_1d(ctx0, model.memory_v, N * n_embd,
516516
(ggml_element_size(model.memory_v) * n_embd) * (il * n_ctx + n_past));
517517

518-
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k));
519-
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v));
518+
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Kcur, k));
519+
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Vcur, v));
520520
}
521521

522522
// Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0,
@@ -614,8 +614,8 @@ bool replit_eval(const replit_model & model, const int n_threads, const int n_pa
614614
// inpL = ggml_soft_max(ctx0, inpL);
615615

616616
// run the computation
617-
ggml_build_forward_expand(&gf, inpL);
618-
ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
617+
ggml_build_forward_expand(gf, inpL);
618+
ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
619619

620620
// std::cout << "Qcur" << std::endl;
621621
// print_tensor(Qcur);

examples/starcoder/main.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,7 @@ bool starcoder_eval(
464464
};
465465

466466
struct ggml_context * ctx0 = ggml_init(params);
467-
struct ggml_cgraph gf = {};
467+
struct ggml_cgraph * gf = ggml_new_graph(ctx0);
468468

469469
struct ggml_tensor * embd = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
470470
memcpy(embd->data, embd_inp.data(), N*ggml_element_size(embd));
@@ -528,8 +528,8 @@ bool starcoder_eval(
528528
struct ggml_tensor * k = ggml_view_1d(ctx0, model.memory_k, N*n_embd, (ggml_element_size(model.memory_k)*n_embd)*(il*n_ctx + n_past));
529529
struct ggml_tensor * v = ggml_view_1d(ctx0, model.memory_v, N*n_embd, (ggml_element_size(model.memory_v)*n_embd)*(il*n_ctx + n_past));
530530

531-
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Kcur, k));
532-
ggml_build_forward_expand(&gf, ggml_cpy(ctx0, Vcur, v));
531+
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Kcur, k));
532+
ggml_build_forward_expand(gf, ggml_cpy(ctx0, Vcur, v));
533533
}
534534

535535
// Q = Qcur.contiguous().view(n_embd/n_head, n_head, N).permute(0, 2, 1, 3)
@@ -716,8 +716,8 @@ bool starcoder_eval(
716716
//inpL = ggml_soft_max_inplace(ctx0, inpL);
717717

718718
// run the computation
719-
ggml_build_forward_expand(&gf, inpL);
720-
ggml_graph_compute_with_ctx(ctx0, &gf, n_threads);
719+
ggml_build_forward_expand(gf, inpL);
720+
ggml_graph_compute_with_ctx(ctx0, gf, n_threads);
721721

722722
//if (n_past%100 == 0) {
723723
// ggml_graph_print (&gf);

0 commit comments

Comments
 (0)