@@ -466,9 +466,7 @@ struct llm_graph_params {
466466
467467class llm_graph_result : public llm_graph_result_i {
468468public:
469- llm_graph_result (int64_t max_nodes) : max_nodes(max_nodes) {
470- reset ();
471- }
469+ llm_graph_result (int64_t max_nodes);
472470
473471 virtual ~llm_graph_result () = default ;
474472
@@ -480,60 +478,20 @@ class llm_graph_result : public llm_graph_result_i {
480478 ggml_cgraph * get_gf () override { return gf; }
481479 ggml_context * get_ctx () override { return ctx_compute.get (); }
482480
483- int64_t get_max_nodes () const {
484- return max_nodes;
485- }
486-
487- void reset () override {
488- t_tokens = nullptr ;
489- t_logits = nullptr ;
490- t_embd = nullptr ;
491- t_embd_pooled = nullptr ;
492-
493- inputs.clear ();
494-
495- buf_compute_meta.resize (ggml_tensor_overhead ()*max_nodes + ggml_graph_overhead_custom (max_nodes, false ));
481+ int64_t get_max_nodes () const ;
496482
497- ggml_init_params params = {
498- /* .mem_size =*/ buf_compute_meta.size (),
499- /* .mem_buffer =*/ buf_compute_meta.data (),
500- /* .no_alloc =*/ true ,
501- };
483+ void reset () override ;
502484
503- ctx_compute.reset (ggml_init (params));
504-
505- gf = ggml_new_graph_custom (ctx_compute.get (), max_nodes, false );
506- }
507-
508- void set_inputs (const llama_ubatch * ubatch) override {
509- for (auto & input : inputs) {
510- input->set_input (ubatch);
511- }
512- }
485+ void set_inputs (const llama_ubatch * ubatch) override ;
513486
514487 // try to update the existing graph result using the new graph parameters in order to reuse it
515488 // this can only be done if we determine that the resulting graph using the new graph parameters
516489 // would be identical to the existing graph. in that case, we simply have to update the memory
517490 // contexts of the input tensors of the graph and we can reuse it for another computation
518491 // return true if the graph was updated and can be reused
519- bool can_reuse (const llm_graph_params & params) override {
520- if (!this ->params .allow_reuse (params)) {
521- return false ;
522- }
523-
524- bool res = true ;
525-
526- for (auto & input : inputs) {
527- res &= input->can_reuse (params);
528- }
492+ bool can_reuse (const llm_graph_params & params) override ;
529493
530- return res;
531- }
532-
533- llm_graph_input_i * add_input (llm_graph_input_ptr input) {
534- inputs.emplace_back (std::move (input));
535- return inputs.back ().get ();
536- }
494+ llm_graph_input_i * add_input (llm_graph_input_ptr input);
537495
538496 // important graph nodes
539497 ggml_tensor * t_tokens = nullptr ;
@@ -556,6 +514,9 @@ class llm_graph_result : public llm_graph_result_i {
556514 // we will use this to determine whether the graph can be reused by comparing them with the new parameters
557515 // note: these are updated after constructing the new graph
558516 llm_graph_params params;
517+
518+ // env: LLAMA_GRAPH_RESULT_DEBUG
519+ int debug = 0 ;
559520};
560521
561522//
0 commit comments