@@ -4970,10 +4970,10 @@ static struct ggml_cgraph * llama_build_graph(
49704970 // allocate input tensors and set input data
49714971 //
49724972
4973- if (batch. token && !alloc_inp_tokens && strcmp (name, " inp_tokens" ) == 0 ) {
4973+ if (!alloc_inp_tokens && strcmp (name, " inp_tokens" ) == 0 ) {
49744974 ggml_allocr_alloc (lctx.alloc , cur);
49754975
4976- if (!ggml_allocr_is_measure (lctx.alloc )) {
4976+ if (!ggml_allocr_is_measure (lctx.alloc ) && batch. token ) {
49774977 const int64_t n_tokens = cur->ne [0 ];
49784978
49794979 memcpy (cur->data , batch.token , n_tokens*ggml_element_size (cur));
@@ -4982,10 +4982,10 @@ static struct ggml_cgraph * llama_build_graph(
49824982 alloc_inp_tokens = true ;
49834983 }
49844984
4985- if (batch. embd && !alloc_inp_embd && strcmp (name, " inp_embd" ) == 0 ) {
4985+ if (!alloc_inp_embd && strcmp (name, " inp_embd" ) == 0 ) {
49864986 ggml_allocr_alloc (lctx.alloc , cur);
49874987
4988- if (!ggml_allocr_is_measure (lctx.alloc )) {
4988+ if (!ggml_allocr_is_measure (lctx.alloc ) && batch. embd ) {
49894989 const int64_t n_embd = cur->ne [0 ];
49904990 const int64_t n_tokens = cur->ne [1 ];
49914991
@@ -4995,10 +4995,10 @@ static struct ggml_cgraph * llama_build_graph(
49954995 alloc_inp_embd = true ;
49964996 }
49974997
4998- if (batch. pos && !alloc_inp_pos && strcmp (name, " inp_pos" ) == 0 ) {
4998+ if (!alloc_inp_pos && strcmp (name, " inp_pos" ) == 0 ) {
49994999 ggml_allocr_alloc (lctx.alloc , cur);
50005000
5001- if (!ggml_allocr_is_measure (lctx.alloc )) {
5001+ if (!ggml_allocr_is_measure (lctx.alloc ) && batch. pos ) {
50025002 const int64_t n_tokens = cur->ne [0 ];
50035003
50045004 int32_t * data = (int32_t *) cur->data ;
0 commit comments