@@ -2705,6 +2705,7 @@ static struct ggml_cgraph * llm_build_llama(
2705
2705
2706
2706
// KQ_pos - contains the positions
2707
2707
struct ggml_tensor * KQ_pos = ggml_new_tensor_1d (ctx0, GGML_TYPE_I32, n_tokens);
2708
+ offload_func_kq (KQ_pos);
2708
2709
ggml_allocr_alloc (lctx.alloc , KQ_pos);
2709
2710
if (!ggml_allocr_is_measure (lctx.alloc )) {
2710
2711
int * data = (int *) KQ_pos->data ;
@@ -2715,6 +2716,7 @@ static struct ggml_cgraph * llm_build_llama(
2715
2716
2716
2717
// K_shift
2717
2718
struct ggml_tensor * K_shift = ggml_new_tensor_1d (ctx0, GGML_TYPE_I32, n_ctx);
2719
+ offload_func_kq (K_shift);
2718
2720
ggml_allocr_alloc (lctx.alloc , K_shift);
2719
2721
if (!ggml_allocr_is_measure (lctx.alloc )) {
2720
2722
int * data = (int *) K_shift->data ;
@@ -3087,6 +3089,7 @@ static struct ggml_cgraph * llm_build_baichaun(
3087
3089
3088
3090
// KQ_pos - contains the positions
3089
3091
struct ggml_tensor * KQ_pos = ggml_new_tensor_1d (ctx0, GGML_TYPE_I32, n_tokens);
3092
+ offload_func_kq (KQ_pos);
3090
3093
ggml_allocr_alloc (lctx.alloc , KQ_pos);
3091
3094
if (!ggml_allocr_is_measure (lctx.alloc )) {
3092
3095
int * data = (int *) KQ_pos->data ;
@@ -3097,6 +3100,7 @@ static struct ggml_cgraph * llm_build_baichaun(
3097
3100
3098
3101
// K_shift
3099
3102
struct ggml_tensor * K_shift = ggml_new_tensor_1d (ctx0, GGML_TYPE_I32, n_ctx);
3103
+ offload_func_kq (K_shift);
3100
3104
ggml_allocr_alloc (lctx.alloc , K_shift);
3101
3105
if (!ggml_allocr_is_measure (lctx.alloc )) {
3102
3106
int * data = (int *) K_shift->data ;
@@ -3486,6 +3490,7 @@ static struct ggml_cgraph * llm_build_falcon(
3486
3490
3487
3491
// KQ_pos - contains the positions
3488
3492
struct ggml_tensor * KQ_pos = ggml_new_tensor_1d (ctx0, GGML_TYPE_I32, n_tokens);
3493
+ offload_func_kq (KQ_pos);
3489
3494
ggml_allocr_alloc (lctx.alloc , KQ_pos);
3490
3495
if (!ggml_allocr_is_measure (lctx.alloc )) {
3491
3496
int * data = (int *) KQ_pos->data ;
@@ -3496,6 +3501,7 @@ static struct ggml_cgraph * llm_build_falcon(
3496
3501
3497
3502
// K_shift
3498
3503
struct ggml_tensor * K_shift = ggml_new_tensor_1d (ctx0, GGML_TYPE_I32, n_ctx);
3504
+ offload_func_kq (K_shift);
3499
3505
ggml_allocr_alloc (lctx.alloc , K_shift);
3500
3506
if (!ggml_allocr_is_measure (lctx.alloc )) {
3501
3507
int * data = (int *) K_shift->data ;
0 commit comments