From 7372b6227183e37f7aa8737188f71a728c716244 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sat, 9 Dec 2023 13:18:58 +0200 Subject: [PATCH] ggml : ggml_get_rows support 2D indexing [n_tokens, n_experts] (cpu only) --- ggml.c | 17 +++++++++-------- ggml.h | 1 + 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/ggml.c b/ggml.c index 322d0c850af51..1c81e79126a62 100644 --- a/ggml.c +++ b/ggml.c @@ -4735,7 +4735,8 @@ struct ggml_tensor * ggml_get_rows( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { - GGML_ASSERT(ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32); + GGML_ASSERT(a->ne[2] == b->ne[1]); + GGML_ASSERT(ggml_is_matrix(b) && b->type == GGML_TYPE_I32); bool is_node = false; @@ -4745,7 +4746,7 @@ struct ggml_tensor * ggml_get_rows( // TODO: implement non F32 return //struct ggml_tensor * result = ggml_new_tensor_2d(ctx, a->type, a->ne[0], b->ne[0]); - struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, a->ne[0], b->ne[0]); + struct ggml_tensor * result = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, a->ne[0], b->ne[0], b->ne[1]); result->op = GGML_OP_GET_ROWS; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; @@ -10348,8 +10349,8 @@ static void ggml_compute_forward_get_rows_q( const enum ggml_type type = src0->type; ggml_to_float_t const dequantize_row_q = type_traits[type].to_float; - assert( dst->ne[0] == nc); - assert( dst->ne[1] == nr); + assert( dst->ne[0] == nc); + assert(ggml_nrows(dst) == nr); assert(src0->nb[0] == ggml_type_size(type)); for (int i = 0; i < nr; ++i) { @@ -10375,8 +10376,8 @@ static void ggml_compute_forward_get_rows_f16( const int nc = src0->ne[0]; const int nr = ggml_nelements(src1); - assert( dst->ne[0] == nc); - assert( dst->ne[1] == nr); + assert( dst->ne[0] == nc); + assert(ggml_nrows(dst) == nr); assert(src0->nb[0] == sizeof(ggml_fp16_t)); for (int i = 0; i < nr; ++i) { @@ -10403,8 +10404,8 @@ static void ggml_compute_forward_get_rows_f32( const int nc = src0->ne[0]; const int nr = ggml_nelements(src1); - assert( dst->ne[0] == nc); - assert( dst->ne[1] == nr); + assert( dst->ne[0] == nc); + assert(ggml_nrows(dst) == nr); assert(src0->nb[0] == sizeof(float)); for (int i = 0; i < nr; ++i) { diff --git a/ggml.h b/ggml.h index e0cb3b99bf853..b154b6dae5184 100644 --- a/ggml.h +++ b/ggml.h @@ -1263,6 +1263,7 @@ extern "C" { struct ggml_context * ctx, struct ggml_tensor * a); + // supports 3D: a->ne[2] == b->ne[1] GGML_API struct ggml_tensor * ggml_get_rows( struct ggml_context * ctx, struct ggml_tensor * a,