diff --git a/crates/models/bloom/src/lib.rs b/crates/models/bloom/src/lib.rs index 9b7331d7..edf821d8 100644 --- a/crates/models/bloom/src/lib.rs +++ b/crates/models/bloom/src/lib.rs @@ -145,8 +145,8 @@ impl KnownModel for Bloom { // normalize embeddings input_layer = ctx0.op_norm(&input_layer); - input_layer = ctx0.op_mul(&ctx0.op_repeat(&self.norm, &input_layer), &input_layer); - input_layer = ctx0.op_add(&ctx0.op_repeat(&self.norm_bias, &input_layer), &input_layer); + input_layer = ctx0.op_mul(&input_layer, &self.norm); + input_layer = ctx0.op_add(&input_layer, &self.norm_bias); let mut gf = ggml::ComputationGraph::new(); for il in 0..n_layer { @@ -157,21 +157,12 @@ impl KnownModel for Bloom { current = ctx0.op_norm(&input_layer); // cur = attention_norm * cur - current = ctx0.op_mul( - &ctx0.op_repeat(&self.layers[il].attention_norm, ¤t), - ¤t, - ); - current = ctx0.op_add( - &ctx0.op_repeat(&self.layers[il].attention_norm_b, ¤t), - ¤t, - ); + current = ctx0.op_mul(¤t, &self.layers[il].attention_norm); + current = ctx0.op_add(¤t, &self.layers[il].attention_norm_b); //attention current = ctx0.op_mul_mat(&self.layers[il].query_key_value, ¤t); - current = ctx0.op_add( - &ctx0.op_repeat(&self.layers[il].query_key_value_b, ¤t), - ¤t, - ); + current = ctx0.op_add(¤t, &self.layers[il].query_key_value_b); // self-attention let nb = current.get_nb()[1]; @@ -293,7 +284,7 @@ impl KnownModel for Bloom { // projection current = ctx0.op_mul_mat(&self.layers[il].wo, ¤t); - current = ctx0.op_add(&ctx0.op_repeat(&self.layers[il].wo_b, ¤t), ¤t); + current = ctx0.op_add(¤t, &self.layers[il].wo_b); let input_feed_forward = ctx0.op_add(¤t, &input_self_attention); @@ -302,19 +293,13 @@ impl KnownModel for Bloom { current = ctx0.op_norm(&input_feed_forward); // cur = ffn_norm*cur + ffn_norm_b - current = ctx0.op_mul( - &ctx0.op_repeat(&self.layers[il].ffn_norm, ¤t), - ¤t, - ); + current = ctx0.op_mul(¤t, &self.layers[il].ffn_norm); - current = ctx0.op_add( - &ctx0.op_repeat(&self.layers[il].ffn_norm_b, ¤t), - ¤t, - ); + current = ctx0.op_add(¤t, &self.layers[il].ffn_norm_b); current = ctx0.op_mul_mat(&self.layers[il].w1, ¤t); - current = ctx0.op_add(&ctx0.op_repeat(&self.layers[il].w1_b, ¤t), ¤t); + current = ctx0.op_add(¤t, &self.layers[il].w1_b); // SILU activation @@ -322,7 +307,7 @@ impl KnownModel for Bloom { current = ctx0.op_mul_mat(&self.layers[il].w2, ¤t); - current = ctx0.op_add(&ctx0.op_repeat(&self.layers[il].w2_b, ¤t), ¤t); + current = ctx0.op_add(¤t, &self.layers[il].w2_b); current = ctx0.op_add(¤t, &input_feed_forward); @@ -334,15 +319,9 @@ impl KnownModel for Bloom { input_layer = ctx0.op_norm(&input_layer); // inpL = norm*inpL - input_layer = ctx0.op_mul( - &ctx0.op_repeat(&self.output_norm, &input_layer), - &input_layer, - ); + input_layer = ctx0.op_mul(&input_layer, &self.output_norm); - input_layer = ctx0.op_add( - &ctx0.op_repeat(&self.output_norm_bias, &input_layer), - &input_layer, - ); + input_layer = ctx0.op_add(&input_layer, &self.output_norm_bias); let embeddings_tensor: ggml::Tensor = input_layer.share();