Skip to content

Commit

Permalink
remove repeat from bloom
Browse files Browse the repository at this point in the history
  • Loading branch information
LLukas22 authored and AmineDiro committed Aug 15, 2023
1 parent 43dade0 commit 5bcbe50
Showing 1 changed file with 12 additions and 33 deletions.
45 changes: 12 additions & 33 deletions crates/models/bloom/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ impl KnownModel for Bloom {

// normalize embeddings
input_layer = ctx0.op_norm(&input_layer);
input_layer = ctx0.op_mul(&ctx0.op_repeat(&self.norm, &input_layer), &input_layer);
input_layer = ctx0.op_add(&ctx0.op_repeat(&self.norm_bias, &input_layer), &input_layer);
input_layer = ctx0.op_mul(&input_layer, &self.norm);
input_layer = ctx0.op_add(&input_layer, &self.norm_bias);

let mut gf = ggml::ComputationGraph::new();
for il in 0..n_layer {
Expand All @@ -157,21 +157,12 @@ impl KnownModel for Bloom {
current = ctx0.op_norm(&input_layer);

// cur = attention_norm * cur
current = ctx0.op_mul(
&ctx0.op_repeat(&self.layers[il].attention_norm, &current),
&current,
);
current = ctx0.op_add(
&ctx0.op_repeat(&self.layers[il].attention_norm_b, &current),
&current,
);
current = ctx0.op_mul(&current, &self.layers[il].attention_norm);
current = ctx0.op_add(&current, &self.layers[il].attention_norm_b);

//attention
current = ctx0.op_mul_mat(&self.layers[il].query_key_value, &current);
current = ctx0.op_add(
&ctx0.op_repeat(&self.layers[il].query_key_value_b, &current),
&current,
);
current = ctx0.op_add(&current, &self.layers[il].query_key_value_b);

// self-attention
let nb = current.get_nb()[1];
Expand Down Expand Up @@ -293,7 +284,7 @@ impl KnownModel for Bloom {

// projection
current = ctx0.op_mul_mat(&self.layers[il].wo, &current);
current = ctx0.op_add(&ctx0.op_repeat(&self.layers[il].wo_b, &current), &current);
current = ctx0.op_add(&current, &self.layers[il].wo_b);

let input_feed_forward = ctx0.op_add(&current, &input_self_attention);

Expand All @@ -302,27 +293,21 @@ impl KnownModel for Bloom {
current = ctx0.op_norm(&input_feed_forward);

// cur = ffn_norm*cur + ffn_norm_b
current = ctx0.op_mul(
&ctx0.op_repeat(&self.layers[il].ffn_norm, &current),
&current,
);
current = ctx0.op_mul(&current, &self.layers[il].ffn_norm);

current = ctx0.op_add(
&ctx0.op_repeat(&self.layers[il].ffn_norm_b, &current),
&current,
);
current = ctx0.op_add(&current, &self.layers[il].ffn_norm_b);

current = ctx0.op_mul_mat(&self.layers[il].w1, &current);

current = ctx0.op_add(&ctx0.op_repeat(&self.layers[il].w1_b, &current), &current);
current = ctx0.op_add(&current, &self.layers[il].w1_b);

// SILU activation

current = ctx0.op_gelu(&current);

current = ctx0.op_mul_mat(&self.layers[il].w2, &current);

current = ctx0.op_add(&ctx0.op_repeat(&self.layers[il].w2_b, &current), &current);
current = ctx0.op_add(&current, &self.layers[il].w2_b);

current = ctx0.op_add(&current, &input_feed_forward);

Expand All @@ -334,15 +319,9 @@ impl KnownModel for Bloom {
input_layer = ctx0.op_norm(&input_layer);

// inpL = norm*inpL
input_layer = ctx0.op_mul(
&ctx0.op_repeat(&self.output_norm, &input_layer),
&input_layer,
);
input_layer = ctx0.op_mul(&input_layer, &self.output_norm);

input_layer = ctx0.op_add(
&ctx0.op_repeat(&self.output_norm_bias, &input_layer),
&input_layer,
);
input_layer = ctx0.op_add(&input_layer, &self.output_norm_bias);

let embeddings_tensor: ggml::Tensor = input_layer.share();

Expand Down

0 comments on commit 5bcbe50

Please sign in to comment.