Skip to content

Commit

Permalink
metal : simplify soft_max encoding
Browse files Browse the repository at this point in the history
ggml-ci
  • Loading branch information
ggerganov committed Nov 29, 2023
1 parent 390a445 commit 580fe20
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 7 deletions.
7 changes: 1 addition & 6 deletions ggml-metal.m
Original file line number Diff line number Diff line change
Expand Up @@ -1040,12 +1040,7 @@ void ggml_metal_graph_compute(
const float scale = ((float *) dst->op_params)[0];

[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
if (id_src1) {
[encoder setBuffer:id_src1 offset:offs_src1 atIndex:1];
} else {
[encoder setBuffer:nil offset:0 atIndex:1];
}

[encoder setBuffer:id_src1 offset:offs_src1 atIndex:1];
[encoder setBuffer:id_dst offset:offs_dst atIndex:2];
[encoder setBytes:&ne00 length:sizeof(ne00) atIndex:3];
[encoder setBytes:&ne01 length:sizeof(ne01) atIndex:4];
Expand Down
2 changes: 1 addition & 1 deletion llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3705,8 +3705,8 @@ static struct ggml_tensor * llm_build_kqv(
struct ggml_tensor * kq = ggml_mul_mat(ctx, k, q);
cb(kq, "kq", il);

// TODO: !!!!!!!!!
if (max_alibi_bias > 0.0f) {
// temporary branch until we figure out how to handle ggml_alibi through ggml_add
kq = ggml_scale(ctx, kq, kq_scale);
cb(kq, "kq_scaled", il);

Expand Down

0 comments on commit 580fe20

Please sign in to comment.