Skip to content

Commit 10fcc41

Browse files
jeffbolznvslaren
andauthored
vulkan: Update topk_moe fusion to handle gpt's late softmax (#16656)
* vulkan: Update topk_moe fusion to handle gpt's late softmax Based on #16649. * Add ggml_check_edges * Add sync logging to show fusion effects * handle clamp added in #16655 * Update ggml/src/ggml-impl.h Co-authored-by: Diego Devesa <slarengh@gmail.com>
1 parent bcf5bda commit 10fcc41

File tree

3 files changed

+272
-138
lines changed

3 files changed

+272
-138
lines changed

ggml/src/ggml-impl.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,7 @@ static inline bool ggml_can_fuse_subgraph(const struct ggml_cgraph * cgraph,
682682
#endif
683683

684684
#ifdef __cplusplus
685+
#include <array>
685686
#include <initializer_list>
686687
#include <vector>
687688

@@ -697,6 +698,21 @@ inline bool ggml_can_fuse_subgraph(const struct ggml_cgraph * cgraph,
697698
return ggml_can_fuse_subgraph(cgraph, start_idx, ops.size(), ops.begin(), outputs.begin(), outputs.size());
698699
}
699700

701+
// Return true if the edges in the graph match expectations.
702+
inline bool ggml_check_edges(const struct ggml_cgraph * cgraph,
703+
int start_idx,
704+
std::initializer_list<std::array<int, 3>> edges) {
705+
for (const auto & edge : edges) {
706+
int dst_node = edge[0];
707+
int src_idx = edge[1];
708+
int src_node = edge[2];
709+
if (cgraph->nodes[start_idx + dst_node]->src[src_idx] != cgraph->nodes[start_idx + src_node]) {
710+
return false;
711+
}
712+
}
713+
return true;
714+
}
715+
700716
// expose GGUF internals for test code
701717
GGML_API size_t gguf_type_size(enum gguf_type type);
702718
GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params);

0 commit comments

Comments
 (0)