Skip to content

Commit 9899f8e

Browse files
committed
generalise extra_supports_op
1 parent 3a65d72 commit 9899f8e

File tree

7 files changed

+62
-41
lines changed

7 files changed

+62
-41
lines changed

Makefile

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -948,11 +948,11 @@ OBJ_GGML = \
948948
$(DIR_GGML)/src/ggml-quants.o \
949949
$(DIR_GGML)/src/ggml-threading.o \
950950
$(DIR_GGML)/src/ggml-cpu/ggml-cpu.o \
951-
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-cpp.o \
952-
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-aarch64.o \
951+
$(DIR_GGML)/src/ggml-cpu/ggml-cpu_cpp17.o \
952+
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-aarch64_cpp17.o \
953953
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-hbm.o \
954954
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-quants.o \
955-
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-traits.o \
955+
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-traits_cpp17.o \
956956
$(OBJ_GGML_EXT)
957957

958958
OBJ_LLAMA = \
@@ -1092,17 +1092,9 @@ DEP_FILES = $(OBJ_GGML:.o=.d) $(OBJ_LLAMA:.o=.d) $(OBJ_COMMON:.o=.d)
10921092
# Default target
10931093
all: $(BUILD_TARGETS)
10941094

1095-
# Note: need this exception because `ggml-cpu.c` and `ggml-cpu.cpp` both produce the same obj/dep files
1096-
# g++ -M -I ./ggml/include/ -I ./ggml/src ggml/src/ggml-cpu/ggml-cpu.cpp | grep ggml
1097-
$(DIR_GGML)/src/ggml-cpu/ggml-cpu-cpp.o: \
1098-
ggml/src/ggml-cpu/ggml-cpu.cpp \
1099-
ggml/include/ggml-backend.h \
1100-
ggml/include/ggml.h \
1101-
ggml/include/ggml-alloc.h \
1102-
ggml/src/ggml-backend-impl.h \
1103-
ggml/include/ggml-cpu.h \
1104-
ggml/src/ggml-impl.h
1105-
$(CXX) $(CXXFLAGS) -c $< -o $@
1095+
# for c++17 build
1096+
$(DIR_GGML)/%_cpp17.o: $(DIR_GGML)/%.cpp
1097+
$(CXX) $(CXXFLAGS) -MMD -std=c++17 -c $< -o $@
11061098

11071099
# Rules for building object files
11081100
$(DIR_GGML)/%.o: $(DIR_GGML)/%.c

ggml/src/ggml-cpu/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ ggml_add_backend_library(ggml-cpu
1313

1414
target_include_directories(ggml-cpu PRIVATE .)
1515

16+
target_compile_features (ggml-cpu PRIVATE cxx_std_17)
17+
1618
if (APPLE AND GGML_ACCELERATE)
1719
find_library(ACCELERATE_FRAMEWORK Accelerate)
1820
if (ACCELERATE_FRAMEWORK)

ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3865,7 +3865,7 @@ static const struct ggml_cpu_tensor_traits ggml_aarch64_iq4_nl_4x4_q8_0 = {
38653865
/* .gemm = */ ggml_gemm_iq4_nl_4x4_q8_0,
38663866
};
38673867

3868-
const struct ggml_cpu_tensor_traits* ggml_aarch64_get_optimal_repack_type(const struct ggml_tensor * cur) {
3868+
static const struct ggml_cpu_tensor_traits* ggml_aarch64_get_optimal_repack_type(const struct ggml_tensor * cur) {
38693869
if (cur->type == GGML_TYPE_Q4_0) {
38703870
// TODO: enable for AVX2 - currently disabled due to bad gemv performance
38713871
if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) {
@@ -3931,6 +3931,16 @@ static size_t ggml_backend_cpu_aarch64_buffer_type_get_alignment(ggml_backend_bu
39313931
GGML_UNUSED(buft);
39323932
}
39333933

3934+
namespace ggml::cpu::aarch64 {
3935+
class extra_buffer_type : ggml::cpu::extra_buffer_type {
3936+
bool supports_op(ggml_backend_dev_t , const struct ggml_tensor * op) {
3937+
// voir si on peu faire plus simple?
3938+
return (op->op == GGML_OP_MUL_MAT && ggml_aarch64_get_optimal_repack_type(op->src[0]));
3939+
}
3940+
3941+
};
3942+
}
3943+
39343944
ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void) {
39353945
static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_aarch64 = {
39363946
/* .iface = */ {
@@ -3942,12 +3952,8 @@ ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void) {
39423952
/* .is_host = */ NULL,
39433953
},
39443954
/* .device = */ ggml_backend_reg_dev_get(ggml_backend_cpu_reg(), 0),
3945-
/* .context = */ NULL,
3955+
/* .context = */ new ggml::cpu::aarch64::extra_buffer_type(),
39463956
};
39473957

39483958
return &ggml_backend_cpu_buffer_type_aarch64;
39493959
}
3950-
3951-
bool ggml_backend_cpu_buft_is_aarch64(ggml_backend_buffer_type_t buft) {
3952-
return buft == ggml_backend_cpu_aarch64_buffer_type();
3953-
}

ggml/src/ggml-cpu/ggml-cpu-aarch64.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,6 @@ extern "C" {
1010
#endif
1111

1212
#ifdef GGML_USE_CPU_AARCH64
13-
const struct ggml_cpu_tensor_traits* ggml_aarch64_get_optimal_repack_type(const struct ggml_tensor * cur);
14-
GGML_BACKEND_API bool ggml_backend_cpu_buft_is_aarch64(ggml_backend_buffer_type_t buft);
15-
1613
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_cpu_aarch64_buffer_type(void);
1714
#endif
1815

ggml/src/ggml-cpu/ggml-cpu-traits.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22
#include "ggml-backend.h"
33
#include "ggml-backend-impl.h"
44

5+
namespace ggml::cpu {
6+
tensor_traits::~tensor_traits(){};
7+
extra_buffer_type::~extra_buffer_type() {}
8+
}
9+
510
const struct ggml_cpu_tensor_traits* ggml_cpu_get_tensor_traits(
611
const struct ggml_tensor * tensor)
712
{

ggml/src/ggml-cpu/ggml-cpu-traits.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,30 @@
11
#pragma once
22
#include "ggml.h"
3+
#include "ggml-backend-impl.h"
34

45
#ifdef __cplusplus
6+
namespace ggml::cpu {
7+
// enregistré dans tensor->extra
8+
class tensor_traits {
9+
public:
10+
~tensor_traits();
11+
virtual bool compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) = 0;
12+
};
13+
14+
// ou mettre ca?
15+
class extra_buffer_type {
16+
public:
17+
~extra_buffer_type();
18+
virtual bool supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) = 0;
19+
};
20+
}
521
extern "C" {
22+
// #else
623
#endif
24+
// a t'on besoin d'un mapping C?
25+
//bool ggml_cpu_extra_compute_forward(void* extra, struct ggml_compute_params * params, struct ggml_tensor * tensor);
726

27+
// @ transferer en methode privée pour cpu_aarch64
828
typedef int (*ggml_repack_t) (struct ggml_tensor *t, int interleave_block, const void * GGML_RESTRICT data,
929
size_t data_size);
1030
typedef void (*ggml_from_float_to_mat_t)

ggml/src/ggml-cpu/ggml-cpu.cpp

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727

2828
// ggml-backend interface
2929

30-
static std::vector<ggml_backend_buffer_type_t>& ggml_backend_cpu_get_extra_bufts() {
30+
static std::vector<ggml_backend_buffer_type_t>& ggml_backend_cpu_get_extra_buffers_type() {
3131
static std::vector<ggml_backend_buffer_type_t> bufts = []() {
3232
std::vector<ggml_backend_buffer_type_t> bufts;
3333

@@ -47,14 +47,14 @@ static std::vector<ggml_backend_buffer_type_t>& ggml_backend_cpu_get_extra_bufts
4747
return bufts;
4848
}
4949

50-
static ggml_backend_buffer_type_t * ggml_backend_cpu_get_extra_bufts(ggml_backend_dev_t device) {
51-
return ggml_backend_cpu_get_extra_bufts().data();
50+
static ggml_backend_buffer_type_t * ggml_backend_cpu_device_get_extra_buffers_type(ggml_backend_dev_t device) {
51+
return ggml_backend_cpu_get_extra_buffers_type().data();
5252

5353
GGML_UNUSED(device);
5454
}
5555

56-
static bool ggml_backend_cpu_is_extra_buft(ggml_backend_buffer_type_t buft) {
57-
for (auto extra : ggml_backend_cpu_get_extra_bufts()) {
56+
static bool ggml_backend_cpu_is_extra_buffer_type(ggml_backend_buffer_type_t buft) {
57+
for (auto extra : ggml_backend_cpu_get_extra_buffers_type()) {
5858
if (extra && extra == buft) return true;
5959
}
6060
return false;
@@ -364,20 +364,16 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
364364
const struct ggml_tensor * src0 = op->src[0];
365365
const struct ggml_tensor * src1 = op->src[1];
366366

367-
// TODO voir comment reformater ca... type_traits && !type_traits->op_supported() => return false?
368-
#ifdef GGML_USE_CPU_AARCH64
369-
if (src0 && src0->buffer && ggml_backend_cpu_buft_is_aarch64(src0->buffer->buft)) {
370-
if (op->op != GGML_OP_MUL_MAT || ggml_aarch64_get_optimal_repack_type(src0) == nullptr) {
371-
return false;
372-
}
373-
}
374-
375-
for (int i = 1; i < GGML_MAX_SRC; i++) {
376-
if (op->src[i] && op->src[i]->buffer && ggml_backend_cpu_buft_is_aarch64(op->src[i]->buffer->buft)) {
367+
// extra_buffer_op?
368+
for (int i = 0; i < GGML_MAX_SRC; i++) {
369+
if (op->src[i] && op->src[i]->buffer && ggml_backend_cpu_is_extra_buffer_type(op->src[i]->buffer->buft)) {
370+
auto buf = (ggml::cpu::extra_buffer_type*) op->src[i]->buffer->buft->context;
371+
if (buf) {
372+
return buf->supports_op(dev, op);
373+
}
377374
return false;
378375
}
379376
}
380-
#endif
381377

382378
switch (op->op) {
383379
case GGML_OP_CPY:
@@ -402,7 +398,7 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st
402398
}
403399

404400
static bool ggml_backend_cpu_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
405-
return ggml_backend_buft_is_host(buft) || ggml_backend_cpu_is_extra_buft(buft);
401+
return ggml_backend_buft_is_host(buft) || ggml_backend_cpu_is_extra_buffer_type(buft);
406402
GGML_UNUSED(dev);
407403
}
408404

@@ -546,7 +542,10 @@ static void * ggml_backend_cpu_get_proc_address(ggml_backend_reg_t reg, const ch
546542
return (void *)fct;
547543
}
548544
if (strcmp(name, "ggml_backend_dev_get_extra_bufts") == 0) {
549-
ggml_backend_dev_get_extra_bufts_t fct = ggml_backend_cpu_get_extra_bufts;
545+
// Pourquoi ne pas mettre ca avec
546+
//static const struct ggml_backend_device_i ggml_backend_cpu_device_i = {
547+
// /* .get_buffer_type = */ ggml_backend_cpu_device_get_buffer_type,
548+
ggml_backend_dev_get_extra_bufts_t fct = ggml_backend_cpu_device_get_extra_buffers_type;
550549
return (void *)fct;
551550
}
552551
if (strcmp(name, "ggml_backend_get_features") == 0) {

0 commit comments

Comments
 (0)