|
3 | 3 | #include "ggml-backend-impl.h" |
4 | 4 |
|
5 | 5 | #ifdef __cplusplus |
| 6 | +#include <vector> |
| 7 | +extern "C" { |
| 8 | +#endif |
| 9 | +struct ggml_compute_params { |
| 10 | + // ith = thread index, nth = number of threads |
| 11 | + int ith, nth; |
| 12 | + |
| 13 | + // work buffer for all threads |
| 14 | + size_t wsize; |
| 15 | + void * wdata; |
| 16 | + |
| 17 | + struct ggml_threadpool * threadpool; |
| 18 | +}; |
| 19 | + |
| 20 | +// implementé dans ggml-cpu.c pas forcement a sa place ici. |
| 21 | +void ggml_barrier(struct ggml_threadpool * tp); |
| 22 | + |
| 23 | +// return true if op part of extra "accelerator" |
| 24 | +bool ggml_cpu_extra_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op); |
| 25 | +bool ggml_cpu_extra_work_size(int n_threads, const struct ggml_tensor * op, size_t * size); |
| 26 | + |
| 27 | +#ifdef __cplusplus |
| 28 | +} |
6 | 29 | namespace ggml::cpu { |
7 | 30 | // enregistré dans tensor->extra |
8 | 31 | class tensor_traits { |
9 | 32 | public: |
10 | 33 | ~tensor_traits(); |
11 | | - virtual bool compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) = 0; |
| 34 | + virtual bool work_size(int n_threads, const struct ggml_tensor * op, size_t & size) = 0; |
| 35 | + virtual bool compute_forward(struct ggml_compute_params * params, struct ggml_tensor * op) = 0; |
12 | 36 | }; |
13 | 37 |
|
14 | | - // ou mettre ca? |
15 | 38 | class extra_buffer_type { |
16 | 39 | public: |
17 | 40 | ~extra_buffer_type(); |
18 | 41 | virtual bool supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) = 0; |
| 42 | + virtual tensor_traits* get_tensor_traits(const struct ggml_tensor * op) = 0; |
19 | 43 | }; |
| 44 | + |
20 | 45 | } |
21 | | -extern "C" { |
22 | | -// #else |
23 | | -#endif |
24 | | - // a t'on besoin d'un mapping C? |
25 | | - //bool ggml_cpu_extra_compute_forward(void* extra, struct ggml_compute_params * params, struct ggml_tensor * tensor); |
26 | | - |
27 | | - // @ transferer en methode privée pour cpu_aarch64 |
28 | | - typedef int (*ggml_repack_t) (struct ggml_tensor *t, int interleave_block, const void * GGML_RESTRICT data, |
29 | | - size_t data_size); |
30 | | - typedef void (*ggml_from_float_to_mat_t) |
31 | | - (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t nr, int64_t k, int64_t bs); |
32 | | - typedef void (*ggml_gemv_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, |
33 | | - const void * GGML_RESTRICT y, int nr, int nc); |
34 | | - typedef void (*ggml_gemm_t) (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT x, |
35 | | - const void * GGML_RESTRICT y, int nr, int nc); |
36 | | - |
37 | | - struct ggml_cpu_tensor_traits { |
38 | | - ggml_repack_t repack; |
39 | | - int64_t blck_size_interleave; // + interleave elements in blocks |
40 | | - ggml_from_float_to_mat_t from_float_to_mat; // + mis sur le vec_dot_type ... quantize_mat_q8_0 |
41 | | - enum ggml_type vec_dot_type; // + |
42 | | - int64_t nrows; // ? number of rows to process simultaneously |
43 | | - int64_t ncols; // ? number of columns to process simultaneously |
44 | | - ggml_gemv_t gemv; // + |
45 | | - ggml_gemm_t gemm; // + |
46 | | - }; |
47 | 46 |
|
48 | | - const struct ggml_cpu_tensor_traits* ggml_cpu_get_tensor_traits(const struct ggml_tensor * tensor); |
| 47 | +// implementé dans ggml-cpu.cpp. |
| 48 | +std::vector<ggml_backend_buffer_type_t>& ggml_backend_cpu_get_extra_buffers_type(); |
49 | 49 |
|
50 | | -#ifdef __cplusplus |
51 | | -} |
52 | 50 | #endif |
0 commit comments