From b00c58c317df247ec29142d369d7ec742933f606 Mon Sep 17 00:00:00 2001 From: JohannesGaessler Date: Sat, 20 May 2023 19:37:11 +0200 Subject: [PATCH] Define GGML_CUDA_DMMV_BLOCK_Y if not defined --- ggml-cuda.cu | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index e1e858f218d93..9336fe0a70c79 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -84,7 +84,11 @@ typedef struct { static_assert(sizeof(block_q8_0) == sizeof(float) + QK8_0, "wrong q8_0 block size/padding"); #define CUDA_DEQUANTIZE_BLOCK_SIZE 256 -#define GGML_CUDA_DMMV_BLOCK_X 32 // dmmv = dequantize_mul_mat_vec +// dmmv = dequantize_mul_mat_vec +#define GGML_CUDA_DMMV_BLOCK_X 32 +#ifndef GGML_CUDA_DMMV_BLOCK_Y +#define GGML_CUDA_DMMV_BLOCK_Y 1 // can by set by compiler option LLAMA_CUDA_BY +#endif static __device__ void dequantize_q4_0(const void * vx, const int ib, const int iqs, float & v0, float & v1){ const block_q4_0 * x = (const block_q4_0 *) vx;