Skip to content

Commit

Permalink
Revert "warp size fixes"
Browse files Browse the repository at this point in the history
It seems like 32 is faster for me, at least and it won't cause so many conflicts.

This reverts commit 5d6eb72.
  • Loading branch information
SlyEcho committed Jun 6, 2023
1 parent 5d6eb72 commit 1ba4ce4
Showing 1 changed file with 2 additions and 6 deletions.
8 changes: 2 additions & 6 deletions ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -182,11 +182,7 @@ typedef struct {
} block_q6_k;
static_assert(sizeof(block_q6_k) == sizeof(ggml_fp16_t) + 13*QK_K/16, "wrong q6_k block size/padding");

#if defined(GGML_USE_HIPBLAS)
#define WARP_SIZE warpSize
#else
#define WARP_SIZE 32
#endif

#define CUDA_MUL_BLOCK_SIZE 256

Expand Down Expand Up @@ -683,8 +679,8 @@ static __global__ void dequantize_mul_mat_vec(const void * vx, const float * y,
// sum up partial sums and write back result
__syncthreads();
#pragma unroll
for (int mask = WARP_SIZE/2; mask > 0; mask >>= 1) {
tmp += __shfl_xor_sync(0xffffffff, tmp, mask, WARP_SIZE);
for (int mask = 16; mask > 0; mask >>= 1) {
tmp += __shfl_xor_sync(0xffffffff, tmp, mask, 32);
}

if (tid == 0) {
Expand Down

0 comments on commit 1ba4ce4

Please sign in to comment.