vllm-project · WoosukKwon · Oct 22, 2023 · Oct 11, 2023 · Oct 11, 2023 · Oct 11, 2023
diff --git a/csrc/quantization/squeezellm/quant_cuda_kernel.cu b/csrc/quantization/squeezellm/quant_cuda_kernel.cu
@@ -99,12 +99,7 @@ __global__ void NUQ4MatMulKernel(
   int row = BLOCKHEIGHT4 * blockIdx.x;
   int col =  BLOCKWIDTH * blockIdx.y + threadIdx.x;
 
-  // __shared__ __half blockvec[BLOCKWIDTH];
-  // blockvec[threadIdx.x] = vec[(row / BLOCKHEIGHT4) * BLOCKWIDTH + threadIdx.x];
-
   __shared__ half2 blockvec[blockwidth2];
-  // if (threadIdx.x < blockwidth2)
-  //   blockvec[threadIdx.x] = vec[(row / BLOCKHEIGHT4) * blockwidth2 + threadIdx.x];
 
   __shared__ __half deq2[16][BLOCKWIDTH];
   int off = threadIdx.x;
@@ -114,9 +109,6 @@ __global__ void NUQ4MatMulKernel(
     deq2[val][off] = lookup_table[lut_index];
   }
 
-
-  // __syncthreads();
-
   __half res;
   half2 res2;
   half2 tmp2;
@@ -181,7 +173,6 @@ __global__ void NUQ4MatMulKernel(
       res3.y = res;
     }
 
-    // atomicAdd(&mul[b * width + col], res);
     atomicAdd(&mul[b * width / 2 + col / 2], res3);
   }
 }
diff --git a/vllm/model_executor/quantization_utils/squeezellm.py b/vllm/model_executor/quantization_utils/squeezellm.py
@@ -37,8 +37,7 @@ def get_supported_act_dtypes(cls) -> List[torch.dtype]:
 
     @classmethod
     def get_min_capability(cls) -> int:
-        # @ Coleman - TODO check this
-        return 80
+        return 70
 
     @classmethod
     def get_config_filenames(cls) -> List[str]: