Skip to content

Commit

Permalink
Apply code suggestions
Browse files Browse the repository at this point in the history
  • Loading branch information
casper-hansen committed Jan 26, 2024
1 parent 4149086 commit 77811c6
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 5 deletions.
4 changes: 1 addition & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,9 +255,7 @@ def get_torch_arch_list() -> Set[str]:
]

if _is_cuda():
vllm_extension_sources.extend([
"csrc/quantization/awq/gemm_kernels.cu",
])
vllm_extension_sources.append("csrc/quantization/awq/gemm_kernels.cu")

if not _is_neuron():
vllm_extension = CUDAExtension(
Expand Down
4 changes: 2 additions & 2 deletions vllm/model_executor/layers/quantization/awq.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,8 @@ def apply_weights(self,
out_shape = (x.shape[:-1] + (qweight.shape[-1] * pack_factor, ))
reshaped_x = x.reshape(-1, x.shape[-1])

# batch_size*seq_len >= threshold
FP16_MATMUL_HEURISTIC_CONDITION = x.shape[0] * x.shape[1] >= 256
# num_tokens >= threshold
FP16_MATMUL_HEURISTIC_CONDITION = x.shape[:-1].numel() >= 256

if FP16_MATMUL_HEURISTIC_CONDITION:
out = ops.awq_dequantize(qweight, scales, qzeros, 0, 0, 0)
Expand Down

0 comments on commit 77811c6

Please sign in to comment.