File tree Expand file tree Collapse file tree 1 file changed +3
-1
lines changed Expand file tree Collapse file tree 1 file changed +3
-1
lines changed Original file line number Diff line number Diff line change 11
11
from vllm .attention .selector import backend_name_to_enum , get_attn_backend
12
12
from vllm .config import CacheConfig , get_current_vllm_config
13
13
from vllm .forward_context import ForwardContext , get_forward_context
14
+ from vllm .model_executor .layers .linear import UnquantizedLinearMethod
14
15
from vllm .model_executor .layers .quantization .base_config import (
15
16
QuantizationConfig )
16
17
from vllm .model_executor .layers .quantization .kv_cache import BaseKVCacheMethod
@@ -97,7 +98,8 @@ def __init__(
97
98
98
99
quant_method = quant_config .get_quant_method (
99
100
self , prefix = prefix ) if quant_config else None
100
- if quant_method is not None :
101
+ if quant_method is not None and not isinstance (
102
+ quant_method , UnquantizedLinearMethod ):
101
103
assert isinstance (quant_method , BaseKVCacheMethod )
102
104
# TODO (mgoin): kv cache dtype should be specified in the FP8
103
105
# checkpoint config and become the "auto" behavior
You can’t perform that action at this time.
0 commit comments