Skip to content

Commit b3f7431

Browse files
committed
respect block_size user setting in V1
Signed-off-by: yan ma <yan.ma@intel.com>
1 parent 363d548 commit b3f7431

File tree

1 file changed

+4
-6
lines changed

1 file changed

+4
-6
lines changed

vllm/platforms/xpu.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -80,13 +80,11 @@ def inference_mode(cls):
8080
@classmethod
8181
def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
8282
cache_config = vllm_config.cache_config
83-
# in V1(or with ipex chunked prefill) block_size is 64
84-
if cache_config and \
85-
cache_config.block_size is None and \
86-
envs.VLLM_USE_V1:
87-
cache_config.block_size = 64
8883
if cache_config and cache_config.block_size is None:
89-
cache_config.block_size = 16
84+
if envs.VLLM_USE_V1:
85+
cache_config.block_size = 64
86+
else:
87+
cache_config.block_size = 16
9088

9189
# Instances created using VllmConfig() typically have model_config as
9290
# None by default. The modification involves adding a check to prevent

0 commit comments

Comments
 (0)