respect block_size user setting in V1

yma11 · yma11 · commit b3f7431e1ba8 · 2025-05-28T06:33:26.000Z
Signed-off-by: yan ma &lt;yan.ma@intel.com&gt;
diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py
@@ -80,13 +80,11 @@ def inference_mode(cls):
     @classmethod
     def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
         cache_config = vllm_config.cache_config
-        # in V1(or with ipex chunked prefill) block_size is 64
-        if cache_config and \
-            cache_config.block_size is None and \
-            envs.VLLM_USE_V1:
-            cache_config.block_size = 64
         if cache_config and cache_config.block_size is None:
-            cache_config.block_size = 16
+            if envs.VLLM_USE_V1:
+                cache_config.block_size = 64
+            else:
+                cache_config.block_size = 16
 
         # Instances created using VllmConfig() typically have model_config as
         # None by default. The modification involves adding a check to prevent