From c03df88a833c7c10e57d07506f7573736c304b0e Mon Sep 17 00:00:00 2001 From: Gregory Shtrasberg Date: Wed, 25 Sep 2024 15:02:51 +0000 Subject: [PATCH] Revert "[Kernel] changing fused moe kernel chunk size default to 32k (#7995)" This reverts commit 34a0e96d463d37cf85cee9c2cd01397034e97573. --- vllm/envs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/envs.py b/vllm/envs.py index 501c1eeca8e58..f7f05fd018b2f 100644 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -404,7 +404,7 @@ def get_default_config_root(): os.path.join(get_default_cache_root(), "vllm", "xla_cache"), )), "VLLM_FUSED_MOE_CHUNK_SIZE": - lambda: int(os.getenv("VLLM_FUSED_MOE_CHUNK_SIZE", "32768")), + lambda: int(os.getenv("VLLM_FUSED_MOE_CHUNK_SIZE", "65536")), # If set, vllm will skip the deprecation warnings. "VLLM_NO_DEPRECATION_WARNING":