From 8c1e77fb585c4f42783a3d88c1efc7c9e15fd89f Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Thu, 28 Nov 2024 08:31:28 -0800 Subject: [PATCH] [Kernel] Update vllm-flash-attn version to reduce CPU overheads (#10742) Signed-off-by: Woosuk Kwon --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 45a3b484e0360..f43bf8143458b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -522,7 +522,7 @@ else() FetchContent_Declare( vllm-flash-attn GIT_REPOSITORY https://github.com/vllm-project/flash-attention.git - GIT_TAG d886f88165702b3c7e7744502772cd98b06be9e1 + GIT_TAG fdf6d72b48aea41f4ae6a89139a453dae554abc8 GIT_PROGRESS TRUE # Don't share the vllm-flash-attn build between build types BINARY_DIR ${CMAKE_BINARY_DIR}/vllm-flash-attn