Skip to content

Commit b6dcb4d

Browse files
authored
[Misc] Fix flash attention backend log (#4368)
1 parent b5b4a39 commit b6dcb4d

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

vllm/attention/selector.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ class _Backend(enum.Enum):
2525
def get_attn_backend(dtype: torch.dtype) -> Type[AttentionBackend]:
2626
backend = _which_attn_to_use(dtype)
2727
if backend == _Backend.FLASH_ATTN:
28-
logger.info("Using FlashAttention backend.")
28+
logger.info("Using FlashAttention-2 backend.")
2929
from vllm.attention.backends.flash_attn import ( # noqa: F401
3030
FlashAttentionBackend)
3131
return FlashAttentionBackend
@@ -62,21 +62,21 @@ def _which_attn_to_use(dtype: torch.dtype) -> _Backend:
6262
# NVIDIA GPUs.
6363
if torch.cuda.get_device_capability()[0] < 8:
6464
# Volta and Turing NVIDIA GPUs.
65-
logger.info("Cannot use FlashAttention backend for Volta and Turing "
65+
logger.info("Cannot use FlashAttention-2 backend for Volta and Turing "
6666
"GPUs.")
6767
return _Backend.XFORMERS
6868

6969
if dtype not in (torch.float16, torch.bfloat16):
70-
logger.info("Cannot use FlashAttention backend for dtype other than "
70+
logger.info("Cannot use FlashAttention-2 backend for dtype other than "
7171
"torch.float16 or torch.bfloat16.")
7272
return _Backend.XFORMERS
7373

7474
try:
7575
import flash_attn # noqa: F401
7676
except ImportError:
7777
logger.info(
78-
"Cannot use FlashAttention backend because the flash_attn package "
79-
"is not found. Please install it for better performance.")
78+
"Cannot use FlashAttention-2 backend because the flash_attn "
79+
"package is not found. Please install it for better performance.")
8080
return _Backend.XFORMERS
8181

8282
backend_by_env_var = os.getenv(VLLM_ATTENTION_BACKEND)

0 commit comments

Comments
 (0)