Skip to content

Commit

Permalink
Make check more obvious
Browse files Browse the repository at this point in the history
  • Loading branch information
danieldk committed Oct 16, 2024
1 parent aa92e45 commit 751f1bb
Showing 1 changed file with 2 additions and 4 deletions.
6 changes: 2 additions & 4 deletions server/text_generation_server/layers/attention/kv_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,8 @@ def __init__(
):
"""Construct the key-value cache for a layer."""

if (
dtype.itemsize == 1
and dtype.is_floating_point
and (ATTENTION != "flashinfer" or SYSTEM != "cuda")
if dtype in {torch.float8_e5m2, torch.float8_e4m3fn} and (
ATTENTION != "flashinfer" or SYSTEM != "cuda"
):
raise ValueError(
"FP8 KV cache is currently only supported for flashinfer on CUDA"
Expand Down

0 comments on commit 751f1bb

Please sign in to comment.