Make check more obvious

huggingface · Oct 16, 2024 · 751f1bb · 751f1bb
1 parent aa92e45
commit 751f1bb
Showing 1 changed file with 2 additions and 4 deletions.
diff --git a/server/text_generation_server/layers/attention/kv_cache.py b/server/text_generation_server/layers/attention/kv_cache.py
@@ -24,10 +24,8 @@ def __init__(
     ):
         """Construct the key-value cache for a layer."""
 
-        if (
-            dtype.itemsize == 1
-            and dtype.is_floating_point
-            and (ATTENTION != "flashinfer" or SYSTEM != "cuda")
+        if dtype in {torch.float8_e5m2, torch.float8_e4m3fn} and (
+            ATTENTION != "flashinfer" or SYSTEM != "cuda"
         ):
             raise ValueError(
                 "FP8 KV cache is currently only supported for flashinfer on CUDA"