modelscope · akaitsuki-ii · Jul 17, 2025 · Jun 20, 2025 · Jul 17, 2025
diff --git a/diffsynth_engine/utils/fp8_linear.py b/diffsynth_engine/utils/fp8_linear.py
@@ -2,6 +2,7 @@
 import torch.nn as nn
 import torch.nn.functional as F
 from contextlib import contextmanager
+from diffsynth_engine.utils.platform import DTYPE_FP8
 
 
 def enable_fp8_linear(module: nn.Module):
@@ -12,7 +13,7 @@ def enable_fp8_linear(module: nn.Module):
 def _enable_fp8_linear(module: nn.Module):
     if isinstance(module, nn.Linear) and torch.is_floating_point(module.weight.data):
         # avoid conversion for int weights like GGUF
-        module.weight.data = module.weight.data.to(torch.float8_e4m3fn)
+        module.weight.data = module.weight.data.to(DTYPE_FP8)
     for submodule in module.children():
         _enable_fp8_linear(submodule)
 
@@ -32,16 +33,24 @@ def fp8_linear(
     ) -> torch.Tensor:
         device = input.device
         origin_dtype = input.dtype
-        input = input.to(torch.float8_e4m3fn)
-        weight = weight.to(torch.float8_e4m3fn)
+        scale_a = 1.0
+        # For float8_e4m3fnuz, the maximum representable value is half of that of e4m3fn.
+        # To avoid overflow and ensure numerical compatibility during FP8 computation,
+        # we scale down the input by 2.0 in advance.
+        # This scaling will be compensated later during the final result scaling.
+        if DTYPE_FP8 == torch.float8_e4m3fnuz:
+            scale_a = 2.0
+            input = input / scale_a
+        input = input.to(DTYPE_FP8)
+        weight = weight.to(DTYPE_FP8)
 
         if len(input.shape) > 2:
             origin_shape = input.shape
             input = input.reshape(-1, origin_shape[-1])
             result = torch._scaled_mm(
                 input,
                 weight.T,
-                scale_a=torch.tensor(1.0).to(device=device),
+                scale_a=torch.tensor(scale_a).to(device=device),
                 scale_b=torch.tensor(1.0).to(device=device),
                 bias=bias,
                 out_dtype=origin_dtype,
@@ -52,7 +61,7 @@ def fp8_linear(
             result = torch._scaled_mm(
                 input,
                 weight.T,
-                scale_a=torch.tensor(1.0).to(device=device),
+                scale_a=torch.tensor(scale_a).to(device=device),
                 scale_b=torch.tensor(1.0).to(device=device),
                 bias=bias,
                 out_dtype=origin_dtype,

diff --git a/diffsynth_engine/utils/platform.py b/diffsynth_engine/utils/platform.py
@@ -1,7 +1,15 @@
+# cross-platform definitions and utilities
 import torch
 import gc
 
-# 存放跨平台的工具类
+
+# data type
+# AMD only supports float8_e4m3fnuz
+# https://onnx.ai/onnx/technical/float8.html
+if torch.version.hip and "gfx94" in torch.cuda.get_device_properties(0).gcnArchName:
+    DTYPE_FP8 = torch.float8_e4m3fnuz
+else:
+    DTYPE_FP8 = torch.float8_e4m3fn
 
 
 def empty_cache():