Add BF16 to GGUF (lllyasviel#2877)

croquelois · web-flow · commit 17a42e5877b0 · 2025-05-19T00:06:23.000-04:00
diff --git a/backend/operations_gguf.py b/backend/operations_gguf.py
@@ -13,6 +13,7 @@
     gguf.GGMLQuantizationType.Q5_K: gguf.Q5_K,
     gguf.GGMLQuantizationType.Q6_K: gguf.Q6_K,
     gguf.GGMLQuantizationType.Q8_0: gguf.Q8_0,
+    gguf.GGMLQuantizationType.BF16: gguf.BF16,
 }
 
 
diff --git a/packages_3rdparty/gguf/quants.py b/packages_3rdparty/gguf/quants.py
@@ -268,6 +268,9 @@ def quantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
     def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
         return (blocks.view(np.int16).astype(np.int32) << 16).view(np.float32)
 
+    @classmethod
+    def dequantize_blocks_pytorch(cls, blocks, block_size, type_size, parameter) -> torch.Tensor:
+        return (blocks.view(torch.int16).to(torch.int32) << 16).view(torch.float32)
 
 class Q4_0(__Quant, qtype=GGMLQuantizationType.Q4_0):
     @classmethod

Original file line number	Diff line number	Diff line change
`@@ -13,6 +13,7 @@`
`13`	`13`	`gguf.GGMLQuantizationType.Q5_K: gguf.Q5_K,`
`14`	`14`	`gguf.GGMLQuantizationType.Q6_K: gguf.Q6_K,`
`15`	`15`	`gguf.GGMLQuantizationType.Q8_0: gguf.Q8_0,`
	`16`	`+ gguf.GGMLQuantizationType.BF16: gguf.BF16,`
`16`	`17`	`}`
`17`	`18`
`18`	`19`