From 1eccde6f153ccefb2a47182adf8f3fe9f1ee66bd Mon Sep 17 00:00:00 2001
From: Francis Couture-Harpin <git@compilade.net>
Date: Wed, 8 May 2024 11:58:54 -0400
Subject: [PATCH] convert-hf : support bfloat16 conversion

---
 convert-hf-to-gguf.py       | 37 ++++++++++++++++++++++++-------------
 gguf-py/gguf/gguf_writer.py | 17 +++++++++++++++--
 2 files changed, 39 insertions(+), 15 deletions(-)

diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index 1dc18b2a577218..837cfff5f2aa6d 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -239,10 +239,7 @@ def write_tensors(self):
                 data: np.ndarray = data  # type hint
                 n_dims = len(data.shape)
                 data_dtype = data.dtype
-
-                # if f32 desired, convert any float16 to float32
-                if self.ftype == 0 and data_dtype == np.float16:
-                    data = data.astype(np.float32)
+                data_qtype: gguf.GGMLQuantizationType | None = None
 
                 # when both are True, f32 should win
                 extra_f32 = self.extra_f32_tensors(name, new_name, bid, n_dims)
@@ -254,20 +251,33 @@ def write_tensors(self):
                 # if f16 desired, convert any float32 2-dim weight tensors to float16
                 extra_f16 = extra_f16 or (name.endswith(".weight") and n_dims >= 2)
 
-                # when both extra_f32 and extra_f16 are False, convert to float32 by default
-                if self.ftype == 1 and data_dtype == np.float16 and (extra_f32 or not extra_f16):
-                    data = data.astype(np.float32)
+                if self.ftype != gguf.GGMLQuantizationType.F32 and extra_f16 and not extra_f32:
+                    if self.ftype == gguf.GGMLQuantizationType.F16:
+                        if data_dtype != np.float16:
+                            data = data.astype(np.float16)
+                        data_qtype = gguf.GGMLQuantizationType.F16
+
+                    elif self.ftype == gguf.GGMLQuantizationType.BF16:
+                        if data_dtype != np.float32:
+                            data = data.astype(np.float32)
+                        data.dtype = np.int32
+                        data = (data >> 16).astype(np.int16)
+                        data_qtype = gguf.GGMLQuantizationType.BF16
+
+                else:  # by default, convert to float32
+                    if data_dtype != np.float32:
+                        data = data.astype(np.float32)
+                    data_qtype = gguf.GGMLQuantizationType.F32
 
-                if self.ftype == 1 and data_dtype == np.float32 and extra_f16 and not extra_f32:
-                    data = data.astype(np.float16)
+                assert data_qtype is not None
 
                 # reverse shape to make it similar to the internal ggml dimension order
                 shape_str = f"{{{', '.join(str(n) for n in reversed(data.shape))}}}"
 
                 # n_dims is implicit in the shape
-                logger.info(f"{f'%-{max_name_len}s' % f'{new_name},'} {old_dtype} --> {data.dtype}, shape = {shape_str}")
+                logger.info(f"{f'%-{max_name_len}s' % f'{new_name},'} {old_dtype} --> {data_qtype.name}, shape = {shape_str}")
 
-                self.gguf_writer.add_tensor(new_name, data)
+                self.gguf_writer.add_tensor(new_name, data, raw_dtype=data_qtype)
 
     def write(self):
         self.write_tensors()
@@ -2417,8 +2427,8 @@ def parse_args() -> argparse.Namespace:
         help="path to write to; default: based on input",
     )
     parser.add_argument(
-        "--outtype", type=str, choices=["f32", "f16"], default="f16",
-        help="output format - use f32 for float32, f16 for float16",
+        "--outtype", type=str, choices=["f32", "f16", "bf16"], default="f16",
+        help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16",
     )
     parser.add_argument(
         "--bigendian", action="store_true",
@@ -2475,6 +2485,7 @@ def main() -> None:
     ftype_map = {
         "f32": gguf.GGMLQuantizationType.F32,
         "f16": gguf.GGMLQuantizationType.F16,
+        "bf16": gguf.GGMLQuantizationType.BF16,
     }
 
     if args.outfile is not None:
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py
index 8dcf9330b076fb..2b35fb6fdd2393 100644
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -28,6 +28,7 @@
 logger = logging.getLogger(__name__)
 
 
+# TODO: generalize LazyTorchTensor to make the Numpy equivalent cleaner
 class LazyTensor:
     data: Callable[[], np.ndarray[Any, Any]]
     # to avoid too deep recursion
@@ -38,12 +39,24 @@ class LazyTensor:
     def __init__(self, data: Callable[[], np.ndarray[Any, Any]], *, dtype: type, shape: tuple[int, ...]):
         self.data = data
         self.functions = []
-        self.dtype = np.dtype(dtype)
+        self.__dict__["dtype"] = np.dtype(dtype)
         self.shape = shape
 
+    def __setattr__(self, __name: str, __value: Any) -> None:
+        if __name == "dtype":
+            def modify_attr(o: object):
+                setattr(o, __name, __value)
+                return o
+            self.functions.append(modify_attr)
+        self.__dict__[__name] = __value
+
+    def __rshift__(self, __n):
+        self.functions.append(lambda n: n >> __n)
+        return self
+
     def astype(self, dtype: type, **kwargs) -> LazyTensor:
         self.functions.append(lambda n: n.astype(dtype, **kwargs))
-        self.dtype = np.dtype(dtype)
+        self.__dict__["dtype"] = np.dtype(dtype)
         return self
 
     @property