From 1eccde6f153ccefb2a47182adf8f3fe9f1ee66bd Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Wed, 8 May 2024 11:58:54 -0400 Subject: [PATCH] convert-hf : support bfloat16 conversion --- convert-hf-to-gguf.py | 37 ++++++++++++++++++++++++------------- gguf-py/gguf/gguf_writer.py | 17 +++++++++++++++-- 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 1dc18b2a577218..837cfff5f2aa6d 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -239,10 +239,7 @@ def write_tensors(self): data: np.ndarray = data # type hint n_dims = len(data.shape) data_dtype = data.dtype - - # if f32 desired, convert any float16 to float32 - if self.ftype == 0 and data_dtype == np.float16: - data = data.astype(np.float32) + data_qtype: gguf.GGMLQuantizationType | None = None # when both are True, f32 should win extra_f32 = self.extra_f32_tensors(name, new_name, bid, n_dims) @@ -254,20 +251,33 @@ def write_tensors(self): # if f16 desired, convert any float32 2-dim weight tensors to float16 extra_f16 = extra_f16 or (name.endswith(".weight") and n_dims >= 2) - # when both extra_f32 and extra_f16 are False, convert to float32 by default - if self.ftype == 1 and data_dtype == np.float16 and (extra_f32 or not extra_f16): - data = data.astype(np.float32) + if self.ftype != gguf.GGMLQuantizationType.F32 and extra_f16 and not extra_f32: + if self.ftype == gguf.GGMLQuantizationType.F16: + if data_dtype != np.float16: + data = data.astype(np.float16) + data_qtype = gguf.GGMLQuantizationType.F16 + + elif self.ftype == gguf.GGMLQuantizationType.BF16: + if data_dtype != np.float32: + data = data.astype(np.float32) + data.dtype = np.int32 + data = (data >> 16).astype(np.int16) + data_qtype = gguf.GGMLQuantizationType.BF16 + + else: # by default, convert to float32 + if data_dtype != np.float32: + data = data.astype(np.float32) + data_qtype = gguf.GGMLQuantizationType.F32 - if self.ftype == 1 and data_dtype == np.float32 and extra_f16 and not extra_f32: - data = data.astype(np.float16) + assert data_qtype is not None # reverse shape to make it similar to the internal ggml dimension order shape_str = f"{{{', '.join(str(n) for n in reversed(data.shape))}}}" # n_dims is implicit in the shape - logger.info(f"{f'%-{max_name_len}s' % f'{new_name},'} {old_dtype} --> {data.dtype}, shape = {shape_str}") + logger.info(f"{f'%-{max_name_len}s' % f'{new_name},'} {old_dtype} --> {data_qtype.name}, shape = {shape_str}") - self.gguf_writer.add_tensor(new_name, data) + self.gguf_writer.add_tensor(new_name, data, raw_dtype=data_qtype) def write(self): self.write_tensors() @@ -2417,8 +2427,8 @@ def parse_args() -> argparse.Namespace: help="path to write to; default: based on input", ) parser.add_argument( - "--outtype", type=str, choices=["f32", "f16"], default="f16", - help="output format - use f32 for float32, f16 for float16", + "--outtype", type=str, choices=["f32", "f16", "bf16"], default="f16", + help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16", ) parser.add_argument( "--bigendian", action="store_true", @@ -2475,6 +2485,7 @@ def main() -> None: ftype_map = { "f32": gguf.GGMLQuantizationType.F32, "f16": gguf.GGMLQuantizationType.F16, + "bf16": gguf.GGMLQuantizationType.BF16, } if args.outfile is not None: diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 8dcf9330b076fb..2b35fb6fdd2393 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -28,6 +28,7 @@ logger = logging.getLogger(__name__) +# TODO: generalize LazyTorchTensor to make the Numpy equivalent cleaner class LazyTensor: data: Callable[[], np.ndarray[Any, Any]] # to avoid too deep recursion @@ -38,12 +39,24 @@ class LazyTensor: def __init__(self, data: Callable[[], np.ndarray[Any, Any]], *, dtype: type, shape: tuple[int, ...]): self.data = data self.functions = [] - self.dtype = np.dtype(dtype) + self.__dict__["dtype"] = np.dtype(dtype) self.shape = shape + def __setattr__(self, __name: str, __value: Any) -> None: + if __name == "dtype": + def modify_attr(o: object): + setattr(o, __name, __value) + return o + self.functions.append(modify_attr) + self.__dict__[__name] = __value + + def __rshift__(self, __n): + self.functions.append(lambda n: n >> __n) + return self + def astype(self, dtype: type, **kwargs) -> LazyTensor: self.functions.append(lambda n: n.astype(dtype, **kwargs)) - self.dtype = np.dtype(dtype) + self.__dict__["dtype"] = np.dtype(dtype) return self @property