Skip to content

Commit

Permalink
convert-hf : support bfloat16 conversion
Browse files Browse the repository at this point in the history
  • Loading branch information
compilade committed May 8, 2024
1 parent bffdaf4 commit 1eccde6
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 15 deletions.
37 changes: 24 additions & 13 deletions convert-hf-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,10 +239,7 @@ def write_tensors(self):
data: np.ndarray = data # type hint
n_dims = len(data.shape)
data_dtype = data.dtype

# if f32 desired, convert any float16 to float32
if self.ftype == 0 and data_dtype == np.float16:
data = data.astype(np.float32)
data_qtype: gguf.GGMLQuantizationType | None = None

# when both are True, f32 should win
extra_f32 = self.extra_f32_tensors(name, new_name, bid, n_dims)
Expand All @@ -254,20 +251,33 @@ def write_tensors(self):
# if f16 desired, convert any float32 2-dim weight tensors to float16
extra_f16 = extra_f16 or (name.endswith(".weight") and n_dims >= 2)

# when both extra_f32 and extra_f16 are False, convert to float32 by default
if self.ftype == 1 and data_dtype == np.float16 and (extra_f32 or not extra_f16):
data = data.astype(np.float32)
if self.ftype != gguf.GGMLQuantizationType.F32 and extra_f16 and not extra_f32:
if self.ftype == gguf.GGMLQuantizationType.F16:
if data_dtype != np.float16:
data = data.astype(np.float16)
data_qtype = gguf.GGMLQuantizationType.F16

elif self.ftype == gguf.GGMLQuantizationType.BF16:
if data_dtype != np.float32:
data = data.astype(np.float32)
data.dtype = np.int32
data = (data >> 16).astype(np.int16)
data_qtype = gguf.GGMLQuantizationType.BF16

else: # by default, convert to float32
if data_dtype != np.float32:
data = data.astype(np.float32)
data_qtype = gguf.GGMLQuantizationType.F32

if self.ftype == 1 and data_dtype == np.float32 and extra_f16 and not extra_f32:
data = data.astype(np.float16)
assert data_qtype is not None

# reverse shape to make it similar to the internal ggml dimension order
shape_str = f"{{{', '.join(str(n) for n in reversed(data.shape))}}}"

# n_dims is implicit in the shape
logger.info(f"{f'%-{max_name_len}s' % f'{new_name},'} {old_dtype} --> {data.dtype}, shape = {shape_str}")
logger.info(f"{f'%-{max_name_len}s' % f'{new_name},'} {old_dtype} --> {data_qtype.name}, shape = {shape_str}")

self.gguf_writer.add_tensor(new_name, data)
self.gguf_writer.add_tensor(new_name, data, raw_dtype=data_qtype)

def write(self):
self.write_tensors()
Expand Down Expand Up @@ -2417,8 +2427,8 @@ def parse_args() -> argparse.Namespace:
help="path to write to; default: based on input",
)
parser.add_argument(
"--outtype", type=str, choices=["f32", "f16"], default="f16",
help="output format - use f32 for float32, f16 for float16",
"--outtype", type=str, choices=["f32", "f16", "bf16"], default="f16",
help="output format - use f32 for float32, f16 for float16, bf16 for bfloat16",
)
parser.add_argument(
"--bigendian", action="store_true",
Expand Down Expand Up @@ -2475,6 +2485,7 @@ def main() -> None:
ftype_map = {
"f32": gguf.GGMLQuantizationType.F32,
"f16": gguf.GGMLQuantizationType.F16,
"bf16": gguf.GGMLQuantizationType.BF16,
}

if args.outfile is not None:
Expand Down
17 changes: 15 additions & 2 deletions gguf-py/gguf/gguf_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
logger = logging.getLogger(__name__)


# TODO: generalize LazyTorchTensor to make the Numpy equivalent cleaner
class LazyTensor:
data: Callable[[], np.ndarray[Any, Any]]
# to avoid too deep recursion
Expand All @@ -38,12 +39,24 @@ class LazyTensor:
def __init__(self, data: Callable[[], np.ndarray[Any, Any]], *, dtype: type, shape: tuple[int, ...]):
self.data = data
self.functions = []
self.dtype = np.dtype(dtype)
self.__dict__["dtype"] = np.dtype(dtype)
self.shape = shape

def __setattr__(self, __name: str, __value: Any) -> None:
if __name == "dtype":
def modify_attr(o: object):
setattr(o, __name, __value)
return o
self.functions.append(modify_attr)
self.__dict__[__name] = __value

def __rshift__(self, __n):
self.functions.append(lambda n: n >> __n)
return self

def astype(self, dtype: type, **kwargs) -> LazyTensor:
self.functions.append(lambda n: n.astype(dtype, **kwargs))
self.dtype = np.dtype(dtype)
self.__dict__["dtype"] = np.dtype(dtype)
return self

@property
Expand Down

0 comments on commit 1eccde6

Please sign in to comment.