Skip to content

Commit 3987a50

Browse files
committed
[fix] convert pt to gguf
1 parent 77e136f commit 3987a50

2 files changed

Lines changed: 20 additions & 221 deletions

File tree

utils/convert-ms-to-gguf-bitnet.py

Lines changed: 13 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,12 @@
1212
import math
1313
import mmap
1414
import os
15-
import pickle
1615
import re
1716
import signal
1817
import struct
1918
import sys
2019
import textwrap
2120
import time
22-
import zipfile
2321
from abc import ABC, abstractmethod
2422
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
2523
from dataclasses import dataclass
@@ -945,7 +943,6 @@ def load() -> Tensor:
945943

946944
import torch
947945

948-
@torch.compile
949946
def forward_t(x):
950947
dtype = x.dtype
951948
x = x.float()
@@ -956,7 +953,8 @@ def forward_t(x):
956953
def weight_quant(weight):
957954
weight = torch.tensor(weight, dtype=torch.float32)
958955
weight = forward_t(weight)
959-
weight = weight.numpy().astype(np.float32)
956+
# Use tolist() then convert to numpy to avoid PyTorch-NumPy compatibility issues
957+
weight = np.array(weight.tolist(), dtype=np.float32)
960958
return weight
961959

962960
def part_lazy_q(lazy_tensor: LazyTensor, n_part: int) -> LazyTensor:
@@ -1028,98 +1026,6 @@ def load() -> Tensor:
10281026
return LazyTensor(load, s, lazy_tensors[0].data_type, 'pack_experts ' + ' | '.join(lt.description for lt in lazy_tensors))
10291027

10301028

1031-
# Functionality that simulates `torch.load` but where individual tensors are
1032-
# only loaded into memory on demand, not all at once.
1033-
# PyTorch can't do this natively as of time of writing:
1034-
# - https://github.com/pytorch/pytorch/issues/64327
1035-
# This allows us to de-shard without multiplying RAM usage, and also
1036-
# conveniently drops the PyTorch dependency (though we still need numpy).
1037-
1038-
1039-
@dataclass
1040-
class LazyStorageKind:
1041-
data_type: DataType
1042-
1043-
1044-
@dataclass
1045-
class LazyStorage:
1046-
load: Callable[[int, int], NDArray]
1047-
kind: LazyStorageKind
1048-
description: str
1049-
1050-
1051-
class LazyUnpickler(pickle.Unpickler):
1052-
def __init__(self, fp: IO[bytes], data_base_path: str, zip_file: zipfile.ZipFile):
1053-
super().__init__(fp)
1054-
self.data_base_path = data_base_path
1055-
self.zip_file = zip_file
1056-
1057-
def persistent_load(self, pid: Any) -> Any:
1058-
assert pid[0] == 'storage'
1059-
assert isinstance(pid[1], LazyStorageKind)
1060-
data_type = pid[1].data_type
1061-
filename_stem = pid[2]
1062-
filename = f'{self.data_base_path}/{filename_stem}'
1063-
info = self.zip_file.getinfo(filename)
1064-
1065-
def load(offset: int, elm_count: int) -> NDArray:
1066-
dtype = data_type.dtype
1067-
with self.zip_file.open(info) as fp:
1068-
fp.seek(offset * dtype.itemsize)
1069-
size = elm_count * dtype.itemsize
1070-
data = fp.read(size)
1071-
assert len(data) == size
1072-
return np.frombuffer(data, dtype)
1073-
description = f'storage data_type={data_type} path-in-zip={filename} path={self.zip_file.filename}'
1074-
return LazyStorage(load=load, kind=pid[1], description=description)
1075-
1076-
@staticmethod
1077-
def lazy_rebuild_tensor_v2(storage: Any, storage_offset: Any, size: Any, stride: Any,
1078-
requires_grad: Any, backward_hooks: Any, metadata: Any = None) -> LazyTensor:
1079-
assert isinstance(storage, LazyStorage)
1080-
1081-
def load() -> UnquantizedTensor:
1082-
elm_count = stride[0] * size[0]
1083-
return UnquantizedTensor(storage.load(storage_offset, elm_count).reshape(size))
1084-
description = f'pickled storage_offset={storage_offset} in {storage.description}'
1085-
return LazyTensor(load, list(size), storage.kind.data_type, description)
1086-
1087-
@staticmethod
1088-
def rebuild_from_type_v2(func, new_type, args, state):
1089-
return func(*args)
1090-
1091-
CLASSES = {
1092-
# getattr used here as a workaround for mypy not being smart enough to determine
1093-
# the staticmethods have a __func__ attribute.
1094-
('torch._tensor', '_rebuild_from_type_v2'): getattr(rebuild_from_type_v2, '__func__'),
1095-
('torch._utils', '_rebuild_tensor_v2'): getattr(lazy_rebuild_tensor_v2, '__func__'),
1096-
('torch', 'BFloat16Storage'): LazyStorageKind(DT_BF16),
1097-
('torch', 'HalfStorage'): LazyStorageKind(DT_F16),
1098-
('torch', 'FloatStorage'): LazyStorageKind(DT_F32),
1099-
('torch', 'IntStorage'): LazyStorageKind(DT_I32),
1100-
('torch', 'Tensor'): LazyTensor,
1101-
}
1102-
1103-
def find_class(self, module: str, name: str) -> Any:
1104-
if not module.startswith('torch'):
1105-
return super().find_class(module, name)
1106-
return self.CLASSES[(module, name)]
1107-
1108-
1109-
def lazy_load_torch_file(outer_fp: IO[bytes], path: Path) -> ModelPlus:
1110-
zf = zipfile.ZipFile(outer_fp)
1111-
pickle_paths = [name for name in zf.namelist() if name.endswith('.pkl')]
1112-
assert len(pickle_paths) == 1, pickle_paths
1113-
pickle_fp = zf.open(pickle_paths[0], 'r')
1114-
unpickler = LazyUnpickler(pickle_fp,
1115-
data_base_path=pickle_paths[0][:-4],
1116-
zip_file=zf)
1117-
model = unpickler.load()
1118-
if 'model' in model: model = model['model']
1119-
as_dict = dict(model.items())
1120-
return ModelPlus(model=as_dict, paths=[path], format='torch', vocab=None)
1121-
1122-
11231029
def lazy_load_safetensors_file(fp: IO[bytes], path: Path) -> ModelPlus:
11241030
header_size, = struct.unpack('<Q', fp.read(8))
11251031
header: dict[str, dict[str, Any]] = json.loads(fp.read(header_size))
@@ -1156,14 +1062,11 @@ def lazy_load_file(path: Path) -> ModelPlus:
11561062
fp = open(path, 'rb')
11571063
first8 = fp.read(8)
11581064
fp.seek(0)
1159-
if first8[:2] == b'PK':
1160-
# A zip file, i.e. PyTorch format
1161-
return lazy_load_torch_file(fp, path)
1162-
elif struct.unpack('<Q', first8)[0] < 16 * 1024 * 1024:
1163-
# Probably safetensors
1065+
if struct.unpack('<Q', first8)[0] < 16 * 1024 * 1024:
1066+
# Safetensors format
11641067
return lazy_load_safetensors_file(fp, path)
11651068
else:
1166-
raise ValueError(f"unknown format: {path}")
1069+
raise ValueError(f"unknown format: {path}. Only safetensors format is supported.")
11671070

11681071

11691072
In = TypeVar('In')
@@ -1491,7 +1394,8 @@ def convert_model_names(model: LazyModel, params: Params, skip_unknown: bool) ->
14911394
# tmp[f"layers.{i_l}.feed_forward.experts.w{w}.weight"] = pack_experts_lazy(experts)
14921395
# tmp[f"rope.freqs"] = part_lazy_rope(1.0 / (torch.tensor(500000) ** (torch.arange(0, 128, 2).float().to("cpu") / 128)))
14931396
# 1.0 / (self.base ** (torch.arange(0, self.dim, 2).float().to(device) / self.dim))
1494-
rope_ndarray = (1.0 / (torch.tensor(500000.0) ** (torch.arange(0, 128, 2).float() / 128))).numpy().astype(np.float32)
1397+
# Use pure NumPy instead of torch to avoid NumPy compatibility issues
1398+
rope_ndarray = (1.0 / (np.float32(500000.0) ** (np.arange(0, 128, 2, dtype=np.float32) / 128))).astype(np.float32)
14951399
# print(rope_ndarray)
14961400

14971401

@@ -1583,7 +1487,7 @@ def load() -> UnquantizedTensor:
15831487

15841488
out: LazyModel = {}
15851489
for name, lazy_tensor in model.items():
1586-
tensor_type, name_new = tmap.get_type_and_name(name, try_suffixes = (".weight", ".bias")) or (None, None)
1490+
tensor_type, name_new = tmap.get_type_and_name(name, try_suffixes = (".weight", ".bias", ".weight_scale")) or (None, None)
15871491
if name_new is None:
15881492
if skip_unknown:
15891493
logger.info(f"Unexpected tensor name: {name} - skipping")
@@ -1644,15 +1548,11 @@ def load_some_model(path: Path) -> ModelPlus:
16441548
'''Load a model of any supported format.'''
16451549
# Be extra-friendly and accept either a file or a directory:
16461550
if path.is_dir():
1647-
# Check if it's a set of safetensors files first
1648-
globs = ["model-00001-of-*.safetensors", "model.safetensors", "consolidated.safetensors", "model-int2.pth"]
1551+
# Check if it's a set of safetensors files
1552+
globs = ["model-00001-of-*.safetensors", "model.safetensors", "consolidated.safetensors"]
16491553
files = [file for glob in globs for file in path.glob(glob)]
16501554
if not files:
1651-
# Try the PyTorch patterns too, with lower priority
1652-
globs = ["consolidated.00.pth", "pytorch_model-00001-of-*.bin", "*.pt", "pytorch_model.bin"]
1653-
files = [file for glob in globs for file in path.glob(glob)]
1654-
if not files:
1655-
raise FileNotFoundError(f"Can't find model in directory {path}")
1555+
raise FileNotFoundError(f"Can't find safetensors model in directory {path}")
16561556
if len(files) > 1:
16571557
raise ValueError(f"Found multiple models in {path}, not sure which to pick: {files}")
16581558
path = files[0]
@@ -1744,7 +1644,7 @@ def do_dump_model(model_plus: ModelPlus) -> None:
17441644

17451645
def main(args_in: list[str] | None = None) -> None:
17461646
output_choices = ["f32", "f16", "i2"]
1747-
if np.uint32(1) == np.uint32(1).newbyteorder("<"):
1647+
if sys.byteorder == "little":
17481648
# We currently only support Q8_0 output on little endian systems.
17491649
output_choices.append("q8_0")
17501650
parser = argparse.ArgumentParser(description="Convert a LLaMA model to a GGML compatible file")
@@ -1852,4 +1752,4 @@ def main(args_in: list[str] | None = None) -> None:
18521752

18531753

18541754
if __name__ == '__main__':
1855-
main()
1755+
main()

utils/convert.py

Lines changed: 7 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,12 @@
1212
import math
1313
import mmap
1414
import os
15-
import pickle
1615
import re
1716
import signal
1817
import struct
1918
import sys
2019
import textwrap
2120
import time
22-
import zipfile
2321
from abc import ABC, abstractmethod
2422
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
2523
from dataclasses import dataclass
@@ -954,98 +952,6 @@ def load() -> Tensor:
954952
return LazyTensor(load, s, lazy_tensors[0].data_type, 'pack_experts ' + ' | '.join(lt.description for lt in lazy_tensors))
955953

956954

957-
# Functionality that simulates `torch.load` but where individual tensors are
958-
# only loaded into memory on demand, not all at once.
959-
# PyTorch can't do this natively as of time of writing:
960-
# - https://github.com/pytorch/pytorch/issues/64327
961-
# This allows us to de-shard without multiplying RAM usage, and also
962-
# conveniently drops the PyTorch dependency (though we still need numpy).
963-
964-
965-
@dataclass
966-
class LazyStorageKind:
967-
data_type: DataType
968-
969-
970-
@dataclass
971-
class LazyStorage:
972-
load: Callable[[int, int], NDArray]
973-
kind: LazyStorageKind
974-
description: str
975-
976-
977-
class LazyUnpickler(pickle.Unpickler):
978-
def __init__(self, fp: IO[bytes], data_base_path: str, zip_file: zipfile.ZipFile):
979-
super().__init__(fp)
980-
self.data_base_path = data_base_path
981-
self.zip_file = zip_file
982-
983-
def persistent_load(self, pid: Any) -> Any:
984-
assert pid[0] == 'storage'
985-
assert isinstance(pid[1], LazyStorageKind)
986-
data_type = pid[1].data_type
987-
filename_stem = pid[2]
988-
filename = f'{self.data_base_path}/{filename_stem}'
989-
info = self.zip_file.getinfo(filename)
990-
991-
def load(offset: int, elm_count: int) -> NDArray:
992-
dtype = data_type.dtype
993-
with self.zip_file.open(info) as fp:
994-
fp.seek(offset * dtype.itemsize)
995-
size = elm_count * dtype.itemsize
996-
data = fp.read(size)
997-
assert len(data) == size
998-
return np.frombuffer(data, dtype)
999-
description = f'storage data_type={data_type} path-in-zip={filename} path={self.zip_file.filename}'
1000-
return LazyStorage(load=load, kind=pid[1], description=description)
1001-
1002-
@staticmethod
1003-
def lazy_rebuild_tensor_v2(storage: Any, storage_offset: Any, size: Any, stride: Any,
1004-
requires_grad: Any, backward_hooks: Any, metadata: Any = None) -> LazyTensor:
1005-
assert isinstance(storage, LazyStorage)
1006-
1007-
def load() -> UnquantizedTensor:
1008-
elm_count = stride[0] * size[0]
1009-
return UnquantizedTensor(storage.load(storage_offset, elm_count).reshape(size))
1010-
description = f'pickled storage_offset={storage_offset} in {storage.description}'
1011-
return LazyTensor(load, list(size), storage.kind.data_type, description)
1012-
1013-
@staticmethod
1014-
def rebuild_from_type_v2(func, new_type, args, state):
1015-
return func(*args)
1016-
1017-
CLASSES = {
1018-
# getattr used here as a workaround for mypy not being smart enough to determine
1019-
# the staticmethods have a __func__ attribute.
1020-
('torch._tensor', '_rebuild_from_type_v2'): getattr(rebuild_from_type_v2, '__func__'),
1021-
('torch._utils', '_rebuild_tensor_v2'): getattr(lazy_rebuild_tensor_v2, '__func__'),
1022-
('torch', 'BFloat16Storage'): LazyStorageKind(DT_BF16),
1023-
('torch', 'HalfStorage'): LazyStorageKind(DT_F16),
1024-
('torch', 'FloatStorage'): LazyStorageKind(DT_F32),
1025-
('torch', 'IntStorage'): LazyStorageKind(DT_I32),
1026-
('torch', 'Tensor'): LazyTensor,
1027-
}
1028-
1029-
def find_class(self, module: str, name: str) -> Any:
1030-
if not module.startswith('torch'):
1031-
return super().find_class(module, name)
1032-
return self.CLASSES[(module, name)]
1033-
1034-
1035-
def lazy_load_torch_file(outer_fp: IO[bytes], path: Path) -> ModelPlus:
1036-
zf = zipfile.ZipFile(outer_fp)
1037-
pickle_paths = [name for name in zf.namelist() if name.endswith('.pkl')]
1038-
assert len(pickle_paths) == 1, pickle_paths
1039-
pickle_fp = zf.open(pickle_paths[0], 'r')
1040-
unpickler = LazyUnpickler(pickle_fp,
1041-
data_base_path=pickle_paths[0][:-4],
1042-
zip_file=zf)
1043-
model = unpickler.load()
1044-
if 'model' in model: model = model['model']
1045-
as_dict = dict(model.items())
1046-
return ModelPlus(model=as_dict, paths=[path], format='torch', vocab=None)
1047-
1048-
1049955
def lazy_load_safetensors_file(fp: IO[bytes], path: Path) -> ModelPlus:
1050956
header_size, = struct.unpack('<Q', fp.read(8))
1051957
header: dict[str, dict[str, Any]] = json.loads(fp.read(header_size))
@@ -1082,14 +988,11 @@ def lazy_load_file(path: Path) -> ModelPlus:
1082988
fp = open(path, 'rb')
1083989
first8 = fp.read(8)
1084990
fp.seek(0)
1085-
if first8[:2] == b'PK':
1086-
# A zip file, i.e. PyTorch format
1087-
return lazy_load_torch_file(fp, path)
1088-
elif struct.unpack('<Q', first8)[0] < 16 * 1024 * 1024:
1089-
# Probably safetensors
991+
if struct.unpack('<Q', first8)[0] < 16 * 1024 * 1024:
992+
# Safetensors format
1090993
return lazy_load_safetensors_file(fp, path)
1091994
else:
1092-
raise ValueError(f"unknown format: {path}")
995+
raise ValueError(f"unknown format: {path}. Only safetensors format is supported.")
1093996

1094997

1095998
In = TypeVar('In')
@@ -1500,15 +1403,11 @@ def load_some_model(path: Path) -> ModelPlus:
15001403
'''Load a model of any supported format.'''
15011404
# Be extra-friendly and accept either a file or a directory:
15021405
if path.is_dir():
1503-
# Check if it's a set of safetensors files first
1504-
globs = ["model-00001-of-*.safetensors", "model.safetensors", "consolidated.safetensors", "model-int2.pth"]
1406+
# Check if it's a set of safetensors files
1407+
globs = ["model-00001-of-*.safetensors", "model.safetensors", "consolidated.safetensors"]
15051408
files = [file for glob in globs for file in path.glob(glob)]
15061409
if not files:
1507-
# Try the PyTorch patterns too, with lower priority
1508-
globs = ["consolidated.00.pth", "pytorch_model-00001-of-*.bin", "*.pt", "pytorch_model.bin"]
1509-
files = [file for glob in globs for file in path.glob(glob)]
1510-
if not files:
1511-
raise FileNotFoundError(f"Can't find model in directory {path}")
1410+
raise FileNotFoundError(f"Can't find safetensors model in directory {path}")
15121411
if len(files) > 1:
15131412
raise ValueError(f"Found multiple models in {path}, not sure which to pick: {files}")
15141413
path = files[0]
@@ -1600,7 +1499,7 @@ def do_dump_model(model_plus: ModelPlus) -> None:
16001499

16011500
def main(args_in: list[str] | None = None) -> None:
16021501
output_choices = ["f32", "f16", "i2"]
1603-
if np.uint32(1) == np.uint32(1).newbyteorder("<"):
1502+
if sys.byteorder == "little":
16041503
# We currently only support Q8_0 output on little endian systems.
16051504
output_choices.append("q8_0")
16061505
parser = argparse.ArgumentParser(description="Convert a LLaMA model to a GGML compatible file")

0 commit comments

Comments
 (0)