1212import math
1313import mmap
1414import os
15- import pickle
1615import re
1716import signal
1817import struct
1918import sys
2019import textwrap
2120import time
22- import zipfile
2321from abc import ABC , abstractmethod
2422from concurrent .futures import ProcessPoolExecutor , ThreadPoolExecutor
2523from dataclasses import dataclass
@@ -945,7 +943,6 @@ def load() -> Tensor:
945943
946944import torch
947945
948- @torch .compile
949946def forward_t (x ):
950947 dtype = x .dtype
951948 x = x .float ()
@@ -956,7 +953,8 @@ def forward_t(x):
956953def weight_quant (weight ):
957954 weight = torch .tensor (weight , dtype = torch .float32 )
958955 weight = forward_t (weight )
959- weight = weight .numpy ().astype (np .float32 )
956+ # Use tolist() then convert to numpy to avoid PyTorch-NumPy compatibility issues
957+ weight = np .array (weight .tolist (), dtype = np .float32 )
960958 return weight
961959
962960def part_lazy_q (lazy_tensor : LazyTensor , n_part : int ) -> LazyTensor :
@@ -1028,98 +1026,6 @@ def load() -> Tensor:
10281026 return LazyTensor (load , s , lazy_tensors [0 ].data_type , 'pack_experts ' + ' | ' .join (lt .description for lt in lazy_tensors ))
10291027
10301028
1031- # Functionality that simulates `torch.load` but where individual tensors are
1032- # only loaded into memory on demand, not all at once.
1033- # PyTorch can't do this natively as of time of writing:
1034- # - https://github.com/pytorch/pytorch/issues/64327
1035- # This allows us to de-shard without multiplying RAM usage, and also
1036- # conveniently drops the PyTorch dependency (though we still need numpy).
1037-
1038-
1039- @dataclass
1040- class LazyStorageKind :
1041- data_type : DataType
1042-
1043-
1044- @dataclass
1045- class LazyStorage :
1046- load : Callable [[int , int ], NDArray ]
1047- kind : LazyStorageKind
1048- description : str
1049-
1050-
1051- class LazyUnpickler (pickle .Unpickler ):
1052- def __init__ (self , fp : IO [bytes ], data_base_path : str , zip_file : zipfile .ZipFile ):
1053- super ().__init__ (fp )
1054- self .data_base_path = data_base_path
1055- self .zip_file = zip_file
1056-
1057- def persistent_load (self , pid : Any ) -> Any :
1058- assert pid [0 ] == 'storage'
1059- assert isinstance (pid [1 ], LazyStorageKind )
1060- data_type = pid [1 ].data_type
1061- filename_stem = pid [2 ]
1062- filename = f'{ self .data_base_path } /{ filename_stem } '
1063- info = self .zip_file .getinfo (filename )
1064-
1065- def load (offset : int , elm_count : int ) -> NDArray :
1066- dtype = data_type .dtype
1067- with self .zip_file .open (info ) as fp :
1068- fp .seek (offset * dtype .itemsize )
1069- size = elm_count * dtype .itemsize
1070- data = fp .read (size )
1071- assert len (data ) == size
1072- return np .frombuffer (data , dtype )
1073- description = f'storage data_type={ data_type } path-in-zip={ filename } path={ self .zip_file .filename } '
1074- return LazyStorage (load = load , kind = pid [1 ], description = description )
1075-
1076- @staticmethod
1077- def lazy_rebuild_tensor_v2 (storage : Any , storage_offset : Any , size : Any , stride : Any ,
1078- requires_grad : Any , backward_hooks : Any , metadata : Any = None ) -> LazyTensor :
1079- assert isinstance (storage , LazyStorage )
1080-
1081- def load () -> UnquantizedTensor :
1082- elm_count = stride [0 ] * size [0 ]
1083- return UnquantizedTensor (storage .load (storage_offset , elm_count ).reshape (size ))
1084- description = f'pickled storage_offset={ storage_offset } in { storage .description } '
1085- return LazyTensor (load , list (size ), storage .kind .data_type , description )
1086-
1087- @staticmethod
1088- def rebuild_from_type_v2 (func , new_type , args , state ):
1089- return func (* args )
1090-
1091- CLASSES = {
1092- # getattr used here as a workaround for mypy not being smart enough to determine
1093- # the staticmethods have a __func__ attribute.
1094- ('torch._tensor' , '_rebuild_from_type_v2' ): getattr (rebuild_from_type_v2 , '__func__' ),
1095- ('torch._utils' , '_rebuild_tensor_v2' ): getattr (lazy_rebuild_tensor_v2 , '__func__' ),
1096- ('torch' , 'BFloat16Storage' ): LazyStorageKind (DT_BF16 ),
1097- ('torch' , 'HalfStorage' ): LazyStorageKind (DT_F16 ),
1098- ('torch' , 'FloatStorage' ): LazyStorageKind (DT_F32 ),
1099- ('torch' , 'IntStorage' ): LazyStorageKind (DT_I32 ),
1100- ('torch' , 'Tensor' ): LazyTensor ,
1101- }
1102-
1103- def find_class (self , module : str , name : str ) -> Any :
1104- if not module .startswith ('torch' ):
1105- return super ().find_class (module , name )
1106- return self .CLASSES [(module , name )]
1107-
1108-
1109- def lazy_load_torch_file (outer_fp : IO [bytes ], path : Path ) -> ModelPlus :
1110- zf = zipfile .ZipFile (outer_fp )
1111- pickle_paths = [name for name in zf .namelist () if name .endswith ('.pkl' )]
1112- assert len (pickle_paths ) == 1 , pickle_paths
1113- pickle_fp = zf .open (pickle_paths [0 ], 'r' )
1114- unpickler = LazyUnpickler (pickle_fp ,
1115- data_base_path = pickle_paths [0 ][:- 4 ],
1116- zip_file = zf )
1117- model = unpickler .load ()
1118- if 'model' in model : model = model ['model' ]
1119- as_dict = dict (model .items ())
1120- return ModelPlus (model = as_dict , paths = [path ], format = 'torch' , vocab = None )
1121-
1122-
11231029def lazy_load_safetensors_file (fp : IO [bytes ], path : Path ) -> ModelPlus :
11241030 header_size , = struct .unpack ('<Q' , fp .read (8 ))
11251031 header : dict [str , dict [str , Any ]] = json .loads (fp .read (header_size ))
@@ -1156,14 +1062,11 @@ def lazy_load_file(path: Path) -> ModelPlus:
11561062 fp = open (path , 'rb' )
11571063 first8 = fp .read (8 )
11581064 fp .seek (0 )
1159- if first8 [:2 ] == b'PK' :
1160- # A zip file, i.e. PyTorch format
1161- return lazy_load_torch_file (fp , path )
1162- elif struct .unpack ('<Q' , first8 )[0 ] < 16 * 1024 * 1024 :
1163- # Probably safetensors
1065+ if struct .unpack ('<Q' , first8 )[0 ] < 16 * 1024 * 1024 :
1066+ # Safetensors format
11641067 return lazy_load_safetensors_file (fp , path )
11651068 else :
1166- raise ValueError (f"unknown format: { path } " )
1069+ raise ValueError (f"unknown format: { path } . Only safetensors format is supported. " )
11671070
11681071
11691072In = TypeVar ('In' )
@@ -1491,7 +1394,8 @@ def convert_model_names(model: LazyModel, params: Params, skip_unknown: bool) ->
14911394 # tmp[f"layers.{i_l}.feed_forward.experts.w{w}.weight"] = pack_experts_lazy(experts)
14921395 # tmp[f"rope.freqs"] = part_lazy_rope(1.0 / (torch.tensor(500000) ** (torch.arange(0, 128, 2).float().to("cpu") / 128)))
14931396 # 1.0 / (self.base ** (torch.arange(0, self.dim, 2).float().to(device) / self.dim))
1494- rope_ndarray = (1.0 / (torch .tensor (500000.0 ) ** (torch .arange (0 , 128 , 2 ).float () / 128 ))).numpy ().astype (np .float32 )
1397+ # Use pure NumPy instead of torch to avoid NumPy compatibility issues
1398+ rope_ndarray = (1.0 / (np .float32 (500000.0 ) ** (np .arange (0 , 128 , 2 , dtype = np .float32 ) / 128 ))).astype (np .float32 )
14951399 # print(rope_ndarray)
14961400
14971401
@@ -1583,7 +1487,7 @@ def load() -> UnquantizedTensor:
15831487
15841488 out : LazyModel = {}
15851489 for name , lazy_tensor in model .items ():
1586- tensor_type , name_new = tmap .get_type_and_name (name , try_suffixes = (".weight" , ".bias" )) or (None , None )
1490+ tensor_type , name_new = tmap .get_type_and_name (name , try_suffixes = (".weight" , ".bias" , ".weight_scale" )) or (None , None )
15871491 if name_new is None :
15881492 if skip_unknown :
15891493 logger .info (f"Unexpected tensor name: { name } - skipping" )
@@ -1644,15 +1548,11 @@ def load_some_model(path: Path) -> ModelPlus:
16441548 '''Load a model of any supported format.'''
16451549 # Be extra-friendly and accept either a file or a directory:
16461550 if path .is_dir ():
1647- # Check if it's a set of safetensors files first
1648- globs = ["model-00001-of-*.safetensors" , "model.safetensors" , "consolidated.safetensors" , "model-int2.pth" ]
1551+ # Check if it's a set of safetensors files
1552+ globs = ["model-00001-of-*.safetensors" , "model.safetensors" , "consolidated.safetensors" ]
16491553 files = [file for glob in globs for file in path .glob (glob )]
16501554 if not files :
1651- # Try the PyTorch patterns too, with lower priority
1652- globs = ["consolidated.00.pth" , "pytorch_model-00001-of-*.bin" , "*.pt" , "pytorch_model.bin" ]
1653- files = [file for glob in globs for file in path .glob (glob )]
1654- if not files :
1655- raise FileNotFoundError (f"Can't find model in directory { path } " )
1555+ raise FileNotFoundError (f"Can't find safetensors model in directory { path } " )
16561556 if len (files ) > 1 :
16571557 raise ValueError (f"Found multiple models in { path } , not sure which to pick: { files } " )
16581558 path = files [0 ]
@@ -1744,7 +1644,7 @@ def do_dump_model(model_plus: ModelPlus) -> None:
17441644
17451645def main (args_in : list [str ] | None = None ) -> None :
17461646 output_choices = ["f32" , "f16" , "i2" ]
1747- if np . uint32 ( 1 ) == np . uint32 ( 1 ). newbyteorder ( "<" ) :
1647+ if sys . byteorder == "little" :
17481648 # We currently only support Q8_0 output on little endian systems.
17491649 output_choices .append ("q8_0" )
17501650 parser = argparse .ArgumentParser (description = "Convert a LLaMA model to a GGML compatible file" )
@@ -1852,4 +1752,4 @@ def main(args_in: list[str] | None = None) -> None:
18521752
18531753
18541754if __name__ == '__main__' :
1855- main ()
1755+ main ()
0 commit comments