@@ -702,6 +702,7 @@ class llama_model_params(ctypes.Structure):
702
702
"""Parameters for llama_model
703
703
704
704
Attributes:
705
+ devices (ctypes.Array[ggml_backend_dev_t]): NULL-terminated list of devices to use for offloading (if NULL, all available devices are used)
705
706
tensor_buft_overrides(llama_model_tensor_buft_override): NULL-terminated list of buffer types to use for tensors that match a pattern
706
707
n_gpu_layers (int): number of layers to store in VRAM
707
708
split_mode (int): how to split the model across multiple GPUs
@@ -716,6 +717,7 @@ class llama_model_params(ctypes.Structure):
716
717
check_tensors (bool): validate model tensor data"""
717
718
718
719
if TYPE_CHECKING :
720
+ devices : CtypesArray [ctypes .c_void_p ] # NOTE: unused
719
721
tensor_buft_overrides : ctypes .POINTER (llama_model_tensor_buft_override )
720
722
n_gpu_layers : int
721
723
split_mode : int
@@ -915,6 +917,7 @@ class llama_context_params(ctypes.Structure):
915
917
# bool keep_split; // quantize to the same number of shards
916
918
# void * imatrix; // pointer to importance matrix data
917
919
# void * kv_overrides; // pointer to vector containing overrides
920
+ # void * tensor_types; // pointer to vector containing tensor types
918
921
# } llama_model_quantize_params;
919
922
class llama_model_quantize_params (ctypes .Structure ):
920
923
"""Parameters for llama_model_quantize
@@ -931,6 +934,7 @@ class llama_model_quantize_params(ctypes.Structure):
931
934
keep_split (bool): quantize to the same number of shards
932
935
imatrix (ctypes.c_void_p): pointer to importance matrix data
933
936
kv_overrides (ctypes.c_void_p): pointer to vector containing overrides
937
+ tensor_types (ctypes.c_void_p): pointer to vector containing tensor types
934
938
"""
935
939
936
940
if TYPE_CHECKING :
@@ -945,6 +949,7 @@ class llama_model_quantize_params(ctypes.Structure):
945
949
keep_split : bool
946
950
imatrix : ctypes .c_void_p
947
951
kv_overrides : ctypes .c_void_p
952
+ tensor_types : ctypes .c_void_p
948
953
949
954
_fields_ = [
950
955
("nthread" , ctypes .c_int32 ),
@@ -958,6 +963,7 @@ class llama_model_quantize_params(ctypes.Structure):
958
963
("keep_split" , ctypes .c_bool ),
959
964
("imatrix" , ctypes .c_void_p ),
960
965
("kv_overrides" , ctypes .c_void_p ),
966
+ ("tensor_types" , ctypes .c_void_p ),
961
967
]
962
968
963
969
0 commit comments