Skip to content

Commit c6b54d8

Browse files
committed
Sync quantize: Handle user-defined quantization levels for additional tensors (#12511)
1 parent 56e7783 commit c6b54d8

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

llama_cpp/llama_cpp.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,7 @@ class llama_model_params(ctypes.Structure):
702702
"""Parameters for llama_model
703703
704704
Attributes:
705+
devices (ctypes.Array[ggml_backend_dev_t]): NULL-terminated list of devices to use for offloading (if NULL, all available devices are used)
705706
tensor_buft_overrides(llama_model_tensor_buft_override): NULL-terminated list of buffer types to use for tensors that match a pattern
706707
n_gpu_layers (int): number of layers to store in VRAM
707708
split_mode (int): how to split the model across multiple GPUs
@@ -716,6 +717,7 @@ class llama_model_params(ctypes.Structure):
716717
check_tensors (bool): validate model tensor data"""
717718

718719
if TYPE_CHECKING:
720+
devices: CtypesArray[ctypes.c_void_p] # NOTE: unused
719721
tensor_buft_overrides: ctypes.POINTER(llama_model_tensor_buft_override)
720722
n_gpu_layers: int
721723
split_mode: int
@@ -915,6 +917,7 @@ class llama_context_params(ctypes.Structure):
915917
# bool keep_split; // quantize to the same number of shards
916918
# void * imatrix; // pointer to importance matrix data
917919
# void * kv_overrides; // pointer to vector containing overrides
920+
# void * tensor_types; // pointer to vector containing tensor types
918921
# } llama_model_quantize_params;
919922
class llama_model_quantize_params(ctypes.Structure):
920923
"""Parameters for llama_model_quantize
@@ -931,6 +934,7 @@ class llama_model_quantize_params(ctypes.Structure):
931934
keep_split (bool): quantize to the same number of shards
932935
imatrix (ctypes.c_void_p): pointer to importance matrix data
933936
kv_overrides (ctypes.c_void_p): pointer to vector containing overrides
937+
tensor_types (ctypes.c_void_p): pointer to vector containing tensor types
934938
"""
935939

936940
if TYPE_CHECKING:
@@ -945,6 +949,7 @@ class llama_model_quantize_params(ctypes.Structure):
945949
keep_split: bool
946950
imatrix: ctypes.c_void_p
947951
kv_overrides: ctypes.c_void_p
952+
tensor_types: ctypes.c_void_p
948953

949954
_fields_ = [
950955
("nthread", ctypes.c_int32),
@@ -958,6 +963,7 @@ class llama_model_quantize_params(ctypes.Structure):
958963
("keep_split", ctypes.c_bool),
959964
("imatrix", ctypes.c_void_p),
960965
("kv_overrides", ctypes.c_void_p),
966+
("tensor_types", ctypes.c_void_p),
961967
]
962968

963969

0 commit comments

Comments
 (0)