Skip to content

Commit 77a0036

Browse files
committed
add docstrings
Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
1 parent 16ae486 commit 77a0036

File tree

4 files changed

+42
-20
lines changed

4 files changed

+42
-20
lines changed

src/transformers/quantizers/quantizer_compressed_tensors.py

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -127,24 +127,46 @@ def is_serializable(self) -> bool:
127127
return True
128128

129129
def _update_transforms_tied_weights(self, model: "PreTrainedModel"):
130+
"""
131+
This function updates the `_tied_weights_keys` and `all_tied_weights_keys`
132+
attributes of the given model with transform weights.
133+
134+
This function is needed because transformers only knows which weights are shared
135+
via the `_tied_weights_keys` attributes. These attributes are used to tie
136+
weights after the model has loaded.
137+
138+
CompressedTensors does not enforce a particular weight is the source weight :.
139+
We rely on correctness of the following mapping in PreTrainedModel.tie_weights():
140+
```
141+
B -> A
142+
C -> A
143+
D -> A
144+
145+
Where any of A,B,C,D might be the loaded source weight
146+
```
147+
This functionality is tested by `test_modeling_utils::BaseModelWithMultipleTiedWeights`
148+
149+
In the future, this function will be folded in to `apply_transform_config`
150+
"""
130151
from compressed_tensors.transform import TransformBase
131152

132-
# create mapping: tied_ptr -> key
133-
weight_to_keys = defaultdict(list)
153+
# 1. find which transform weights are shared
154+
# create mapping: tensor_ptr -> key
155+
weight_to_keys: dict[int, str] = defaultdict(list)
134156
for name, module in model.named_modules():
135157
if isinstance(module, TransformBase):
136158
for param_name, param in module.named_parameters(recurse=False):
137159
param_fqn = f"{name}.{param_name}" if name else param_name
138-
weight_to_keys[id(param)].append(param_fqn)
160+
weight_to_keys[id(param)].append(param_fqn) # id is used to identify meta tensors
139161

162+
# 2. assign each group of shared weights to the same value
140163
# create tied weights: key -> tied_keys[0]
141-
# PreTrainedModel.tie_weights will tie keys with the same value (tied_keys[0])
142164
transform_tied_weights_keys = {}
143165
for keys in weight_to_keys.values():
144166
keys = list(keys)
145-
for key in keys[1:]:
167+
for key in keys[1:]: # skip A -> A
146168
transform_tied_weights_keys[key] = keys[0]
147169

148-
# update tied weights attributes
170+
# 3. update tied weights attributes
149171
model._tied_weights_keys.update(transform_tied_weights_keys)
150172
model.all_tied_weights_keys = model._tied_weights_keys

src/transformers/utils/import_utils.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ def is_env_variable_false(env_variable: str) -> bool:
105105
AUTOROUND_MIN_VERSION = "0.5.0"
106106
TRITON_MIN_VERSION = "1.0.0"
107107
KERNELS_MIN_VERSION = "0.9.0"
108+
COMPRESSED_TENSORS_MIN_VERSION = "0.11.0"
108109

109110

110111
@lru_cache
@@ -1027,9 +1028,9 @@ def is_qutlass_available():
10271028

10281029

10291030
@lru_cache
1030-
def is_compressed_tensors_available() -> bool:
1031+
def is_compressed_tensors_available(min_version: str = COMPRESSED_TENSORS_MIN_VERSION) -> bool:
10311032
is_available, ct_version = _is_package_available("compressed_tensors", return_version=True)
1032-
return is_available and version.parse(ct_version) >= version.parse("0.11.0")
1033+
return is_available and version.parse(ct_version) >= version.parse(min_version)
10331034

10341035

10351036
@lru_cache

src/transformers/utils/quantization_config.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1094,16 +1094,16 @@ class CompressedTensorsConfig(QuantizationConfigMixin):
10941094
This is a wrapper class that handles compressed-tensors quantization config options.
10951095
It is a wrapper around `compressed_tensors.QuantizationConfig`
10961096
Args:
1097-
config_groups (`dict[str, typing.Union[ForwardRef('QuantizationScheme'), list[str]]] | None`, *optional*): <fill_docstring>
1098-
format (`str`, *optional*, defaults to `"dense"`): <fill_docstring>
1099-
quantization_status (`QuantizationStatus`, *optional*, defaults to `"initialized"`): <fill_docstring>
1100-
kv_cache_scheme (`Optional`, *optional*): <fill_docstring>
1101-
global_compression_ratio (`float | None`, *optional*): <fill_docstring>
1102-
ignore (`list[str] | None`, *optional*): <fill_docstring>
1103-
sparsity_config (`dict[str, typing.Any] | None`, *optional*): <fill_docstring>
1104-
transform_config (`Optional`, *optional*): <fill_docstring>
1105-
quant_method (`str`, *optional*, defaults to `"compressed-tensors"`): <fill_docstring>
1106-
run_compressed (`bool`, *optional*, defaults to `True`): <fill_docstring>
1097+
config_groups (`dict[str, typing.Union[ForwardRef('QuantizationScheme'), list[str]]] | None`, *optional*): dictionary mapping group name to a quantization scheme definition
1098+
format (`str`, *optional*, defaults to `"dense"`): format the model is represented as. Set `run_compressed` True to execute model as the
1099+
quantization_status (`QuantizationStatus`, *optional*, defaults to `"initialized"`): status of model in the quantization lifecycle, ie 'initialized', 'calibration', 'frozen'
1100+
kv_cache_scheme (`Optional`, *optional*): specifies quantization of the kv cache. If None, kv cache is not quantized.
1101+
global_compression_ratio (`float | None`, *optional*): 0-1 float percentage of model compression
1102+
ignore (`list[str] | None`, *optional*): layer names or types to not quantize, supports regex prefixed by 're:'
1103+
sparsity_config (`dict[str, typing.Any] | None`, *optional*): configuration for sparsity compression
1104+
transform_config (`Optional`, *optional*): configuration for (hadamard) transforms
1105+
quant_method (`str`, *optional*, defaults to `"compressed-tensors"`): do not override, should be compressed-tensors
1106+
run_compressed (`bool`, *optional*, defaults to `True`): alter submodules (usually linear) in order to emulate compressed model execution if True, otherwise use default submodule
11071107
"""
11081108

11091109
def __init__(
@@ -1115,7 +1115,7 @@ def __init__(
11151115
global_compression_ratio: float | None = None,
11161116
ignore: list[str] | None = None,
11171117
sparsity_config: dict[str, Any] | None = None,
1118-
transform_config: Optional[dict[str, Any]] = None,
1118+
transform_config: dict[str, Any] | None = None,
11191119
quant_method: str = "compressed-tensors",
11201120
run_compressed: bool = True,
11211121
**kwargs,

utils/check_docstrings.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,6 @@ class DecoratedItem:
189189
"CanineTokenizer",
190190
"ChineseCLIPTextModel",
191191
"ClapTextConfig",
192-
"CompressedTensorsConfig",
193192
"ConditionalDetrConfig",
194193
"ConditionalDetrImageProcessor",
195194
"ConvBertConfig",

0 commit comments

Comments
 (0)