add docstrings

kylesayrs · kylesayrs · commit 77a0036d6464 · 2025-12-17T17:58:47.000Z
Signed-off-by: Kyle Sayers &lt;kylesayrs@gmail.com&gt;
diff --git a/src/transformers/quantizers/quantizer_compressed_tensors.py b/src/transformers/quantizers/quantizer_compressed_tensors.py
@@ -127,24 +127,46 @@ def is_serializable(self) -> bool:
         return True
 
     def _update_transforms_tied_weights(self, model: "PreTrainedModel"):
+        """
+        This function updates the `_tied_weights_keys` and `all_tied_weights_keys`
+        attributes of the given model with transform weights.
+
+        This function is needed because transformers only knows which weights are shared
+        via the `_tied_weights_keys` attributes. These attributes are used to tie
+        weights after the model has loaded.
+
+        CompressedTensors does not enforce a particular weight is the source weight :.
+        We rely on correctness of the following mapping in PreTrainedModel.tie_weights():
+        ```
+        B -> A
+        C -> A
+        D -> A
+
+        Where any of A,B,C,D might be the loaded source weight
+        ```
+        This functionality is tested by `test_modeling_utils::BaseModelWithMultipleTiedWeights`
+
+        In the future, this function will be folded in to `apply_transform_config`
+        """
         from compressed_tensors.transform import TransformBase
 
-        # create mapping: tied_ptr -> key
-        weight_to_keys = defaultdict(list)
+        # 1. find which transform weights are shared
+        # create mapping: tensor_ptr -> key
+        weight_to_keys: dict[int, str] = defaultdict(list)
         for name, module in model.named_modules():
             if isinstance(module, TransformBase):
                 for param_name, param in module.named_parameters(recurse=False):
                     param_fqn = f"{name}.{param_name}" if name else param_name
-                    weight_to_keys[id(param)].append(param_fqn)
+                    weight_to_keys[id(param)].append(param_fqn)  # id is used to identify meta tensors
 
+        # 2. assign each group of shared weights to the same value
         # create tied weights: key -> tied_keys[0]
-        # PreTrainedModel.tie_weights will tie keys with the same value (tied_keys[0])
         transform_tied_weights_keys = {}
         for keys in weight_to_keys.values():
             keys = list(keys)
-            for key in keys[1:]:
+            for key in keys[1:]:  # skip A -> A
                 transform_tied_weights_keys[key] = keys[0]
 
-        # update tied weights attributes
+        # 3. update tied weights attributes
         model._tied_weights_keys.update(transform_tied_weights_keys)
         model.all_tied_weights_keys = model._tied_weights_keys
diff --git a/src/transformers/utils/import_utils.py b/src/transformers/utils/import_utils.py
@@ -105,6 +105,7 @@ def is_env_variable_false(env_variable: str) -> bool:
 AUTOROUND_MIN_VERSION = "0.5.0"
 TRITON_MIN_VERSION = "1.0.0"
 KERNELS_MIN_VERSION = "0.9.0"
+COMPRESSED_TENSORS_MIN_VERSION = "0.11.0"
 
 
 @lru_cache
@@ -1027,9 +1028,9 @@ def is_qutlass_available():
 
 
 @lru_cache
-def is_compressed_tensors_available() -> bool:
+def is_compressed_tensors_available(min_version: str = COMPRESSED_TENSORS_MIN_VERSION) -> bool:
     is_available, ct_version = _is_package_available("compressed_tensors", return_version=True)
-    return is_available and version.parse(ct_version) >= version.parse("0.11.0")
+    return is_available and version.parse(ct_version) >= version.parse(min_version)
 
 
 @lru_cache
diff --git a/src/transformers/utils/quantization_config.py b/src/transformers/utils/quantization_config.py
@@ -1094,16 +1094,16 @@ class CompressedTensorsConfig(QuantizationConfigMixin):
     This is a wrapper class that handles compressed-tensors quantization config options.
     It is a wrapper around `compressed_tensors.QuantizationConfig`
     Args:
-            config_groups (`dict[str, typing.Union[ForwardRef('QuantizationScheme'), list[str]]] | None`, *optional*): <fill_docstring>
-            format (`str`, *optional*, defaults to `"dense"`): <fill_docstring>
-            quantization_status (`QuantizationStatus`, *optional*, defaults to `"initialized"`): <fill_docstring>
-            kv_cache_scheme (`Optional`, *optional*): <fill_docstring>
-            global_compression_ratio (`float | None`, *optional*): <fill_docstring>
-            ignore (`list[str] | None`, *optional*): <fill_docstring>
-            sparsity_config (`dict[str, typing.Any] | None`, *optional*): <fill_docstring>
-            transform_config (`Optional`, *optional*): <fill_docstring>
-            quant_method (`str`, *optional*, defaults to `"compressed-tensors"`): <fill_docstring>
-            run_compressed (`bool`, *optional*, defaults to `True`): <fill_docstring>
+            config_groups (`dict[str, typing.Union[ForwardRef('QuantizationScheme'), list[str]]] | None`, *optional*): dictionary mapping group name to a quantization scheme definition
+            format (`str`, *optional*, defaults to `"dense"`): format the model is represented as. Set `run_compressed` True to execute model as the
+            quantization_status (`QuantizationStatus`, *optional*, defaults to `"initialized"`): status of model in the quantization lifecycle, ie 'initialized', 'calibration', 'frozen'
+            kv_cache_scheme (`Optional`, *optional*): specifies quantization of the kv cache. If None, kv cache is not quantized.
+            global_compression_ratio (`float | None`, *optional*): 0-1 float percentage of model compression
+            ignore (`list[str] | None`, *optional*): layer names or types to not quantize, supports regex prefixed by 're:'
+            sparsity_config (`dict[str, typing.Any] | None`, *optional*): configuration for sparsity compression
+            transform_config (`Optional`, *optional*): configuration for (hadamard) transforms
+            quant_method (`str`, *optional*, defaults to `"compressed-tensors"`): do not override, should be compressed-tensors
+            run_compressed (`bool`, *optional*, defaults to `True`): alter submodules (usually linear) in order to emulate compressed model execution if True, otherwise use default submodule
     """
 
     def __init__(
@@ -1115,7 +1115,7 @@ def __init__(
         global_compression_ratio: float | None = None,
         ignore: list[str] | None = None,
         sparsity_config: dict[str, Any] | None = None,
-        transform_config: Optional[dict[str, Any]] = None,
+        transform_config: dict[str, Any] | None = None,
         quant_method: str = "compressed-tensors",
         run_compressed: bool = True,
         **kwargs,
diff --git a/utils/check_docstrings.py b/utils/check_docstrings.py
@@ -189,7 +189,6 @@ class DecoratedItem:
     "CanineTokenizer",
     "ChineseCLIPTextModel",
     "ClapTextConfig",
-    "CompressedTensorsConfig",
     "ConditionalDetrConfig",
     "ConditionalDetrImageProcessor",
     "ConvBertConfig",