format

pytorch · jerryzh168 · Apr 8, 2025 · Apr 1, 2025 · Apr 4, 2025 · Apr 6, 2025
commit d4bb04da10f52b6f1cfe8b1efe76573e7b6091bc
diff --git a/torchao/core/config.py b/torchao/core/config.py
@@ -171,7 +171,11 @@ def config_to_dict(config: AOBaseConfig) -> Dict[str, Any]:
     return json.loads(json.dumps(config, cls=ConfigJSONEncoder))
 
 
-ALLOWED_AO_MODULES = {"torchao.quantization", "torchao.sparsity.sparse_api", "torchao.prototype.quantization"}
+ALLOWED_AO_MODULES = {
+    "torchao.quantization",
+    "torchao.sparsity.sparse_api",
+    "torchao.prototype.quantization",
+}
 
 
 def config_from_dict(data: Dict[str, Any]) -> AOBaseConfig:

diff --git a/torchao/quantization/quant_primitives.py b/torchao/quantization/quant_primitives.py
@@ -1148,21 +1148,28 @@ def quantize_gguf(
         block_qparam_shape_after_reduction[i] = 1
     original_shape = input.shape
     input = input.view(input_shape_for_reduction)
-    quantized_block_scale = quantized_block_scale.view(block_qparam_shape_after_reduction)
+    quantized_block_scale = quantized_block_scale.view(
+        block_qparam_shape_after_reduction
+    )
     quantized_block_min = quantized_block_min.view(block_qparam_shape_after_reduction)
 
-
     # step 2: second order quantization, recover unquantized block_scale and block_min
     super_block_size = (1, _GGUF_QK_K // block_size[-1], 1)
     super_block_input_shape_for_reduction, reduction_dims = _get_reduction_params(
         super_block_size, quantized_block_scale.size()
     )
-    super_block_qparam_shape_after_reduction = super_block_input_shape_for_reduction.copy()
+    super_block_qparam_shape_after_reduction = (
+        super_block_input_shape_for_reduction.copy()
+    )
     for i in reduction_dims:
         super_block_qparam_shape_after_reduction[i] = 1
 
-    quantized_block_scale = quantized_block_scale.view(super_block_input_shape_for_reduction)
-    quantized_block_min = quantized_block_min.view(super_block_input_shape_for_reduction)
+    quantized_block_scale = quantized_block_scale.view(
+        super_block_input_shape_for_reduction
+    )
+    quantized_block_min = quantized_block_min.view(
+        super_block_input_shape_for_reduction
+    )
     super_block_scale_scale = super_block_scale_scale.view(
         super_block_qparam_shape_after_reduction
     )
@@ -1203,19 +1210,27 @@ def dequantize_gguf(
 
     original_shape = input.shape
     input = input.view(input_shape_for_reduction)
-    quantized_block_scale = quantized_block_scale.view(block_qparam_shape_after_reduction)
+    quantized_block_scale = quantized_block_scale.view(
+        block_qparam_shape_after_reduction
+    )
     quantized_block_min = quantized_block_min.view(block_qparam_shape_after_reduction)
 
     # step 2. calculate and reshape block_qparams for second quantization step
     super_block_size = (1, _GGUF_QK_K // block_size[-1], 1)
     super_block_input_shape_for_reduction, reduction_dims = _get_reduction_params(
         super_block_size, quantized_block_scale.size()
     )
-    super_block_qparam_shape_after_reduction = super_block_input_shape_for_reduction.copy()
+    super_block_qparam_shape_after_reduction = (
+        super_block_input_shape_for_reduction.copy()
+    )
     for i in reduction_dims:
         super_block_qparam_shape_after_reduction[i] = 1
-    quantized_block_scale = quantized_block_scale.view(super_block_input_shape_for_reduction)
-    quantized_block_min = quantized_block_min.view(super_block_input_shape_for_reduction)
+    quantized_block_scale = quantized_block_scale.view(
+        super_block_input_shape_for_reduction
+    )
+    quantized_block_min = quantized_block_min.view(
+        super_block_input_shape_for_reduction
+    )
     super_block_scale_scale = super_block_scale_scale.view(
         super_block_qparam_shape_after_reduction
     )