openvinotoolkit · ljaljushkin · Nov 4, 2025 · Oct 29, 2025 · Oct 29, 2025 · Oct 29, 2025
@@ -398,7 +398,7 @@ class AdvancedCompressionParameters:
     :type lora_adapter_rank: int
     :param group_size_fallback_mode: Specifies how to handle nodes that do not support the given group size.
     :type group_size_fallback_mode: GroupSizeFallbackMode
-    :param min_adjusted_group_size: Minimum group size for adjustable group size searching. Defaults to 16. The reason
+    :param min_adjusted_group_size: Minimum group size for adjustable group size searching. Defaults to 32. The reason
         behind this argument is to avoid too small group size values, which may lead to performance issues.
     :type min_adjusted_group_size: int
     :param awq_params: Advanced parameters for AWQ algorithm.
@@ -418,8 +418,8 @@ class AdvancedCompressionParameters:
 
     statistics_path: Optional[str] = None
     lora_adapter_rank: int = 256
-    group_size_fallback_mode: GroupSizeFallbackMode = GroupSizeFallbackMode.IGNORE
-    min_adjusted_group_size: int = 16
+    group_size_fallback_mode: GroupSizeFallbackMode = GroupSizeFallbackMode.ERROR
+    min_adjusted_group_size: int = 32
     awq_params: AdvancedAWQParameters = field(default_factory=AdvancedAWQParameters)
     scale_estimation_params: AdvancedScaleEstimationParameters = field(
         default_factory=AdvancedScaleEstimationParameters

@@ -58,16 +58,8 @@
 TModel = TypeVar("TModel")
 TTensor = TypeVar("TTensor")
 
-INT8_MODES = [CompressWeightsMode.INT8_ASYM, CompressWeightsMode.INT8_SYM]
-NON_INT8_MODES = [
-    CompressWeightsMode.INT4_SYM,
-    CompressWeightsMode.INT4_ASYM,
-    CompressWeightsMode.NF4,
-    CompressWeightsMode.MXFP4,
-    CompressWeightsMode.MXFP8_E4M3,
-    CompressWeightsMode.FP8_E4M3,
-    CompressWeightsMode.FP4,
-]
+INT8_MODES = [CompressWeightsMode.INT8_ASYM, CompressWeightsMode.INT8_SYM, CompressWeightsMode.INT8]
+NON_INT8_MODES = [mode for mode in CompressWeightsMode if mode not in INT8_MODES]
 SUPPORTED_DATA_TYPES = [
     TensorDataType.float16,
     TensorDataType.bfloat16,
@@ -101,6 +93,8 @@ def get_weight_compression_configuration(
     elif group_size is None and mode in NON_INT8_MODES:
         if mode in [CompressWeightsMode.MXFP4, CompressWeightsMode.MXFP8_E4M3]:
             group_size = 32
+        elif mode in [CompressWeightsMode.CODEBOOK, CompressWeightsMode.CB4_F8E4M3]:
+            group_size = -1
         else:
             group_size = 128
 
@@ -599,16 +593,12 @@ def _handle_ignore_group_size_fallback(
                 if w_params.node_with_weight.node_name not in nodes_to_exclude
             ]
 
-            log_lines = [
-                f"{node_name} (weight shape: {weight_shape})" for node_name, weight_shape in nodes_to_exclude.items()
-            ]
-            log_message = (
+            nncf_logger.warning(
                 f"Group-wise quantization with group size {self._group_size} can't be applied to some nodes. "
                 "They will be ignored and kept with original precision.\n"
                 "Consider changing group size value or setting group size fallback parameter to ADJUST, which enables "
                 "automatic adjustment to smaller group size values."
             )
-            nncf_logger.warning(f"{log_message} Nodes:\n\t" + "\n\t".join(log_lines))
 
         return all_weight_params, ratio_defining_params, skipped_weight_params
 
@@ -648,25 +638,17 @@ def _handle_adjust_group_size_fallback(
 
         if adjusted_weight_params:
             # Adjusted group size value for some nodes
-            log_lines = [
-                f"{w.node_with_weight.node_name} (weight shape: {w.weight_shape}, adjusted group size: {adjusted_gs})"
-                for w, adjusted_gs in adjusted_weight_params
-            ]
             nncf_logger.info(
                 f"Some nodes can't be quantized with the specified group size of {self._group_size}. "
-                "Adjusted group size values will be used:\n\t" + "\n\t".join(log_lines)
+                "Adjusted group size values will be used."
             )
 
         if invalid_weight_params:
             # Valid adjusted group size wasn't found
-            log_lines = [
-                f"{w.node_with_weight.node_name} (weight shape: {w.weight_shape})" for w in invalid_weight_params
-            ]
-            log_message = (
+            nncf_logger.info(
                 "A valid adjusted group size value can't be found for some nodes. They will be quantized using the "
                 f"{self._backup_mode.value} backup mode."
             )
-            nncf_logger.info(f"{log_message} Nodes:\n\t" + "\n\t".join(log_lines))
 
         return valid_weight_params, group_size_values
 
@@ -691,6 +673,7 @@ def _get_bitwidth_distribution_str(
     ) -> str:
         """
         Generates a table that shows the ratio of weights quantized to different number of bits.
+        Additionally, splits modes into sub-rows by `group_size` (e.g., "int4_asym group size 64").
 
         :param all_params: Information about each weight node.
         :param ratio_defining_params: Information about weights that are used for calculating ratio between primary and
@@ -701,31 +684,47 @@ def _get_bitwidth_distribution_str(
         dtype_vs_num_weights_map = {}
         ratio_defining_weight_names = set(wp.weight_name for wp in ratio_defining_params)
         for data in all_params:
-            dtype = data.compression_config.mode if data.compression_config is not None else "float"
-            n_total, n_ratio_defining = dtype_vs_num_weights_map.get(dtype, ([], []))
+            if data.compression_config is None:
+                label, n_bits = "float", 32
+            else:
+                n_bits = data.compression_config.num_bits
+                gs = data.compression_config.group_size
+                gs_label = f"group size {gs}" if gs != -1 else "per-channel"
+                label = f"{data.compression_config.mode}, {gs_label}"
+            dtype_key = (label, n_bits)
+
+            n_total, n_ratio_defining = dtype_vs_num_weights_map.get(dtype_key, ([], []))
             if data.weight_name in ratio_defining_weight_names:
                 n_ratio_defining.append(data.num_weights)
             n_total.append(data.num_weights)
-            dtype_vs_num_weights_map[dtype] = (n_total, n_ratio_defining)
+            dtype_vs_num_weights_map[dtype_key] = (n_total, n_ratio_defining)
 
         n_skipped_float = [ws.num_weights for ws in skipped_weight_params if ws.weight_dtype.is_float()]
         if n_skipped_float:
-            n_total, n_ratio_defining = dtype_vs_num_weights_map.get("float", ([], []))
-            dtype_vs_num_weights_map["float"] = (n_total + n_skipped_float, n_ratio_defining)
+            n_total, n_ratio_defining = dtype_vs_num_weights_map.get(("float", 32), ([], []))
+            dtype_vs_num_weights_map[("float", 32)] = (n_total + n_skipped_float, n_ratio_defining)
 
         num_total_skipped_weights = sum(ws.num_weights for ws in skipped_weight_params)
         num_ratio_defining_weights = sum(ws.num_weights for ws in ratio_defining_params)
         num_ratio_defining_params = len(ratio_defining_params)
         num_total_weights = sum(ws.num_weights for ws in all_params) + num_total_skipped_weights
         num_params = len(all_params) + len(n_skipped_float)
-        dtype_vs_num_weights_map = OrderedDict(sorted(dtype_vs_num_weights_map.items(), reverse=True))
-        # Table creation
+
+        def _sort_dtype(dtype_label: str, dtype_bits: int):
+            if ", group size " in dtype_label:
+                base, gs_str = dtype_label.rsplit(", group size ", 1)
+                return -dtype_bits, base, int(gs_str)
+            return -dtype_bits, dtype_label, -1
+
+        dtype_vs_num_weights_map = OrderedDict(
+            sorted(dtype_vs_num_weights_map.items(), key=lambda kv: _sort_dtype(*kv[0]))
+        )
         header = ["Weight compression mode", "% all parameters (layers)", "% ratio-defining parameters (layers)"]
         rows = []
-        for bitwidth, (n_total, n_ratio_defining) in dtype_vs_num_weights_map.items():
+        for (label, _), (n_total, n_ratio_defining) in dtype_vs_num_weights_map.items():
             rows.append(
                 [
-                    bitwidth,
+                    label,
                     self._proportion_str(n_total, num_total_weights, num_params),
                     self._proportion_str(n_ratio_defining, num_ratio_defining_weights, num_ratio_defining_params),
                 ]

@@ -451,20 +451,18 @@ def test_awq_scale_reference(self, monkeypatch, mocker):
         for node_name, scales in spy_instance._scale_per_target_node.items():
             assert fns.allclose(scales, self.get_reference_for_test_awq_scale_reference()[node_name])
 
-    @pytest.mark.parametrize("algorithm", (None, "awq", "scale_estimation", "gptq", "lora_correction"))
     @pytest.mark.parametrize(
         ["group_size", "fallback_mode", "min_adjusted_group_size", "expected_outcome"],
         [
             (32, nncf.GroupSizeFallbackMode.ERROR, None, "exception"),
             (32, nncf.GroupSizeFallbackMode.IGNORE, 16, "warn_ignored"),
             (32, nncf.GroupSizeFallbackMode.ADJUST, 16, "info_cant_adjust"),
             (32, nncf.GroupSizeFallbackMode.ADJUST, 8, "info_adjusted_group_size"),
-            (32, None, None, "warn_ignored"),
+            (32, None, None, "exception"),
         ],
     )
     def test_error_message_for_invalid_group_size(
         self,
-        algorithm,
         group_size,
         fallback_mode,
         min_adjusted_group_size,
@@ -477,21 +475,16 @@ def test_error_message_for_invalid_group_size(
             - an info message is logged when an adjustable group size value cannot be found
             - an info message is logged when the group size is adjusted to a valid value
         """
-        if algorithm in self.get_not_supported_algorithms():
-            pytest.skip("Skipping test for not supported algorithms")
 
-        model = self.get_awq_model()
-        hidden_dim = 8
-        input_example = self.to_tensor(np.ones([1, 4, hidden_dim], dtype=np.float32))
+        model = self.get_different_channel_size_model([8, 8, 8, 8, 8, 8, 8, 16, 32])
+        input_example = self.to_tensor(np.ones([1, 8, 8], dtype=np.float32))
         dataset = Dataset([input_example], self.get_transform_func())
-        algorithm_dict = {algorithm: True} if algorithm else {}
         kwargs = dict(
             model=model,
             mode=CompressWeightsMode.INT4_ASYM,
-            ratio=1.0,
+            ratio=0.9,
             group_size=group_size,
             all_layers=True,
-            **algorithm_dict,
             dataset=dataset,
         )
         if fallback_mode is not None or min_adjusted_group_size is not None:
@@ -516,11 +509,19 @@ def test_error_message_for_invalid_group_size(
                 compress_weights(**kwargs)
             info_messages = [args[0] for args, _ in mock_info.call_args_list]
             info_msg = (
-                "Adjusted group size values will be used:"
+                "Adjusted group size values will be used"
                 if expected_outcome == "info_adjusted_group_size"
                 else "A valid adjusted group size value can't be found for some nodes."
             )
             assert any(info_msg in msg for msg in info_messages)
+            if expected_outcome == "info_adjusted_group_size":
+                table_rows = [
+                    "int8_asym, per-channel    │ 50% (1 / 9)                 │ 50% (1 / 9)",
+                    "int4_asym, group size 8   │ 25% (7 / 9)                 │ 25% (7 / 9)",
+                    "int4_asym, group size 16  │ 25% (1 / 9)                 │ 25% (1 / 9)",
+                ]
+                for row in table_rows:
+                    assert any(row in msg for msg in info_messages)
 
     @pytest.mark.parametrize(
         [
@@ -532,7 +533,6 @@ def test_error_message_for_invalid_group_size(
             "ref_num_group_sizes",
         ],
         [
-            ([8, 8, 16, 16, 16, 32], 1.0, 32, None, None, {32: 1}),
             ([8, 8, 16, 16, 16, 32], 1.0, 32, nncf.GroupSizeFallbackMode.IGNORE, None, {32: 1}),
             ([8, 8, 16, 16, 16, 32], 1.0, 32, nncf.GroupSizeFallbackMode.ADJUST, 16, {16: 3, 32: 1}),
             ([8, 8, 16, 16, 16, 32], 1.0, 32, nncf.GroupSizeFallbackMode.ADJUST, 32, {32: 1}),