Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/nncf/quantization/advanced_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ class AdvancedCompressionParameters:
:type lora_adapter_rank: int
:param group_size_fallback_mode: Specifies how to handle nodes that do not support the given group size.
:type group_size_fallback_mode: GroupSizeFallbackMode
:param min_adjusted_group_size: Minimum group size for adjustable group size searching. Defaults to 16. The reason
:param min_adjusted_group_size: Minimum group size for adjustable group size searching. Defaults to 32. The reason
behind this argument is to avoid too small group size values, which may lead to performance issues.
:type min_adjusted_group_size: int
:param awq_params: Advanced parameters for AWQ algorithm.
Expand All @@ -418,8 +418,8 @@ class AdvancedCompressionParameters:

statistics_path: Optional[str] = None
lora_adapter_rank: int = 256
group_size_fallback_mode: GroupSizeFallbackMode = GroupSizeFallbackMode.IGNORE
min_adjusted_group_size: int = 16
group_size_fallback_mode: GroupSizeFallbackMode = GroupSizeFallbackMode.ERROR
min_adjusted_group_size: int = 32
awq_params: AdvancedAWQParameters = field(default_factory=AdvancedAWQParameters)
scale_estimation_params: AdvancedScaleEstimationParameters = field(
default_factory=AdvancedScaleEstimationParameters
Expand Down
67 changes: 33 additions & 34 deletions src/nncf/quantization/algorithms/weight_compression/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,16 +58,8 @@
TModel = TypeVar("TModel")
TTensor = TypeVar("TTensor")

INT8_MODES = [CompressWeightsMode.INT8_ASYM, CompressWeightsMode.INT8_SYM]
NON_INT8_MODES = [
CompressWeightsMode.INT4_SYM,
CompressWeightsMode.INT4_ASYM,
CompressWeightsMode.NF4,
CompressWeightsMode.MXFP4,
CompressWeightsMode.MXFP8_E4M3,
CompressWeightsMode.FP8_E4M3,
CompressWeightsMode.FP4,
]
INT8_MODES = [CompressWeightsMode.INT8_ASYM, CompressWeightsMode.INT8_SYM, CompressWeightsMode.INT8]
NON_INT8_MODES = [mode for mode in CompressWeightsMode if mode not in INT8_MODES]
SUPPORTED_DATA_TYPES = [
TensorDataType.float16,
TensorDataType.bfloat16,
Expand Down Expand Up @@ -101,6 +93,8 @@ def get_weight_compression_configuration(
elif group_size is None and mode in NON_INT8_MODES:
if mode in [CompressWeightsMode.MXFP4, CompressWeightsMode.MXFP8_E4M3]:
group_size = 32
elif mode in [CompressWeightsMode.CODEBOOK, CompressWeightsMode.CB4_F8E4M3]:
group_size = -1
else:
group_size = 128

Expand Down Expand Up @@ -599,16 +593,12 @@ def _handle_ignore_group_size_fallback(
if w_params.node_with_weight.node_name not in nodes_to_exclude
]

log_lines = [
f"{node_name} (weight shape: {weight_shape})" for node_name, weight_shape in nodes_to_exclude.items()
]
log_message = (
nncf_logger.warning(
f"Group-wise quantization with group size {self._group_size} can't be applied to some nodes. "
"They will be ignored and kept with original precision.\n"
"Consider changing group size value or setting group size fallback parameter to ADJUST, which enables "
"automatic adjustment to smaller group size values."
)
nncf_logger.warning(f"{log_message} Nodes:\n\t" + "\n\t".join(log_lines))

return all_weight_params, ratio_defining_params, skipped_weight_params

Expand Down Expand Up @@ -648,25 +638,17 @@ def _handle_adjust_group_size_fallback(

if adjusted_weight_params:
# Adjusted group size value for some nodes
log_lines = [
f"{w.node_with_weight.node_name} (weight shape: {w.weight_shape}, adjusted group size: {adjusted_gs})"
for w, adjusted_gs in adjusted_weight_params
]
nncf_logger.info(
f"Some nodes can't be quantized with the specified group size of {self._group_size}. "
"Adjusted group size values will be used:\n\t" + "\n\t".join(log_lines)
"Adjusted group size values will be used."
)

if invalid_weight_params:
# Valid adjusted group size wasn't found
log_lines = [
f"{w.node_with_weight.node_name} (weight shape: {w.weight_shape})" for w in invalid_weight_params
]
log_message = (
nncf_logger.info(
"A valid adjusted group size value can't be found for some nodes. They will be quantized using the "
f"{self._backup_mode.value} backup mode."
)
nncf_logger.info(f"{log_message} Nodes:\n\t" + "\n\t".join(log_lines))

return valid_weight_params, group_size_values

Expand All @@ -691,6 +673,7 @@ def _get_bitwidth_distribution_str(
) -> str:
"""
Generates a table that shows the ratio of weights quantized to different number of bits.
Additionally, splits modes into sub-rows by `group_size` (e.g., "int4_asym group size 64").

:param all_params: Information about each weight node.
:param ratio_defining_params: Information about weights that are used for calculating ratio between primary and
Expand All @@ -701,31 +684,47 @@ def _get_bitwidth_distribution_str(
dtype_vs_num_weights_map = {}
ratio_defining_weight_names = set(wp.weight_name for wp in ratio_defining_params)
for data in all_params:
dtype = data.compression_config.mode if data.compression_config is not None else "float"
n_total, n_ratio_defining = dtype_vs_num_weights_map.get(dtype, ([], []))
if data.compression_config is None:
label, n_bits = "float", 32
else:
n_bits = data.compression_config.num_bits
gs = data.compression_config.group_size
gs_label = f"group size {gs}" if gs != -1 else "per-channel"
label = f"{data.compression_config.mode}, {gs_label}"
dtype_key = (label, n_bits)

n_total, n_ratio_defining = dtype_vs_num_weights_map.get(dtype_key, ([], []))
if data.weight_name in ratio_defining_weight_names:
n_ratio_defining.append(data.num_weights)
n_total.append(data.num_weights)
dtype_vs_num_weights_map[dtype] = (n_total, n_ratio_defining)
dtype_vs_num_weights_map[dtype_key] = (n_total, n_ratio_defining)

n_skipped_float = [ws.num_weights for ws in skipped_weight_params if ws.weight_dtype.is_float()]
if n_skipped_float:
n_total, n_ratio_defining = dtype_vs_num_weights_map.get("float", ([], []))
dtype_vs_num_weights_map["float"] = (n_total + n_skipped_float, n_ratio_defining)
n_total, n_ratio_defining = dtype_vs_num_weights_map.get(("float", 32), ([], []))
dtype_vs_num_weights_map[("float", 32)] = (n_total + n_skipped_float, n_ratio_defining)

num_total_skipped_weights = sum(ws.num_weights for ws in skipped_weight_params)
num_ratio_defining_weights = sum(ws.num_weights for ws in ratio_defining_params)
num_ratio_defining_params = len(ratio_defining_params)
num_total_weights = sum(ws.num_weights for ws in all_params) + num_total_skipped_weights
num_params = len(all_params) + len(n_skipped_float)
dtype_vs_num_weights_map = OrderedDict(sorted(dtype_vs_num_weights_map.items(), reverse=True))
# Table creation

def _sort_dtype(dtype_label: str, dtype_bits: int):
if ", group size " in dtype_label:
base, gs_str = dtype_label.rsplit(", group size ", 1)
return -dtype_bits, base, int(gs_str)
return -dtype_bits, dtype_label, -1

dtype_vs_num_weights_map = OrderedDict(
sorted(dtype_vs_num_weights_map.items(), key=lambda kv: _sort_dtype(*kv[0]))
)
header = ["Weight compression mode", "% all parameters (layers)", "% ratio-defining parameters (layers)"]
rows = []
for bitwidth, (n_total, n_ratio_defining) in dtype_vs_num_weights_map.items():
for (label, _), (n_total, n_ratio_defining) in dtype_vs_num_weights_map.items():
rows.append(
[
bitwidth,
label,
self._proportion_str(n_total, num_total_weights, num_params),
self._proportion_str(n_ratio_defining, num_ratio_defining_weights, num_ratio_defining_params),
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -451,20 +451,18 @@ def test_awq_scale_reference(self, monkeypatch, mocker):
for node_name, scales in spy_instance._scale_per_target_node.items():
assert fns.allclose(scales, self.get_reference_for_test_awq_scale_reference()[node_name])

@pytest.mark.parametrize("algorithm", (None, "awq", "scale_estimation", "gptq", "lora_correction"))
@pytest.mark.parametrize(
["group_size", "fallback_mode", "min_adjusted_group_size", "expected_outcome"],
[
(32, nncf.GroupSizeFallbackMode.ERROR, None, "exception"),
(32, nncf.GroupSizeFallbackMode.IGNORE, 16, "warn_ignored"),
(32, nncf.GroupSizeFallbackMode.ADJUST, 16, "info_cant_adjust"),
(32, nncf.GroupSizeFallbackMode.ADJUST, 8, "info_adjusted_group_size"),
(32, None, None, "warn_ignored"),
(32, None, None, "exception"),
],
)
def test_error_message_for_invalid_group_size(
self,
algorithm,
group_size,
fallback_mode,
min_adjusted_group_size,
Expand All @@ -477,21 +475,16 @@ def test_error_message_for_invalid_group_size(
- an info message is logged when an adjustable group size value cannot be found
- an info message is logged when the group size is adjusted to a valid value
"""
if algorithm in self.get_not_supported_algorithms():
pytest.skip("Skipping test for not supported algorithms")

model = self.get_awq_model()
hidden_dim = 8
input_example = self.to_tensor(np.ones([1, 4, hidden_dim], dtype=np.float32))
model = self.get_different_channel_size_model([8, 8, 8, 8, 8, 8, 8, 16, 32])
input_example = self.to_tensor(np.ones([1, 8, 8], dtype=np.float32))
dataset = Dataset([input_example], self.get_transform_func())
algorithm_dict = {algorithm: True} if algorithm else {}
kwargs = dict(
model=model,
mode=CompressWeightsMode.INT4_ASYM,
ratio=1.0,
ratio=0.9,
group_size=group_size,
all_layers=True,
**algorithm_dict,
dataset=dataset,
)
if fallback_mode is not None or min_adjusted_group_size is not None:
Expand All @@ -516,11 +509,19 @@ def test_error_message_for_invalid_group_size(
compress_weights(**kwargs)
info_messages = [args[0] for args, _ in mock_info.call_args_list]
info_msg = (
"Adjusted group size values will be used:"
"Adjusted group size values will be used"
if expected_outcome == "info_adjusted_group_size"
else "A valid adjusted group size value can't be found for some nodes."
)
assert any(info_msg in msg for msg in info_messages)
if expected_outcome == "info_adjusted_group_size":
table_rows = [
"int8_asym, per-channel │ 50% (1 / 9) │ 50% (1 / 9)",
"int4_asym, group size 8 │ 25% (7 / 9) │ 25% (7 / 9)",
"int4_asym, group size 16 │ 25% (1 / 9) │ 25% (1 / 9)",
]
for row in table_rows:
assert any(row in msg for msg in info_messages)

@pytest.mark.parametrize(
[
Expand All @@ -532,7 +533,6 @@ def test_error_message_for_invalid_group_size(
"ref_num_group_sizes",
],
[
([8, 8, 16, 16, 16, 32], 1.0, 32, None, None, {32: 1}),
([8, 8, 16, 16, 16, 32], 1.0, 32, nncf.GroupSizeFallbackMode.IGNORE, None, {32: 1}),
([8, 8, 16, 16, 16, 32], 1.0, 32, nncf.GroupSizeFallbackMode.ADJUST, 16, {16: 3, 32: 1}),
([8, 8, 16, 16, 16, 32], 1.0, 32, nncf.GroupSizeFallbackMode.ADJUST, 32, {32: 1}),
Expand Down