1+ import os
12import shutil
23import sys
34import unittest
1112from auto_round import AutoRound
1213
1314
15+ def _get_folder_size (path : str ) -> float :
16+ """Return folder size in GB."""
17+ total_size = 0
18+ for dirpath , _ , filenames in os .walk (path ):
19+ for f in filenames :
20+ fp = os .path .join (dirpath , f )
21+ if os .path .isfile (fp ):
22+ total_size += os .path .getsize (fp )
23+ return total_size / (1024 ** 3 ) # convert to GB
24+
25+
1426class LLMDataLoader :
1527 def __init__ (self ):
1628 self .batch_size = 1
@@ -25,7 +37,7 @@ class TestAutoRound(unittest.TestCase):
2537 def setUpClass (self ):
2638 model_name = "facebook/opt-125m"
2739 self .save_dir = "./saved"
28- self .model = AutoModelForCausalLM .from_pretrained (model_name , dtype = "auto" , trust_remote_code = True )
40+ self .model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = "auto" , trust_remote_code = True )
2941 self .tokenizer = AutoTokenizer .from_pretrained (model_name , trust_remote_code = True )
3042 self .llm_dataloader = LLMDataLoader ()
3143
@@ -268,10 +280,7 @@ def test_mxfp4_llmcompressor_format(self):
268280 model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = "auto" , trust_remote_code = True )
269281 from transformers import AutoConfig
270282
271- bits = 4
272- data_type = "mx_fp"
273- group_size = 32
274- sym = True
283+ scheme = "MXFP4"
275284 layer_config = {}
276285 fp_layers_str = "k_proj"
277286 from auto_round .utils import get_fp_layer_names
@@ -282,12 +291,58 @@ def test_mxfp4_llmcompressor_format(self):
282291 autoround = AutoRound (
283292 model ,
284293 self .tokenizer ,
285- bits = bits ,
286- group_size = group_size ,
287- sym = sym ,
294+ scheme = scheme ,
288295 iters = 2 ,
289296 seqlen = 2 ,
290- data_type = data_type ,
297+ layer_config = layer_config ,
298+ dataset = self .llm_dataloader ,
299+ )
300+ quantized_model_path = self .save_dir
301+ autoround .quantize ()
302+ compressed_model = autoround .save_quantized (
303+ output_dir = quantized_model_path , inplace = True , format = "llm_compressor"
304+ )
305+ tmp_layer = compressed_model .model .decoder .layers [3 ].self_attn .q_proj
306+ skip_layer = compressed_model .model .decoder .layers [3 ].self_attn .k_proj
307+ assert (
308+ hasattr (tmp_layer , "weight_scale" )
309+ and hasattr (tmp_layer , "weight_packed" )
310+ and tmp_layer .weight_scale .dtype is torch .uint8
311+ and tmp_layer .weight_scale .shape [0 ] == 768
312+ ), "Illegal MXFP4 packing name or data_type or shape"
313+ assert not hasattr (skip_layer , "weight_scale" ) and not hasattr ( ## check skipped layers
314+ skip_layer , "weight_packed"
315+ ), "Illegal MXFP4 quantization for fp_layers"
316+ quantization_config = AutoConfig .from_pretrained (
317+ quantized_model_path , trust_remote_code = True
318+ ).quantization_config
319+ assert (
320+ quantization_config ["format" ] == "float-quantized"
321+ and quantization_config ["config_groups" ]["group_0" ]["weights" ]["is_mx" ] is True
322+ and quantization_config ["config_groups" ]["group_0" ]["weights" ]["num_bits" ] == 4
323+ ), f"Invalid MXFP4 quantization configuration: { quantization_config } "
324+
325+ shutil .rmtree ("./saved" , ignore_errors = True )
326+
327+ def test_rtn_mxfp4_llmcompressor_format (self ):
328+ model_name = "facebook/opt-125m"
329+ model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = "auto" , trust_remote_code = True )
330+ from transformers import AutoConfig
331+
332+ scheme = "MXFP4"
333+ layer_config = {}
334+ fp_layers_str = "k_proj"
335+ from auto_round .utils import get_fp_layer_names
336+
337+ not_quantize_layer_names = get_fp_layer_names (model , fp_layers_str )
338+ for name in not_quantize_layer_names :
339+ layer_config [name ] = {"bits" : 16 , "act_bits" : 16 , "data_type" : "float" }
340+ autoround = AutoRound (
341+ model ,
342+ self .tokenizer ,
343+ scheme = scheme ,
344+ iters = 0 ,
345+ seqlen = 2 ,
291346 layer_config = layer_config ,
292347 dataset = self .llm_dataloader ,
293348 )
@@ -322,19 +377,13 @@ def test_mxfp8_llmcompressor_format(self):
322377 model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = "auto" , trust_remote_code = True )
323378 from transformers import AutoConfig
324379
325- bits = 8
326- data_type = "mx_fp_rceil"
327- group_size = 32
328- sym = True
380+ scheme = "MXFP8"
329381 autoround = AutoRound (
330382 model ,
331383 self .tokenizer ,
332- bits = bits ,
333- group_size = group_size ,
334- sym = sym ,
384+ scheme = scheme ,
335385 iters = 2 ,
336386 seqlen = 2 ,
337- data_type = data_type ,
338387 dataset = self .llm_dataloader ,
339388 )
340389 quantized_model_path = self .save_dir
@@ -355,28 +404,23 @@ def test_mxfp8_llmcompressor_format(self):
355404 and quantization_config ["config_groups" ]["group_0" ]["weights" ]["is_mx" ] is True
356405 and quantization_config ["config_groups" ]["group_0" ]["weights" ]["num_bits" ] == 8
357406 ), f"Invalid MXFP8 quantization configuration: { quantization_config } "
407+ folder_size_gb = _get_folder_size (quantized_model_path )
408+ # Original opt-125m is < 0.5GB -> quantized mxfp8 model should be smaller but not empty
409+ assert (
410+ 0.15 < folder_size_gb < 0.2
411+ ), f"Quantized model folder size { folder_size_gb :.2f} GB is outside the expected range (0.1~0.2 GB)"
358412 shutil .rmtree ("./saved" , ignore_errors = True )
359413
360414 def test_nvfp4_llmcompressor_format (self ):
361415 model_name = "facebook/opt-125m"
362416 model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = "auto" , trust_remote_code = True )
363417 from transformers import AutoConfig
364418
365- bits = 4
366- act_bits = 4
367- data_type = "nv_fp"
368- act_data_type = "nv_fp4_with_static_gs"
369- group_size = 16
370- sym = True
419+ scheme = "NVFP4"
371420 autoround = AutoRound (
372421 model ,
373422 self .tokenizer ,
374- bits = bits ,
375- act_bits = act_bits ,
376- data_type = data_type ,
377- act_data_type = act_data_type ,
378- group_size = group_size ,
379- sym = sym ,
423+ scheme = scheme ,
380424 iters = 2 ,
381425 seqlen = 2 ,
382426 dataset = self .llm_dataloader ,
@@ -399,28 +443,23 @@ def test_nvfp4_llmcompressor_format(self):
399443 quantization_config ["format" ] == "nvfp4-pack-quantized"
400444 and quantization_config ["config_groups" ]["group_0" ]["input_activations" ]["num_bits" ] == 4
401445 ), f"Invalid NVFP4 quantization configuration: { quantization_config } "
446+ folder_size_gb = _get_folder_size (quantized_model_path )
447+ # Original opt-125m is < 0.5GB -> quantized nvfp4 model should be smaller but not empty
448+ assert (
449+ 0.1 < folder_size_gb < 0.15
450+ ), f"Quantized model folder size { folder_size_gb :.2f} GB is outside the expected range (0.1~0.15 GB)"
402451 shutil .rmtree ("./saved" , ignore_errors = True )
403452
404453 def test_nvfp4_autoround_format (self ):
405454 model_name = "facebook/opt-125m"
406455 model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = "auto" , trust_remote_code = True )
407456 from transformers import AutoConfig
408457
409- bits = 4
410- act_bits = 4
411- data_type = "nv_fp"
412- act_data_type = "nv_fp4_with_static_gs"
413- group_size = 16
414- sym = True
458+ scheme = "NVFP4"
415459 autoround = AutoRound (
416460 model ,
417461 self .tokenizer ,
418- bits = bits ,
419- act_bits = act_bits ,
420- data_type = data_type ,
421- act_data_type = act_data_type ,
422- group_size = group_size ,
423- sym = sym ,
462+ scheme = "NVFP4" ,
424463 iters = 2 ,
425464 seqlen = 2 ,
426465 dataset = self .llm_dataloader ,
0 commit comments