@@ -84,7 +84,7 @@ def test_shared_layers(self):
8484 @multi_card
8585 def test_multi_card (self ):
8686 model_name = "/models/Qwen3-0.6B"
87- target_bits = 5.265
87+ target_bits = 4.5
8888 for device_map in ["auto" , "0,1" , "0" , None ]:
8989 scheme = AutoScheme (avg_bits = target_bits , options = ("NVFP4" ))
9090 ar = AutoRound (model = model_name , scheme = scheme , iters = 0 , nsamples = 1 , device_map = device_map )
@@ -96,7 +96,7 @@ def test_multi_card(self):
9696 @multi_card
9797 def test_multi_card_1 (self ):
9898 model_name = "/models/Qwen3-0.6B"
99- target_bits = 5.265
99+ target_bits = 4.5
100100 from transformers import AutoModelForCausalLM , AutoTokenizer
101101
102102 tokenizer = AutoTokenizer .from_pretrained (model_name )
@@ -110,7 +110,7 @@ def test_multi_card_1(self):
110110
111111 def test_non_low_gpu_mem_usage (self ):
112112 model_name = "/models/Qwen3-0.6B"
113- target_bits = 5.265
113+ target_bits = 4.5
114114 # for device_map in ["auto", "0,1", "0", None]:
115115 scheme = AutoScheme (avg_bits = target_bits , options = ("NVFP4" ), low_gpu_mem_usage = False , device_map = "auto" )
116116
@@ -123,7 +123,7 @@ def test_non_low_gpu_mem_usage(self):
123123 @multi_card
124124 def test_dict_device_map (self ):
125125 model_name = "/models/Qwen3-8B"
126- target_bits = 8.755
126+ target_bits = 8.25
127127 device_map = {"up_proj" : 0 , "down_proj" : 1 }
128128
129129 scheme = AutoScheme (avg_bits = target_bits , options = ("MXFP8" ))
0 commit comments