Skip to content

Commit 4afbe0a

Browse files
authored
update bits (#986)
Signed-off-by: He, Xin3 <xin3.he@intel.com>
1 parent b9245b5 commit 4afbe0a

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

test/test_cuda/test_auto_scheme.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def test_shared_layers(self):
8484
@multi_card
8585
def test_multi_card(self):
8686
model_name = "/models/Qwen3-0.6B"
87-
target_bits = 5.265
87+
target_bits = 4.5
8888
for device_map in ["auto", "0,1", "0", None]:
8989
scheme = AutoScheme(avg_bits=target_bits, options=("NVFP4"))
9090
ar = AutoRound(model=model_name, scheme=scheme, iters=0, nsamples=1, device_map=device_map)
@@ -96,7 +96,7 @@ def test_multi_card(self):
9696
@multi_card
9797
def test_multi_card_1(self):
9898
model_name = "/models/Qwen3-0.6B"
99-
target_bits = 5.265
99+
target_bits = 4.5
100100
from transformers import AutoModelForCausalLM, AutoTokenizer
101101

102102
tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -110,7 +110,7 @@ def test_multi_card_1(self):
110110

111111
def test_non_low_gpu_mem_usage(self):
112112
model_name = "/models/Qwen3-0.6B"
113-
target_bits = 5.265
113+
target_bits = 4.5
114114
# for device_map in ["auto", "0,1", "0", None]:
115115
scheme = AutoScheme(avg_bits=target_bits, options=("NVFP4"), low_gpu_mem_usage=False, device_map="auto")
116116

@@ -123,7 +123,7 @@ def test_non_low_gpu_mem_usage(self):
123123
@multi_card
124124
def test_dict_device_map(self):
125125
model_name = "/models/Qwen3-8B"
126-
target_bits = 8.755
126+
target_bits = 8.25
127127
device_map = {"up_proj": 0, "down_proj": 1}
128128

129129
scheme = AutoScheme(avg_bits=target_bits, options=("MXFP8"))

0 commit comments

Comments
 (0)