Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions test/test_cuda/test_auto_scheme.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def test_shared_layers(self):
@multi_card
def test_multi_card(self):
model_name = "/models/Qwen3-0.6B"
target_bits = 5.265
target_bits = 4.5
for device_map in ["auto", "0,1", "0", None]:
scheme = AutoScheme(avg_bits=target_bits, options=("NVFP4"))
ar = AutoRound(model=model_name, scheme=scheme, iters=0, nsamples=1, device_map=device_map)
Expand All @@ -96,7 +96,7 @@ def test_multi_card(self):
@multi_card
def test_multi_card_1(self):
model_name = "/models/Qwen3-0.6B"
target_bits = 5.265
target_bits = 4.5
from transformers import AutoModelForCausalLM, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(model_name)
Expand All @@ -110,7 +110,7 @@ def test_multi_card_1(self):

def test_non_low_gpu_mem_usage(self):
model_name = "/models/Qwen3-0.6B"
target_bits = 5.265
target_bits = 4.5
# for device_map in ["auto", "0,1", "0", None]:
scheme = AutoScheme(avg_bits=target_bits, options=("NVFP4"), low_gpu_mem_usage=False, device_map="auto")

Expand All @@ -123,7 +123,7 @@ def test_non_low_gpu_mem_usage(self):
@multi_card
def test_dict_device_map(self):
model_name = "/models/Qwen3-8B"
target_bits = 8.755
target_bits = 8.25
device_map = {"up_proj": 0, "down_proj": 1}

scheme = AutoScheme(avg_bits=target_bits, options=("MXFP8"))
Expand Down