99import transformers
1010from lm_eval .utils import make_table # pylint: disable=E0401
1111from transformers import AutoModelForCausalLM , AutoTokenizer
12+ from transformers .utils .versions import require_version
1213
1314from auto_round import AutoRound , AutoRoundAdam
1415from auto_round .eval .evaluation import simple_evaluate
15- from auto_round .testing_utils import require_awq , require_gptqmodel , require_optimum
16+ from auto_round .testing_utils import require_awq , require_gptqmodel , require_optimum , require_package_version_ut
1617
1718
1819def get_accuracy (data ):
@@ -38,7 +39,6 @@ def tearDownClass(self):
3839
3940 @require_gptqmodel
4041 @require_optimum
41- @require_awq
4242 def test_backend (self ):
4343 model_name = "/models/opt-125m"
4444 model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = torch .float16 , device_map = "auto" )
@@ -64,6 +64,16 @@ def test_backend(self):
6464 assert accuracy > 0.35
6565 shutil .rmtree ("./saved" , ignore_errors = True )
6666
67+ @require_optimum
68+ @require_awq
69+ @require_package_version_ut ("transformers" , "<4.57.0" )
70+ def test_backend_awq (self ):
71+ model_name = "/models/opt-125m"
72+ model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = torch .float16 , device_map = "auto" )
73+ tokenizer = AutoTokenizer .from_pretrained (model_name )
74+ autoround = AutoRound (model , tokenizer , bits = 4 , group_size = 128 )
75+ autoround .quantize ()
76+
6777 ##test auto_awq format
6878 autoround .save_quantized (self .save_dir , format = "auto_awq" , inplace = False )
6979 model_args = f"pretrained={ self .save_dir } "
@@ -73,9 +83,9 @@ def test_backend(self):
7383 assert accuracy > 0.35
7484 shutil .rmtree ("./saved" , ignore_errors = True )
7585
86+
7687 @unittest .skipIf (torch .cuda .is_available () is False , "Skipping because no cuda" )
7788 @require_gptqmodel
78- @require_awq
7989 def test_fp_layers (self ):
8090 model_name = "/models/opt-125m"
8191 model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = torch .float16 , device_map = "auto" )
@@ -98,6 +108,23 @@ def test_fp_layers(self):
98108 assert accuracy > 0.35
99109 shutil .rmtree ("./saved" , ignore_errors = True )
100110
111+
112+ @unittest .skipIf (torch .cuda .is_available () is False , "Skipping because no cuda" )
113+ @require_awq
114+ @require_package_version_ut ("transformers" , "<4.57.0" )
115+ def test_fp_layers_awq (self ):
116+ model_name = "/models/opt-125m"
117+ model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = torch .float16 , device_map = "auto" )
118+ tokenizer = AutoTokenizer .from_pretrained (model_name )
119+ from auto_round .utils import get_fp_layer_names
120+
121+ layer_names = get_fp_layer_names (model , "model.decoder.layers.0,model.decoder.layers.1" )
122+ layer_configs = {}
123+ for name in layer_names :
124+ layer_configs [name ] = {"bits" : 16 }
125+ autoround = AutoRound (model , tokenizer , bits = 4 , group_size = 128 )
126+ autoround .quantize ()
127+
101128 ##test auto_awq format
102129 autoround .save_quantized (self .save_dir , format = "auto_awq" , inplace = False )
103130 model_args = f"pretrained={ self .save_dir } "
@@ -107,6 +134,7 @@ def test_fp_layers(self):
107134 assert accuracy > 0.35
108135 shutil .rmtree ("./saved" , ignore_errors = True )
109136
137+
110138 @unittest .skipIf (torch .cuda .is_available () is False , "Skipping because no cuda" )
111139 def test_undivided_group_size_tuning (self ):
112140 model_name = "/models/opt-125m"
@@ -157,3 +185,4 @@ def test_autoround_asym(self): ##need to install false
157185
158186if __name__ == "__main__" :
159187 unittest .main ()
188+
0 commit comments