38
38
PerGroup ,
39
39
)
40
40
from torchao .quantization .quant_api import (
41
- AOPerModuleConfig ,
42
41
Int4WeightOnlyConfig ,
43
42
Int8DynamicActivationInt4WeightConfig ,
44
43
Int8WeightOnlyConfig ,
45
44
IntxWeightOnlyConfig ,
45
+ ModuleFqnToConfig ,
46
46
Quantizer ,
47
47
TwoStepQuantizer ,
48
48
_replace_with_custom_fn_if_matches_filter ,
@@ -946,10 +946,10 @@ def test_workflow_e2e_numerics(self, config):
946
946
assert sqnr >= 16.5 , f"SQNR { sqnr } is too low"
947
947
948
948
@unittest .skipIf (not torch .cuda .is_available (), "Need CUDA available" )
949
- def test_ao_per_module_config_default (self ):
949
+ def test_module_fqn_to_config_default (self ):
950
950
config1 = Int4WeightOnlyConfig (group_size = 32 )
951
951
config2 = Int8WeightOnlyConfig ()
952
- config = AOPerModuleConfig ({"_default" : config1 , "linear2" : config2 })
952
+ config = ModuleFqnToConfig ({"_default" : config1 , "linear2" : config2 })
953
953
model = ToyLinearModel ().cuda ().to (dtype = torch .bfloat16 )
954
954
example_inputs = model .example_inputs (device = "cuda" , dtype = torch .bfloat16 )
955
955
quantize_ (model , config )
@@ -960,10 +960,10 @@ def test_ao_per_module_config_default(self):
960
960
assert isinstance (model .linear2 .weight ._layout , PlainLayout )
961
961
962
962
@unittest .skipIf (not torch .cuda .is_available (), "Need CUDA available" )
963
- def test_ao_per_module_config_module_name (self ):
963
+ def test_module_fqn_to_config_module_name (self ):
964
964
config1 = Int4WeightOnlyConfig (group_size = 32 )
965
965
config2 = Int8WeightOnlyConfig ()
966
- config = AOPerModuleConfig ({"linear1" : config1 , "linear2" : config2 })
966
+ config = ModuleFqnToConfig ({"linear1" : config1 , "linear2" : config2 })
967
967
model = ToyLinearModel ().cuda ().to (dtype = torch .bfloat16 )
968
968
example_inputs = model .example_inputs (device = "cuda" , dtype = torch .bfloat16 )
969
969
quantize_ (model , config )
@@ -974,7 +974,7 @@ def test_ao_per_module_config_module_name(self):
974
974
assert isinstance (model .linear2 .weight ._layout , PlainLayout )
975
975
976
976
@unittest .skipIf (not TORCH_VERSION_AT_LEAST_2_6 , "Need torch 2.6+" )
977
- def test_ao_per_module_config_embedding_linear (self ):
977
+ def test_module_fqn_to_config_embedding_linear (self ):
978
978
weight_dtype = torch .int8
979
979
granularity = PerGroup (8 )
980
980
mapping_type = MappingType .SYMMETRIC
@@ -987,7 +987,7 @@ def test_ao_per_module_config_embedding_linear(self):
987
987
# example model linear is Linear(16, 8)
988
988
linear_config = Int8DynamicActivationInt4WeightConfig (group_size = 16 )
989
989
990
- config = AOPerModuleConfig ({"emb" : embedding_config , "linear" : linear_config })
990
+ config = ModuleFqnToConfig ({"emb" : embedding_config , "linear" : linear_config })
991
991
indices = torch .randint (0 , 10 , (32 ,))
992
992
indices = indices .unsqueeze (0 )
993
993
example_inputs = (indices ,)
@@ -1006,9 +1006,9 @@ def test_ao_per_module_config_embedding_linear(self):
1006
1006
assert isinstance (model .linear .weight , LinearActivationQuantizedTensor )
1007
1007
1008
1008
@unittest .skipIf (not torch .cuda .is_available (), "Need CUDA available" )
1009
- def test_ao_per_module_config_skip (self ):
1009
+ def test_module_fqn_to_config_skip (self ):
1010
1010
config1 = Int4WeightOnlyConfig (group_size = 32 )
1011
- config = AOPerModuleConfig ({"_default" : config1 , "linear2" : None })
1011
+ config = ModuleFqnToConfig ({"_default" : config1 , "linear2" : None })
1012
1012
model = ToyLinearModel ().cuda ().to (dtype = torch .bfloat16 )
1013
1013
example_inputs = model .example_inputs (device = "cuda" , dtype = torch .bfloat16 )
1014
1014
quantize_ (model , config )
0 commit comments