Add register_apply_tensor_subclass

jerryzh168 · jerryzh168 · commit 103bad4b9ed6 · 2024-06-14T13:19:07.000-07:00
Summary:
`register_apply_tensor_subclass` allows users to add a string shortcut for
a new apply_tensor_subclass function, they can use this to test their new dtype tensor subclass

see `test/quantization/test_quant_api.py -k test_register_apply_tensor_subclass` for detail

Test Plan:
python test/quantization/test_quant_api.py -k test_register_apply_tensor_subclass

Reviewers:

Subscribers:

Tasks:

Tags:
diff --git a/test/quantization/test_quant_api.py b/test/quantization/test_quant_api.py
@@ -601,5 +601,18 @@ def test_quantized_tensor_subclass_int8_dyn_quant(self):
         # make sure it compiles
         torch._export.aot_compile(m_unwrapped, example_inputs)
 
+    def test_register_apply_tensor_subclass(self):
+        from torchao.quantization import register_apply_tensor_subclass
+        def apply_my_dtype(weight):
+            return weight * 2
+
+        m = ToyLinearModel().eval().to(torch.bfloat16).to("cuda")
+        with self.assertRaisesWithRegex("not supported"):
+            quantize(m, "my_dtype")
+
+        register_apply_tensor_subclass("my_dtype", apply_my_dtype)
+        # make sure it runs
+        quantize(m, "my_dtype")
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/torchao/__init__.py b/torchao/__init__.py
@@ -25,10 +25,14 @@
 
 from torchao.quantization import (
     autoquant,
+    quantize,
+    register_apply_tensor_subclass,
 )
 from . import dtypes
 
 __all__ = [
     "dtypes",
     "autoquant",
+    "quantize",
+    "register_apply_tensor_subclass",
 ]
diff --git a/torchao/quantization/__init__.py b/torchao/quantization/__init__.py
@@ -14,12 +14,6 @@
 from .autoquant import *
 
 __all__ = [
-    "DynamicallyPerAxisQuantizedLinear",
-    "apply_weight_only_int8_quant",
-    "apply_dynamic_quant",
-    "change_linear_weights_to_int8_dqtensors",
-    "change_linear_weights_to_int8_woqtensors",
-    "change_linear_weights_to_int4_woqtensors",
     "swap_conv2d_1x1_to_linear"
     "safe_int_mm",
     "autoquant",
@@ -31,14 +25,12 @@
     "swap_linear_with_smooth_fq_linear",
     "smooth_fq_linear_to_inference",
     "set_smooth_fq_attribute",
-    "Int8DynamicallyQuantizedLinearWeight",
-    "Int8WeightOnlyQuantizedLinearWeight",
-    "Int4WeightOnlyQuantizedLinearWeight",
     "compute_error",
-    "WeightOnlyInt8QuantLinear",
     "Int4WeightOnlyGPTQQuantizer",
     "Int4WeightOnlyQuantizer",
     "quantize_affine",
     "dequantize_affine",
     "choose_qprams_affine",
+    "quantize",
+    "register_apply_tensor_subclass",
 ]
diff --git a/torchao/quantization/quant_api.py b/torchao/quantization/quant_api.py
@@ -41,6 +41,7 @@
     Int4WeightOnlyGPTQQuantizer,
     Int4WeightOnlyQuantizer,
 )
+import logging
 from .autoquant import autoquant, AutoQuantizableLinearWeight
 
 
@@ -438,3 +439,8 @@ def get_per_token_block_size(x):
     "int8_weight_only": int8wo(),
     "int8_dynamic": int8da_int8w(),
 }
+
+def register_apply_tensor_subclass(name: str, apply_tensor_subclass: Callable):
+    if name in _APPLY_TS_TABLE:
+        logging.warning(f"shortcut string {name} already exist, overwriting")
+    _APPLY_TS_TABLE[name] = apply_tensor_subclass

Original file line number	Diff line number	Diff line change
`@@ -25,10 +25,14 @@`
`25`	`25`
`26`	`26`	`from torchao.quantization import (`
`27`	`27`	`autoquant,`
	`28`	`+ quantize,`
	`29`	`+ register_apply_tensor_subclass,`
`28`	`30`	`)`
`29`	`31`	`from . import dtypes`
`30`	`32`
`31`	`33`	`__all__ = [`
`32`	`34`	`"dtypes",`
`33`	`35`	`"autoquant",`
	`36`	`+ "quantize",`
	`37`	`+ "register_apply_tensor_subclass",`
`34`	`38`	`]`
Original file line number	Diff line number	Diff line change
`@@ -41,6 +41,7 @@`
`41`	`41`	`Int4WeightOnlyGPTQQuantizer,`
`42`	`42`	`Int4WeightOnlyQuantizer,`
`43`	`43`	`)`
	`44`	`+import logging`
`44`	`45`	`from .autoquant import autoquant, AutoQuantizableLinearWeight`
`45`	`46`
`46`	`47`
`@@ -438,3 +439,8 @@ def get_per_token_block_size(x):`
`438`	`439`	`"int8_weight_only": int8wo(),`
`439`	`440`	`"int8_dynamic": int8da_int8w(),`
`440`	`441`	`}`
	`442`	`+`
	`443`	`+def register_apply_tensor_subclass(name: str, apply_tensor_subclass: Callable):`
	`444`	`+ if name in _APPLY_TS_TABLE:`
	`445`	`+ logging.warning(f"shortcut string {name} already exist, overwriting")`
	`446`	`+ _APPLY_TS_TABLE[name] = apply_tensor_subclass`