Move get_quantizer_and_quant_params to quantizer_lib (#11056)

tarun292 · facebook-github-bot · commit af2f41a65ec3 · 2025-05-21T16:01:56.000-07:00
Summary:

Move get_quantizer_and_quant_params to quantizer_lib to extensions/ so that it's easier to create recipes

Differential Revision: D75179679
diff --git a/examples/models/llama/eval_llama_lib.py b/examples/models/llama/eval_llama_lib.py
@@ -12,9 +12,7 @@
 import torch
 
 from datasets import load_dataset
-from executorch.examples.models.llama.export_llama_lib import (
-    get_quantizer_and_quant_params,
-)
+from executorch.extension.llm.export.quantizer_lib import get_quantizer_and_quant_params
 
 from executorch.extension.llm.export.builder import LLMEdgeManager
 from lm_eval.evaluator import simple_evaluate
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
@@ -43,13 +43,7 @@
     get_xnnpack_partitioner,
 )
 
-from executorch.extension.llm.export.quantizer_lib import (
-    get_coreml_quantizer,
-    get_pt2e_quantization_params,
-    get_pt2e_quantizers,
-    get_qnn_quantizer,
-    get_vulkan_quantizer,
-)
+from executorch.extension.llm.export.quantizer_lib import get_quantizer_and_quant_params
 from executorch.util.activation_memory_profiler import generate_memory_trace
 
 from ..model_factory import EagerModelFactory
@@ -724,34 +718,6 @@ def _prepare_for_llama_export(args) -> LLMEdgeManager:
     )
 
     return edge_manager
-
-
-def get_quantizer_and_quant_params(args):
-    pt2e_quant_params = get_pt2e_quantization_params(
-        args.pt2e_quantize, args.quantization_mode
-    )
-    quantizers = get_pt2e_quantizers(pt2e_quant_params, args.so_library)
-    quant_dtype = None
-    if args.qnn and args.pt2e_quantize:
-        assert len(quantizers) == 0, "Should not enable both xnnpack and qnn"
-        qnn_quantizer, quant_dtype = get_qnn_quantizer(
-            args.pt2e_quantize, args.quantization_mode
-        )
-        quantizers.append(qnn_quantizer)
-    if args.coreml and args.pt2e_quantize:
-        assert len(quantizers) == 0, "Should not enable both xnnpack / qnn and coreml"
-        coreml_quantizer = get_coreml_quantizer(args.pt2e_quantize)
-        quantizers.append(coreml_quantizer)
-    if args.vulkan and args.pt2e_quantize:
-        assert (
-            len(quantizers) == 0
-        ), "Should not enable both vulkan and other quantizers"
-        vulkan_quantizer = get_vulkan_quantizer(args.pt2e_quantize)
-        quantizers.append(vulkan_quantizer)
-    logging.info(f"Applying quantizers: {quantizers}")
-    return pt2e_quant_params, quantizers, quant_dtype
-
-
 def _qmode_type(value):
     choices = ["int8", "8da4w", "8da4w-gptq", "vulkan_4w"]
     patterns = [r"torchao:8da(\d+)w", r"torchao:fpa(\d+)w"]
diff --git a/examples/models/llava/export_llava.py b/examples/models/llava/export_llava.py
@@ -18,8 +18,8 @@
 )
 from executorch.examples.models.llama.export_llama_lib import (
     build_args_parser,
-    get_quantizer_and_quant_params,
 )
+from executorch.extension.llm.export.quantizer_lib import get_quantizer_and_quant_params
 from executorch.examples.models.llama.source_transformation.custom_kv_cache import (
     replace_kv_cache_with_custom_kv_cache,
 )
diff --git a/extension/llm/export/quantizer_lib.py b/extension/llm/export/quantizer_lib.py
@@ -277,3 +277,29 @@ def get_vulkan_quantizer(pt2e_quantize: str):
 
     quantizer = VulkanQuantizer().set_global(config)
     return quantizer
+
+
+def get_quantizer_and_quant_params(args):
+    pt2e_quant_params = get_pt2e_quantization_params(
+        args.pt2e_quantize, args.quantization_mode
+    )
+    quantizers = get_pt2e_quantizers(pt2e_quant_params, args.so_library)
+    quant_dtype = None
+    if args.qnn and args.pt2e_quantize:
+        assert len(quantizers) == 0, "Should not enable both xnnpack and qnn"
+        qnn_quantizer, quant_dtype = get_qnn_quantizer(
+            args.pt2e_quantize, args.quantization_mode
+        )
+        quantizers.append(qnn_quantizer)
+    if args.coreml and args.pt2e_quantize:
+        assert len(quantizers) == 0, "Should not enable both xnnpack / qnn and coreml"
+        coreml_quantizer = get_coreml_quantizer(args.pt2e_quantize)
+        quantizers.append(coreml_quantizer)
+    if args.vulkan and args.pt2e_quantize:
+        assert (
+            len(quantizers) == 0
+        ), "Should not enable both vulkan and other quantizers"
+        vulkan_quantizer = get_vulkan_quantizer(args.pt2e_quantize)
+        quantizers.append(vulkan_quantizer)
+    logging.info(f"Applying quantizers: {quantizers}")
+    return pt2e_quant_params, quantizers, quant_dtype

Original file line number	Diff line number	Diff line change
`@@ -18,8 +18,8 @@`
`18`	`18`	`)`
`19`	`19`	`from executorch.examples.models.llama.export_llama_lib import (`
`20`	`20`	`build_args_parser,`
`21`		`- get_quantizer_and_quant_params,`
`22`	`21`	`)`
	`22`	`+from executorch.extension.llm.export.quantizer_lib import get_quantizer_and_quant_params`
`23`	`23`	`from executorch.examples.models.llama.source_transformation.custom_kv_cache import (`
`24`	`24`	`replace_kv_cache_with_custom_kv_cache,`
`25`	`25`	`)`