Skip to content

Commit af2f41a

Browse files
tarun292facebook-github-bot
authored andcommitted
Move get_quantizer_and_quant_params to quantizer_lib (#11056)
Summary: Move get_quantizer_and_quant_params to quantizer_lib to extensions/ so that it's easier to create recipes Differential Revision: D75179679
1 parent 5268b24 commit af2f41a

File tree

4 files changed

+29
-39
lines changed

4 files changed

+29
-39
lines changed

examples/models/llama/eval_llama_lib.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,7 @@
1212
import torch
1313

1414
from datasets import load_dataset
15-
from executorch.examples.models.llama.export_llama_lib import (
16-
get_quantizer_and_quant_params,
17-
)
15+
from executorch.extension.llm.export.quantizer_lib import get_quantizer_and_quant_params
1816

1917
from executorch.extension.llm.export.builder import LLMEdgeManager
2018
from lm_eval.evaluator import simple_evaluate

examples/models/llama/export_llama_lib.py

Lines changed: 1 addition & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,7 @@
4343
get_xnnpack_partitioner,
4444
)
4545

46-
from executorch.extension.llm.export.quantizer_lib import (
47-
get_coreml_quantizer,
48-
get_pt2e_quantization_params,
49-
get_pt2e_quantizers,
50-
get_qnn_quantizer,
51-
get_vulkan_quantizer,
52-
)
46+
from executorch.extension.llm.export.quantizer_lib import get_quantizer_and_quant_params
5347
from executorch.util.activation_memory_profiler import generate_memory_trace
5448

5549
from ..model_factory import EagerModelFactory
@@ -724,34 +718,6 @@ def _prepare_for_llama_export(args) -> LLMEdgeManager:
724718
)
725719

726720
return edge_manager
727-
728-
729-
def get_quantizer_and_quant_params(args):
730-
pt2e_quant_params = get_pt2e_quantization_params(
731-
args.pt2e_quantize, args.quantization_mode
732-
)
733-
quantizers = get_pt2e_quantizers(pt2e_quant_params, args.so_library)
734-
quant_dtype = None
735-
if args.qnn and args.pt2e_quantize:
736-
assert len(quantizers) == 0, "Should not enable both xnnpack and qnn"
737-
qnn_quantizer, quant_dtype = get_qnn_quantizer(
738-
args.pt2e_quantize, args.quantization_mode
739-
)
740-
quantizers.append(qnn_quantizer)
741-
if args.coreml and args.pt2e_quantize:
742-
assert len(quantizers) == 0, "Should not enable both xnnpack / qnn and coreml"
743-
coreml_quantizer = get_coreml_quantizer(args.pt2e_quantize)
744-
quantizers.append(coreml_quantizer)
745-
if args.vulkan and args.pt2e_quantize:
746-
assert (
747-
len(quantizers) == 0
748-
), "Should not enable both vulkan and other quantizers"
749-
vulkan_quantizer = get_vulkan_quantizer(args.pt2e_quantize)
750-
quantizers.append(vulkan_quantizer)
751-
logging.info(f"Applying quantizers: {quantizers}")
752-
return pt2e_quant_params, quantizers, quant_dtype
753-
754-
755721
def _qmode_type(value):
756722
choices = ["int8", "8da4w", "8da4w-gptq", "vulkan_4w"]
757723
patterns = [r"torchao:8da(\d+)w", r"torchao:fpa(\d+)w"]

examples/models/llava/export_llava.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
)
1919
from executorch.examples.models.llama.export_llama_lib import (
2020
build_args_parser,
21-
get_quantizer_and_quant_params,
2221
)
22+
from executorch.extension.llm.export.quantizer_lib import get_quantizer_and_quant_params
2323
from executorch.examples.models.llama.source_transformation.custom_kv_cache import (
2424
replace_kv_cache_with_custom_kv_cache,
2525
)

extension/llm/export/quantizer_lib.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,3 +277,29 @@ def get_vulkan_quantizer(pt2e_quantize: str):
277277

278278
quantizer = VulkanQuantizer().set_global(config)
279279
return quantizer
280+
281+
282+
def get_quantizer_and_quant_params(args):
283+
pt2e_quant_params = get_pt2e_quantization_params(
284+
args.pt2e_quantize, args.quantization_mode
285+
)
286+
quantizers = get_pt2e_quantizers(pt2e_quant_params, args.so_library)
287+
quant_dtype = None
288+
if args.qnn and args.pt2e_quantize:
289+
assert len(quantizers) == 0, "Should not enable both xnnpack and qnn"
290+
qnn_quantizer, quant_dtype = get_qnn_quantizer(
291+
args.pt2e_quantize, args.quantization_mode
292+
)
293+
quantizers.append(qnn_quantizer)
294+
if args.coreml and args.pt2e_quantize:
295+
assert len(quantizers) == 0, "Should not enable both xnnpack / qnn and coreml"
296+
coreml_quantizer = get_coreml_quantizer(args.pt2e_quantize)
297+
quantizers.append(coreml_quantizer)
298+
if args.vulkan and args.pt2e_quantize:
299+
assert (
300+
len(quantizers) == 0
301+
), "Should not enable both vulkan and other quantizers"
302+
vulkan_quantizer = get_vulkan_quantizer(args.pt2e_quantize)
303+
quantizers.append(vulkan_quantizer)
304+
logging.info(f"Applying quantizers: {quantizers}")
305+
return pt2e_quant_params, quantizers, quant_dtype

0 commit comments

Comments
 (0)