Skip to content

Aot compiler fix #9634

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .ci/scripts/gather_test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from typing import Any

from examples.models import MODEL_NAME_TO_MODEL
from examples.xnnpack import MODEL_NAME_TO_OPTIONS
from examples.xnnpack import MODEL_NAME_TO_OPTIONS, QuantType

DEFAULT_RUNNERS = {
"linux": "linux.2xlarge",
Expand Down Expand Up @@ -154,7 +154,7 @@ def export_models_for_ci() -> dict[str, dict]:
if backend == "xnnpack":
if name not in MODEL_NAME_TO_OPTIONS:
continue
if MODEL_NAME_TO_OPTIONS[name].quantization:
if MODEL_NAME_TO_OPTIONS[name].quantization != QuantType.NONE:
backend += "-quantization"

if MODEL_NAME_TO_OPTIONS[name].delegation:
Expand Down
13 changes: 10 additions & 3 deletions examples/models/llama/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,15 +259,22 @@ def __init__(self, **kwargs):
assign=True,
) # self.model_ = Transformer(gptconf)
else:
print("Checkpoint not provided, defaulting to uninitialized weights.")
print("Checkpoint not provided, defaulting weights to zeros.")
self.model_.to_empty(device="cpu")
for p in self.model_.parameters():
p.data.fill_(0)
for b in self.model_.buffers():
b.data.fill_(0)
except RuntimeError as e:
print(
f"Could not load checkpoint into mode and will default to uninitialized weights due to error: {e}."
f"Could not load checkpoint into mode and will defaulting weights to zeros due to error: {e}."
)
# Need to provide concrete (empty) values for meta-initialized tensors for quantization.
self.model_.to_empty(device="cpu")

for p in self.model_.parameters():
p.data.fill_(0)
for b in self.model_.buffers():
b.data.fill_(0)
if missing:
missing_weights = [fqn for fqn in missing if fqn.endswith(".weight")]
if missing_weights:
Expand Down
49 changes: 30 additions & 19 deletions examples/xnnpack/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,33 +7,44 @@
# pyre-unsafe

from dataclasses import dataclass
from enum import Enum


class QuantType(Enum):
NONE = 1
# Used for Operations that don't have weights
STATIC_PER_TENSOR = 2
# Used best for CNN/RNN Models with Conv layers
STATIC_PER_CHANNEL = 3
# Used for Linear Layers and Transformer Based Models
DYNAMIC_PER_CHANNEL = 4


@dataclass
class XNNPACKOptions(object):
quantization: bool
quantization: QuantType
delegation: bool


MODEL_NAME_TO_OPTIONS = {
"linear": XNNPACKOptions(True, True),
"add": XNNPACKOptions(True, True),
"add_mul": XNNPACKOptions(True, True),
"dl3": XNNPACKOptions(True, True),
"ic3": XNNPACKOptions(True, True),
"ic4": XNNPACKOptions(True, True),
"mv2": XNNPACKOptions(True, True),
"mv3": XNNPACKOptions(True, True),
"resnet18": XNNPACKOptions(True, True),
"resnet50": XNNPACKOptions(True, True),
"vit": XNNPACKOptions(True, True),
"w2l": XNNPACKOptions(True, True),
"edsr": XNNPACKOptions(True, True),
"mobilebert": XNNPACKOptions(True, True),
"llama2": XNNPACKOptions(False, True),
"emformer_join": XNNPACKOptions(True, True),
"emformer_predict": XNNPACKOptions(True, True),
"emformer_transcribe": XNNPACKOptions(True, True),
"linear": XNNPACKOptions(QuantType.STATIC_PER_CHANNEL, True),
"add": XNNPACKOptions(QuantType.STATIC_PER_TENSOR, True),
"add_mul": XNNPACKOptions(QuantType.STATIC_PER_TENSOR, True),
"dl3": XNNPACKOptions(QuantType.STATIC_PER_CHANNEL, True),
"ic3": XNNPACKOptions(QuantType.STATIC_PER_CHANNEL, True),
"ic4": XNNPACKOptions(QuantType.STATIC_PER_CHANNEL, True),
"mv2": XNNPACKOptions(QuantType.STATIC_PER_CHANNEL, True),
"mv3": XNNPACKOptions(QuantType.STATIC_PER_CHANNEL, True),
"resnet18": XNNPACKOptions(QuantType.STATIC_PER_CHANNEL, True),
"resnet50": XNNPACKOptions(QuantType.STATIC_PER_CHANNEL, True),
"vit": XNNPACKOptions(QuantType.DYNAMIC_PER_CHANNEL, True),
"w2l": XNNPACKOptions(QuantType.DYNAMIC_PER_CHANNEL, True),
"edsr": XNNPACKOptions(QuantType.STATIC_PER_CHANNEL, True),
"mobilebert": XNNPACKOptions(QuantType.DYNAMIC_PER_CHANNEL, True),
"llama2": XNNPACKOptions(QuantType.DYNAMIC_PER_CHANNEL, True),
"emformer_join": XNNPACKOptions(QuantType.DYNAMIC_PER_CHANNEL, True),
"emformer_predict": XNNPACKOptions(QuantType.DYNAMIC_PER_CHANNEL, True),
"emformer_transcribe": XNNPACKOptions(QuantType.STATIC_PER_CHANNEL, True),
}


Expand Down
6 changes: 4 additions & 2 deletions examples/xnnpack/aot_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@

args = parser.parse_args()

if not args.delegate:
if not args.delegate and args.quantize:
raise NotImplementedError(
"T161880157: Quantization-only without delegation is not supported yet"
)
Expand All @@ -79,6 +79,8 @@
f"Available models are {list(MODEL_NAME_TO_OPTIONS.keys())}."
)

quant_type = MODEL_NAME_TO_OPTIONS[args.model_name].quantization

model, example_inputs, _, _ = EagerModelFactory.create_model(
*MODEL_NAME_TO_MODEL[args.model_name]
)
Expand All @@ -91,7 +93,7 @@
if args.quantize:
logging.info("Quantizing Model...")
# TODO(T165162973): This pass shall eventually be folded into quantizer
model = quantize(model, example_inputs)
model = quantize(model, example_inputs, quant_type)
ep = torch.export.export_for_training(model, example_inputs)

edge = to_edge_transform_and_lower(
Expand Down
16 changes: 14 additions & 2 deletions examples/xnnpack/quantization/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,25 @@

from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e

from .. import QuantType

def quantize(model, example_inputs):

def quantize(
model, example_inputs, quant_type: QuantType = QuantType.STATIC_PER_TENSOR
):
"""This is the official recommended flow for quantization in pytorch 2.0 export"""
logging.info(f"Original model: {model}")
quantizer = XNNPACKQuantizer()
# if we set is_per_channel to True, we also need to add out_variant of quantize_per_channel/dequantize_per_channel
operator_config = get_symmetric_quantization_config(is_per_channel=False)
is_per_channel = (
quant_type == QuantType.STATIC_PER_CHANNEL
or quant_type == QuantType.DYNAMIC_PER_CHANNEL
)
is_dynamic = quant_type == QuantType.DYNAMIC_PER_CHANNEL
operator_config = get_symmetric_quantization_config(
is_per_channel=is_per_channel,
is_dynamic=is_dynamic,
)
quantizer.set_global(operator_config)
m = prepare_pt2e(model, quantizer)
# calibration
Expand Down
Loading