Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions olive/passes/onnx/hqq_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def _run_for_config(
return model
output_model_path = resolve_onnx_path(output_model_path, Path(model.model_path).name)
ir_model = model.load_ir_model()
ir.external_data.load_to_model(ir_model)
ir_model.graph.opset_imports[MSFT_DOMAIN] = 1
self._quantize_model(
ir_model,
Expand Down
1 change: 1 addition & 0 deletions olive/passes/onnx/rtn_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def _run_for_config(
) -> ONNXModelHandler:
output_model_path = resolve_onnx_path(output_model_path, Path(model.model_path).name)
ir_model = model.load_ir_model()
ir.external_data.load_to_model(ir_model)
ir_model.graph.opset_imports[MSFT_DOMAIN] = 1
self._quantize_model(
ir_model,
Expand Down
72 changes: 72 additions & 0 deletions test/passes/onnx/test_hqq_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,47 @@ def matmul_model_path(self, tmp_path):
onnx.save(model_def, str(model_path))
return model_path

@pytest.fixture
def matmul_model_with_external_data_path(self, tmp_path):
"""Create an ONNX model with weights stored as external data."""
input_shape = [1, 64]
weight_shape = [64, 128]
weight_tensor = np.random.randn(*weight_shape).astype(np.float32)

input_name = "input"
output_name = "output"
weight_name = "weight"

input_tensor_proto = onnx.helper.make_tensor_value_info(input_name, onnx.TensorProto.FLOAT, input_shape)
output_tensor_proto = onnx.helper.make_tensor_value_info(output_name, onnx.TensorProto.FLOAT, [1, 128])
weight_tensor_proto = onnx.numpy_helper.from_array(weight_tensor, name=weight_name)

matmul_node = onnx.helper.make_node(
str(OpType.MatMul), inputs=[input_name, weight_name], outputs=[output_name], name="MatMul_Node"
)

graph_def = onnx.helper.make_graph(
nodes=[matmul_node],
name="test-model",
inputs=[input_tensor_proto],
outputs=[output_tensor_proto],
initializer=[weight_tensor_proto],
)

model_def = onnx.helper.make_model(graph_def, producer_name="olive-test")
model_def.opset_import[0].version = 13

model_path = str(tmp_path / "matmul_model_ext.onnx")
onnx.save(
model_def,
model_path,
save_as_external_data=True,
all_tensors_to_one_file=True,
location="matmul_model_ext.onnx.data",
size_threshold=0,
)
return tmp_path / "matmul_model_ext.onnx"

def test_hqq_quantization_pass(self, matmul_model_path, tmp_path):
# Setup
olive_model = ONNXModelHandler(model_path=str(matmul_model_path))
Expand Down Expand Up @@ -91,3 +132,34 @@ def test_hqq_quantization_pass(self, matmul_model_path, tmp_path):
break

assert found_matmul_nbits, "No MatMulNBits node found in quantized model"

def test_hqq_quantization_pass_produces_valid_output_when_model_has_external_data(
self, matmul_model_with_external_data_path, tmp_path
):
"""Quantizing a model with external data should produce a valid ONNX model."""
olive_model = ONNXModelHandler(model_path=str(matmul_model_with_external_data_path))
accelerator_spec = AcceleratorSpec(
accelerator_type="CPU",
execution_provider="CPUExecutionProvider",
)
pass_config = {"block_size": 128}
p = create_pass_from_dict(
OnnxHqqQuantization, pass_config, disable_search=True, accelerator_spec=accelerator_spec
)

output_path = tmp_path / "quantized_ext_data.onnx"
quantized_model = p.run(olive_model, output_path)

assert os.path.exists(quantized_model.model_path)

# The output model must pass ONNX validation (regression test for #2223)
onnx.checker.check_model(quantized_model.model_path)

ir_model = ir.load(quantized_model.model_path)
found_matmul_nbits = False
for node in ir_model.graph.all_nodes():
if node.op_type == OpType.MatMulNBits:
found_matmul_nbits = True
break

assert found_matmul_nbits, "No MatMulNBits node found in quantized model"
72 changes: 72 additions & 0 deletions test/passes/onnx/test_rtn_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,47 @@ def matmul_model_path(self, tmp_path):
onnx.save(model_def, str(model_path))
return model_path

@pytest.fixture
def matmul_model_with_external_data_path(self, tmp_path):
"""Create an ONNX model with weights stored as external data."""
input_shape = [1, 64]
weight_shape = [64, 128]
weight_tensor = np.random.randn(*weight_shape).astype(np.float32)

input_name = "input"
output_name = "output"
weight_name = "weight"

input_tensor_proto = onnx.helper.make_tensor_value_info(input_name, onnx.TensorProto.FLOAT, input_shape)
output_tensor_proto = onnx.helper.make_tensor_value_info(output_name, onnx.TensorProto.FLOAT, [1, 128])
weight_tensor_proto = onnx.numpy_helper.from_array(weight_tensor, name=weight_name)

matmul_node = onnx.helper.make_node(
str(OpType.MatMul), inputs=[input_name, weight_name], outputs=[output_name], name="MatMul_Node"
)

graph_def = onnx.helper.make_graph(
nodes=[matmul_node],
name="test-model",
inputs=[input_tensor_proto],
outputs=[output_tensor_proto],
initializer=[weight_tensor_proto],
)

model_def = onnx.helper.make_model(graph_def, producer_name="olive-test")
model_def.opset_import[0].version = 13

model_path = str(tmp_path / "matmul_model_ext.onnx")
onnx.save(
model_def,
model_path,
save_as_external_data=True,
all_tensors_to_one_file=True,
location="matmul_model_ext.onnx.data",
size_threshold=0,
)
return tmp_path / "matmul_model_ext.onnx"

@pytest.fixture
def gather_model_path(self, tmp_path):
"""Create a simple ONNX model with a Gather op and save it to a temporary file."""
Expand Down Expand Up @@ -170,6 +211,37 @@ def test_rtn_quantization_pass_gather(self, gather_model_path, tmp_path, is_symm

assert found_gather_block_quantized, "No GatherBlockQuantized node found in quantized model"

def test_rtn_quantization_pass_produces_valid_output_when_model_has_external_data(
self, matmul_model_with_external_data_path, tmp_path
):
"""Quantizing a model with external data should produce a valid ONNX model."""
olive_model = ONNXModelHandler(model_path=str(matmul_model_with_external_data_path))
accelerator_spec = AcceleratorSpec(
accelerator_type="CPU",
execution_provider="CPUExecutionProvider",
)
pass_config = {"bits": 4, "block_size": 128, "axis": 0, "is_symmetric": True}
p = create_pass_from_dict(
OnnxBlockWiseRtnQuantization, pass_config, disable_search=True, accelerator_spec=accelerator_spec
)

output_path = tmp_path / "quantized_ext_data.onnx"
quantized_model = p.run(olive_model, output_path)

assert os.path.exists(quantized_model.model_path)

# The output model must pass ONNX validation
onnx.checker.check_model(quantized_model.model_path)

ir_model = ir.load(quantized_model.model_path)
found_matmul_nbits = False
for node in ir_model.graph.all_nodes():
if node.op_type == OpType.MatMulNBits:
found_matmul_nbits = True
break

assert found_matmul_nbits, "No MatMulNBits node found in quantized model"

def test_rtn_quantization_with_exclusion(self, matmul_model_path, tmp_path):
# Setup
olive_model = ONNXModelHandler(model_path=str(matmul_model_path))
Expand Down
Loading