microsoft · xiaoyu-work · Apr 8, 2026 · Apr 4, 2026
diff --git a/olive/passes/onnx/hqq_quantization.py b/olive/passes/onnx/hqq_quantization.py
@@ -72,6 +72,7 @@ def _run_for_config(
             return model
         output_model_path = resolve_onnx_path(output_model_path, Path(model.model_path).name)
         ir_model = model.load_ir_model()
+        ir.external_data.load_to_model(ir_model)
         ir_model.graph.opset_imports[MSFT_DOMAIN] = 1
         self._quantize_model(
             ir_model,

diff --git a/olive/passes/onnx/rtn_quantization.py b/olive/passes/onnx/rtn_quantization.py
@@ -77,6 +77,7 @@ def _run_for_config(
     ) -> ONNXModelHandler:
         output_model_path = resolve_onnx_path(output_model_path, Path(model.model_path).name)
         ir_model = model.load_ir_model()
+        ir.external_data.load_to_model(ir_model)
         ir_model.graph.opset_imports[MSFT_DOMAIN] = 1
         self._quantize_model(
             ir_model,

diff --git a/test/passes/onnx/test_hqq_quantization.py b/test/passes/onnx/test_hqq_quantization.py
@@ -59,6 +59,47 @@ def matmul_model_path(self, tmp_path):
         onnx.save(model_def, str(model_path))
         return model_path
 
+    @pytest.fixture
+    def matmul_model_with_external_data_path(self, tmp_path):
+        """Create an ONNX model with weights stored as external data."""
+        input_shape = [1, 64]
+        weight_shape = [64, 128]
+        weight_tensor = np.random.randn(*weight_shape).astype(np.float32)
+
+        input_name = "input"
+        output_name = "output"
+        weight_name = "weight"
+
+        input_tensor_proto = onnx.helper.make_tensor_value_info(input_name, onnx.TensorProto.FLOAT, input_shape)
+        output_tensor_proto = onnx.helper.make_tensor_value_info(output_name, onnx.TensorProto.FLOAT, [1, 128])
+        weight_tensor_proto = onnx.numpy_helper.from_array(weight_tensor, name=weight_name)
+
+        matmul_node = onnx.helper.make_node(
+            str(OpType.MatMul), inputs=[input_name, weight_name], outputs=[output_name], name="MatMul_Node"
+        )
+
+        graph_def = onnx.helper.make_graph(
+            nodes=[matmul_node],
+            name="test-model",
+            inputs=[input_tensor_proto],
+            outputs=[output_tensor_proto],
+            initializer=[weight_tensor_proto],
+        )
+
+        model_def = onnx.helper.make_model(graph_def, producer_name="olive-test")
+        model_def.opset_import[0].version = 13
+
+        model_path = str(tmp_path / "matmul_model_ext.onnx")
+        onnx.save(
+            model_def,
+            model_path,
+            save_as_external_data=True,
+            all_tensors_to_one_file=True,
+            location="matmul_model_ext.onnx.data",
+            size_threshold=0,
+        )
+        return tmp_path / "matmul_model_ext.onnx"
+
     def test_hqq_quantization_pass(self, matmul_model_path, tmp_path):
         # Setup
         olive_model = ONNXModelHandler(model_path=str(matmul_model_path))
@@ -91,3 +132,34 @@ def test_hqq_quantization_pass(self, matmul_model_path, tmp_path):
                 break
 
         assert found_matmul_nbits, "No MatMulNBits node found in quantized model"
+
+    def test_hqq_quantization_pass_produces_valid_output_when_model_has_external_data(
+        self, matmul_model_with_external_data_path, tmp_path
+    ):
+        """Quantizing a model with external data should produce a valid ONNX model."""
+        olive_model = ONNXModelHandler(model_path=str(matmul_model_with_external_data_path))
+        accelerator_spec = AcceleratorSpec(
+            accelerator_type="CPU",
+            execution_provider="CPUExecutionProvider",
+        )
+        pass_config = {"block_size": 128}
+        p = create_pass_from_dict(
+            OnnxHqqQuantization, pass_config, disable_search=True, accelerator_spec=accelerator_spec
+        )
+
+        output_path = tmp_path / "quantized_ext_data.onnx"
+        quantized_model = p.run(olive_model, output_path)
+
+        assert os.path.exists(quantized_model.model_path)
+
+        # The output model must pass ONNX validation (regression test for #2223)
+        onnx.checker.check_model(quantized_model.model_path)
+
+        ir_model = ir.load(quantized_model.model_path)
+        found_matmul_nbits = False
+        for node in ir_model.graph.all_nodes():
+            if node.op_type == OpType.MatMulNBits:
+                found_matmul_nbits = True
+                break
+
+        assert found_matmul_nbits, "No MatMulNBits node found in quantized model"
diff --git a/test/passes/onnx/test_rtn_quantization.py b/test/passes/onnx/test_rtn_quantization.py
@@ -58,6 +58,47 @@ def matmul_model_path(self, tmp_path):
         onnx.save(model_def, str(model_path))
         return model_path
 
+    @pytest.fixture
+    def matmul_model_with_external_data_path(self, tmp_path):
+        """Create an ONNX model with weights stored as external data."""
+        input_shape = [1, 64]
+        weight_shape = [64, 128]
+        weight_tensor = np.random.randn(*weight_shape).astype(np.float32)
+
+        input_name = "input"
+        output_name = "output"
+        weight_name = "weight"
+
+        input_tensor_proto = onnx.helper.make_tensor_value_info(input_name, onnx.TensorProto.FLOAT, input_shape)
+        output_tensor_proto = onnx.helper.make_tensor_value_info(output_name, onnx.TensorProto.FLOAT, [1, 128])
+        weight_tensor_proto = onnx.numpy_helper.from_array(weight_tensor, name=weight_name)
+
+        matmul_node = onnx.helper.make_node(
+            str(OpType.MatMul), inputs=[input_name, weight_name], outputs=[output_name], name="MatMul_Node"
+        )
+
+        graph_def = onnx.helper.make_graph(
+            nodes=[matmul_node],
+            name="test-model",
+            inputs=[input_tensor_proto],
+            outputs=[output_tensor_proto],
+            initializer=[weight_tensor_proto],
+        )
+
+        model_def = onnx.helper.make_model(graph_def, producer_name="olive-test")
+        model_def.opset_import[0].version = 13
+
+        model_path = str(tmp_path / "matmul_model_ext.onnx")
+        onnx.save(
+            model_def,
+            model_path,
+            save_as_external_data=True,
+            all_tensors_to_one_file=True,
+            location="matmul_model_ext.onnx.data",
+            size_threshold=0,
+        )
+        return tmp_path / "matmul_model_ext.onnx"
+
     @pytest.fixture
     def gather_model_path(self, tmp_path):
         """Create a simple ONNX model with a Gather op and save it to a temporary file."""
@@ -170,6 +211,37 @@ def test_rtn_quantization_pass_gather(self, gather_model_path, tmp_path, is_symm
 
         assert found_gather_block_quantized, "No GatherBlockQuantized node found in quantized model"
 
+    def test_rtn_quantization_pass_produces_valid_output_when_model_has_external_data(
+        self, matmul_model_with_external_data_path, tmp_path
+    ):
+        """Quantizing a model with external data should produce a valid ONNX model."""
+        olive_model = ONNXModelHandler(model_path=str(matmul_model_with_external_data_path))
+        accelerator_spec = AcceleratorSpec(
+            accelerator_type="CPU",
+            execution_provider="CPUExecutionProvider",
+        )
+        pass_config = {"bits": 4, "block_size": 128, "axis": 0, "is_symmetric": True}
+        p = create_pass_from_dict(
+            OnnxBlockWiseRtnQuantization, pass_config, disable_search=True, accelerator_spec=accelerator_spec
+        )
+
+        output_path = tmp_path / "quantized_ext_data.onnx"
+        quantized_model = p.run(olive_model, output_path)
+
+        assert os.path.exists(quantized_model.model_path)
+
+        # The output model must pass ONNX validation
+        onnx.checker.check_model(quantized_model.model_path)
+
+        ir_model = ir.load(quantized_model.model_path)
+        found_matmul_nbits = False
+        for node in ir_model.graph.all_nodes():
+            if node.op_type == OpType.MatMulNBits:
+                found_matmul_nbits = True
+                break
+
+        assert found_matmul_nbits, "No MatMulNBits node found in quantized model"
+
     def test_rtn_quantization_with_exclusion(self, matmul_model_path, tmp_path):
         # Setup
         olive_model = ONNXModelHandler(model_path=str(matmul_model_path))