PaddlePaddle · wanghaoshuang · Dec 1, 2023 · Nov 28, 2023 · Nov 29, 2023 · Nov 30, 2023
diff --git a/python/paddle/nn/quant/format.py b/python/paddle/nn/quant/format.py
@@ -48,7 +48,16 @@ def from_quanter(quanter):
 class LinearQuanter(Layer):
     def __init__(self, scales, zero_point=None, quant_axis=None, bit_length=8):
         super().__init__()
-        self._scales = paddle.to_tensor(scales, dtype="float32")
+        scales = paddle.to_tensor(scales, dtype="float32")
+        scale_attr = paddle.framework.ParamAttr(
+            name=paddle.utils.unique_name.generate('quant_dequant.scale'),
+            initializer=paddle.nn.initializer.Constant(1.0),
+            trainable=False,
+        )
+        self._scales = self.create_parameter(
+            shape=scales.shape, attr=scale_attr, dtype="float32"
+        )
+        self._scales.set_value(scales)
         self._zero_point = (
             paddle.zeros([1], dtype="float32")
             if zero_point is None
@@ -98,7 +107,16 @@ def from_quanter(quanter):
 class LinearDequanter(Layer):
     def __init__(self, scales, zero_point=None, quant_axis=None, bit_length=8):
         super().__init__()
-        self._scales = paddle.to_tensor(scales, dtype="float32")
+        scales = paddle.to_tensor(scales, dtype="float32")
+        scale_attr = paddle.framework.ParamAttr(
+            name=paddle.utils.unique_name.generate('quant_dequant.scale'),
+            initializer=paddle.nn.initializer.Constant(1.0),
+            trainable=False,
+        )
+        self._scales = self.create_parameter(
+            shape=scales.shape, attr=scale_attr, dtype="float32"
+        )
+        self._scales.set_value(scales)
         self._zero_point = (
             paddle.zeros([1], dtype="float32")
             if zero_point is None
@@ -224,12 +242,12 @@ def _quant_weights(self, weight_name, quanter):
         qweight = quanter(weight)
         weight.set_value(qweight)
 
-    def _convert(self):
+    def _convert(self, remain_weight=False):
         r"""Convert current layer to onnx style for inference."""
         assert not self.converted, "The model should be converted only once."
         for weight_name, quanter_name in self.weights_to_quanters():
             qdq = self._convert_quanter_to_qdq(quanter_name)
-            if qdq is not None:
+            if qdq is not None and remain_weight is False:
                 self._quant_weights(weight_name, qdq._quanter)
                 qdq._quanter = None
                 qdq._sub_layers['_quanter'] = None

diff --git a/python/paddle/quantization/quantize.py b/python/paddle/quantization/quantize.py
@@ -40,12 +40,13 @@ def quantize(self, model: Layer, inplace=False):
         r"""Create a model for quantization-aware training or post-training quantization."""
         pass
 
-    def convert(self, model: Layer, inplace=False):
+    def convert(self, model: Layer, inplace=False, remain_weight=False):
         r"""Convert the quantization model to ONNX style. And the converted
         model can be saved as inference model by calling paddle.jit.save.
         Args:
             model(Layer) - The quantized model to be converted.
-            inplace(bool) - Whether to modify the model in-place.
+            inplace(bool, optional) - Whether to modify the model in-place, default is False.
+            remain_weight(bool, optional) - Whether to remain weights in floats, default is False.
 
         Return: The converted model
 
@@ -71,11 +72,13 @@ def convert(self, model: Layer, inplace=False):
         for name, child in _model.named_children():
             quant_dequant = None
             if isinstance(child, ConvertibleQuantedLayer):
-                child._convert()
+                if child.weight_quanter.scales() is None:
+                    continue
+                child._convert(remain_weight=remain_weight)
             elif isinstance(child, BaseQuanter):
                 quant_dequant = LinearQuanterDequanter.from_quanter(child)
             else:
-                self.convert(child, inplace=True)
+                self.convert(child, inplace=True, remain_weight=remain_weight)
             if quant_dequant is not None:
                 replaced[name] = quant_dequant
         for key, value in replaced.items():