Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ jobs:
testFloatTranspose
testFloatMul
Quant
Dequant


generic-models:
Expand Down Expand Up @@ -238,6 +239,7 @@ jobs:
testFloatTranspose
testFloatMul
Quant
Dequant
num-cores: 8

siracusa-models:
Expand Down
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -196,4 +196,14 @@ Change main.c to use OUTPUTTYPE instead of float
- Custom `QuantPatternPass` class to replace matched patterns with a single `Quant` operator
- Parser implementation in `Parsers.py` to extract quantization parameters
- C template implementation in `QuantTemplate.py` for efficient quantization
- Type checker implementation in `TypeCheckers.py` to handle bit-width and signedness

## Implemented Dequant Layer for Generic and Siracusa

### Added
- New `Dequant` operation to handle dequantization pattern in ONNX models
- Implementation for both Generic and Siracusa targets in the Deeploy framework
- Custom `DequantPatternPass` class to replace matched patterns with a single `Dequant` operator
- Parser implementation in `Parsers.py` to extract dequantization parameters
- C template implementation in `DequantTemplate.py` for efficient dequantization
- Type checker implementation in `TypeCheckers.py` to handle bit-width and signedness
26 changes: 16 additions & 10 deletions Deeploy/Targets/Generic/Bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,18 @@
from Deeploy.DeeployTypes import CodeTransformation, NodeBinding
from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration
from Deeploy.Targets.Generic.Templates import AddTemplate, ConcatTemplate, ConvTemplate, DebugPrintTemplate, \
DummyTemplate, DWConvTemplate, FloatAddTemplate, FloatConvTemplate, FloatDivTemplate, FloatGELUTemplate, \
FloatGemmTemplate, FloatLayernormTemplate, FloatMatMulTemplate, FloatMaxPoolTemplate, FloatMulTemplate, \
FloatPadTemplate, FloatReluTemplate, FloatSoftmaxTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, \
ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, MaxPoolTemplate, MulTemplate, PadTemplate, QuantTemplate, \
ReduceMeanTemplate, ReduceSumTemplate, RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, \
RQSiGELUTemplate, SliceTemplate, TransposeTemplate, iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, \
iSoftmaxTemplate
DequantTemplate, DummyTemplate, DWConvTemplate, FloatAddTemplate, FloatConvTemplate, FloatDivTemplate, \
FloatGELUTemplate, FloatGemmTemplate, FloatLayernormTemplate, FloatMatMulTemplate, FloatMaxPoolTemplate, \
FloatMulTemplate, FloatPadTemplate, FloatReluTemplate, FloatSoftmaxTemplate, GatherTemplate, GemmTemplate, \
IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, MaxPoolTemplate, MulTemplate, \
PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, RequantShiftTemplate, ReshapeTemplate, \
RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, TransposeTemplate, iGELUTemplate, iLayernormTemplate, \
iRMSNormTemplate, iSoftmaxTemplate
from Deeploy.Targets.Generic.TypeCheckers import AddChecker, ConcatChecker, ConvChecker, DebugPrintChecker, \
DivChecker, DummyChecker, GatherChecker, GELUChecker, GEMMChecker, LayerNormChecker, MatMulChecker, \
MaxPoolChecker, MulChecker, PadChecker, QuantChecker, ReduceMeanChecker, ReduceSumChecker, ReluChecker, \
RequantShiftChecker, ReshapeChecker, RQIntegerDivChecker, SliceChecker, SoftmaxChecker, TransposeChecker
DequantChecker, DivChecker, DummyChecker, GatherChecker, GELUChecker, GEMMChecker, LayerNormChecker, \
MatMulChecker, MaxPoolChecker, MulChecker, PadChecker, QuantChecker, ReduceMeanChecker, ReduceSumChecker, \
ReluChecker, RequantShiftChecker, ReshapeChecker, RQIntegerDivChecker, SliceChecker, SoftmaxChecker, \
TransposeChecker

BasicTransformer = CodeTransformation([ArgumentStructGeneration(), MemoryManagementGeneration(), FutureGeneration()])

Expand Down Expand Up @@ -266,3 +267,8 @@
NodeBinding(QuantChecker([PointerClass(float32_t)], [PointerClass(int8_t)]), QuantTemplate.referenceTemplate,
BasicTransformer),
]

BasicDequantBindings = [
NodeBinding(DequantChecker([PointerClass(int8_t)], [PointerClass(float32_t)]), DequantTemplate.referenceTemplate,
BasicTransformer),
]
6 changes: 6 additions & 0 deletions Deeploy/Targets/Generic/Layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,3 +564,9 @@ class QuantLayer(ONNXLayer):

def __init__(self, maps: List[NodeMapper]):
super().__init__(maps)


class DequantLayer(ONNXLayer):

def __init__(self, maps: List[NodeMapper]):
super().__init__(maps)
36 changes: 36 additions & 0 deletions Deeploy/Targets/Generic/Parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2286,3 +2286,39 @@ def parseNodeCtxt(self,
self.operatorRepresentation['size'] = np.prod(data_in.shape)

return ctxt, True


class DequantParser(NodeParser):

def __init__(self):
super().__init__()

def parseNode(self, node: gs.Node) -> bool:
ret = all([
'scale' in node.attrs, 'zero_point' in node.attrs, 'bit_width' in node.attrs,
len(node.inputs) == 1,
len(node.outputs) == 1
])

if ret:
self.operatorRepresentation['scale'] = float(node.attrs['scale'])
self.operatorRepresentation['zero_point'] = float(node.attrs['zero_point'])
self.operatorRepresentation['bit_width'] = int(node.attrs['bit_width'])

self.operatorRepresentation['signed'] = bool(node.attrs['signed'])

return ret

def parseNodeCtxt(self,
ctxt: NetworkContext,
node: gs.Node,
channels_first: bool = True) -> Tuple[NetworkContext, bool]:

data_in = ctxt.lookup(node.inputs[0].name)
data_out = ctxt.lookup(node.outputs[0].name)

self.operatorRepresentation['data_in'] = data_in.name
self.operatorRepresentation['data_out'] = data_out.name
self.operatorRepresentation['size'] = np.prod(data_in.shape)

return ctxt, True
27 changes: 15 additions & 12 deletions Deeploy/Targets/Generic/Platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,24 +28,24 @@
from Deeploy.DeeployTypes import ConstantBuffer, DeploymentEngine, DeploymentPlatform, NodeMapper, NodeTemplate, \
StructBuffer, TopologyOptimizer, TransientBuffer, VariableBuffer
from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicConv1DBinding, BasicConv2DBindings, \
BasicDebugPrintBindings, BasicDivBindings, BasicDWConv1DBinding, BasicDWConv2DBinding, BasicGatherBindings, \
BasicGELUBindings, BasicGEMMBindings, BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, \
BasicDebugPrintBindings, BasicDequantBindings, BasicDivBindings, BasicDWConv1DBinding, BasicDWConv2DBinding, \
BasicGatherBindings, BasicGELUBindings, BasicGEMMBindings, BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, \
BasicLayerNormBindings, BasicMatMulBindings, BasicMaxPool2DBindings, BasicMulBindings, BasicPad1DBindings, \
BasicPad2DBindings, BasicQuantBindings, BasicReduceMeanBindings, BasicReduceSumBindings, BasicReluBinding, \
BasicReshapeBindings, BasicRQIntegerDivBinding, BasicRQSBindings, BasicRQSGELUBinding, BasicSliceBindings, \
BasicSoftmaxBindings, BasicTransposeBindings, DummyBinding
from Deeploy.Targets.Generic.Layers import AddLayer, ConvLayer, DebugPrintLayer, DivLayer, GatherLayer, GELULayer, \
GEMMLayer, ITAMaxLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, QuantLayer, \
from Deeploy.Targets.Generic.Layers import AddLayer, ConvLayer, DebugPrintLayer, DequantLayer, DivLayer, GatherLayer, \
GELULayer, GEMMLayer, ITAMaxLayer, LayerNormLayer, MatMulLayer, MaxPoolLayer, MulLayer, PadLayer, QuantLayer, \
ReduceMeanLayer, ReduceSumLayer, ReluLayer, RequantShiftLayer, ReshapeLayer, RQIntegerDivLayer, RQSiGELULayer, \
SliceLayer, SoftmaxLayer, TransposeLayer
from Deeploy.Targets.Generic.Parsers import AddParser, DebugParser, DivParser, DummyParser, FlattenParser, \
GatherParser, GELUParser, GenericConv1DParser, GenericConv2DParser, GenericDWConv1DParser, GenericDWConv2DParser, \
GenericGEMMParser, GenericMaxPool2DParser, IntegerDivParser, ITAMaxParser, ITAPartialMaxParser, LayerNormParser, \
MatMulParser, MulParser, Pad1DParser, Pad2DParser, QuantParser, ReduceMeanParser, ReduceSumParser, ReluParser, \
RequantShiftParser, ReshapeParser, RQIntegerDivParser, RQSiGELUParser, SliceParser, SoftmaxParser, \
TransposeParser, UnsqueezeParser, iLayerNormParser, iSoftmaxParser
from Deeploy.Targets.Generic.Parsers import AddParser, DebugParser, DequantParser, DivParser, DummyParser, \
FlattenParser, GatherParser, GELUParser, GenericConv1DParser, GenericConv2DParser, GenericDWConv1DParser, \
GenericDWConv2DParser, GenericGEMMParser, GenericMaxPool2DParser, IntegerDivParser, ITAMaxParser, \
ITAPartialMaxParser, LayerNormParser, MatMulParser, MulParser, Pad1DParser, Pad2DParser, QuantParser, \
ReduceMeanParser, ReduceSumParser, ReluParser, RequantShiftParser, ReshapeParser, RQIntegerDivParser, \
RQSiGELUParser, SliceParser, SoftmaxParser, TransposeParser, UnsqueezeParser, iLayerNormParser, iSoftmaxParser
from Deeploy.Targets.Generic.Templates import AllocateTemplate, FreeTemplate
from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import ExtractPaddingFromConvPass, \
from Deeploy.Targets.Generic.TopologyOptimizationPasses.Passes import DequantPatternPass, ExtractPaddingFromConvPass, \
ExtractPaddingFromPoolPass, MatMulAddMergePass, MergeConstAddAndRequantPass, QuantPatternPass, \
iGELURequantMergePass

Expand Down Expand Up @@ -82,6 +82,7 @@
TransposeMapper = NodeMapper(TransposeParser(), BasicTransposeBindings)
UnsqueezeMapper = NodeMapper(UnsqueezeParser(), BasicReshapeBindings)
QuantMapper = NodeMapper(QuantParser(), BasicQuantBindings)
DequantMapper = NodeMapper(DequantParser(), BasicDequantBindings)

SliceMapper = NodeMapper(SliceParser(), BasicSliceBindings)

Expand Down Expand Up @@ -123,7 +124,8 @@
'Transpose': TransposeLayer([TransposeMapper]),
'Unsqueeze': ReshapeLayer([UnsqueezeMapper]),
'Slice': SliceLayer([SliceMapper]),
'Quant': QuantLayer([QuantMapper])
'Quant': QuantLayer([QuantMapper]),
'Dequant': DequantLayer([DequantMapper])
# # For example, you can use the DummpyMapper, in case you want to test
# # deployment or optimizations with GlobalAveragePool nodes but did not yet
# # implement the corresponding kernel
Expand Down Expand Up @@ -161,6 +163,7 @@ class GenericStructBuffer(StructBuffer):

GenericOptimizer = TopologyOptimizer([
QuantPatternPass(),
DequantPatternPass(),
iGELURequantMergePass(),
MatMulAddMergePass(),
MergeConstAddAndRequantPass(),
Expand Down
48 changes: 48 additions & 0 deletions Deeploy/Targets/Generic/Templates/DequantTemplate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# ----------------------------------------------------------------------

# File: DequantTemplate.py

# Last edited: 17.03.2025

# Copyright (C) 2025, ETH Zurich and University of Bologna.

# Author: Federico Brancasi, ETH Zurich

# ----------------------------------------------------------------------
# SPDX-License-Identifier: Apache-2.0

# Licensed under the Apache License, Version 2.0 (the License); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at

# www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an AS IS BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from Deeploy.DeeployTypes import NodeTemplate


class _DequantTemplate(NodeTemplate):

def __init__(self, templateStr):
super().__init__(templateStr)


referenceTemplate = _DequantTemplate("""
// Dequantization (Name: ${nodeName}, Op: ${nodeOp})
BEGIN_SINGLE_CORE

for (uint32_t i=0; i<${size}; i++) {
int32_t quantized = (int32_t)${data_in}[i];
float32_t shifted_val = quantized - ${zero_point};
float32_t dequantized = shifted_val * ${scale};

${data_out}[i] = (${data_out_type.referencedType.typeName})dequantized;
}

END_SINGLE_CORE
""")
52 changes: 52 additions & 0 deletions Deeploy/Targets/Generic/TopologyOptimizationPasses/Passes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1149,3 +1149,55 @@ def __init__(self):

name = "_QUANT_PATTERN_PASS"
super().__init__(graph, _quant_pattern_fun, name)


def _recognize_dequant_fun(graph: gs.Graph, match: Match, name: str):
matched_nodes = [m for k, m in match.nodes_map.items()]

sub_node = matched_nodes[0]
mul_node = matched_nodes[1]

zero_point = float(sub_node.inputs[1].values.item())

mul_input_idx = 0 if mul_node.inputs[0] == sub_node.outputs[0] else 1

const_input_idx = 1 - mul_input_idx

scale = float(mul_node.inputs[const_input_idx].values.item())

bit_width = 8
if hasattr(sub_node.inputs[0], 'dtype'):
input_dtype = sub_node.inputs[0].dtype
if input_dtype == np.int8:
bit_width = 8
elif input_dtype == np.int16:
bit_width = 16
elif input_dtype == np.int32:
bit_width = 32

dequant_attrs = {'scale': scale, 'zero_point': zero_point, 'bit_width': bit_width, 'signed': True}

_inputs = [sub_node.inputs[0]]
_outputs = mul_node.outputs

dequant_node = gs.Node(op = 'Dequant', name = name, attrs = dequant_attrs)
graph.replaceInsertNode(_inputs, _outputs, dequant_node)

return graph


@contextagnostic
class DequantPatternPass(ReplaceSequentialPatternPass):

def __init__(self):
graph = gs.Graph()
_input = gs.Variable(name = 'input_1')

sub_output = graph.layer(inputs = [_input], outputs = ['sub_out'], op = 'Sub', name = 'sub')
mul_output = graph.layer(inputs = sub_output, outputs = ['mul_out'], op = 'Mul', name = 'mul')

graph.outputs.append(mul_output)
graph.inputs.append(_input)

name = "_RECOGNIZE_DEQUANT_PASS"
super().__init__(graph, _recognize_dequant_fun, name)
14 changes: 14 additions & 0 deletions Deeploy/Targets/Generic/TypeCheckers.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,3 +576,17 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
operatorRepresentation: OperatorRepresentation) -> List[bool]:
# Return signedness from the operation attributes
return [bool(operatorRepresentation['signed'])]


class DequantChecker(SignPropTypeChecker):

def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
super().__init__(input_types, output_types)

def _inferNumLevels(self, inputs: List[VariableBuffer],
operatorRepresentation: OperatorRepresentation) -> List[int]:
return [2**(self.output_types[0].referencedType.typeWidth)]

def _inferSignedness(self, inputs: List[VariableBuffer],
operatorRepresentation: OperatorRepresentation) -> List[bool]:
return [True]
13 changes: 9 additions & 4 deletions Deeploy/Targets/PULPOpen/Bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,12 @@
from Deeploy.DeeployTypes import CodeTransformation, NodeBinding, NodeTemplate
from Deeploy.FutureExtension.Bindings.AutoFutureBinding import AutoFutureBinding
from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration
from Deeploy.Targets.Generic.Templates import ConcatTemplate, FloatGELUTemplate, FloatGemmTemplate, \
from Deeploy.Targets.Generic.Templates import ConcatTemplate, DequantTemplate, FloatGELUTemplate, FloatGemmTemplate, \
FloatLayernormTemplate, FloatMatMulTemplate, FloatMulTemplate, FloatReluTemplate, FloatSoftmaxTemplate, \
GatherTemplate, QuantTemplate, RQSiGELUTemplate, iHardswishTemplate
from Deeploy.Targets.Generic.TypeCheckers import ConcatChecker, ConvChecker, GatherChecker, GELUChecker, GEMMChecker, \
HardswishChecker, LayerNormChecker, MatMulChecker, MulChecker, QuantChecker, ReduceMeanChecker, ReluChecker, \
RQAddChecker, RQHardswishChecker, SliceChecker, SoftmaxChecker, TransposeChecker
from Deeploy.Targets.Generic.TypeCheckers import ConcatChecker, ConvChecker, DequantChecker, GatherChecker, \
GELUChecker, GEMMChecker, HardswishChecker, LayerNormChecker, MatMulChecker, MulChecker, QuantChecker, \
ReduceMeanChecker, ReluChecker, RQAddChecker, RQHardswishChecker, SliceChecker, SoftmaxChecker, TransposeChecker
from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterSynch import PULPSynchCoresPass
from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterTiling import PULPClusterTiling
from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPL3Tiling import PULPL3Tiling
Expand Down Expand Up @@ -368,3 +368,8 @@
NodeBinding(QuantChecker([PointerClass(float32_t)], [PointerClass(int8_t)]), QuantTemplate.referenceTemplate,
ForkTransformer),
]

BasicDequantBindings = [
NodeBinding(DequantChecker([PointerClass(int8_t)], [PointerClass(float32_t)]), DequantTemplate.referenceTemplate,
ForkTransformer),
]
Loading
Loading