pulp-platform · diaconuccalin · May 22, 2025 · May 5, 2025 · May 5, 2025 · May 6, 2025
@@ -74,8 +74,13 @@ jobs:
         testFloatAdder
         testFloatGEMM
         testFloat2DConvolution
+        testFloat2DConvolutionBias
+        testFloat2DConvolutionZeroBias
         testFloatLayerNorm
         testFloatDiv
+        testFloat2DDWConvolution
+        testFloat2DDWConvolutionBias
+        testFloat2DDWConvolutionZeroBias
         testFloatRelu
         testFloatMaxPool
         testFloatMatmul

@@ -348,8 +348,15 @@ Change main.c to use OUTPUTTYPE instead of float
 - HOTFIX: Deeploy subdirectories installed when installing Deeploy with pip install
 - HOTFIX: Linking TEST_RECENT on MacOS
 
-
 ## Add RV32IMF Picolibc support for Siracusa platform
 
 ### Added
 - Adds RV32IMF Picolib to the toolchain
+
+## Add Generic Float DW Conv2D Kernel
+
+### Added
+- Generic float DW Conv2D kernel and bindings.
+- Bias handling and computation for regular and DW Conv2D.
+- Empty bias handling for generic regular and DW Conv2D.
+- Tests for Conv2D regular and DW, with and without bias (and included them in the CI pipeline).
@@ -635,3 +635,33 @@ def __init__(self):
         graph.inputs.append(_input)
 
         super().__init__(graph, _remove_global_output_reshape_fun, "_REMOVE_GLOBAL_OUTPUT_RESHAPE_PASS")
+
+
+def _remove_empty_conv_bias_fun(graph: gs.Graph, match: Match, name: str):
+    # Extract matched convolution
+    matched_nodes = list(match.nodes_map.values())
+    opNode = matched_nodes[0]
+
+    # Check if the Conv node has a bias input
+    # If it does, check if the bias only contains zeros
+    if len(opNode.inputs) > 2 and np.all(opNode.inputs[2].values == 0):
+        del opNode.inputs[2]
+
+    # Return updated graph
+    return graph
+
+
+@contextagnostic
+class RemoveEmptyConvBiasPass(ReplaceSequentialPatternPass):
+
+    def __init__(self):
+        # Initialized graph with a Conv node
+        graph = gs.Graph()
+        _input = gs.Variable(name = 'input_1')
+        output = graph.layer(inputs = [_input], outputs = ['convOut'], op = 'Conv', name = 'conv')
+        graph.outputs.append(output)
+        graph.inputs.append(_input)
+
+        # Apply function
+        name = "_REMOVE_EMPTY_CONV_BIAS_PASS"
+        super().__init__(graph, _remove_empty_conv_bias_fun, name)
@@ -2,13 +2,14 @@
 #
 # File: BasicBindings.py
 #
-# Last edited: 17.12.2022
+# Last edited: 05.05.2025
 #
-# Copyright (C) 2022, ETH Zurich and University of Bologna.
+# Copyright (C) 2025, ETH Zurich and University of Bologna.
 #
-# Author:
+# Authors:
 # - Moritz Scherer, ETH Zurich
 # - Philip Wiese, ETH Zurich
+# - Calin Diaconu, University of Bologna
 #
 # ----------------------------------------------------------------------
 # SPDX-License-Identifier: Apache-2.0
@@ -36,12 +37,12 @@
 from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration
 from Deeploy.Targets.Generic.Templates import AddTemplate, ConcatTemplate, ConvTemplate, DebugPrintTemplate, \
     DequantTemplate, DummyTemplate, DWConvTemplate, FloatAddTemplate, FloatConvTemplate, FloatDivTemplate, \
-    FloatGELUTemplate, FloatGemmTemplate, FloatLayernormTemplate, FloatMatMulTemplate, FloatMaxPoolTemplate, \
-    FloatMulTemplate, FloatPadTemplate, FloatReluTemplate, FloatSoftmaxTemplate, GatherTemplate, GemmTemplate, \
-    IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, MaxPoolTemplate, MulTemplate, \
-    PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, RequantShiftTemplate, ReshapeTemplate, \
-    RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, TransposeTemplate, iGELUTemplate, iLayernormTemplate, \
-    iRMSNormTemplate, iSoftmaxTemplate
+    FloatDWConvTemplate, FloatGELUTemplate, FloatGemmTemplate, FloatLayernormTemplate, FloatMatMulTemplate, \
+    FloatMaxPoolTemplate, FloatMulTemplate, FloatPadTemplate, FloatReluTemplate, FloatSoftmaxTemplate, GatherTemplate, \
+    GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, ITAPartialMaxTemplate, MatMulTemplate, MaxPoolTemplate, \
+    MulTemplate, PadTemplate, QuantTemplate, ReduceMeanTemplate, ReduceSumTemplate, RequantShiftTemplate, \
+    ReshapeTemplate, RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, TransposeTemplate, iGELUTemplate, \
+    iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate
 from Deeploy.Targets.Generic.TypeCheckers import AddChecker, ConcatChecker, ConvChecker, DebugPrintChecker, \
     DequantChecker, DivChecker, DummyChecker, GatherChecker, GELUChecker, GEMMChecker, LayerNormChecker, \
     MatMulChecker, MaxPoolChecker, MulChecker, PadChecker, QuantChecker, ReduceMeanChecker, ReduceSumChecker, \
@@ -91,8 +92,15 @@
         BasicTransformer)
 ]
 
-BasicDWConv2DBinding = NodeBinding(ConvChecker([PointerClass(int8_t), PointerClass(int8_t)], [PointerClass(int32_t)]),
-                                   DWConvTemplate.reference2DTemplate, BasicTransformer)
+BasicDWConv2DBindings = [
+    NodeBinding(ConvChecker([PointerClass(int8_t), PointerClass(int8_t)], [PointerClass(int32_t)]),
+                DWConvTemplate.reference2DTemplate, BasicTransformer)
+] + [
+    NodeBinding(
+        ConvChecker([PointerClass(float32_t), PointerClass(float32_t),
+                     PointerClass(float32_t)], [PointerClass(float32_t)]), FloatDWConvTemplate.reference2DTemplate,
+        BasicTransformer)
+]
 
 BasicDebugPrintBindings = [
     NodeBinding(DebugPrintChecker([PointerClass(type)], [PointerClass(type)]), DebugPrintTemplate.referenceTemplate,

@@ -2,13 +2,14 @@
 #
 # File: BasicParsers.py
 #
-# Last edited: 15.12.2021
+# Last edited: 12.05.2025
 #
-# Copyright (C) 2021, ETH Zurich and University of Bologna.
+# Copyright (C) 2025, ETH Zurich and University of Bologna.
 #
 # Authors:
 # - Moritz Scherer, ETH Zurich
 # - Victor Jung, ETH Zurich
+# - Calin Diaconu, University of Bologna
 #
 # ----------------------------------------------------------------------
 # SPDX-License-Identifier: Apache-2.0
@@ -2138,7 +2139,22 @@ def parseNodeCtxt(self,
 
         newCtxt, ret = super().parseNodeCtxt(ctxt, node, channels_first)
 
-        if not ret:
+        if ret:
+            inputs = ['data_in', 'weight']
+
+            # Handle bias, if present
+            if len(node.inputs) > 2:
+                inputs.append("bias")
+                self.operatorRepresentation["has_bias"] = "true"
+            else:
+                self.operatorRepresentation["has_bias"] = "false"
+                self.operatorRepresentation["bias"] = "NULL"
+
+            for idx, inputNode in enumerate(node.inputs):
+                self.operatorRepresentation[inputs[idx]] = ctxt.lookup(inputNode.name).name
+
+            return newCtxt, True
+        else:
             return ctxt, False
 
         assert len(node.inputs
@@ -2178,8 +2194,18 @@ def parseNodeCtxt(self,
 
         if ret:
             inputs = ['data_in', 'weight']
+
+            # Handle bias, if present
+            if len(node.inputs) > 2:
+                inputs.append("bias")
+                self.operatorRepresentation["has_bias"] = "true"
+            else:
+                self.operatorRepresentation["has_bias"] = "false"
+                self.operatorRepresentation["bias"] = "NULL"
+
             for idx, inputNode in enumerate(node.inputs):
                 self.operatorRepresentation[inputs[idx]] = ctxt.lookup(inputNode.name).name
+
             if self.operatorRepresentation['group'] == self.operatorRepresentation['ch_im_in']:
                 return newCtxt, True
 

@@ -2,13 +2,14 @@
 #
 # File: GenericPlatform.py
 #
-# Last edited: 17.12.2022
+# Last edited: 05.05.2025
 #
-# Copyright (C) 2022, ETH Zurich and University of Bologna.
+# Copyright (C) 2025, ETH Zurich and University of Bologna.
 #
 # Author:
 # - Moritz Scherer, ETH Zurich
 # - Philip Wiese, ETH Zurich
+# - Calin Diaconu, University of Bologna
 #
 # ----------------------------------------------------------------------
 # SPDX-License-Identifier: Apache-2.0
@@ -25,10 +26,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from Deeploy.CommonExtensions.OptimizationPasses.TopologyOptimizationPasses.LoweringOptimizationPasses import \
+    RemoveEmptyConvBiasPass
 from Deeploy.DeeployTypes import ConstantBuffer, DeploymentEngine, DeploymentPlatform, NodeMapper, NodeTemplate, \
     StructBuffer, TopologyOptimizer, TransientBuffer, VariableBuffer
 from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicConv1DBinding, BasicConv2DBindings, \
-    BasicDebugPrintBindings, BasicDequantBindings, BasicDivBindings, BasicDWConv1DBinding, BasicDWConv2DBinding, \
+    BasicDebugPrintBindings, BasicDequantBindings, BasicDivBindings, BasicDWConv1DBinding, BasicDWConv2DBindings, \
     BasicGatherBindings, BasicGELUBindings, BasicGEMMBindings, BasicITAPartialSoftmaxBinding, BasicITASoftmaxBinding, \
     BasicLayerNormBindings, BasicMatMulBindings, BasicMaxPool2DBindings, BasicMulBindings, BasicPad1DBindings, \
     BasicPad2DBindings, BasicQuantBindings, BasicReduceMeanBindings, BasicReduceSumBindings, BasicReluBinding, \
@@ -54,7 +57,7 @@
 Conv2DMapper = NodeMapper(GenericConv2DParser(), BasicConv2DBindings)
 DebugMapper = NodeMapper(DebugParser(), BasicDebugPrintBindings)
 DWConv1DMapper = NodeMapper(GenericDWConv1DParser(), [BasicDWConv1DBinding])
-DWConv2DMapper = NodeMapper(GenericDWConv2DParser(), [BasicDWConv2DBinding])
+DWConv2DMapper = NodeMapper(GenericDWConv2DParser(), BasicDWConv2DBindings)
 FlattenMapper = NodeMapper(FlattenParser(), BasicReshapeBindings)
 GatherMapper = NodeMapper(GatherParser(), BasicGatherBindings)
 GELUMapper = NodeMapper(GELUParser(), BasicGELUBindings)
@@ -169,6 +172,7 @@ class GenericStructBuffer(StructBuffer):
     MergeConstAddAndRequantPass(),
     ExtractPaddingFromConvPass(),
     ExtractPaddingFromPoolPass(),
+    RemoveEmptyConvBiasPass(),
     # DebugPrintPass(r'.*[Mm]at[Mm]ul.*', position = 'after'),
 ])
 

@@ -2,11 +2,13 @@
 #
 # File: FLoatConvTemplate.py
 #
-# Last edited: 23.01.2025
+# Last edited: 12.05.2025
 #
 # Copyright (C) 2023, ETH Zurich and University of Bologna.
 #
-# Author: Run Wang, ETH Zurich
+# Authors:
+# - Run Wang, ETH Zurich
+# - Calin Diaconu, University of Bologna
 #
 # ----------------------------------------------------------------------
 # SPDX-License-Identifier: Apache-2.0
@@ -41,6 +43,8 @@
             ref_${data_out}_${data_in}, ${ch_im_in}, ${dim_im_in_x}, ${dim_im_in_y},
             ${weight}, ${ch_im_out}, ${dim_kernel_x}, ${dim_kernel_y},
             ${stride_x}, ${stride_y},
+            ${bias},
+            ${has_bias},
             ref_${data_out}_${data_out}
         );
         ref_${data_out}_${data_in} += ${batchOffsetIn};

@@ -0,0 +1,53 @@
+# ----------------------------------------------------------------------
+#
+# File: FLoatDWConvTemplate.py
+#
+# Last edited: 12.05.2025
+#
+# Copyright (C) 2025, ETH Zurich and University of Bologna.
+#
+# Author:
+# - Calin Diaconu, University of Bologna
+#
+# ----------------------------------------------------------------------
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from Deeploy.DeeployTypes import NodeTemplate
+
+reference2DTemplate = NodeTemplate("""
+<%
+batchOffsetIn = ch_im_in * dim_im_in_x * dim_im_in_y
+batchOffsetOut = ch_im_out * dim_im_out_x * dim_im_out_y
+%>
+// 2D FP Depth-wise Conv (Name: ${nodeName}, Op: ${nodeOp})
+BEGIN_SINGLE_CORE
+    ${data_in_type.typeName} ref_${data_out}_${data_in} = ${data_in};
+    ${data_out_type.typeName} ref_${data_out}_${data_out} = ${data_out};
+    for (uint32_t n=0; n<${batch}; ++n) {
+        DWConv2d_fp${data_in_type.referencedType.typeWidth}_fp${weight_type.referencedType.typeWidth}_fp${data_out_type.referencedType.typeWidth}_NCHW(
+            ref_${data_out}_${data_in},
+            ${ch_im_in}, ${dim_im_in_x}, ${dim_im_in_y},
+            ${weight},
+            ${ch_im_out}, ${dim_kernel_x}, ${dim_kernel_y},
+            ${stride_x}, ${stride_y},
+            ${bias},
+            ${has_bias},
+            ref_${data_out}_${data_out}
+        );
+        ref_${data_out}_${data_in} += ${batchOffsetIn};
+        ref_${data_out}_${data_out} += ${batchOffsetOut};
+    }
+END_SINGLE_CORE
+""")
@@ -2,12 +2,13 @@
 #
 # File: MemPoolPlatform.py
 #
-# Last edited: 17.12.2022
+# Last edited: 05.05.2025
 #
-# Copyright (C) 2022, ETH Zurich and University of Bologna.
+# Copyright (C) 2025, ETH Zurich and University of Bologna.
 #
-# Author:
+# Authors:
 # - Philip Wiese, ETH Zurich
+# - Calin Diaconu, University of Bologna
 #
 # ----------------------------------------------------------------------
 # SPDX-License-Identifier: Apache-2.0
@@ -31,7 +32,7 @@
 from Deeploy.DeeployTypes import ConstantBuffer, DeploymentEngine, DeploymentPlatform, NodeMapper, NodeTemplate, \
     StructBuffer, TopologyOptimizer, TransientBuffer, VariableBuffer
 from Deeploy.Targets.Generic.Bindings import BasicAddBindings, BasicConv1DBinding, BasicConv2DBindings, \
-    BasicDebugPrintBindings, BasicDivBindings, BasicDWConv1DBinding, BasicDWConv2DBinding, BasicGatherBindings, \
+    BasicDebugPrintBindings, BasicDivBindings, BasicDWConv1DBinding, BasicDWConv2DBindings, BasicGatherBindings, \
     BasicGELUBindings, BasicLayerNormBindings, BasicMulBindings, BasicPad1DBindings, BasicPad2DBindings, \
     BasicReduceMeanBindings, BasicReduceSumBindings, BasicReshapeBindings, BasicRQIntegerDivBinding, \
     BasicRQSGELUBinding, BasicSliceBindings, BasicSoftmaxBindings, BasicTransposeBindings, DummyBinding
@@ -62,7 +63,7 @@
 GenericConv1D_Mapper = NodeMapper(GenericConv1DParser(), [BasicConv1DBinding])
 GenericDWConv1D_Mapper = NodeMapper(GenericDWConv1DParser(), [BasicDWConv1DBinding])
 GenericConv2D_Mapper = NodeMapper(GenericConv2DParser(), BasicConv2DBindings)
-GenericDWConv2D_Mapper = NodeMapper(GenericDWConv2DParser(), [BasicDWConv2DBinding])
+GenericDWConv2D_Mapper = NodeMapper(GenericDWConv2DParser(), BasicDWConv2DBindings)
 
 GenericConv_Mappers = [GenericConv2D_Mapper, GenericDWConv2D_Mapper, GenericConv1D_Mapper, GenericDWConv1D_Mapper]
 

@@ -2,15 +2,16 @@
  * Title:        Convolution.h
  * Description:
  *
- * Date:         04.01.2023
+ * Date:         12.05.2025
  *
  * ===================================================================== */
 
 /*
- * Copyright (C) 2023 ETH Zurich and University of Bologna.
+ * Copyright (C) 2025 ETH Zurich and University of Bologna.
  *
  * Authors:
  * - Philip Wiese, ETH Zurich
+ * - Calin Diaconu, University of Bologna
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -62,7 +63,8 @@ void Conv2d_s8_s8_s32_NCHW(int8_t const *__restrict__ pSrcA, uint32_t C,
 void Conv2d_fp32_fp32_fp32_NCHW(const float *__restrict__ pSrcA, uint32_t C,
                                 uint32_t H_padded, uint32_t W_padded,
                                 const float *__restrict__ pSrcB, uint32_t F,
-                                uint32_t P, uint32_t Q, uint32_t SP, uint32_t SQ,
-                                float *__restrict__ pDstC);
+                                uint32_t P, uint32_t Q, uint32_t SP,
+                                uint32_t SQ, const float *__restrict__ pSrcBias,
+                                const bool has_bias, float *__restrict__ pDstC);
 
 #endif //__DEEPLOY_BASIC_MATH_CONVOLUTION_KERNEL_HEADER_