PaddlePaddle
diff --git a/‎paddle/fluid/pir/transforms/tensorrt/trt_op_marker_pass.cc
Lines changed: 56 additions & 52 deletions b/‎paddle/fluid/pir/transforms/tensorrt/trt_op_marker_pass.cc
Lines changed: 56 additions & 52 deletions
diff --git a/‎python/paddle/tensorrt/converter.py
Lines changed: 16 additions & 6 deletions b/‎python/paddle/tensorrt/converter.py
Lines changed: 16 additions & 6 deletions
diff --git a/‎python/paddle/tensorrt/converter_utils.py
Lines changed: 91 additions & 0 deletions b/‎python/paddle/tensorrt/converter_utils.py
Lines changed: 91 additions & 0 deletions
@@ -89,6 +89,13 @@ class Pool2dOpPattern
         op->attribute<pir::BoolAttribute>(kCanRunTrtAttr).data()) {
       return false;
     }
+    paddle::dialect::FullIntArrayOp full_int_array_op =
+        pir::GetDefiningOpForInput(op, 1)
+            ->dyn_cast<paddle::dialect::FullIntArrayOp>();
+    if (!full_int_array_op) {
+      VLOG(3) << "Cannot find FullIntArrayOp";
+      return false;
+    }
     auto padding_attr = op->attribute<pir::ArrayAttribute>("paddings");
     std::vector<int32_t> paddings;
     for (const auto &attr : padding_attr.AsVector()) {
@@ -122,28 +129,19 @@ class Pool2dOpPattern
           if (!op->attribute<pir::BoolAttribute>("global_pooling").data()) {
             if (op->HasAttribute("exclusive")) {
               if (op->attribute<pir::BoolAttribute>("exclusive").data()) {
-                paddle::dialect::FullIntArrayOp full_int_array_op =
-                    pir::GetDefiningOpForInput(op, 1)
-                        ->dyn_cast<paddle::dialect::FullIntArrayOp>();
-                if (!full_int_array_op) {
-                  VLOG(3) << "Cannot find FullIntArrayOp";
-                  return false;
-                } else {
-                  auto attr_value =
-                      full_int_array_op->attribute<pir::ArrayAttribute>(
-                          "value");
-                  std::vector<int64_t> kernel_size;
-                  for (const auto &attr : attr_value.AsVector()) {
-                    kernel_size.push_back(
-                        attr.dyn_cast<pir::Int64Attribute>().data());
-                  }
-                  for (size_t i = 0; i < kernel_size.size(); ++i) {
-                    if (kernel_size[i] <= paddings[i]) {
-                      VLOG(3) << "the padding size should be less than the "
-                                 "filter size "
-                                 "for exclusive-counting pooling.";
-                      return false;
-                    }
+                auto attr_value =
+                    full_int_array_op->attribute<pir::ArrayAttribute>("value");
+                std::vector<int64_t> kernel_size;
+                for (const auto &attr : attr_value.AsVector()) {
+                  kernel_size.push_back(
+                      attr.dyn_cast<pir::Int64Attribute>().data());
+                }
+                for (size_t i = 0; i < kernel_size.size(); ++i) {
+                  if (kernel_size[i] <= paddings[i]) {
+                    VLOG(3) << "the padding size should be less than the "
+                               "filter size "
+                               "for exclusive-counting pooling.";
+                    return false;
                   }
                 }
               }
@@ -796,42 +794,42 @@ class SplitOpPattern : public pir::OpRewritePattern<paddle::dialect::SplitOp> {
       return false;
     }
 
-    paddle::dialect::FullOp full_op =
-        pir::GetDefiningOpForInput(op, 2)->dyn_cast<paddle::dialect::FullOp>();
-    if (!full_op) {
-      VLOG(3) << "Can not find full op";
+    pir::Value axis_tensor = op.operand_source(2);
+    if (!axis_tensor) {
+      VLOG(3) << "pd_op.split can not find axis input";
       return false;
-    } else {
+    }
+    auto out_vector_type = op.result(0).type().dyn_cast<pir::VectorType>();
+    if (pir::GetDefiningOpForInput(op, 2)->isa<paddle::dialect::FullOp>()) {
+      paddle::dialect::FullOp full_op =
+          pir::GetDefiningOpForInput(op, 2)
+              ->dyn_cast<paddle::dialect::FullOp>();
       auto axis = full_op->attribute<paddle::dialect::ScalarAttribute>("value")
                       .data()
                       .to<int>();
       auto x_shape = op.operand_source(0)
                          .type()
                          .dyn_cast<paddle::dialect::DenseTensorType>()
                          .dims();
-      auto out_vector_type = op.result(0).type().dyn_cast<pir::VectorType>();
 
-      paddle::dialect::FullIntArrayOp full_sections_op =
-          pir::GetDefiningOpForInput(op, 1)
-              ->dyn_cast<paddle::dialect::FullIntArrayOp>();
-      if (!full_sections_op) {
-        VLOG(3) << "Can not find FullIntArrayOp";
+      axis += (axis < 0) ? x_shape.size() : 0;
+
+      if (x_shape[axis] == -1) {
+        VLOG(3) << "The (" << axis << ") dim of input should not be -1";
         return false;
       }
+    }
 
+    if (pir::GetDefiningOpForInput(op, 1)
+            ->isa<paddle::dialect::FullIntArrayOp>()) {
+      paddle::dialect::FullIntArrayOp full_sections_op =
+          pir::GetDefiningOpForInput(op, 1)
+              ->dyn_cast<paddle::dialect::FullIntArrayOp>();
       auto sections = full_sections_op->attribute<pir::ArrayAttribute>("value");
-
       std::vector<int64_t> output_lengths;
       for (const auto &attr : sections.AsVector()) {
         output_lengths.push_back(attr.dyn_cast<pir::Int64Attribute>().data());
       }
-      axis += (axis < 0) ? x_shape.size() : 0;
-
-      if (x_shape[axis] == -1) {
-        VLOG(3) << "The (" << axis << ") dim of input should not be -1";
-        return false;
-      }
-
       if (output_lengths.size() != out_vector_type.size()) {
         VLOG(3) << "The output_length should be equal to the output size.";
         return false;
@@ -853,33 +851,38 @@ class SplitWithNumOpPattern
         op->attribute<pir::BoolAttribute>(kCanRunTrtAttr).data()) {
       return false;
     }
-    paddle::dialect::FullOp full_op =
-        pir::GetDefiningOpForInput(op, 1)->dyn_cast<paddle::dialect::FullOp>();
-    if (!full_op) {
-      VLOG(3) << "Can not find full op";
+
+    pir::Value axis_tensor = op.operand_source(1);
+    if (!axis_tensor) {
+      VLOG(3) << "pd_op.split_with_num can not find axis input";
       return false;
-    } else {
-      auto axis = full_op->attribute<paddle::dialect::ScalarAttribute>("value")
+    }
+    if (pir::GetDefiningOpForInput(op, 1)
+            ->isa<paddle::dialect::FullIntArrayOp>()) {
+      paddle::dialect::FullIntArrayOp full_int_array_op =
+          pir::GetDefiningOpForInput(op, 1)
+              ->dyn_cast<paddle::dialect::FullIntArrayOp>();
+      auto axis = full_int_array_op
+                      ->attribute<paddle::dialect::ScalarAttribute>("value")
                       .data()
                       .to<int>();
       auto x_shape = op.operand_source(0)
                          .type()
                          .dyn_cast<paddle::dialect::DenseTensorType>()
                          .dims();
-      auto out_vector_type = op.result(0).type().dyn_cast<pir::VectorType>();
 
       axis += (axis < 0) ? x_shape.size() : 0;
       if (x_shape[axis] == -1) {
         VLOG(3) << "The (" << axis << ") dim of input should not be -1";
         return false;
       }
-
       if (!op->HasAttribute("num")) {
         VLOG(3) << "split_with_num op must has num attributes";
         return false;
       }
       int num = op->attribute<pir::Int32Attribute>("num").data();
       std::vector<int64_t> output_lengths;
+
       if (num > 0) {
         int64_t in_axis_dim = x_shape[axis];
         if (in_axis_dim % num != 0) {
@@ -893,14 +896,15 @@ class SplitWithNumOpPattern
           output_lengths.push_back(out_axis_dim);
         }
       }
-
+      auto out_vector_type = op.result(0).type().dyn_cast<pir::VectorType>();
       if (out_vector_type.size() != output_lengths.size()) {
         VLOG(3) << "The output_length should be equal to the output size.";
         return false;
       }
-      op->set_attribute(kCanRunTrtAttr, rewriter.bool_attr(true));
-      return true;
     }
+
+    op->set_attribute(kCanRunTrtAttr, rewriter.bool_attr(true));
+    return true;
   }
 };
 class GreaterEqualOpPattern
 
@@ -173,6 +173,9 @@ def convert_subgraph_to_trt(self, program, group_op):
                 value_to_trt_tensor[value.id] = input_tensor
 
         for op in operations:
+            # Adding marker labels to builtin ops facilitates convert processing, but they ultimately do not enter the TensorRT subgraph.
+            if op.name() == "builtin.split":
+                continue
             operands = []
             for operand in op.operands():
                 source = operand.source()
@@ -205,7 +208,18 @@ def convert_subgraph_to_trt(self, program, group_op):
 
             trt_outs = self.convert(network, op, operands)
 
+            results = []
+
             for idx, result in enumerate(op.results()):
+                if result.is_combine():
+                    used_ops = result.all_used_ops()
+                    for use_op in used_ops:
+                        if use_op.name() == "builtin.split":
+                            split_outputs = use_op.results()
+                            results.extend(split_outputs)
+                else:
+                    results.append(result)
+            for idx, result in enumerate(results):
                 if idx < len(trt_outs):
                     value_to_trt_tensor[result.id] = trt_outs[idx]
                 else:
@@ -409,14 +423,10 @@ def convert(self, network, paddle_op, inputs):
                     f"Converter for {op_name} not implemented."
                 )
             outs = converter_func(network, paddle_op, inputs)
-        if isinstance(outs, tuple):
-            return outs
-        elif isinstance(outs, trt.ITensor):
+        if isinstance(outs, trt.ITensor):
             return (outs,)
         else:
-            raise TypeError(
-                f"Expected outputs to be a tuple or ITensor, but got {type(outs)}"
-            )
+            return outs
 
     def convert_program_to_trt(self):
         for op in self.program.global_block().ops:
 
@@ -213,6 +213,11 @@ def get_shape_tensor_element(network, x, index):
     return gather_layer.get_output(0)
 
 
+def trt_less(network, a, b):
+    layer = network.add_elementwise(a, b, trt.ElementWiseOperation.LESS)
+    return layer.get_output(0)
+
+
 def trt_sum(network, a, b):
     layer = network.add_elementwise(a, b, trt.ElementWiseOperation.SUM)
     return layer.get_output(0)
@@ -231,3 +236,89 @@ def trt_sub(network, a, b):
 def trt_min(network, a, b):
     layer = network.add_elementwise(a, b, trt.ElementWiseOperation.MIN)
     return layer.get_output(0)
+
+
+def trt_mul(network, a, b):
+    layer = network.add_elementwise(a, b, trt.ElementWiseOperation.PROD)
+    return layer.get_output(0)
+
+
+def trt_div(network, a, b):
+    layer = network.add_elementwise(a, b, trt.ElementWiseOperation.DIV)
+    return layer.get_output(0)
+
+
+def trt_floor_div(network, a, b):
+    layer = network.add_elementwise(a, b, trt.ElementWiseOperation.FLOOR_DIV)
+    return layer.get_output(0)
+
+
+def trt_equal(network, a, b):
+    layer = network.add_elementwise(a, b, trt.ElementWiseOperation.EQUAL)
+    return layer.get_output(0)
+
+
+def cast_tensor(network, input_tensor, dtype):
+    layer = network.add_identity(input_tensor)
+    layer.set_output_type(0, dtype)
+    return layer.get_output(0)
+
+
+def build_start_tensor(network, rank, axis_tensor, offset):
+    # Create indices_tensor [0, 1, ..., rank-1]
+    indices = np.arange(rank, dtype=np.int32)
+    indices_tensor = network.add_constant([rank], indices).get_output(0)
+
+    # Create mask: mask = (indices == axis_tensor)
+    mask = network.add_elementwise(
+        indices_tensor, axis_tensor, trt.ElementWiseOperation.EQUAL
+    ).get_output(0)
+    mask_int = cast_tensor(network, mask, trt.int32)
+
+    # Calculate start_tensor = mask_int * offset
+    start_tensor = network.add_elementwise(
+        mask_int, offset, trt.ElementWiseOperation.PROD
+    ).get_output(0)
+
+    return start_tensor
+
+
+def build_size_tensor(
+    network, rank, axis_tensor, size_value, input_shape_tensor
+):
+    # Create indices_tensor [0, 1, ..., rank-1]
+    indices = np.arange(rank, dtype=np.int32)
+    indices_tensor = network.add_constant([rank], indices).get_output(0)
+
+    # Create mask: mask = (indices == axis_tensor)
+    mask = network.add_elementwise(
+        indices_tensor, axis_tensor, trt.ElementWiseOperation.EQUAL
+    ).get_output(0)
+    mask_int = cast_tensor(network, mask, trt.int32)
+
+    # Create ones_tensor
+    ones_tensor = network.add_constant(
+        [rank], np.ones([rank], dtype=np.int32)
+    ).get_output(0)
+
+    # Calculate inverse_mask = ones_tensor - mask_int
+    inverse_mask = network.add_elementwise(
+        ones_tensor, mask_int, trt.ElementWiseOperation.SUB
+    ).get_output(0)
+
+    # Calculate size_tensor = mask_int * size_value + inverse_mask * input_shape_tensor
+    size_value_broadcast = network.add_elementwise(
+        mask_int, size_value, trt.ElementWiseOperation.PROD
+    ).get_output(0)
+
+    input_shape_broadcast = network.add_elementwise(
+        inverse_mask, input_shape_tensor, trt.ElementWiseOperation.PROD
+    ).get_output(0)
+
+    size_tensor = network.add_elementwise(
+        size_value_broadcast,
+        input_shape_broadcast,
+        trt.ElementWiseOperation.SUM,
+    ).get_output(0)
+
+    return size_tensor