Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion paddle/fluid/framework/new_executor/collect_shape_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,23 +28,41 @@ void CollectShapeManager::CollectShapeInfo(
framework::ValueExecutionInfo *value_exe_info,
framework::Scope *scope) {
std::lock_guard<std::mutex> lock(info_mutex_);
VLOG(3) << "collect shape in instruction:" << instr->Name();
is_shape_range_info_ready_ = false;
for (auto &input : instr->Inputs()) {
VLOG(3) << "input id:" << input.first.impl();
if (!op_value2instr_id_.count(input.first)) {
// Because the input value maybe same between different ops.
// To prevent duplicate shape collection, we only select one op for
// getting shape of value
op_value2instr_id_[input.first] = instr->Id();
}
if (op_value2instr_id_[input.first] != instr->Id()) {
VLOG(3) << "input shape has been collected in same instruction, jump it, "
"and input id:"
<< input.first.impl();
continue;
}
auto var_name = value_exe_info->GetVarName(input.first);
auto *var = scope->FindVar(var_name);
if (!var || !var->IsType<phi::DenseTensor>()) continue;
if (!var || !var->IsType<phi::DenseTensor>()) {
VLOG(3) << "input var is null : " << (var == nullptr);
VLOG(3) << "input var is dense_tensor : "
<< (var->IsType<phi::DenseTensor>());
VLOG(3) << "input is null or not dense_tensor, jump it, and input id:"
<< input.first.impl();
continue;
}

auto tensor = var->Get<phi::DenseTensor>();
if (!tensor.initialized() && !instr->NoNeedBuffer().count(input.first)) {
VLOG(3) << "input tensor is initialized: " << (tensor.initialized());
VLOG(3) << "input tensor is no need buffer:"
<< instr->NoNeedBuffer().count(input.first);
VLOG(3) << "input tensor is not initialized and not no need buffer, jump "
"it, and input id:"
<< input.first.impl();
continue;
}
paddle::platform::DeviceContextPool &pool =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ namespace paddle {
namespace framework {

using TensorRTEngine = paddle::platform::TensorRTEngine;
static const int kMaxDim = 1000;

TensorRTEngineInstruction::TensorRTEngineInstruction(
size_t id,
Expand Down Expand Up @@ -621,7 +622,7 @@ void TensorRTEngineInstruction::BindOutputTensor(
binding_offset;
#endif
std::vector<int> ddim;

phi::DenseTensor *fluid_t = nullptr;
#if IS_TRT_VERSION_GE(8500)
auto x_name = trt_engine_->engine()->getIOTensorName(bind_index);
auto dims = trt_context->getTensorShape(x_name);
Expand All @@ -631,17 +632,36 @@ void TensorRTEngineInstruction::BindOutputTensor(
if (dims.d[nb_dims - 1] != 1 || nb_dims == outputs_rank_[output_index])
break;
}
bool has_unknown_dim =
false; // not dynamic shape, some shape is unknown before run trt engine.
for (int i = 0; i < nb_dims; i++) {
ddim.push_back(dims.d[i]);
if (dims.d[i] == -1) {
has_unknown_dim = true;
ddim.push_back(kMaxDim);
} else {
ddim.push_back(dims.d[i]);
}
}

if (has_unknown_dim) {
const paddle::framework::Scope &scope = *(value_exec_info_->GetScope());
std::string tmp_output = output_name + "_tmp";
if (scope.FindVar(tmp_output) == nullptr) {
const_cast<framework::Scope *>(&scope)->Var(tmp_output);
}
fluid_t = scope.FindVar(tmp_output)->GetMutable<phi::DenseTensor>();
} else {
fluid_t = output_tensor;
}

#else
PADDLE_THROW(
common::errors::Unimplemented("PIR-TRT only support TensorRT "
"version that is >= 8.5,"
"Please check your TensorRT "
"in your env."));
#endif
auto *fluid_t = output_tensor;

fluid_t->Resize(common::make_ddim(ddim));
PADDLE_ENFORCE_LT(bind_index,
num_bindings,
Expand Down Expand Up @@ -734,11 +754,67 @@ void TensorRTEngineInstruction::RunTrt() {
VLOG(4) << "Start running trt engine...";
// Execute the engine.
trt_engine_->Execute(runtime_batch, &buffers, stream);

VLOG(4) << "End running trt engine and deal with output";
for (const auto &index_name_pair : output_names_) {
size_t i = index_name_pair.first;
auto type = outputs_dtype_[i];

#if IS_TRT_VERSION_GE(8500)
// deal with output that has unknown shape
std::string output_name = index_name_pair.second;
int bind_index = -1;
int binding_offset = 0;
binding_offset = trt_engine_->GetBindingsOffset();
for (int i = 0; i < trt_engine_->engine()->getNbIOTensors(); ++i) {
if (std::string(output_name.c_str()) ==
std::string(trt_engine_->engine()->getIOTensorName(i))) {
bind_index = i + binding_offset;
break;
}
}

auto trt_output_name = trt_engine_->engine()->getIOTensorName(bind_index);
auto trt_dims = trt_engine_->context()->getTensorShape(trt_output_name);
// find the tmp tensor(Allocated extra memory space for unknown dim) and
// copy its element to actual output tensor(Allocated appropriate memory
// space)
std::string tmp_output = output_name + "_tmp";
if (scope.FindVar(tmp_output) != nullptr) {
auto *output_tensor_tmp =
scope.FindVar(tmp_output)->GetMutable<phi::DenseTensor>();
auto *output_tensor = const_cast<phi::DenseTensor *>(
&(out_variable_array->at(i)->Get<phi::DenseTensor>()));
std::vector<int> ddim;
for (int i = 0; i < trt_dims.nbDims; i++) {
ddim.push_back(trt_dims.d[i]);
}
output_tensor->Resize(common::make_ddim(ddim));
dev_ctx_->Alloc(output_tensor, type);
if (type == phi::DataType::FLOAT32) {
auto *mutable_output = output_tensor->data<float>();
phi::memory_utils::Copy(phi::GPUPlace(),
mutable_output,
phi::GPUPlace(),
output_tensor_tmp->data<float>(),
sizeof(float) * output_tensor->numel(),
nullptr);
} else if (type == phi::DataType::INT64 || type == phi::DataType::INT32) {
auto *mutable_output = output_tensor->data<int32_t>();
phi::memory_utils::Copy(phi::GPUPlace(),
mutable_output,
phi::GPUPlace(),
output_tensor_tmp->data<int32_t>(),
sizeof(int32_t) * output_tensor->numel(),
nullptr);
} else {
PADDLE_THROW(common::errors::Unimplemented(
"Unsupported data type: %d when deal with output", type));
}
}
#endif

// Type transformation for INT64 and FLOAT64
if (type == phi::DataType::INT64) {
auto y = index_name_pair.second;
auto *fluid_v = out_variable_array->at(i);
Expand Down
3 changes: 1 addition & 2 deletions paddle/fluid/pir/transforms/pd_op_to_kernel_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2412,11 +2412,10 @@ void HandleForTensorRTOp(
std::vector<pir::Type> op_output_types;

for (size_t i = 0; i < op_item->num_results(); ++i) {
phi::Place out_place = phi::TransToPhiPlace(kernel_key.backend());
PushBackOutputTypes(ctx,
op_item,
op_item->result(i).type(),
out_place,
place,
kernel_key,
&op_output_types);
}
Expand Down
14 changes: 13 additions & 1 deletion paddle/fluid/pir/transforms/tensorrt/trt_op_marker_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1023,7 +1023,7 @@ class SqueezeOpPattern
int64_t s = input_var_name_shape[i];
if (s == -1) {
VLOG(3) << "The necessary attributes of the squeeze operator axis is "
"missing. ss =====-1";
"missing. ss == -1";
return false;
} else if (s == 1) {
axes.push_back(s);
Expand All @@ -1035,6 +1035,18 @@ class SqueezeOpPattern
"missing.";
return false;
}
} else {
pir::Value x = op.operand_source(0);
auto x_shape = pir::GetShapeFromValue(x);
for (auto axis : axes) {
if (axis < 0) axis += x_shape.size();
if (x_shape[axis] != 1) {
VLOG(3) << "Cannot squeeze dimension " << axis << " with size "
<< x_shape[axis]
<< ". Only dimensions with size 1 can be squeezed.";
return false;
}
}
}

op->set_attribute(kCanRunTrtAttr, rewriter.bool_attr(true));
Expand Down
33 changes: 14 additions & 19 deletions python/paddle/tensorrt/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,25 +385,20 @@ def convert_subgraph_to_trt(self, program, group_op):
if group_op.result(out_index).use_empty():
# if result value is not used, it doesn't need get shape, continue
continue
if not is_shape_tensor(result_value):
if len(result_value.shape) == 0:
min_shape = []
opt_shape = []
max_shape = []
else:
min_shape = get_value_shape_range_info(
result_value, False, paddle.base.core.ShapeMode.kMIN
)
opt_shape = get_value_shape_range_info(
result_value, False, paddle.base.core.ShapeMode.kOPT
)
max_shape = get_value_shape_range_info(
result_value, False, paddle.base.core.ShapeMode.kMAX
)
else:
min_shape = []
opt_shape = []
max_shape = []
min_shape = []
opt_shape = []
max_shape = []
if len(result_value.shape) != 0:
min_shape = get_value_shape_range_info(
result_value, False, paddle.base.core.ShapeMode.kMIN
)
opt_shape = get_value_shape_range_info(
result_value, False, paddle.base.core.ShapeMode.kOPT
)
max_shape = get_value_shape_range_info(
result_value, False, paddle.base.core.ShapeMode.kMAX
)

min_value = []
opt_value = []
max_value = []
Expand Down
54 changes: 51 additions & 3 deletions python/paddle/tensorrt/impls/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,8 @@ def bilinear_interp_converter(network, paddle_op, inputs):
use_scales = True
if outsize_tensor is not None:
use_scales = False
elif out_h > 0 and out_w > 0 and scale_attr is not None:
use_scales = True
if outsize_tensor is None and len(scale_attr) == 0:
use_scales = False

if use_scales:
scale_h = -1.0
Expand Down Expand Up @@ -225,7 +225,55 @@ def bilinear_interp_converter(network, paddle_op, inputs):
set_layer_name(output_size_tensor, paddle_op)
output_size_tensor = output_size_tensor.get_output(0)
resize_layer.set_input(1, output_size_tensor)

else:
if data_format == "NCHW":
shape_layer = network.add_shape(input_tensor)
shape_output = shape_layer.get_output(0)
# Get N and C from slice_layer output
slice_layer = network.add_slice(
shape_output, start=[0], shape=[2], stride=[1]
)
# Create H and W
hw_constant = network.add_constant(
shape=(2,),
weights=trt.Weights(
np.array([out_h, out_w], dtype=np.int32)
),
).get_output(0)
# Create output shape(NCHW)
concat_layer = network.add_concatenation(
[slice_layer.get_output(0), hw_constant]
)
concat_layer.axis = 0
resize_layer.set_input(1, concat_layer.get_output(0))
elif data_format == "NHWC":
shape_layer = network.add_shape(input_tensor)
shape_output = shape_layer.get_output(0)
# Get N and C from slice_layer output
n_layer = network.add_slice(
shape_output, start=[0], shape=[1], stride=[1]
)
c_layer = network.add_slice(
shape_output, start=[3], shape=[1], stride=[1]
)
# Create H and W
hw_constant = network.add_constant(
shape=(2,),
weights=trt.Weights(
np.array([out_h, out_w], dtype=np.int32)
),
).get_output(0)
# Create output shape(NHWC)
concat_layer = network.add_concatenation(
[n_layer.get_output(0), hw_constant, c_layer.get_output(0)]
)
concat_layer.axis = 0
resize_layer.set_input(1, concat_layer.get_output(0))
else:
raise NotImplementedError(
"Converter for bilinear_interp not support data_format {}.",
data_format,
)
return resize_layer.get_output(0)


Expand Down
9 changes: 8 additions & 1 deletion python/paddle/tensorrt/impls/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,16 @@ def non_zero_converter(network, paddle_op, inputs):
input_tensor = inputs[0]
cast_layer = network.add_cast(input_tensor, trt.float32)
set_layer_name(cast_layer, paddle_op)

non_zero_layer = network.add_non_zero(cast_layer.get_output(0))
nonzero_output = non_zero_layer.get_output(0)
set_layer_name(non_zero_layer, paddle_op)
return non_zero_layer.get_output(0)

shuffle_layer = network.add_shuffle(input=nonzero_output)
shuffle_layer.first_transpose = (1, 0)
transposed_output = shuffle_layer.get_output(0)
set_layer_name(shuffle_layer, paddle_op)
return transposed_output


@converter_registry.register("pd_op.argmax", trt_version="trt_version_ge=8.0")
Expand Down