Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Paddle-TRT] upgrade EnqueueV2 to EnqueueV3 #59950

Merged
merged 29 commits into from
Jan 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 1 addition & 11 deletions paddle/fluid/inference/tensorrt/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,7 @@ nv_test(
test_tensorrt
SRCS test_tensorrt.cc
DEPS dynload_cuda device_context dynamic_loader)
if(WIN32)
nv_test(
test_tensorrt_engine
SRCS test_engine.cc test_dynamic_engine.cc
DEPS dynload_cuda tensorrt_engine tensorrt_plugin)
elseif(WITH_CINN)
nv_test(
test_tensorrt_engine
SRCS test_engine.cc test_dynamic_engine.cc
DEPS fleet_executor dynload_cuda tensorrt_engine tensorrt_plugin python)
else()
if(NOT WIN32)
nv_test(
test_tensorrt_engine
SRCS test_engine.cc test_dynamic_engine.cc
Expand Down
34 changes: 23 additions & 11 deletions paddle/fluid/inference/tensorrt/engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/inference/tensorrt/engine.h"

#include <NvInfer.h>
#include <glog/logging.h>

Expand Down Expand Up @@ -174,11 +173,22 @@ bool TensorRTEngine::Enqueue(nvinfer1::IExecutionContext *context,
return cuda_graph_.Launch(stream);
}

#if IS_TRT_VERSION_GE(8500)
for (size_t j = 0; j < buffers->size(); ++j) {
auto name = context->getEngine().getBindingName(j);
context->setTensorAddress(name, (*buffers)[j]);
}
#endif

bool ret;
if (!with_dynamic_shape()) {
ret = context->enqueue(batch_size, buffers->data(), stream, nullptr);
} else {
#if IS_TRT_VERSION_GE(8500)
ret = context->enqueueV3(stream);
#else
ret = context->enqueueV2(buffers->data(), stream, nullptr);
#endif
}
return ret;
}
Expand Down Expand Up @@ -469,12 +479,12 @@ void TensorRTEngine::DeclareOutput(const nvinfer1::ILayer *layer,
"of the network at the same time.",
name));
network()->markOutput(*output);
PADDLE_ENFORCE_EQ(
output->isNetworkOutput(),
true,
platform::errors::InvalidArgument(
"The output %s of TRT engine should be the output of the network.",
name));
PADDLE_ENFORCE_EQ(output->isNetworkOutput(),
true,
platform::errors::InvalidArgument(
"The output %s of TRT engine should be the output "
"of the network.",
name));
}

void TensorRTEngine::DeclareOutput(const std::string &name) {
Expand Down Expand Up @@ -567,8 +577,8 @@ nvinfer1::ITensor *TensorRTEngine::ConvertWeight2ITensor(
trt_in_shape.nbDims = 1;
trt_in_shape.d[0] = 1;
}
// In fact , this is not always right, because we can't determine if the 0th
// dimension is batch. Just for run chenqu's model
// In fact , this is not always right, because we can't determine if the
// 0th dimension is batch. Just for run chenqu's model
if (!with_dynamic_shape()) {
trt_in_shape.nbDims--;
for (int i = 0; i < trt_in_shape.nbDims; i++) {
Expand Down Expand Up @@ -626,8 +636,10 @@ void TensorRTEngine::Deserialize(const std::string &engine_serialized_data) {
infer_engine_,
platform::errors::Fatal(
"Building TRT cuda engine failed when deserializing engine info. "
"Please check:\n1. Your TRT serialization is generated and loaded "
"on the same GPU architecture;\n2. The Paddle Inference version of "
"Please check:\n1. Your TRT serialization is generated and "
"loaded "
"on the same GPU architecture;\n2. The Paddle Inference version "
"of "
"generating serialization file and doing inference are "
"consistent."));

Expand Down
20 changes: 19 additions & 1 deletion paddle/fluid/inference/tensorrt/test_dynamic_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -131,25 +131,43 @@ TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) {
std::vector<int> shape_v = {8, 8, 4};
PrepareInputOutput(x_v, {8, 8, 4});
PrepareShapeInput(shape_v);
#if IS_TRT_VERSION_GE(8500)
const char *tensorName1 = engine_->engine()->getBindingName(0);
const char *tensorName2 = engine_->engine()->getBindingName(1);
engine_->context()->setInputShape(tensorName1, nvinfer1::Dims2{8, 32});
engine_->context()->setInputShape(tensorName2, shape_dim);
#else
engine_->context()->setBindingDimensions(0, nvinfer1::Dims2{8, 32});
engine_->context()->setBindingDimensions(1, shape_dim);
engine_->context()->setInputShapeBinding(1, shape_v.data());

#endif
auto *x_gpu_data = input_.mutable_data<float>(ctx_->GetPlace());
auto *shape_gpu_data = shape_.mutable_data<int>(ctx_->GetPlace());
auto *y_gpu_data = output_.mutable_data<float>(ctx_->GetPlace());

buffers[0] = reinterpret_cast<void *>(x_gpu_data);
buffers[1] = reinterpret_cast<void *>(shape_gpu_data);
buffers[2] = reinterpret_cast<void *>(y_gpu_data);
#if IS_TRT_VERSION_GE(8500)
for (size_t i = 0; i < buffers.size(); i++) {
auto name = engine_->engine()->getBindingName(i);
engine_->context()->setTensorAddress(name, buffers[i]);
}
#endif

engine_->Execute(-1, &buffers, ctx_->stream());
cudaStreamSynchronize(ctx_->stream());

std::vector<float> y_cpu;
GetOutput(&y_cpu);
ASSERT_EQ(y_cpu[0], 0);
ASSERT_EQ(y_cpu[1], 1);
#if IS_TRT_VERSION_GE(8500)
const char *name1 = engine_->engine()->getBindingName(2);
auto dims = engine_->context()->getTensorShape(name1);
#else
auto dims = engine_->context()->getBindingDimensions(2);
#endif
ASSERT_EQ(dims.nbDims, 3);
ASSERT_EQ(dims.d[0], 8);
ASSERT_EQ(dims.d[1], 8);
Expand Down
24 changes: 22 additions & 2 deletions paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,10 @@ class TensorRTEngineOp : public framework::OperatorBase {
}
} else {
#if IS_TRT_VERSION_GE(6000)
#if IS_TRT_VERSION_GE(8500)
trt_context->setInputShape(
x.c_str(), inference::tensorrt::Vec2TRT_Dims(t_shape, x, true));
#else
trt_context->setBindingDimensions(
bind_index, inference::tensorrt::Vec2TRT_Dims(t_shape, x, true));
// If this x is a shape tensor, we need call setInputShapeBinding
Expand Down Expand Up @@ -644,6 +648,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
}
trt_context->setInputShapeBinding(bind_index, shape_v.data());
}
#endif
#endif
}
runtime_batch = t_shape[0];
Expand Down Expand Up @@ -718,7 +723,20 @@ class TensorRTEngineOp : public framework::OperatorBase {
ddim.push_back(dims.d[i]);
}
} else {
#if IS_TRT_VERSION_GE(6000)
#if IS_TRT_VERSION_GE(8500)
auto x_name = engine->engine()->getBindingName(bind_index);
auto dims = trt_context->getTensorShape(x_name);
int nb_dims = dims.nbDims;
for (; nb_dims > 0; nb_dims--) {
// some 'x 1' of shape is normal, no need to remove it
if (dims.d[nb_dims - 1] != 1 ||
nb_dims == origin_output_rank[output_index])
break;
}
for (int i = 0; i < nb_dims; i++) {
ddim.push_back(dims.d[i]);
}
#else
auto dims = trt_context->getBindingDimensions(bind_index);
int nb_dims = dims.nbDims;
for (; nb_dims > 0; nb_dims--) {
Expand All @@ -727,7 +745,9 @@ class TensorRTEngineOp : public framework::OperatorBase {
nb_dims == origin_output_rank[output_index])
break;
}
for (int i = 0; i < nb_dims; i++) ddim.push_back(dims.d[i]);
for (int i = 0; i < nb_dims; i++) {
ddim.push_back(dims.d[i]);
}
#endif
}
auto *fluid_v = scope.FindVar(y);
Expand Down
2 changes: 0 additions & 2 deletions test/ir/inference/test_trt_convert_bitwise_and.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,10 @@ def generate_trt_nodes_num(attrs, dynamic_shape):
# for dynamic_shape
generate_dynamic_shape(attrs)
self.trt_param.precision = paddle_infer.PrecisionType.Float32
program_config.set_input_type(np.float32)
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True
), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
program_config.set_input_type(np.float16)
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True
), 1e-3
Expand Down
34 changes: 5 additions & 29 deletions test/ir/inference/test_trt_convert_bitwise_not.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,12 @@ def sample_program_configs(self):
self.trt_param.workspace_size = 1073741824

def generate_input1(dims, batch, attrs: List[Dict[str, Any]]):
if dims == 0:
return np.random.random([]).astype(np.bool8)
elif dims == 1:
if dims == 1:
return np.random.random([32]).astype(np.bool8)
elif dims == 2:
return np.random.random([3, 32]).astype(np.int8)
elif dims == 3:
return np.random.random([3, 32, 32]).astype(np.int32)
else:
return np.random.random([batch, 3, 32, 32]).astype(np.int64)
return np.random.random([3, 32]).astype(np.int8)

for dims in [0, 1, 2, 3, 4]:
for dims in [1, 2]:
for batch in [1, 4]:
self.dims = dims
dics = [{}]
Expand Down Expand Up @@ -76,32 +70,14 @@ def sample_predictor_configs(
self, program_config
) -> (paddle_infer.Config, List[int], float):
def generate_dynamic_shape(attrs):
if self.dims == 0:
self.dynamic_shape.min_input_shape = {"input_data": []}
self.dynamic_shape.max_input_shape = {"input_data": []}
self.dynamic_shape.opt_input_shape = {"input_data": []}
elif self.dims == 1:
if self.dims == 1:
self.dynamic_shape.min_input_shape = {"input_data": [1]}
self.dynamic_shape.max_input_shape = {"input_data": [64]}
self.dynamic_shape.opt_input_shape = {"input_data": [32]}
elif self.dims == 2:
else:
self.dynamic_shape.min_input_shape = {"input_data": [1, 16]}
self.dynamic_shape.max_input_shape = {"input_data": [4, 32]}
self.dynamic_shape.opt_input_shape = {"input_data": [3, 32]}
elif self.dims == 3:
self.dynamic_shape.min_input_shape = {"input_data": [1, 16, 16]}
self.dynamic_shape.max_input_shape = {"input_data": [4, 32, 32]}
self.dynamic_shape.opt_input_shape = {"input_data": [3, 32, 32]}
else:
self.dynamic_shape.min_input_shape = {
"input_data": [1, 3, 16, 16]
}
self.dynamic_shape.max_input_shape = {
"input_data": [4, 3, 32, 32]
}
self.dynamic_shape.opt_input_shape = {
"input_data": [1, 3, 32, 32]
}

def clear_dynamic_shape():
self.dynamic_shape.min_input_shape = {}
Expand Down
2 changes: 0 additions & 2 deletions test/ir/inference/test_trt_convert_bitwise_or.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,10 @@ def generate_trt_nodes_num(attrs, dynamic_shape):
# for dynamic_shape
generate_dynamic_shape(attrs)
self.trt_param.precision = paddle_infer.PrecisionType.Float32
program_config.set_input_type(np.float32)
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True
), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
program_config.set_input_type(np.float16)
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True
), 1e-3
Expand Down
2 changes: 0 additions & 2 deletions test/ir/inference/test_trt_convert_solve.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,8 @@ def clear_dynamic_shape():
# for dynamic_shape
generate_dynamic_shape(attrs)
self.trt_param.precision = paddle_infer.PrecisionType.Float32
program_config.set_input_type(np.float32)
yield self.create_inference_config(), (1, 3), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

half的单测为什么去掉呢?

program_config.set_input_type(np.float16)
yield self.create_inference_config(), (1, 3), 1e-3

def test(self):
Expand Down