Skip to content

Commit

Permalink
Revert "[Paddle-TRT] upgrade EnqueueV2 to EnqueueV3" (PaddlePaddle#60797
Browse files Browse the repository at this point in the history
)
  • Loading branch information
lizexu123 authored Jan 16, 2024
1 parent 27c4227 commit 1615510
Show file tree
Hide file tree
Showing 8 changed files with 60 additions and 70 deletions.
12 changes: 11 additions & 1 deletion paddle/fluid/inference/tensorrt/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,17 @@ nv_test(
test_tensorrt
SRCS test_tensorrt.cc
DEPS dynload_cuda device_context dynamic_loader)
if(NOT WIN32)
if(WIN32)
nv_test(
test_tensorrt_engine
SRCS test_engine.cc test_dynamic_engine.cc
DEPS dynload_cuda tensorrt_engine tensorrt_plugin)
elseif(WITH_CINN)
nv_test(
test_tensorrt_engine
SRCS test_engine.cc test_dynamic_engine.cc
DEPS fleet_executor dynload_cuda tensorrt_engine tensorrt_plugin python)
else()
nv_test(
test_tensorrt_engine
SRCS test_engine.cc test_dynamic_engine.cc
Expand Down
34 changes: 11 additions & 23 deletions paddle/fluid/inference/tensorrt/engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/inference/tensorrt/engine.h"

#include <NvInfer.h>
#include <glog/logging.h>

Expand Down Expand Up @@ -173,22 +174,11 @@ bool TensorRTEngine::Enqueue(nvinfer1::IExecutionContext *context,
return cuda_graph_.Launch(stream);
}

#if IS_TRT_VERSION_GE(8500)
for (size_t j = 0; j < buffers->size(); ++j) {
auto name = context->getEngine().getBindingName(j);
context->setTensorAddress(name, (*buffers)[j]);
}
#endif

bool ret;
if (!with_dynamic_shape()) {
ret = context->enqueue(batch_size, buffers->data(), stream, nullptr);
} else {
#if IS_TRT_VERSION_GE(8500)
ret = context->enqueueV3(stream);
#else
ret = context->enqueueV2(buffers->data(), stream, nullptr);
#endif
}
return ret;
}
Expand Down Expand Up @@ -479,12 +469,12 @@ void TensorRTEngine::DeclareOutput(const nvinfer1::ILayer *layer,
"of the network at the same time.",
name));
network()->markOutput(*output);
PADDLE_ENFORCE_EQ(output->isNetworkOutput(),
true,
platform::errors::InvalidArgument(
"The output %s of TRT engine should be the output "
"of the network.",
name));
PADDLE_ENFORCE_EQ(
output->isNetworkOutput(),
true,
platform::errors::InvalidArgument(
"The output %s of TRT engine should be the output of the network.",
name));
}

void TensorRTEngine::DeclareOutput(const std::string &name) {
Expand Down Expand Up @@ -577,8 +567,8 @@ nvinfer1::ITensor *TensorRTEngine::ConvertWeight2ITensor(
trt_in_shape.nbDims = 1;
trt_in_shape.d[0] = 1;
}
// In fact , this is not always right, because we can't determine if the
// 0th dimension is batch. Just for run chenqu's model
// In fact , this is not always right, because we can't determine if the 0th
// dimension is batch. Just for run chenqu's model
if (!with_dynamic_shape()) {
trt_in_shape.nbDims--;
for (int i = 0; i < trt_in_shape.nbDims; i++) {
Expand Down Expand Up @@ -636,10 +626,8 @@ void TensorRTEngine::Deserialize(const std::string &engine_serialized_data) {
infer_engine_,
platform::errors::Fatal(
"Building TRT cuda engine failed when deserializing engine info. "
"Please check:\n1. Your TRT serialization is generated and "
"loaded "
"on the same GPU architecture;\n2. The Paddle Inference version "
"of "
"Please check:\n1. Your TRT serialization is generated and loaded "
"on the same GPU architecture;\n2. The Paddle Inference version of "
"generating serialization file and doing inference are "
"consistent."));

Expand Down
20 changes: 1 addition & 19 deletions paddle/fluid/inference/tensorrt/test_dynamic_engine.cc
Original file line number Diff line number Diff line change
Expand Up @@ -131,43 +131,25 @@ TEST_F(TensorRTDynamicShapeValueEngineTest, test_trt_dynamic_shape_value) {
std::vector<int> shape_v = {8, 8, 4};
PrepareInputOutput(x_v, {8, 8, 4});
PrepareShapeInput(shape_v);
#if IS_TRT_VERSION_GE(8500)
const char *tensorName1 = engine_->engine()->getBindingName(0);
const char *tensorName2 = engine_->engine()->getBindingName(1);
engine_->context()->setInputShape(tensorName1, nvinfer1::Dims2{8, 32});
engine_->context()->setInputShape(tensorName2, shape_dim);
#else
engine_->context()->setBindingDimensions(0, nvinfer1::Dims2{8, 32});
engine_->context()->setBindingDimensions(1, shape_dim);
engine_->context()->setInputShapeBinding(1, shape_v.data());
#endif

auto *x_gpu_data = input_.mutable_data<float>(ctx_->GetPlace());
auto *shape_gpu_data = shape_.mutable_data<int>(ctx_->GetPlace());
auto *y_gpu_data = output_.mutable_data<float>(ctx_->GetPlace());

buffers[0] = reinterpret_cast<void *>(x_gpu_data);
buffers[1] = reinterpret_cast<void *>(shape_gpu_data);
buffers[2] = reinterpret_cast<void *>(y_gpu_data);
#if IS_TRT_VERSION_GE(8500)
for (size_t i = 0; i < buffers.size(); i++) {
auto name = engine_->engine()->getBindingName(i);
engine_->context()->setTensorAddress(name, buffers[i]);
}
#endif

engine_->Execute(-1, &buffers, ctx_->stream());
cudaStreamSynchronize(ctx_->stream());

std::vector<float> y_cpu;
GetOutput(&y_cpu);
ASSERT_EQ(y_cpu[0], 0);
ASSERT_EQ(y_cpu[1], 1);
#if IS_TRT_VERSION_GE(8500)
const char *name1 = engine_->engine()->getBindingName(2);
auto dims = engine_->context()->getTensorShape(name1);
#else
auto dims = engine_->context()->getBindingDimensions(2);
#endif
ASSERT_EQ(dims.nbDims, 3);
ASSERT_EQ(dims.d[0], 8);
ASSERT_EQ(dims.d[1], 8);
Expand Down
24 changes: 2 additions & 22 deletions paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -611,10 +611,6 @@ class TensorRTEngineOp : public framework::OperatorBase {
}
} else {
#if IS_TRT_VERSION_GE(6000)
#if IS_TRT_VERSION_GE(8500)
trt_context->setInputShape(
x.c_str(), inference::tensorrt::Vec2TRT_Dims(t_shape, x, true));
#else
trt_context->setBindingDimensions(
bind_index, inference::tensorrt::Vec2TRT_Dims(t_shape, x, true));
// If this x is a shape tensor, we need call setInputShapeBinding
Expand Down Expand Up @@ -648,7 +644,6 @@ class TensorRTEngineOp : public framework::OperatorBase {
}
trt_context->setInputShapeBinding(bind_index, shape_v.data());
}
#endif
#endif
}
runtime_batch = t_shape[0];
Expand Down Expand Up @@ -723,20 +718,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
ddim.push_back(dims.d[i]);
}
} else {
#if IS_TRT_VERSION_GE(8500)
auto x_name = engine->engine()->getBindingName(bind_index);
auto dims = trt_context->getTensorShape(x_name);
int nb_dims = dims.nbDims;
for (; nb_dims > 0; nb_dims--) {
// some 'x 1' of shape is normal, no need to remove it
if (dims.d[nb_dims - 1] != 1 ||
nb_dims == origin_output_rank[output_index])
break;
}
for (int i = 0; i < nb_dims; i++) {
ddim.push_back(dims.d[i]);
}
#else
#if IS_TRT_VERSION_GE(6000)
auto dims = trt_context->getBindingDimensions(bind_index);
int nb_dims = dims.nbDims;
for (; nb_dims > 0; nb_dims--) {
Expand All @@ -745,9 +727,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
nb_dims == origin_output_rank[output_index])
break;
}
for (int i = 0; i < nb_dims; i++) {
ddim.push_back(dims.d[i]);
}
for (int i = 0; i < nb_dims; i++) ddim.push_back(dims.d[i]);
#endif
}
auto *fluid_v = scope.FindVar(y);
Expand Down
2 changes: 2 additions & 0 deletions test/ir/inference/test_trt_convert_bitwise_and.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,10 +135,12 @@ def generate_trt_nodes_num(attrs, dynamic_shape):
# for dynamic_shape
generate_dynamic_shape(attrs)
self.trt_param.precision = paddle_infer.PrecisionType.Float32
program_config.set_input_type(np.float32)
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True
), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
program_config.set_input_type(np.float16)
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True
), 1e-3
Expand Down
34 changes: 29 additions & 5 deletions test/ir/inference/test_trt_convert_bitwise_not.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,18 @@ def sample_program_configs(self):
self.trt_param.workspace_size = 1073741824

def generate_input1(dims, batch, attrs: List[Dict[str, Any]]):
if dims == 1:
if dims == 0:
return np.random.random([]).astype(np.bool8)
elif dims == 1:
return np.random.random([32]).astype(np.bool8)
else:
elif dims == 2:
return np.random.random([3, 32]).astype(np.int8)
elif dims == 3:
return np.random.random([3, 32, 32]).astype(np.int32)
else:
return np.random.random([batch, 3, 32, 32]).astype(np.int64)

for dims in [1, 2]:
for dims in [0, 1, 2, 3, 4]:
for batch in [1, 4]:
self.dims = dims
dics = [{}]
Expand Down Expand Up @@ -70,14 +76,32 @@ def sample_predictor_configs(
self, program_config
) -> (paddle_infer.Config, List[int], float):
def generate_dynamic_shape(attrs):
if self.dims == 1:
if self.dims == 0:
self.dynamic_shape.min_input_shape = {"input_data": []}
self.dynamic_shape.max_input_shape = {"input_data": []}
self.dynamic_shape.opt_input_shape = {"input_data": []}
elif self.dims == 1:
self.dynamic_shape.min_input_shape = {"input_data": [1]}
self.dynamic_shape.max_input_shape = {"input_data": [64]}
self.dynamic_shape.opt_input_shape = {"input_data": [32]}
else:
elif self.dims == 2:
self.dynamic_shape.min_input_shape = {"input_data": [1, 16]}
self.dynamic_shape.max_input_shape = {"input_data": [4, 32]}
self.dynamic_shape.opt_input_shape = {"input_data": [3, 32]}
elif self.dims == 3:
self.dynamic_shape.min_input_shape = {"input_data": [1, 16, 16]}
self.dynamic_shape.max_input_shape = {"input_data": [4, 32, 32]}
self.dynamic_shape.opt_input_shape = {"input_data": [3, 32, 32]}
else:
self.dynamic_shape.min_input_shape = {
"input_data": [1, 3, 16, 16]
}
self.dynamic_shape.max_input_shape = {
"input_data": [4, 3, 32, 32]
}
self.dynamic_shape.opt_input_shape = {
"input_data": [1, 3, 32, 32]
}

def clear_dynamic_shape():
self.dynamic_shape.min_input_shape = {}
Expand Down
2 changes: 2 additions & 0 deletions test/ir/inference/test_trt_convert_bitwise_or.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,10 +135,12 @@ def generate_trt_nodes_num(attrs, dynamic_shape):
# for dynamic_shape
generate_dynamic_shape(attrs)
self.trt_param.precision = paddle_infer.PrecisionType.Float32
program_config.set_input_type(np.float32)
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True
), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
program_config.set_input_type(np.float16)
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True
), 1e-3
Expand Down
2 changes: 2 additions & 0 deletions test/ir/inference/test_trt_convert_solve.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,10 @@ def clear_dynamic_shape():
# for dynamic_shape
generate_dynamic_shape(attrs)
self.trt_param.precision = paddle_infer.PrecisionType.Float32
program_config.set_input_type(np.float32)
yield self.create_inference_config(), (1, 3), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
program_config.set_input_type(np.float16)
yield self.create_inference_config(), (1, 3), 1e-3

def test(self):
Expand Down

0 comments on commit 1615510

Please sign in to comment.