Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve TensorRT GetCapability to Enable More Models #1012

Merged
merged 12 commits into from
May 24, 2019
1 change: 1 addition & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
[submodule "cmake/external/onnx-tensorrt"]
path = cmake/external/onnx-tensorrt
url = https://github.com/onnx/onnx-tensorrt.git
branch = v5.0
jywu-msft marked this conversation as resolved.
Show resolved Hide resolved
[submodule "cmake/external/eigen"]
path = cmake/external/eigen
url = https://github.com/eigenteam/eigen-git-mirror.git
2 changes: 1 addition & 1 deletion cmake/onnxruntime_providers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ if (onnxruntime_USE_TENSORRT)
set(OLD_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
if (WIN32)
set(OLD_CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4996 /wd4244 /wd4267 /wd4099 /wd4551 /wd4505 /wd4515 /wd4706 /wd4456")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4996 /wd4244 /wd4267 /wd4099 /wd4551 /wd4505 /wd4515 /wd4706 /wd4456 /wd2220")
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4701 /wd4805")
endif()
Expand Down
367 changes: 256 additions & 111 deletions onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc

Large diffs are not rendered by default.

59 changes: 35 additions & 24 deletions onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,22 @@
namespace onnxruntime {

class TensorrtLogger : public nvinfer1::ILogger {
nvinfer1::ILogger::Severity verbosity_;
public:
TensorrtLogger(Severity verbosity=Severity::kWARNING)
: verbosity_(verbosity) {}
void log(Severity severity, const char* msg) override {
if( severity <= verbosity_ ) {
time_t rawtime = std::time(0);
char buf[256];
strftime(&buf[0], 256,
"%Y-%m-%d %H:%M:%S",
std::gmtime(&rawtime));
const char* sevstr = (severity == Severity::kINTERNAL_ERROR ? " BUG" :
severity == Severity::kERROR ? " ERROR" :
severity == Severity::kWARNING ? "WARNING" :
severity == Severity::kINFO ? " INFO" :
"UNKNOWN");
LOGS_DEFAULT(WARNING) << "[" << buf << " " << sevstr << "] " << msg;
}
nvinfer1::ILogger::Severity verbosity_;

public:
TensorrtLogger(Severity verbosity = Severity::kWARNING)
: verbosity_(verbosity) {}
void log(Severity severity, const char* msg) override {
if (severity <= verbosity_) {
time_t rawtime = std::time(0);
char buf[256];
strftime(&buf[0], 256,
"%Y-%m-%d %H:%M:%S",
std::gmtime(&rawtime));
const char* sevstr = (severity == Severity::kINTERNAL_ERROR ? " BUG" : severity == Severity::kERROR ? " ERROR" : severity == Severity::kWARNING ? "WARNING" : severity == Severity::kINFO ? " INFO" : "UNKNOWN");
LOGS_DEFAULT(WARNING) << "[" << buf << " " << sevstr << "] " << msg;
}
}
};

// Information needed to construct trt execution providers.
Expand Down Expand Up @@ -74,16 +71,17 @@ class TensorrtExecutionProvider : public IExecutionProvider {
std::shared_ptr<KernelRegistry> GetKernelRegistry() const override;

void SetMaxBatchSize(const int batch_size) {
max_batch_size_ = batch_size;
max_batch_size_ = batch_size;
}

void SetMaxWorkspaceSize(const size_t workspace_size) {
max_workspace_size_ = workspace_size;
max_workspace_size_ = workspace_size;
}

private:
int max_batch_size_ = 1;
size_t max_workspace_size_ = 1 << 30; // 1GB
int max_batch_size_ = 1;
size_t max_workspace_size_ = 1 << 30; // 1GB
int max_parser_iterations_ = 6;

struct InferDeleter {
template <typename T>
Expand All @@ -105,7 +103,20 @@ class TensorrtExecutionProvider : public IExecutionProvider {
std::unordered_map<std::string, std::vector<std::vector<int>>> input_info_;
std::unordered_map<std::string, std::vector<std::vector<int>>> output_info_;
std::unordered_map<std::string, std::vector<std::vector<int64_t>>> output_shapes_;

/**Get IndexedSubGraph based on node list of the subgraph*/
std::unique_ptr<IndexedSubGraph> GetSubGraph(SubGraph_t graph_nodes_index, int& kernels_index,
const onnxruntime::GraphViewer& graph) const;

/**
Get TensorRT supported node lists by calling Onnx-TensorRT parser recursively. Since each time the parser
can only detect first unsupported node failure, it needs to wait for Onnxruntime to partition the graph
and then detect next failure again. If there are too many iterations, which means many nodes in the graph
are not supported by TensorRT, the process will be terminated and the whole graph is simply assigned to
other execution provider.
*/
SubGraphCollection_t GetSupportedList(SubGraphCollection_t supported_nodes_list, int iterations, const int max_iterations,
const onnxruntime::GraphViewer& graph, bool* early_termination) const;
};

} // namespace onnxruntime

40 changes: 20 additions & 20 deletions onnxruntime/test/providers/cpu/math/element_wise_ops_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ TEST(MathOpTest, Add_int64) {
test.AddInput<int64_t>("A", {3}, {1, 2, 3});
test.AddInput<int64_t>("B", {3}, {4, 5, 6});
test.AddOutput<int64_t>("C", {3}, {5, 7, 9});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: INT64 is not supported
test.Run();
}

TEST(MathOpTest, Add) {
Expand Down Expand Up @@ -69,7 +69,7 @@ TEST(MathOpTest, Add_Broadcast_0x0) {
test.AddInput<float>("A", {}, {10.0f});
test.AddInput<float>("B", {}, {2.0f});
test.AddOutput<float>("C", {}, {12.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: dynamic shape is not supported
test.Run();
}

TEST(MathOpTest, Add_Broadcast_0x1) {
Expand All @@ -78,7 +78,7 @@ TEST(MathOpTest, Add_Broadcast_0x1) {
test.AddInput<float>("A", {}, {10.0f});
test.AddInput<float>("B", {1}, {2.0f});
test.AddOutput<float>("C", {1}, {12.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: dynamic shape is not supported
test.Run();
}

TEST(MathOpTest, Add_Broadcast_1x0) {
Expand All @@ -87,7 +87,7 @@ TEST(MathOpTest, Add_Broadcast_1x0) {
test.AddInput<float>("A", {1}, {10.0f});
test.AddInput<float>("B", {}, {2.0f});
test.AddOutput<float>("C", {1}, {12.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: dynamic shape is not supported
test.Run();
}

TEST(MathOpTest, Add_Broadcast_1x1) {
Expand Down Expand Up @@ -134,7 +134,7 @@ TEST(MathOpTest, Add_Broadcast_2x1x4_1x3x1) {
211.0f, 212.0f, 213.0f, 214.0f,
221.0f, 222.0f, 223.0f, 224.0f,
231.0f, 232.0f, 233.0f, 234.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //Input batch size is inconsistent
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: Input batch size is inconsistent
}

TEST(MathOpTest, Add_Broadcast_2x1x1_3x4) {
Expand All @@ -154,7 +154,7 @@ TEST(MathOpTest, Add_Broadcast_2x1x1_3x4) {
211.0f, 212.0f, 213.0f, 214.0f,
221.0f, 222.0f, 223.0f, 224.0f,
231.0f, 232.0f, 233.0f, 234.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //Input batch size is inconsistent
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: Input batch size is inconsistent
}

TEST(MathOpTest, Sub_int32) {
Expand All @@ -170,7 +170,7 @@ TEST(MathOpTest, Sub_int64) {
test.AddInput<int64_t>("A", {3}, {1, 5, 6});
test.AddInput<int64_t>("B", {3}, {4, 5, 3});
test.AddOutput<int64_t>("C", {3}, {-3, 0, 3});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: INT64 is not supported
test.Run();
}

TEST(MathOpTest, Sub) {
Expand Down Expand Up @@ -203,7 +203,7 @@ TEST(MathOpTest, Sub_Broadcast_Scalar) {
{-4.0f, -3.0f, -6.0f,
-5.0f, -3.5f, -105.0f,
-10.4f, 4.3f, -10'005.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: dynamic shape is not supported
test.Run();
}

TEST(MathOpTest, Mul_int32) {
Expand All @@ -219,7 +219,7 @@ TEST(MathOpTest, Mul_int64) {
test.AddInput<int64_t>("A", {3}, {3, 6, -3});
test.AddInput<int64_t>("B", {3}, {4, -3, -2});
test.AddOutput<int64_t>("C", {3}, {12, -18, 6});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: INT64 is not supported
test.Run();
}

TEST(MathOpTest, Mul) {
Expand Down Expand Up @@ -253,7 +253,7 @@ TEST(MathOpTest, Div_int64) {
test.AddInput<int64_t>("A", {3}, {4, 8, 8});
test.AddInput<int64_t>("B", {3}, {2, 3, 4});
test.AddOutput<int64_t>("C", {3}, {2, 2, 2});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: INT64 is not supported
test.Run();
}

TEST(MathOpTest, Div) {
Expand Down Expand Up @@ -284,7 +284,7 @@ TEST(MathOpTest, Abs_int8) {
std::vector<int64_t> dims{4};
test.AddInput<int8_t>("X", dims, {1, 2, -1, -5});
test.AddOutput<int8_t>("Y", dims, {1, 2, 1, 5});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
test.Run();
}

TEST(MathOpTest, Abs_int32) {
Expand Down Expand Up @@ -312,7 +312,7 @@ TEST(MathOpTest, Neg_int8) {
std::vector<int64_t> dims{4};
test.AddInput<int8_t>("X", dims, {1, -2, 0, -10});
test.AddOutput<int8_t>("Y", dims, {-1, 2, 0, 10});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
test.Run();
}

TEST(MathOpTest, Neg_int32) {
Expand Down Expand Up @@ -393,7 +393,7 @@ TEST(MathOpTest, Pow_Broadcast_Scalar0) {
test.AddInput<float>("X", {}, {2.0f});
test.AddInput<float>("Y", dims, {1.0f, 2.0f, 3.0f});
test.AddOutput<float>("Z", dims, {2.0f, 4.0f, 8.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: dynamic shape is not supported
test.Run();
}

TEST(MathOpTest, Pow_Broadcast_Scalar1) {
Expand All @@ -403,7 +403,7 @@ TEST(MathOpTest, Pow_Broadcast_Scalar1) {
test.AddInput<float>("X", dims, {1.0f, 2.0f, 3.0f});
test.AddInput<float>("Y", {}, {2.0f});
test.AddOutput<float>("Z", dims, {1.0f, 4.0f, 9.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: dynamic shape is not supported
test.Run();
}

TEST(MathOpTest, Exp) {
Expand All @@ -416,7 +416,7 @@ TEST(MathOpTest, Exp) {
{1.0f, std::exp(1.0f),
std::exp(2.0f), std::exp(10.0f)});
test.SetOutputRelErr("Y", 1e-7f);
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: result differs
}

TEST(MathOpTest, Log) {
Expand Down Expand Up @@ -470,7 +470,7 @@ TEST(MathOpTest, Sum_8_Test1) {
311.0f, 312.0f, 313.0f,
321.0f, 322.0f, 323.0f,
331.0f, 332.0f, 333.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); // TensorRT parser failed on this test
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: Expected output shape [{3,3,3}] did not match run output shape [{3,1,1}] for sum
}

TEST(MathOpTest, Sum_8_Test2) {
Expand Down Expand Up @@ -499,7 +499,7 @@ TEST(MathOpTest, Sum_8_Test2) {
3.3f, 4.4f, -94.7f,
59.6f, 64.01f, -8.0f});

test.Run(OpTester::ExpectResult::kExpectSuccess, "Sum is not correct", {kTensorrtExecutionProvider});
test.Run(OpTester::ExpectResult::kExpectSuccess, "Sum is not correct", {kTensorrtExecutionProvider}); //TensorRT: result differs
}

TEST(MathOpTest, Min_6) {
Expand Down Expand Up @@ -582,7 +582,7 @@ TEST(MathOpTest, Max_8) {
{10.0f, 20.0f, 30.0f,
40.0f, 50.0f, 60.0f,
300.0f, 300.0f, 300.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //Input batch size is inconsistent
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: Input batch size is inconsistent
}

TEST(MathOpTest, Max_8_2inputbroadcast) {
Expand All @@ -597,7 +597,7 @@ TEST(MathOpTest, Max_8_2inputbroadcast) {
{10.0f, 20.0f, 30.0f,
40.0f, 50.0f, 60.0f,
70.0f, 80.0f, 90.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //Input batch size is inconsistent
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: Input batch size is inconsistent
}

TEST(MathOpTest, Not) {
Expand Down Expand Up @@ -773,7 +773,7 @@ TEST(MathOpTest, Mean_8) {
{12.0f / 3.0f, 22.0f / 3.0f, 32.0f / 3.0f,
43.0f / 3.0f, 53.0f / 3.0f, 63.0f / 3.0f,
74.0f / 3.0f, 84.0f / 3.0f, 94.0f / 3.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //Input batch size is inconsistent
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider}); //TensorRT: Input batch size is inconsistent
}

#ifndef DISABLE_CONTRIB_OPS
Expand Down
22 changes: 10 additions & 12 deletions onnxruntime/test/providers/cpu/math/gemm_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
namespace onnxruntime {
namespace test {

// Disable TensorRT on some of the tests because TensorRT only support FLOAT, INT8, FLOAT16 and INT32 for now

TEST(GemmOpTest, GemmNoTrans) {
OpTester test("Gemm");

Expand All @@ -25,7 +23,7 @@ TEST(GemmOpTest, GemmNoTrans) {
test.AddOutput<float>("Y", {2, 3},
{11.0f, 11.0f, 11.0f,
-9.0f, -9.0f, -9.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
test.Run();
}

// Only CUDA kernel has float 16 support
Expand Down Expand Up @@ -58,7 +56,7 @@ TEST(GemmOpTest, GemmNoTrans_f16) {
test.AddInput<MLFloat16>("B", {4, 3}, f_B);
test.AddInput<MLFloat16>("C", {2, 3}, f_C);
test.AddOutput<MLFloat16>("Y", {2, 3}, f_Y);
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
test.Run();
}
#endif

Expand All @@ -78,7 +76,7 @@ TEST(GemmOpTest, GemmBroadcast) {
test.AddOutput<float>("Y", {2, 3},
{11.0f, 12.0f, 13.0f,
-9.0f, -8.0f, -7.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
test.Run();
}

TEST(GemmOpTest, GemmTrans) {
Expand All @@ -99,7 +97,7 @@ TEST(GemmOpTest, GemmTrans) {
test.AddOutput<float>("Y", {2, 3},
{11.0f, 11.0f, 11.0f,
-9.0f, -9.0f, -9.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
test.Run();
}

TEST(GemmOpTest, GemmAlphaBeta) {
Expand All @@ -118,7 +116,7 @@ TEST(GemmOpTest, GemmAlphaBeta) {
test.AddOutput<float>("Y", {2, 3},
{7.0f, 7.0f, 7.0f,
-3.0f, -3.0f, -3.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
test.Run();
}

TEST(GemmOpTest, GemmNaN) {
Expand All @@ -137,7 +135,7 @@ TEST(GemmOpTest, GemmNaN) {
test.AddOutput<float>("Y", {2, 3},
{10.0f, 10.0f, 10.0f,
-10.0f, -10.0f, -10.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
test.Run();
}

TEST(GemmOpTest, GemmScalarBroadcast) {
Expand All @@ -156,7 +154,7 @@ TEST(GemmOpTest, GemmScalarBroadcast) {
test.AddOutput<float>("Y", {2, 3},
{11.0f, 11.0f, 11.0f,
-9.0f, -9.0f, -9.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
test.Run();
}

TEST(MathOpTest, Gemm2DBroadcast) {
Expand All @@ -175,7 +173,7 @@ TEST(MathOpTest, Gemm2DBroadcast) {
test.AddOutput<float>("Y", {2, 3},
{11.0f, 11.0f, 11.0f,
-8.0f, -8.0f, -8.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
test.Run();
}

TEST(GemmOpTest, GemmFalseBroadcast) {
Expand All @@ -194,7 +192,7 @@ TEST(GemmOpTest, GemmFalseBroadcast) {
test.AddOutput<float>("Y", {2, 3},
{11.0f, 11.0f, 11.0f,
-8.0f, -8.0f, -8.0f});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
test.Run();
}

TEST(GemmOpTest, GemmEmptyTensor) {
Expand All @@ -211,7 +209,7 @@ TEST(GemmOpTest, GemmEmptyTensor) {
test.AddInput<float>("C", {3}, std::vector<float>(3, 1.0f));
test.AddOutput<float>("Y", {0, 3},
{});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kTensorrtExecutionProvider});
test.Run();
}

} // namespace test
Expand Down
Loading