microsoft · jywu-msft · Jul 15, 2020 · Jul 14, 2020 · Jul 14, 2020 · Jul 14, 2020
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -914,6 +914,10 @@ if (onnxruntime_USE_TENSORRT)
     set(onnxruntime_DELAYLOAD_FLAGS "${onnxruntime_DELAYLOAD_FLAGS} /DELAYLOAD:nvinfer.dll /DELAYLOAD:nvinfer_plugin.dll")
   else()
     set(CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations")
+    # needs to link with stdc++fs in Linux
+    if (NOT APPLE)
+      list(APPEND onnxruntime_EXTERNAL_LIBRARIES stdc++fs)
+    endif()	
   endif()
 endif()
 

diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake
@@ -329,6 +329,7 @@ if (onnxruntime_USE_TENSORRT)
   include_directories(${ONNXRUNTIME_ROOT}/../cmake/external/onnx)
   set(OLD_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
   if (WIN32)
+    add_definitions(-D_SILENCE_EXPERIMENTAL_FILESYSTEM_DEPRECATION_WARNING=1)
     set(OLD_CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS})
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4996 /wd4244 /wd4267 /wd4099 /wd4551 /wd4505 /wd4515 /wd4706 /wd4456 /wd4324 /wd4701 /wd4804 /wd4702")
     if (CMAKE_BUILD_TYPE STREQUAL "Debug")

diff --git a/docs/execution_providers/TensorRT-ExecutionProvider.md b/docs/execution_providers/TensorRT-ExecutionProvider.md
@@ -67,9 +67,13 @@ ORT_TENSORRT_MIN_SUBGRAPH_SIZE: minimum node size in a subgraph after partitioni
 
 ORT_TENSORRT_FP16_ENABLE: Enable FP16 mode in TensorRT
 
-By default TensorRT execution provider builds an ICudaEngine with max workspace size = 1 GB, max partition iterations = 1000, min subgraph size = 1 and FP16 mode is disabled.
+ORT_TENSORRT_ENGINE_CACHE_ENABLE: Enable TensorRT engine caching
 
-One can override these defaults by setting environment variables ORT_TENSORRT_MAX_WORKSPACE_SIZE, ORT_TENSORRT_MAX_PARTITION_ITERATIONS, ORT_TENSORRT_MIN_SUBGRAPH_SIZE and ORT_TENSORRT_FP16_ENABLE.
+ORT_TENSORRT_ENGINE_CACHE_PATH: Specify path for TensorRT engine files if ORT_TENSORRT_ENGINE_CACHE_ENABLE is 1
+
+By default TensorRT execution provider builds an ICudaEngine with max workspace size = 1 GB, max partition iterations = 1000, min subgraph size = 1, FP16 mode is disabled and TensorRT engine caching is disabled.
+
+One can override these defaults by setting environment variables ORT_TENSORRT_MAX_WORKSPACE_SIZE, ORT_TENSORRT_MAX_PARTITION_ITERATIONS, ORT_TENSORRT_MIN_SUBGRAPH_SIZE,  ORT_TENSORRT_FP16_ENABLE, ORT_TENSORRT_ENGINE_CACHE_ENABLE and ORT_TENSORRT_ENGINE_CACHE_PATH.
 e.g. on Linux
 
 ### override default max workspace size to 2GB
@@ -83,3 +87,9 @@ export ORT_TENSORRT_MIN_SUBGRAPH_SIZE=5
 
 ### Enable FP16 mode in TensorRT
 export ORT_TENSORRT_FP16_ENABLE=1
+
+### Enable TensorRT engine caching
+export ORT_TENSORRT_ENGINE_CACHE_ENABLE=1
+
+### Specify TensorRT engine cache path
+export ORT_TENSORRT_ENGINE_CACHE_PATH="cache"