Skip to content

Commit

Permalink
Merge branch 'main' into lluo/save_remove_inputs
Browse files Browse the repository at this point in the history
  • Loading branch information
lanluo-nvidia committed Oct 29, 2024
2 parents 076f47a + e2a27a0 commit 8250179
Show file tree
Hide file tree
Showing 183 changed files with 2,945 additions and 722 deletions.
38 changes: 38 additions & 0 deletions core/runtime/TRTEngine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,12 @@ TRTEngine::TRTEngine(
cuda_engine = make_trt(rt->deserializeCudaEngine(serialized_engine.c_str(), serialized_engine.size()));
TORCHTRT_CHECK((cuda_engine.get() != nullptr), "Unable to deserialize the TensorRT engine");

if (get_streamable_device_memory_budget() > 0) {
int64_t budget_bytes = get_automatic_device_memory_budget();
LOG_DEBUG("Weight streaming budget set to " << budget_bytes << "B");
cuda_engine->setWeightStreamingBudgetV2(budget_bytes);
}

exec_ctx = make_trt(cuda_engine->createExecutionContext());
TORCHTRT_CHECK((exec_ctx.get() != nullptr), "Unable to create TensorRT execution context");

Expand Down Expand Up @@ -258,6 +264,38 @@ void TRTEngine::set_profiling_paths() {
cuda_graph_debug_path = std::filesystem::path{profile_path_prefix + "/" + name + "_cudagraph.dot"}.string();
}

int64_t TRTEngine::get_device_memory_budget() {
return cuda_engine->getWeightStreamingBudgetV2();
}

bool TRTEngine::set_device_memory_budget(int64_t budget) {
// Recreating the context because weight streaming budget cannot be modified while there are active context.
if (exec_ctx.get() != nullptr) {
exec_ctx.reset();
}
if (profile_execution) {
trt_engine_profiler.reset();
}
bool result = cuda_engine->setWeightStreamingBudgetV2(budget);
exec_ctx = make_trt(cuda_engine->createExecutionContext());
TORCHTRT_CHECK(
(exec_ctx.get() != nullptr),
"Unable to recreate TensorRT execution context after setting new device memory budget");
if (profile_execution) {
enable_profiling();
}
return result;
}

// Returns 0 if BuilderFlag::kWEIGHT_STREAMING is unset during engine building.
int64_t TRTEngine::get_streamable_device_memory_budget() {
return cuda_engine->getStreamableWeightsSize();
}

int64_t TRTEngine::get_automatic_device_memory_budget() {
return cuda_engine->getWeightStreamingAutomaticBudget();
}

std::string TRTEngine::to_str() const {
// clang-format off
std::stringstream ss;
Expand Down
4 changes: 4 additions & 0 deletions core/runtime/TRTEngine.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ struct TRTEngine : torch::CustomClassHolder {
std::string get_engine_layer_info();
void dump_engine_layer_info_to_file(const std::string& path);
void dump_engine_layer_info();
int64_t get_device_memory_budget();
bool set_device_memory_budget(int64_t budget);
int64_t get_streamable_device_memory_budget();
int64_t get_automatic_device_memory_budget();
friend std::ostream& operator<<(std::ostream& os, const TRTEngine& engine);
static const char BINDING_DELIM = '%';

Expand Down
6 changes: 6 additions & 0 deletions core/runtime/register_jit_hooks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ static auto TORCHTRT_UNUSED TRTEngineTSRegistrtion =
.def("dump_engine_layer_info_to_file", &TRTEngine::dump_engine_layer_info_to_file)
.def("dump_engine_layer_info", &TRTEngine::dump_engine_layer_info)
.def("get_engine_layer_info", &TRTEngine::get_engine_layer_info)
.def_property(
"device_memory_budget",
&TRTEngine::get_device_memory_budget,
&TRTEngine::set_device_memory_budget)
.def_property("streamable_device_memory_budget", &TRTEngine::get_streamable_device_memory_budget)
.def_property("automatic_device_memory_budget", &TRTEngine::get_automatic_device_memory_budget)
.def_pickle(
[](const c10::intrusive_ptr<TRTEngine>& self) -> std::vector<std::string> {
// Serialize TensorRT engine
Expand Down
5 changes: 3 additions & 2 deletions docs/_cpp_api/classtorch__tensorrt_1_1DataType.html
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>Class DataType &mdash; Torch-TensorRT v2.6.0.dev0+b99d080 documentation</title>
<title>Class DataType &mdash; Torch-TensorRT v2.6.0.dev0+2840531 documentation</title>



Expand Down Expand Up @@ -275,7 +275,7 @@


<div class="version">
v2.6.0.dev0+b99d080
v2.6.0.dev0+2840531
</div>


Expand Down Expand Up @@ -330,6 +330,7 @@
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/converter_overloading.html">Overloading Torch-TensorRT Converters with Custom Converters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/custom_kernel_plugins.html">Using Custom Kernels within TensorRT Engines with Torch-TensorRT</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/mutable_torchtrt_module_example.html">Mutable Torch TensorRT Module</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/weight_streaming_example.html">Weight Streaming</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Dynamo Frontend</span></p>
<ul>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>Class Device::DeviceType &mdash; Torch-TensorRT v2.6.0.dev0+b99d080 documentation</title>
<title>Class Device::DeviceType &mdash; Torch-TensorRT v2.6.0.dev0+2840531 documentation</title>



Expand Down Expand Up @@ -275,7 +275,7 @@


<div class="version">
v2.6.0.dev0+b99d080
v2.6.0.dev0+2840531
</div>


Expand Down Expand Up @@ -330,6 +330,7 @@
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/converter_overloading.html">Overloading Torch-TensorRT Converters with Custom Converters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/custom_kernel_plugins.html">Using Custom Kernels within TensorRT Engines with Torch-TensorRT</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/mutable_torchtrt_module_example.html">Mutable Torch TensorRT Module</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/weight_streaming_example.html">Weight Streaming</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Dynamo Frontend</span></p>
<ul>
Expand Down
5 changes: 3 additions & 2 deletions docs/_cpp_api/classtorch__tensorrt_1_1TensorFormat.html
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>Class TensorFormat &mdash; Torch-TensorRT v2.6.0.dev0+b99d080 documentation</title>
<title>Class TensorFormat &mdash; Torch-TensorRT v2.6.0.dev0+2840531 documentation</title>



Expand Down Expand Up @@ -275,7 +275,7 @@


<div class="version">
v2.6.0.dev0+b99d080
v2.6.0.dev0+2840531
</div>


Expand Down Expand Up @@ -330,6 +330,7 @@
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/converter_overloading.html">Overloading Torch-TensorRT Converters with Custom Converters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/custom_kernel_plugins.html">Using Custom Kernels within TensorRT Engines with Torch-TensorRT</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/mutable_torchtrt_module_example.html">Mutable Torch TensorRT Module</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/weight_streaming_example.html">Weight Streaming</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Dynamo Frontend</span></p>
<ul>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>Template Class Int8CacheCalibrator &mdash; Torch-TensorRT v2.6.0.dev0+b99d080 documentation</title>
<title>Template Class Int8CacheCalibrator &mdash; Torch-TensorRT v2.6.0.dev0+2840531 documentation</title>



Expand Down Expand Up @@ -275,7 +275,7 @@


<div class="version">
v2.6.0.dev0+b99d080
v2.6.0.dev0+2840531
</div>


Expand Down Expand Up @@ -330,6 +330,7 @@
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/converter_overloading.html">Overloading Torch-TensorRT Converters with Custom Converters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/custom_kernel_plugins.html">Using Custom Kernels within TensorRT Engines with Torch-TensorRT</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/mutable_torchtrt_module_example.html">Mutable Torch TensorRT Module</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/weight_streaming_example.html">Weight Streaming</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Dynamo Frontend</span></p>
<ul>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>Template Class Int8Calibrator &mdash; Torch-TensorRT v2.6.0.dev0+b99d080 documentation</title>
<title>Template Class Int8Calibrator &mdash; Torch-TensorRT v2.6.0.dev0+2840531 documentation</title>



Expand Down Expand Up @@ -275,7 +275,7 @@


<div class="version">
v2.6.0.dev0+b99d080
v2.6.0.dev0+2840531
</div>


Expand Down Expand Up @@ -330,6 +330,7 @@
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/converter_overloading.html">Overloading Torch-TensorRT Converters with Custom Converters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/custom_kernel_plugins.html">Using Custom Kernels within TensorRT Engines with Torch-TensorRT</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/mutable_torchtrt_module_example.html">Mutable Torch TensorRT Module</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/weight_streaming_example.html">Weight Streaming</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Dynamo Frontend</span></p>
<ul>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>Define STR &mdash; Torch-TensorRT v2.6.0.dev0+b99d080 documentation</title>
<title>Define STR &mdash; Torch-TensorRT v2.6.0.dev0+2840531 documentation</title>



Expand Down Expand Up @@ -275,7 +275,7 @@


<div class="version">
v2.6.0.dev0+b99d080
v2.6.0.dev0+2840531
</div>


Expand Down Expand Up @@ -330,6 +330,7 @@
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/converter_overloading.html">Overloading Torch-TensorRT Converters with Custom Converters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/custom_kernel_plugins.html">Using Custom Kernels within TensorRT Engines with Torch-TensorRT</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/mutable_torchtrt_module_example.html">Mutable Torch TensorRT Module</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/weight_streaming_example.html">Weight Streaming</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Dynamo Frontend</span></p>
<ul>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>Define TORCH_TENSORRT_PATCH_VERSION &mdash; Torch-TensorRT v2.6.0.dev0+b99d080 documentation</title>
<title>Define TORCH_TENSORRT_PATCH_VERSION &mdash; Torch-TensorRT v2.6.0.dev0+2840531 documentation</title>



Expand Down Expand Up @@ -275,7 +275,7 @@


<div class="version">
v2.6.0.dev0+b99d080
v2.6.0.dev0+2840531
</div>


Expand Down Expand Up @@ -330,6 +330,7 @@
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/converter_overloading.html">Overloading Torch-TensorRT Converters with Custom Converters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/custom_kernel_plugins.html">Using Custom Kernels within TensorRT Engines with Torch-TensorRT</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/mutable_torchtrt_module_example.html">Mutable Torch TensorRT Module</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/weight_streaming_example.html">Weight Streaming</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Dynamo Frontend</span></p>
<ul>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>Define TORCH_TENSORRT_MAJOR_VERSION &mdash; Torch-TensorRT v2.6.0.dev0+b99d080 documentation</title>
<title>Define TORCH_TENSORRT_MAJOR_VERSION &mdash; Torch-TensorRT v2.6.0.dev0+2840531 documentation</title>



Expand Down Expand Up @@ -275,7 +275,7 @@


<div class="version">
v2.6.0.dev0+b99d080
v2.6.0.dev0+2840531
</div>


Expand Down Expand Up @@ -330,6 +330,7 @@
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/converter_overloading.html">Overloading Torch-TensorRT Converters with Custom Converters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/custom_kernel_plugins.html">Using Custom Kernels within TensorRT Engines with Torch-TensorRT</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/mutable_torchtrt_module_example.html">Mutable Torch TensorRT Module</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/weight_streaming_example.html">Weight Streaming</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Dynamo Frontend</span></p>
<ul>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>Define TORCH_TENSORRT_MINOR_VERSION &mdash; Torch-TensorRT v2.6.0.dev0+b99d080 documentation</title>
<title>Define TORCH_TENSORRT_MINOR_VERSION &mdash; Torch-TensorRT v2.6.0.dev0+2840531 documentation</title>



Expand Down Expand Up @@ -275,7 +275,7 @@


<div class="version">
v2.6.0.dev0+b99d080
v2.6.0.dev0+2840531
</div>


Expand Down Expand Up @@ -330,6 +330,7 @@
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/converter_overloading.html">Overloading Torch-TensorRT Converters with Custom Converters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/custom_kernel_plugins.html">Using Custom Kernels within TensorRT Engines with Torch-TensorRT</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/mutable_torchtrt_module_example.html">Mutable Torch TensorRT Module</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/weight_streaming_example.html">Weight Streaming</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Dynamo Frontend</span></p>
<ul>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>Define TORCHTRT_API &mdash; Torch-TensorRT v2.6.0.dev0+b99d080 documentation</title>
<title>Define TORCHTRT_API &mdash; Torch-TensorRT v2.6.0.dev0+2840531 documentation</title>



Expand Down Expand Up @@ -275,7 +275,7 @@


<div class="version">
v2.6.0.dev0+b99d080
v2.6.0.dev0+2840531
</div>


Expand Down Expand Up @@ -330,6 +330,7 @@
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/converter_overloading.html">Overloading Torch-TensorRT Converters with Custom Converters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/custom_kernel_plugins.html">Using Custom Kernels within TensorRT Engines with Torch-TensorRT</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/mutable_torchtrt_module_example.html">Mutable Torch TensorRT Module</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/weight_streaming_example.html">Weight Streaming</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Dynamo Frontend</span></p>
<ul>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>Define XSTR &mdash; Torch-TensorRT v2.6.0.dev0+b99d080 documentation</title>
<title>Define XSTR &mdash; Torch-TensorRT v2.6.0.dev0+2840531 documentation</title>



Expand Down Expand Up @@ -275,7 +275,7 @@


<div class="version">
v2.6.0.dev0+b99d080
v2.6.0.dev0+2840531
</div>


Expand Down Expand Up @@ -330,6 +330,7 @@
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/converter_overloading.html">Overloading Torch-TensorRT Converters with Custom Converters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/custom_kernel_plugins.html">Using Custom Kernels within TensorRT Engines with Torch-TensorRT</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/mutable_torchtrt_module_example.html">Mutable Torch TensorRT Module</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/weight_streaming_example.html">Weight Streaming</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Dynamo Frontend</span></p>
<ul>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

<meta name="viewport" content="width=device-width, initial-scale=1.0">

<title>Define TORCHTRT_HIDDEN &mdash; Torch-TensorRT v2.6.0.dev0+b99d080 documentation</title>
<title>Define TORCHTRT_HIDDEN &mdash; Torch-TensorRT v2.6.0.dev0+2840531 documentation</title>



Expand Down Expand Up @@ -275,7 +275,7 @@


<div class="version">
v2.6.0.dev0+b99d080
v2.6.0.dev0+2840531
</div>


Expand Down Expand Up @@ -330,6 +330,7 @@
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/converter_overloading.html">Overloading Torch-TensorRT Converters with Custom Converters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/custom_kernel_plugins.html">Using Custom Kernels within TensorRT Engines with Torch-TensorRT</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/mutable_torchtrt_module_example.html">Mutable Torch TensorRT Module</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/_rendered_examples/dynamo/weight_streaming_example.html">Weight Streaming</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Dynamo Frontend</span></p>
<ul>
Expand Down
Loading

0 comments on commit 8250179

Please sign in to comment.