Skip to content

Commit c82408c

Browse files
committed
cherry-pick: Port most changes from main
- Excluded all changes to `docs` and `.github` directories; did include documentation changes and all other commits, with the exception of #2451 and #2445 for reasons discussed - Made necessary changes to switch over to Torch 2.2.0 rc builds, including updating imports
1 parent b6dd22b commit c82408c

File tree

88 files changed

+4220
-685
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

88 files changed

+4220
-685
lines changed

.github/scripts/install-torch-tensorrt.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
set -eou pipefail
33
# Source conda so it's available to the script environment
44
source ${BUILD_ENV_FILE}
5-
${CONDA_RUN} ${PIP_INSTALL_TORCH} torch==2.1.2 torchvision==0.16.2 pyyaml
5+
${CONDA_RUN} ${PIP_INSTALL_TORCH} torch==2.2.0 torchvision==0.17.0 pyyaml
66
export TRT_VERSION=$(${CONDA_RUN} python -c "import versions; versions.tensorrt_version()")
77
${CONDA_RUN} python -m pip install /opt/torch-tensorrt-builds/torch_tensorrt*+${CU_VERSION}*.whl tensorrt~=${TRT_VERSION} tensorrt-bindings~=${TRT_VERSION} --extra-index-url=https://pypi.ngc.nvidia.com
88

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ torch.jit.save(trt_ts_module, "trt_torchscript_module.ts") # save the TRT embedd
116116
These are the following dependencies used to verify the testcases. Torch-TensorRT can work with other versions, but the tests are not guaranteed to pass.
117117

118118
- Bazel 6.2.1
119-
- Libtorch 2.1.1
119+
- Libtorch 2.2.0
120120
- CUDA 12.1
121121
- cuDNN 8.9.5
122122
- TensorRT 8.6.1

WORKSPACE

+2-2
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,14 @@ http_archive(
5454
name = "libtorch",
5555
build_file = "@//third_party/libtorch:BUILD",
5656
strip_prefix = "libtorch",
57-
urls = ["https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.1.1%2Bcu121.zip"],
57+
urls = ["https://download.pytorch.org/libtorch/test/cu121/libtorch-cxx11-abi-shared-with-deps-2.2.0%2Bcu121.zip"],
5858
)
5959

6060
http_archive(
6161
name = "libtorch_pre_cxx11_abi",
6262
build_file = "@//third_party/libtorch:BUILD",
6363
strip_prefix = "libtorch",
64-
urls = ["https://download.pytorch.org/libtorch/cu121/libtorch-shared-with-deps-2.1.1%2Bcu121.zip"],
64+
urls = ["https://download.pytorch.org/libtorch/test/cu121/libtorch-shared-with-deps-2.2.0%2Bcu121.zip"],
6565
)
6666

6767
# Download these tarballs manually from the NVIDIA website

core/conversion/conversionctx/ConversionCtx.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ void ConversionCtx::RecordNewITensor(const torch::jit::Value* value, nvinfer1::I
164164

165165
std::string ConversionCtx::SerializeEngine() {
166166
#if NV_TENSORRT_MAJOR > 7
167-
auto serialized_network = builder->buildSerializedNetwork(*net, *cfg);
167+
auto serialized_network = make_trt(builder->buildSerializedNetwork(*net, *cfg));
168168
if (!serialized_network) {
169169
TORCHTRT_THROW_ERROR("Building serialized network failed in TensorRT");
170170
}

core/conversion/converters/impl/conv_deconv.cpp

+86-61
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,74 @@ namespace converters {
1010
namespace impl {
1111
namespace {
1212

13+
void add_output_padding(nvinfer1::Dims& padding, nvinfer1::Dims& out_padding, bool& has_output_padding) {
14+
int nbSpatialDims = out_padding.nbDims;
15+
// When there is out_padding, if padding is larger than out_padding, just adjust padding Or reduce out_padding as
16+
// minimum as possible.
17+
for (int i = 0; i < nbSpatialDims; ++i) {
18+
if (padding.d[i] - out_padding.d[i] >= 0) {
19+
padding.d[i] -= out_padding.d[i];
20+
out_padding.d[i] = 0;
21+
} else {
22+
// Reduce out_padding as possible.
23+
out_padding.d[i] -= padding.d[i];
24+
padding.d[i] = 0;
25+
has_output_padding = true;
26+
}
27+
}
28+
}
29+
30+
nvinfer1::ILayer* add_bias_layer(
31+
ConversionCtx* ctx,
32+
nvinfer1::ITensor* input_tensor,
33+
nvinfer1::Dims& input_dims,
34+
nvinfer1::Dims& output_padding,
35+
Weights& bias) {
36+
nvinfer1::ITensor* input_shape = ctx->net->addShape(*input_tensor)->getOutput(0);
37+
// Add padding layer
38+
nvinfer1::ITensor* start;
39+
nvinfer1::ITensor* totalPadding;
40+
auto in_nbDims = input_dims.nbDims;
41+
std::vector<int32_t> startVec(in_nbDims, 0);
42+
std::vector<int32_t> totalPaddingVec(in_nbDims, 0);
43+
int32_t diff = in_nbDims - output_padding.nbDims;
44+
for (int32_t i = diff; i < in_nbDims; i++) {
45+
int32_t idx = i - diff;
46+
startVec[i] = 0; // Don't need begin padding, only post padding
47+
totalPaddingVec[i] = output_padding.d[idx];
48+
}
49+
start = tensor_to_const(ctx, torch::tensor(startVec, torch::kInt32));
50+
totalPadding = tensor_to_const(ctx, torch::tensor(totalPaddingVec, torch::kInt32));
51+
52+
const auto size =
53+
ctx->net->addElementWise(*input_shape, *totalPadding, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0);
54+
55+
nvinfer1::Dims stride;
56+
stride.nbDims = in_nbDims;
57+
for (int64_t i = 0; i < in_nbDims; i++) {
58+
stride.d[i] = 1;
59+
}
60+
const auto& dummy = stride;
61+
auto* sliceLayer = ctx->net->addSlice(*input_tensor, dummy, dummy, stride);
62+
sliceLayer->setInput(1, *start);
63+
sliceLayer->setInput(2, *size);
64+
sliceLayer->setMode(nvinfer1::SliceMode::kFILL);
65+
nvinfer1::ITensor* slice_output = sliceLayer->getOutput(0);
66+
67+
nvinfer1::Dims constantDims;
68+
constantDims.nbDims = in_nbDims;
69+
for (int64_t i = 0; i < in_nbDims; i++) {
70+
constantDims.d[i] = 1;
71+
}
72+
constantDims.d[diff - 1] =
73+
bias.shape.d[0]; // Set C dimension to bias dim and other dimensions to 1 to enable broadcast
74+
auto const_layer = ctx->net->addConstant(constantDims, bias.data);
75+
auto bias_layer =
76+
ctx->net->addElementWise(*slice_output, *const_layer->getOutput(0), nvinfer1::ElementWiseOperation::kSUM);
77+
78+
return bias_layer;
79+
}
80+
1381
bool add_conv_deconv(ConversionCtx* ctx, const torch::jit::Node* n, args& args) {
1482
// Input to conv/deconv
1583
auto in = args[0].ITensor();
@@ -76,16 +144,29 @@ bool add_conv_deconv(ConversionCtx* ctx, const torch::jit::Node* n, args& args)
76144

77145
nvinfer1::ILayer* layer = nullptr;
78146
if (transposed) {
79-
nvinfer1::IDeconvolutionLayer* deconvLayer =
80-
ctx->net->addDeconvolutionNd(*in, kernel_dims.d[0], filter_dim, kernel_weights, bias.data);
147+
// Fix padding based on output_padding provided
148+
nvinfer1::Dims begPadding = padding;
149+
bool hasOutputPadding = false;
150+
add_output_padding(padding, out_padding, hasOutputPadding);
151+
152+
nvinfer1::IDeconvolutionLayer* deconvLayer = ctx->net->addDeconvolutionNd(
153+
*in, kernel_dims.d[0], filter_dim, kernel_weights, hasOutputPadding ? nvinfer1::Weights{} : bias.data);
81154
deconvLayer->setStrideNd(stride);
82155
deconvLayer->setDilationNd(dilation);
83156
deconvLayer->setNbGroups(groups);
84-
deconvLayer->setPaddingNd(padding);
157+
deconvLayer->setPrePadding(begPadding);
158+
deconvLayer->setPostPadding(padding);
159+
85160
// Set deconv kernel weights
86161
deconvLayer->setInput(1, *kernel);
87162
TORCHTRT_CHECK(deconvLayer, "Unable to create deconv layer with non-const weights from node: " << *n);
88163
layer = deconvLayer;
164+
if (hasOutputPadding) {
165+
LOG_DEBUG("Padding output deconvolution tensor with:" << out_padding);
166+
nvinfer1::ITensor* tensorPtr = deconvLayer->getOutput(0);
167+
auto dims = in->getDimensions();
168+
layer = add_bias_layer(ctx, tensorPtr, dims, out_padding, bias);
169+
}
89170
} else {
90171
nvinfer1::IConvolutionLayer* convLayer =
91172
ctx->net->addConvolutionNd(*in, kernel_dims.d[0], filter_dim, kernel_weights, bias.data);
@@ -155,20 +236,7 @@ bool add_conv_deconv(ConversionCtx* ctx, const torch::jit::Node* n, args& args)
155236
// https://github.com/onnx/onnx-tensorrt/blob/c3cfcbc8248c6bd007e6630af2085df5e4834b42/builtin_op_importers.cpp#L734
156237
nvinfer1::Dims begPadding = padding;
157238
bool hasOutputPadding = false;
158-
int nbSpatialDims = out_padding.nbDims;
159-
// When there is out_padding, if padding is larger than out_padding, just adjust padding Or reduce out_padding as
160-
// minimum as possible.
161-
for (int i = 0; i < nbSpatialDims; ++i) {
162-
if (padding.d[i] - out_padding.d[i] >= 0) {
163-
padding.d[i] -= out_padding.d[i];
164-
out_padding.d[i] = 0;
165-
} else {
166-
// Reduce out_padding as possible.
167-
out_padding.d[i] -= padding.d[i];
168-
padding.d[i] = 0;
169-
hasOutputPadding = true;
170-
}
171-
}
239+
add_output_padding(padding, out_padding, hasOutputPadding);
172240

173241
// shape of deconvolution's weight: [in, out/groups, ...]
174242
// If there is still output padding, remove the bias. Bias will be added below.
@@ -190,51 +258,8 @@ bool add_conv_deconv(ConversionCtx* ctx, const torch::jit::Node* n, args& args)
190258
#endif
191259
if (hasOutputPadding) {
192260
LOG_DEBUG("Padding output deconvolution tensor with:" << out_padding);
193-
194-
// Add padding layer
195-
nvinfer1::ITensor* start;
196-
nvinfer1::ITensor* totalPadding;
197-
auto in_nbDims = orig_dims.nbDims;
198-
std::vector<int32_t> startVec(in_nbDims, 0);
199-
std::vector<int32_t> totalPaddingVec(in_nbDims, 0);
200-
int32_t diff = in_nbDims - out_padding.nbDims;
201-
for (int32_t i = diff; i < in_nbDims; i++) {
202-
int32_t idx = i - diff;
203-
startVec[i] = 0; // Don't need begin padding, only post padding
204-
totalPaddingVec[i] = out_padding.d[idx];
205-
}
206-
start = tensor_to_const(ctx, torch::tensor(startVec, torch::kInt32));
207-
totalPadding = tensor_to_const(ctx, torch::tensor(totalPaddingVec, torch::kInt32));
208-
209261
nvinfer1::ITensor* tensorPtr = deconv->getOutput(0);
210-
nvinfer1::ITensor* deconvOutShape = ctx->net->addShape(*tensorPtr)->getOutput(0);
211-
const auto size =
212-
ctx->net->addElementWise(*deconvOutShape, *totalPadding, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0);
213-
214-
nvinfer1::Dims stride;
215-
stride.nbDims = in_nbDims;
216-
for (int64_t i = 0; i < in_nbDims; i++) {
217-
stride.d[i] = 1;
218-
}
219-
const auto& dummy = stride;
220-
auto* sliceLayer = ctx->net->addSlice(*tensorPtr, dummy, dummy, stride);
221-
sliceLayer->setInput(1, *start);
222-
sliceLayer->setInput(2, *size);
223-
sliceLayer->setMode(nvinfer1::SliceMode::kFILL);
224-
tensorPtr = sliceLayer->getOutput(0);
225-
226-
nvinfer1::Dims constantDims;
227-
constantDims.nbDims = in_nbDims;
228-
for (int64_t i = 0; i < in_nbDims; i++) {
229-
constantDims.d[i] = 1;
230-
}
231-
constantDims.d[diff - 1] =
232-
bias.shape.d[0]; // Set C dimension to bias dim and other dimensions to 1 to enable broadcast
233-
auto const_layer = ctx->net->addConstant(constantDims, bias.data);
234-
auto add_bias_layer =
235-
ctx->net->addElementWise(*tensorPtr, *const_layer->getOutput(0), nvinfer1::ElementWiseOperation::kSUM);
236-
237-
new_layer = add_bias_layer;
262+
new_layer = add_bias_layer(ctx, tensorPtr, orig_dims, out_padding, bias);
238263
} else {
239264
new_layer = deconv;
240265
}

core/conversion/converters/impl/matrix_multiply.cpp

+34-5
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,49 @@ auto mm_registrations TORCHTRT_UNUSED =
1616
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
1717
auto self = args[0].ITensorOrFreeze(ctx);
1818
auto other = args[1].ITensorOrFreeze(ctx);
19+
20+
auto selfDims = self->getDimensions().nbDims;
21+
auto otherDims = other->getDimensions().nbDims;
22+
23+
bool squeezeFront = false;
24+
bool squeezeBack = false;
25+
26+
if (selfDims == 1 && selfDims < otherDims) {
27+
squeezeFront = true;
28+
} else if (otherDims == 1 && otherDims < selfDims) {
29+
// Append a 1 to the end of the shape before padding front to match self
30+
other = addPadding(ctx, n, other, 2, true, false);
31+
otherDims = other->getDimensions().nbDims;
32+
squeezeBack = true;
33+
}
34+
1935
// Ensure self and other tensors have same nbDims by expanding the dimensions (from 0 axis) if
2036
// necessary.
21-
if (self->getDimensions().nbDims < other->getDimensions().nbDims) {
22-
self = addPadding(ctx, n, self, other->getDimensions().nbDims, false, false);
23-
} else {
24-
other = addPadding(ctx, n, other, self->getDimensions().nbDims, false, false);
37+
if (selfDims < otherDims) {
38+
self = addPadding(ctx, n, self, otherDims, false, false);
39+
} else if (otherDims < selfDims) {
40+
other = addPadding(ctx, n, other, selfDims, false, false);
2541
}
2642

2743
auto mm_layer = ctx->net->addMatrixMultiply(
2844
*self, nvinfer1::MatrixOperation::kNONE, *other, nvinfer1::MatrixOperation::kNONE);
2945

3046
TORCHTRT_CHECK(mm_layer, "Unable to create matrix multiplication node: " << *n);
3147
mm_layer->setName(util::node_info(n).c_str());
32-
auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], mm_layer->getOutput(0));
48+
auto out = mm_layer->getOutput(0);
49+
50+
if (squeezeFront || squeezeBack) {
51+
auto squeezeDimOffset = squeezeFront ? 2 : 1;
52+
auto reshapeDims =
53+
util::squeezeDims(out->getDimensions(), out->getDimensions().nbDims - squeezeDimOffset);
54+
auto shuffle_layer = ctx->net->addShuffle(*out);
55+
LOG_DEBUG("Squeezing matmul output for 1d correction: " << reshapeDims);
56+
TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
57+
shuffle_layer->setReshapeDimensions(reshapeDims);
58+
shuffle_layer->setName((util::node_info(n) + "_squeeze").c_str());
59+
out = shuffle_layer->getOutput(0);
60+
}
61+
auto out_tensor = ctx->AssociateValueAndTensor(n->outputs()[0], out);
3362

3463
LOG_DEBUG("Output tensor shape: " << out_tensor->getDimensions());
3564
return true;

core/runtime/register_jit_hooks.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ static auto TORCHTRT_UNUSED TRTEngineTSRegistrtion =
8787
.def_pickle(
8888
[](const c10::intrusive_ptr<TRTEngine>& self) -> std::vector<std::string> {
8989
// Serialize TensorRT engine
90-
auto serialized_trt_engine = self->cuda_engine->serialize();
90+
auto serialized_trt_engine = make_trt(self->cuda_engine->serialize());
9191

9292
// Adding device info related meta data to the serialized file
9393
auto trt_engine = std::string((const char*)serialized_trt_engine->data(), serialized_trt_engine->size());

cpp/include/torch_tensorrt/macros.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
#define STR(x) XSTR(x)
2525

2626
#define TORCH_TENSORRT_MAJOR_VERSION 2
27-
#define TORCH_TENSORRT_MINOR_VERSION 1
27+
#define TORCH_TENSORRT_MINOR_VERSION 2
2828
#define TORCH_TENSORRT_PATCH_VERSION 0
2929
#define TORCH_TENSORRT_VERSION \
3030
STR(TORCH_TENSORRT_MAJOR_VERSION) \

dev_dep_versions.yml

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
__version__: "2.2.0"
12
__cuda_version__: "12.1"
23
__cudnn_version__: "8.9"
34
__tensorrt_version__: "8.6"

docker/dist-build.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
TOP_DIR=$(cd $(dirname $0); pwd)/..
44

55
if [[ -z "${USE_CXX11}" ]]; then
6-
BUILD_CMD="python -m pip wheel . --extra-index-url https://download.pytorch.org/whl/cu121 -w dist"
6+
BUILD_CMD="python -m pip wheel . --extra-index-url https://download.pytorch.org/whl/test/cu121 -w dist"
77
else
8-
BUILD_CMD="python -m pip wheel . --config-setting="--build-option=--use-cxx11-abi" --extra-index-url https://download.pytorch.org/whl/cu121 -w dist"
8+
BUILD_CMD="python -m pip wheel . --config-setting="--build-option=--use-cxx11-abi" --extra-index-url https://download.pytorch.org/whl/test/cu121 -w dist"
99
fi
1010

1111
# TensorRT restricts our pip version

docsrc/index.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ User Guide
8787
:maxdepth: 1
8888
:hidden:
8989

90+
9091
user_guide/dynamic_shapes
9192
user_guide/ptq
9293
user_guide/saving_models
@@ -206,4 +207,4 @@ Legacy Further Information (TorchScript)
206207
* `GTC 2021 Fall Talk <https://www.nvidia.com/en-us/on-demand/session/gtcfall21-a31107/>`_
207208
* `PyTorch Ecosystem Day 2021 <https://assets.pytorch.org/pted2021/posters/I6.png>`_
208209
* `PyTorch Developer Conference 2021 <https://s3.amazonaws.com/assets.pytorch.org/ptdd2021/posters/D2.png>`_
209-
* `PyTorch Developer Conference 2022 <https://pytorch.s3.amazonaws.com/posters/ptc2022/C04.pdf>`_
210+
* `PyTorch Developer Conference 2022 <https://pytorch.s3.amazonaws.com/posters/ptc2022/C04.pdf>`_

docsrc/user_guide/saving_models.rst

+8-9
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ Saving models compiled with Torch-TensorRT
88
:members:
99
:undoc-members:
1010
:show-inheritance:
11-
11+
1212
Saving models compiled with Torch-TensorRT varies slightly with the `ir` that has been used for compilation.
1313

14-
Dynamo IR
14+
Dynamo IR
1515
-------------
1616

1717
Starting with 2.1 release of Torch-TensorRT, we are switching the default compilation to be dynamo based.
@@ -41,7 +41,7 @@ The following code illustrates this approach.
4141
b) ExportedProgram
4242
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
4343

44-
`torch.export.ExportedProgram` is a new format introduced in Pytorch 2.1. After we compile a Pytorch module using Torch-TensorRT, the resultant
44+
`torch.export.ExportedProgram` is a new format introduced in Pytorch 2.1. After we compile a Pytorch module using Torch-TensorRT, the resultant
4545
`torch.fx.GraphModule` along with additional metadata can be used to create `ExportedProgram` which can be saved and loaded from disk.
4646

4747
.. code-block:: python
@@ -51,18 +51,17 @@ b) ExportedProgram
5151
5252
model = MyModel().eval().cuda()
5353
inputs = [torch.randn((1, 3, 224, 224)).cuda()]
54-
exp_program = torch_tensorrt.dynamo.trace(model, inputs)
55-
trt_gm = torch_tensorrt.dynamo.compile(exp_program, inputs) # Output is a torch.fx.GraphModule
54+
trt_gm = torch_tensorrt.compile(model, ir="dynamo", inputs) # Output is a torch.fx.GraphModule
5655
# Transform and create an exported program
57-
trt_exp_program = torch_tensorrt.dynamo.export(trt_gm, inputs, exp_program.call_spec, ir="exported_program")
56+
trt_exp_program = torch_tensorrt.dynamo.export(trt_gm, inputs)
5857
torch.export.save(trt_exp_program, "trt_model.ep")
5958
60-
# Later, you can load it and run inference
59+
# Later, you can load it and run inference
6160
model = torch.export.load("trt_model.ep")
6261
model(*inputs)
6362
64-
`torch_tensorrt.dynamo.export` inlines the submodules within a GraphModule to their corresponding nodes, stiches all the nodes together and creates an ExportedProgram.
65-
This is needed as `torch.export` serialization cannot handle serializing and deserializing of submodules (`call_module` nodes).
63+
`torch_tensorrt.dynamo.export` inlines the submodules within a GraphModule to their corresponding nodes and stiches all the nodes together.
64+
This is needed as `torch._export` serialization cannot handle serializing and deserializing of submodules (`call_module` nodes).
6665

6766
.. note:: This way of saving the models using `ExportedProgram` is experimental. Here is a known issue : https://github.com/pytorch/TensorRT/issues/2341
6867

Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
tensorboard>=1.14.0
22
protobuf==3.20.*
33
nvidia-pyindex
4-
--extra-index-url https://pypi.ngc.nvidia.com
5-
pytorch-quantization>=2.1.2
4+
--extra-index-url https://pypi.nvidia.com
5+
pytorch-quantization
66
tqdm

0 commit comments

Comments
 (0)