Skip to content

Commit

Permalink
[PyTorch] Avoid heap allocations in inferUnsqueezeGeometry (pytorch#4…
Browse files Browse the repository at this point in the history
…9497)

Summary:
Pull Request resolved: pytorch#49497

Noticed this thing spending relatively most of its time in
malloc in perf. Optimize for typical tensor sizes.
ghstack-source-id: 119318388

Test Plan:
perf profile internal benchmark; saw inferUnsqueezeGeometry
go from 0.30% exclusive 0.47% inclusive to 0.11% exclusive 0.16%
inclusive.

Differential Revision: D25596549

fbshipit-source-id: 3bbd2031645a4b9fe6f49a77d41db46826d0f632
  • Loading branch information
swolchok authored and facebook-github-bot committed Jan 5, 2021
1 parent 093aca0 commit a272a7e
Showing 1 changed file with 21 additions and 10 deletions.
31 changes: 21 additions & 10 deletions aten/src/ATen/native/TensorShape.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <ATen/WrapDimUtils.h>
#include <c10/util/Exception.h>
#include <c10/util/Optional.h>
#include <c10/util/SmallVector.h>
#include <ATen/native/Resize.h>
#include <ATen/native/TypeProperties.h>
#include <ATen/SparseTensorUtils.h>
Expand Down Expand Up @@ -1467,15 +1468,25 @@ inferSqueezeGeometry(const Tensor& tensor, int64_t dim) {
return std::make_tuple(sizes, strides);
}

std::tuple<std::vector<int64_t>, std::vector<int64_t> >
namespace {
// Named type instead of a pair/tuple so that we can be sure to
// construct the vectors in place and get NRVO.
struct InferUnsqueezeGeometryResult {
c10::SmallVector<int64_t, 5> sizes;
c10::SmallVector<int64_t, 5> strides;
InferUnsqueezeGeometryResult(IntArrayRef tensor_sizes, IntArrayRef tensor_strides)
: sizes(tensor_sizes.begin(), tensor_sizes.end())
, strides(tensor_strides.begin(), tensor_strides.end()) {}
};
}
InferUnsqueezeGeometryResult
inferUnsqueezeGeometry(const Tensor& tensor, int64_t dim) {
auto sizes = tensor.sizes().vec();
auto strides = tensor.strides().vec();
int64_t new_stride = dim >= tensor.dim() ? 1 : sizes[dim] * strides[dim];
sizes.insert(sizes.begin() + dim, 1);
strides.insert(strides.begin() + dim, new_stride);
InferUnsqueezeGeometryResult result(tensor.sizes(), tensor.strides());
int64_t new_stride = dim >= tensor.dim() ? 1 : result.sizes[dim] * result.strides[dim];
result.sizes.insert(result.sizes.begin() + dim, 1);
result.strides.insert(result.strides.begin() + dim, new_stride);

return std::make_tuple(sizes, strides);
return result;
}

Tensor squeeze_qtensor(const Tensor& self) {
Expand Down Expand Up @@ -1624,7 +1635,7 @@ Tensor unsqueeze_qtensor(const Tensor& self, int64_t dim) {
axis,
quantizer->scalar_type());
}
return make_qtensor(self, std::get<0>(g), std::get<1>(g), quantizer);
return make_qtensor(self, g.sizes, g.strides, quantizer);
}

Tensor unsqueeze(const Tensor& self, int64_t dim) {
Expand All @@ -1636,15 +1647,15 @@ Tensor unsqueeze(const Tensor& self, int64_t dim) {
return unsqueeze_qtensor(self, dim);
} else {
auto g = inferUnsqueezeGeometry(self, dim);
return self.as_strided(std::get<0>(g), std::get<1>(g));
return self.as_strided(g.sizes, g.strides);
}
}

Tensor & unsqueeze_(Tensor& self, int64_t dim) {
dim = maybe_wrap_dim(dim, self.dim() + 1);

auto g = inferUnsqueezeGeometry(self, dim);
return self.as_strided_(std::get<0>(g), std::get<1>(g));
return self.as_strided_(g.sizes, g.strides);
}

Tensor flatten(const Tensor& self, int64_t start_dim, int64_t end_dim) {
Expand Down

0 comments on commit a272a7e

Please sign in to comment.