Skip to content

Commit

Permalink
Revert D34034847: DispatchKeySet perf improvements
Browse files Browse the repository at this point in the history
Test Plan: revert-hammer

Differential Revision:
D34034847 (pytorch@8aa3620)

Original commit changeset: a930e44513a7

Original Phabricator Diff: D34034847 (pytorch@8aa3620)

fbshipit-source-id: 57b8b7dee252bb8d10316189a034517a28c42199
(cherry picked from commit c3151d4)
  • Loading branch information
bdhirsh authored and pytorchmergebot committed Feb 14, 2022
1 parent f1a9650 commit 7f560fb
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 70 deletions.
3 changes: 1 addition & 2 deletions aten/src/ATen/native/quantized/cpu/fbgemm_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,10 +160,9 @@ Tensor MakeStridedQTensorCPU(
allocator->allocate(size_bytes),
allocator,
/* resizable = */ true);
constexpr auto quantized_cpu_ks = at::DispatchKeySet(at::DispatchKey::QuantizedCPU);
auto tensor = detail::make_tensor<QTensorImpl>(
storage,
quantized_cpu_ks,
at::DispatchKeySet(at::DispatchKey::QuantizedCPU),
dtype,
quantizer);
get_qtensorimpl(tensor)->set_sizes_and_strides(sizes, strides);
Expand Down
9 changes: 0 additions & 9 deletions c10/core/DispatchKeySet.h
Original file line number Diff line number Diff line change
Expand Up @@ -606,15 +606,6 @@ constexpr DispatchKeySet default_excluded_set = DispatchKeySet({
constexpr DispatchKeySet autograd_dispatch_keyset_with_ADInplaceOrView =
autograd_dispatch_keyset | DispatchKeySet(DispatchKey::ADInplaceOrView);

constexpr DispatchKeySet python_ks = DispatchKeySet(DispatchKey::Python);

constexpr DispatchKeySet sparse_ks = DispatchKeySet(DispatchKey::Sparse);

constexpr DispatchKeySet sparse_csr_ks =
DispatchKeySet({DispatchKey::SparseCsrCPU, DispatchKey::SparseCsrCUDA});

constexpr DispatchKeySet mkldnn_ks = DispatchKeySet(DispatchKey::MkldnnCPU);

// backend dispatch keys that map to DispatchKey::AutogradOther
// NB: keys in this set also get associated with CompositeImplicitAutograd
constexpr DispatchKeySet autogradother_backends =
Expand Down
7 changes: 4 additions & 3 deletions c10/core/TensorImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,8 @@ TensorImpl::TensorImpl(
numel_(0),
data_type_(data_type),
device_opt_(storage_.device()),
key_set_(key_set - c10::python_ks) { // See [Note: Python key removal]
key_set_(key_set.remove(
DispatchKey::Python)) { // See [Note: Python key removal]
init_bitfields();
// Inference tensor doesn't have version counter.
if (!is_inference()) {
Expand Down Expand Up @@ -193,8 +194,8 @@ TensorImpl::TensorImpl(

key_set = key_set | getAutocastRelatedKeySetFromBackend(k);

// See [Note: Python key removal]
key_set = key_set - c10::python_ks;
key_set =
key_set.remove(DispatchKey::Python); // See [Note: Python key removal]

// Inference tensor doesn't have autograd related keys.
if (inference_mode) {
Expand Down
89 changes: 33 additions & 56 deletions c10/core/TensorImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -838,103 +838,91 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
bool is_sparse() const {
// NB: This method is not virtual and avoid dispatches for performance
// reasons.
return key_set_.has_all(c10::sparse_ks);
return key_set_.has(DispatchKey::Sparse);
}

// Whether a tensor is sparse COO or not. Use is_sparse_csr for checking CSR
// format.
bool is_sparse_csr() const {
return key_set_.has_any(c10::sparse_csr_ks);
return key_set_.has(DispatchKey::SparseCsrCPU) ||
key_set_.has(DispatchKey::SparseCsrCUDA);
}

bool is_quantized() const {
// NB: This method is not virtual and avoid dispatches for performance
// reasons.
constexpr auto quantized_ks = DispatchKeySet(DispatchKey::Quantized);
return key_set_.has_all(quantized_ks);
return key_set_.has(DispatchKey::Quantized);
}

bool is_meta() const {
// NB: This method is not virtual and avoid dispatches for performance
// reasons.
constexpr auto meta_ks = DispatchKeySet(DispatchKey::Meta);
return key_set_.has_all(meta_ks);
return key_set_.has(DispatchKey::Meta);
}

bool is_cpu() const {
// NB: This method is not virtual and avoid dispatches for performance
// reasons.
constexpr auto cpu_bits_ks = DispatchKeySet(BackendComponent::CPUBit) |
DispatchKeySet({DispatchKey::SparseCsrCPU, DispatchKey::MkldnnCPU});
return key_set_.has_any(cpu_bits_ks);
return key_set_.has_backend(BackendComponent::CPUBit) ||
key_set_.has(DispatchKey::SparseCsrCPU) ||
key_set_.has(DispatchKey::MkldnnCPU);
}

bool is_cuda() const {
// NB: This method is not virtual and avoid dispatches for performance
// reasons.
constexpr auto cuda_bits_ks = DispatchKeySet(BackendComponent::CUDABit) |
DispatchKeySet(DispatchKey::SparseCsrCUDA);
return key_set_.has_any(cuda_bits_ks);
return key_set_.has_backend(BackendComponent::CUDABit) ||
key_set_.has(DispatchKey::SparseCsrCUDA);
}

bool is_xpu() const {
// NB: This method is not virtual and avoid dispatches for performance
// reasons.
constexpr auto xpu_ks = DispatchKeySet(BackendComponent::XPUBit);
return key_set_.has_all(xpu_ks);
return key_set_.has_backend(BackendComponent::XPUBit);
}

bool is_xla() const {
constexpr auto xla_ks = DispatchKeySet(BackendComponent::XLABit);
return key_set_.has_all(xla_ks);
return key_set_.has_backend(BackendComponent::XLABit);
}

bool is_hpu() const {
constexpr auto hpu_ks = DispatchKeySet(BackendComponent::HPUBit);
return key_set_.has_all(hpu_ks);
return key_set_.has_backend(BackendComponent::HPUBit);
}

bool is_lazy() const {
constexpr auto lazy_ks = DispatchKeySet(BackendComponent::LazyBit);
return key_set_.has_all(lazy_ks);
return key_set_.has_backend(BackendComponent::LazyBit);
}

bool is_hip() const {
// NB: This method is not virtual and avoid dispatches for performance
// reasons.
constexpr auto hip_ks = DispatchKeySet(BackendComponent::HIPBit);
return key_set_.has_all(hip_ks);
return key_set_.has_backend(BackendComponent::HIPBit);
}

bool is_ve() const {
// NB: This method is not virtual and avoid dispatches for performance
// reasons.
constexpr auto ve_ks = DispatchKeySet(BackendComponent::VEBit);
return key_set_.has_all(ve_ks);
return key_set_.has_backend(BackendComponent::VEBit);
}

bool is_mkldnn() const {
return key_set_.has_all(c10::mkldnn_ks);
return key_set_.has(DispatchKey::MkldnnCPU);
}

bool is_vulkan() const {
constexpr auto vulkan_ks = DispatchKeySet(DispatchKey::Vulkan);
return key_set_.has_all(vulkan_ks);
return key_set_.has(DispatchKey::Vulkan);
}

bool is_metal() const {
constexpr auto metal_ks = DispatchKeySet(DispatchKey::Metal);
return key_set_.has_all(metal_ks);
return key_set_.has(DispatchKey::Metal);
}

bool is_mlc() const {
constexpr auto mls_ks = DispatchKeySet(DispatchKey::MLC);
return key_set_.has_all(mls_ks);
return key_set_.has(DispatchKey::MLC);
}

bool is_ort() const {
constexpr auto ort_ks = DispatchKeySet(DispatchKey::ORT);
return key_set_.has_all(ort_ks);
return key_set_.has(DispatchKey::ORT);
}

// TODO: remove this once we don't automatically enabled Autograd dispatch
Expand All @@ -950,8 +938,8 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
// Invariant:
// Inference tensor has version_counter_.enabled() == false
bool is_inference() {
bool no_ADInplaceOrView = !key_set_.has_any(c10::inplace_or_view_ks);
bool no_Autograd = !key_set_.has_any(c10::autograd_dispatch_keyset);
bool no_ADInplaceOrView = !key_set_.has(c10::DispatchKey::ADInplaceOrView);
bool no_Autograd = (key_set_ & c10::autograd_dispatch_keyset).empty();
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
no_ADInplaceOrView == no_Autograd,
"ADInplaceOrView and Autograd keys must be on/off at the same time.");
Expand All @@ -972,22 +960,14 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {

Layout layout() const {
// NB: This method is not virtual and avoid dispatches for perf.
// strided is also the most common layout type, so we check for
// strided case first.
// This keyset must also be kept in sync with the logic in
// is_sparse() / is_sparse_csr() / is_mkldnn()
constexpr auto sparse_and_sparsecsr_and_mkldnn_ks =
c10::sparse_ks | c10::sparse_csr_ks | c10::mkldnn_ks;
if (!key_set_.has_any(sparse_and_sparsecsr_and_mkldnn_ks)) {
return kStrided;
} else if (is_sparse()) {
if (is_sparse()) {
return kSparse;
} else if (is_sparse_csr()) {
return kSparseCsr;
} else {
TORCH_INTERNAL_ASSERT(
is_mkldnn(), "There is an error in the layout calculation logic.");
} else if (is_mkldnn()) {
return kMkldnn;
} else {
return kStrided;
}
}

Expand Down Expand Up @@ -1073,8 +1053,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
* Whether or not the imaginary part of the tensor should be negated
*/
inline bool is_conj() const {
constexpr auto conjugate_ks = DispatchKeySet(DispatchKey::Conjugate);
return key_set_.has_all(conjugate_ks);
return key_set_.has(DispatchKey::Conjugate);
}

/**
Expand All @@ -1094,8 +1073,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
* Whether or not the tensor is a zerotensor
*/
inline bool _is_zerotensor() const {
constexpr auto zerotensor_ks = DispatchKeySet(DispatchKey::ZeroTensor);
return key_set_.has_all(zerotensor_ks);
return key_set_.has(DispatchKey::ZeroTensor);
}

/**
Expand All @@ -1115,8 +1093,7 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {
* Whether or not the tensor should be negated
*/
inline bool is_neg() const {
constexpr auto negative_ks = DispatchKeySet(DispatchKey::Negative);
return key_set_.has_all(negative_ks);
return key_set_.has(DispatchKey::Negative);
}

/**
Expand Down Expand Up @@ -1487,14 +1464,14 @@ struct C10_API TensorImpl : public c10::intrusive_ptr_target {

void set_python_dispatch(bool k) {
if (k) {
key_set_ = key_set_.add(c10::python_ks);
key_set_ = key_set_.add(DispatchKey::Python);
} else {
key_set_ = key_set_ - c10::python_ks;
key_set_ = key_set_.remove(DispatchKey::Python);
}
}

bool is_python_dispatch() const {
return key_set_.has_all(c10::python_ks);
return key_set_.has(DispatchKey::Python);
}

/**
Expand Down

0 comments on commit 7f560fb

Please sign in to comment.