Skip to content

Commit

Permalink
Upgrade oneDNN to v2.5.2 (#71546)
Browse files Browse the repository at this point in the history
Summary:
This PR upgrades oneDNN to v2.5.2, and includes some building support for oneDNN v2.5.2.

v2.4 changes:
- Improved performance for future Intel Xeon Scalable processor (code name Sapphire Rapids). The functionality is disabled by default and should be enabled via CPU dispatcher control.
- Improved binary primitive performance for cases when one of the tensors is broadcasted.
- Improved performance of reduction primitive, reorder, shuffle primitives.
- Improved performance of depthwise convolution forward propagation for processors with Intel AVX5-12 support
- Improved performance of forward inner product primitive for the shapes with minibatch equal to 1 for processors with Intel AVX-512 support
- Improved performance of int8 matmul and inner product primitives for processors with Intel AVX2 and Intel DL Boost support

v2.5 changes:
- Improved performance for future Intel Xeon Scalable processors (code name Sapphire Rapids). The functionality is now enabled by default and requires Linux kernel 5.16.
- Improved performance of matmul primitive for processors with Intel AVX-512 support.

v2.5.2 changes:
- Fixed performance regression in binary primitive with broadcast
- Fixed segmentation fault in depthwise convolution primitive for shapes with huge spatial size for processors with Intel AVX-512 support

Pull Request resolved: pytorch/pytorch#71546

Reviewed By: george-qi

Differential Revision: D33827108

Pulled By: VitalyFedyunin

fbshipit-source-id: 8f5a19b331c82af5b0783f081e061e1034a93952
(cherry picked from commit 9705212)
  • Loading branch information
yanbing-j authored and cyyever committed Feb 3, 2022
1 parent b888d60 commit a5870fa
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 14 deletions.
4 changes: 2 additions & 2 deletions aten/src/ATen/Version.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#endif

#if AT_MKLDNN_ENABLED()
#include <mkldnn.hpp>
#include <dnnl.hpp>
#include <ideep.hpp>
#endif

Expand Down Expand Up @@ -42,7 +42,7 @@ std::string get_mkldnn_version() {
// Apparently no way to get ideep version?
// https://github.com/intel/ideep/issues/29
{
const mkldnn_version_t* ver = mkldnn_version();
const dnnl_version_t* ver = dnnl_version();
ss << "Intel(R) MKL-DNN v" << ver->major << "." << ver->minor << "." << ver->patch
<< " (Git Hash " << ver->hash << ")";
}
Expand Down
17 changes: 9 additions & 8 deletions cmake/Modules/FindMKLDNN.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ SET(MKLDNN_ROOT "${IDEEP_ROOT}/mkl-dnn/third_party/oneDNN")

FIND_PACKAGE(BLAS)
FIND_PATH(IDEEP_INCLUDE_DIR ideep.hpp PATHS ${IDEEP_ROOT} PATH_SUFFIXES include)
FIND_PATH(MKLDNN_INCLUDE_DIR mkldnn.hpp mkldnn.h PATHS ${MKLDNN_ROOT} PATH_SUFFIXES include)
FIND_PATH(MKLDNN_INCLUDE_DIR dnnl.hpp dnnl.h PATHS ${MKLDNN_ROOT} PATH_SUFFIXES include)
IF (NOT MKLDNN_INCLUDE_DIR)
EXECUTE_PROCESS(COMMAND git${CMAKE_EXECUTABLE_SUFFIX} submodule update --init --jobs 0 mkl-dnn WORKING_DIRECTORY ${IDEEP_ROOT})
FIND_PATH(MKLDNN_INCLUDE_DIR mkldnn.hpp mkldnn.h PATHS ${MKLDNN_ROOT} PATH_SUFFIXES include)
Expand Down Expand Up @@ -53,27 +53,28 @@ ELSEIF (MKLDNN_CPU_RUNTIME STREQUAL "TBB")
SET(Threading_cmake_included TRUE)

SET(DNNL_CPU_THREADING_RUNTIME ${MKLDNN_CPU_RUNTIME})
INCLUDE_DIRECTORIES(${TBB_INCLUDE_DIRS})
INCLUDE_DIRECTORIES(${TBB_INCLUDE_DIR})
LIST(APPEND EXTRA_SHARED_LIBS TBB::tbb)
ELSE()
MESSAGE(FATAL_ERROR "MKLDNN_CPU_RUNTIME is set to TBB but TBB is not used")
ENDIF()
ENDIF()
MESSAGE(STATUS "MKLDNN_CPU_RUNTIME = ${MKLDNN_CPU_RUNTIME}")

SET(MKLDNN_BUILD_TESTS FALSE CACHE BOOL "" FORCE)
SET(MKLDNN_BUILD_EXAMPLES FALSE CACHE BOOL "" FORCE)
SET(MKLDNN_LIBRARY_TYPE STATIC CACHE STRING "" FORCE)
SET(MKLDNN_CPU_RUNTIME ${MKLDNN_CPU_RUNTIME} CACHE STRING "" FORCE)
SET(DNNL_BUILD_TESTS FALSE CACHE BOOL "" FORCE)
SET(DNNL_BUILD_EXAMPLES FALSE CACHE BOOL "" FORCE)
SET(DNNL_LIBRARY_TYPE STATIC CACHE STRING "" FORCE)
SET(DNNL_ENABLE_PRIMITIVE_CACHE TRUE CACHE BOOL "" FORCE)
IF(MKLDNN_USE_NATIVE_ARCH) # Disable HostOpts in MKLDNN unless MKLDNN_USE_NATIVE_ARCH is set.
SET(MKLDNN_ARCH_OPT_FLAGS "HostOpts" CACHE STRING "" FORCE)
SET(DNNL_ARCH_OPT_FLAGS "HostOpts" CACHE STRING "" FORCE)
ELSE()
IF(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
IF(CPU_INTEL)
SET(MKLDNN_ARCH_OPT_FLAGS "-msse4" CACHE STRING "" FORCE)
SET(DNNL_ARCH_OPT_FLAGS "-msse4" CACHE STRING "" FORCE)
ENDIF()
ELSE()
SET(MKLDNN_ARCH_OPT_FLAGS "" CACHE STRING "" FORCE)
SET(DNNL_ARCH_OPT_FLAGS "" CACHE STRING "" FORCE)
ENDIF()
ENDIF()

Expand Down
2 changes: 1 addition & 1 deletion third_party/ideep
34 changes: 31 additions & 3 deletions third_party/mkl-dnn.BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,34 @@ _DNNL_RUNTIME_OMP = {
"#cmakedefine DNNL_WITH_SYCL": "/* #undef DNNL_WITH_SYCL */",
"#cmakedefine DNNL_WITH_LEVEL_ZERO": "/* #undef DNNL_WITH_LEVEL_ZERO */",
"#cmakedefine DNNL_SYCL_CUDA": "/* #undef DNNL_SYCL_CUDA */",
"#cmakedefine DNNL_ENABLE_STACK_CHECKER": "#undef DNNL_ENABLE_STACK_CHECKER",
"#cmakedefine01 BUILD_TRAINING": "#define BUILD_TRAINING 1",
"#cmakedefine01 BUILD_INFERENCE": "#define BUILD_INFERENCE 0",
"#cmakedefine01 BUILD_PRIMITIVE_ALL": "#define BUILD_PRIMITIVE_ALL 1",
"#cmakedefine01 BUILD_BATCH_NORMALIZATION": "#define BUILD_BATCH_NORMALIZATION 0",
"#cmakedefine01 BUILD_BINARY": "#define BUILD_BINARY 0",
"#cmakedefine01 BUILD_CONCAT": "#define BUILD_CONCAT 0",
"#cmakedefine01 BUILD_CONVOLUTION": "#define BUILD_CONVOLUTION 0",
"#cmakedefine01 BUILD_DECONVOLUTION": "#define BUILD_DECONVOLUTION 0",
"#cmakedefine01 BUILD_ELTWISE": "#define BUILD_ELTWISE 0",
"#cmakedefine01 BUILD_INNER_PRODUCT": "#define BUILD_INNER_PRODUCT 0",
"#cmakedefine01 BUILD_LAYER_NORMALIZATION": "#define BUILD_LAYER_NORMALIZATION 0",
"#cmakedefine01 BUILD_LRN": "#define BUILD_LRN 0",
"#cmakedefine01 BUILD_MATMUL": "#define BUILD_MATMUL 0",
"#cmakedefine01 BUILD_POOLING": "#define BUILD_POOLING 0",
"#cmakedefine01 BUILD_PRELU": "#define BUILD_PRELU 0",
"#cmakedefine01 BUILD_REDUCTION": "#define BUILD_REDUCTION 0",
"#cmakedefine01 BUILD_REORDER": "#define BUILD_REORDER 0",
"#cmakedefine01 BUILD_RESAMPLING": "#define BUILD_RESAMPLING 0",
"#cmakedefine01 BUILD_RNN": "#define BUILD_RNN 0",
"#cmakedefine01 BUILD_SHUFFLE": "#define BUILD_SHUFFLE 0",
"#cmakedefine01 BUILD_SOFTMAX": "#define BUILD_SOFTMAX 0",
"#cmakedefine01 BUILD_SUM": "#define BUILD_SUM 0",
"#cmakedefine01 BUILD_PRIMITIVE_CPU_ISA_ALL": "#define BUILD_PRIMITIVE_CPU_ISA_ALL 1",
"#cmakedefine01 BUILD_SSE41": "#define BUILD_SSE41 0",
"#cmakedefine01 BUILD_AVX2": "#define BUILD_AVX2 0",
"#cmakedefine01 BUILD_AVX512": "#define BUILD_AVX512 0",
"#cmakedefine01 BUILD_AMX": "#define BUILD_AMX 0",
}

template_rule(
Expand All @@ -17,9 +45,9 @@ template_rule(
out = "third_party/oneDNN/include/oneapi/dnnl/dnnl_version.h",
substitutions = {
"@DNNL_VERSION_MAJOR@": "2",
"@DNNL_VERSION_MINOR@": "3",
"@DNNL_VERSION_PATCH@": "3",
"@DNNL_VERSION_HASH@": "f40443c413429c29570acd6cf5e3d1343cf647b4",
"@DNNL_VERSION_MINOR@": "5",
"@DNNL_VERSION_PATCH@": "2",
"@DNNL_VERSION_HASH@": "a9302535553c73243c632ad3c4c80beec3d19a1e",
},
)

Expand Down

0 comments on commit a5870fa

Please sign in to comment.