diff --git a/3rdparty/mshadow/mshadow/tensor.h b/3rdparty/mshadow/mshadow/tensor.h index f8f63388155c..8b24d1fa6136 100644 --- a/3rdparty/mshadow/mshadow/tensor.h +++ b/3rdparty/mshadow/mshadow/tensor.h @@ -108,7 +108,10 @@ struct Shape { * \return the corresponding dimension size */ MSHADOW_XINLINE const index_t &operator[](int idx) const { +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Warray-bounds" return shape_[idx]; +#pragma GCC diagnostic pop } /*! * \return whether two shape equals diff --git a/CMakeLists.txt b/CMakeLists.txt index 7d34d5f1935e..dd1206adeab9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -77,7 +77,6 @@ option(USE_JEMALLOC "Build with Jemalloc support" OFF) option(USE_LIBJPEG_TURBO "Use libjpeg-turbo" OFF) option(USE_DIST_KVSTORE "Build with DIST_KVSTORE support" OFF) option(USE_PLUGINS_WARPCTC "Use WARPCTC Plugins" OFF) -option(USE_CPP_PACKAGE "Build C++ Package" OFF) option(USE_MXNET_LIB_NAMING "Use MXNet library naming conventions." ON) option(USE_GPROF "Compile with gprof (profiling) flag" OFF) option(USE_VTUNE "Enable use of Intel Amplifier XE (VTune)" OFF) # one could set VTUNE_ROOT for search path diff --git a/ci/docker/Dockerfile.build.ubuntu b/ci/docker/Dockerfile.build.ubuntu index e68e2f94bc02..73494a6784a8 100644 --- a/ci/docker/Dockerfile.build.ubuntu +++ b/ci/docker/Dockerfile.build.ubuntu @@ -21,8 +21,8 @@ # See docker-compose.yml for supported BASE_IMAGE ARGs and targets. #################################################################################################### -# The Dockerfile uses a dynamic BASE_IMAGE (for example ubuntu:18.04 -# nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 etc). +# The Dockerfile uses a dynamic BASE_IMAGE (for example ubuntu:20.04 +# nvidia/cuda:11.1-cudnn8-devel-ubuntu20.04 etc). # On top of BASE_IMAGE we install all dependencies shared by all MXNet build # environments into a "base" target. At the end of this file, we can specialize # "base" for specific usecases. The target built by docker can be selected via @@ -36,9 +36,7 @@ WORKDIR /work/deps RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get update && \ apt-get install -y wget software-properties-common && \ - wget -qO - http://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - && \ wget -qO - wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB | apt-key add - && \ - apt-add-repository "deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-10 main" && \ apt-add-repository "deb https://apt.repos.intel.com/mkl all main" && \ apt-get update && \ apt-get install -y \ @@ -47,6 +45,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \ unzip \ pandoc \ ## Development tools + cmake \ build-essential \ ninja-build \ git \ @@ -123,43 +122,33 @@ COPY runtime_functions.sh /work/ # The target built by docker can be selected via "--target" option or docker-compose.yml #################################################################################################### FROM base as gpu -# Install Thrust 1.9.8 to be shipped with Cuda 11. -# Fixes https://github.com/thrust/thrust/issues/1072 for Clang 10 -# This file can be deleted when using Cuda 11 on CI -RUN cd /usr/local && \ - git clone https://github.com/thrust/thrust.git && \ - cd thrust && \ - git checkout 1.9.8 -# Install TensorRT +# Install TensorRT and CuDNN +# Use bash as it has better support for string comparisons in if clauses +SHELL ["/bin/bash", "-c"] # We need to redeclare ARG due to # https://docs.docker.com/engine/reference/builder/#understand-how-arg-and-from-interact ARG BASE_IMAGE -# due to issue https://gitlab.com/nvidia/container-images/cuda/-/issues/92, we -# get the cuda version from installed deb package if CUDA_VERSION is not set -RUN if [ -z "$CUDA_VERSION" ]; then export CUDA_VERSION=$( \ - dpkg --list cuda-libraries*| grep "cuda-libraries-dev-" | awk '{print $3}' | sed 's/-1$//'); \ - fi && export SHORT_CUDA_VERSION=${CUDA_VERSION%.*} && \ +RUN export SHORT_CUDA_VERSION=${CUDA_VERSION%.*} && \ + export OS_RELEASE="$(cat /etc/os-release)" && \ apt-get update && \ - if [ ${SHORT_CUDA_VERSION} = 10.0 ]; then \ - TRT_VERSION="7.0.0-1+cuda10.0"; \ - TRT_MAJOR_VERSION=7; \ - elif [ ${SHORT_CUDA_VERSION} = 10.1 ]; then \ - TRT_VERSION="6.0.1-1+cuda10.1"; \ - TRT_MAJOR_VERSION=6; \ - elif [ ${SHORT_CUDA_VERSION} = 10.2 ]; then \ - TRT_VERSION="7.0.0-1+cuda10.2"; \ - TRT_MAJOR_VERSION=7; \ - elif [ ${SHORT_CUDA_VERSION} = 11.0 ]; then \ - TRT_VERSION="7.2.0-1+cuda11.0"; \ - TRT_MAJOR_VERSION=7; \ - else \ - echo "ERROR: Cuda ${SHORT_CUDA_VERSION} not yet supported in Dockerfile.build.ubuntu"; \ - exit 1; \ + if [[ ${OS_RELEASE} == *"Bionic"* ]]; then \ + if [ ${SHORT_CUDA_VERSION} = 11.0 ]; then \ + TRT_VERSION="7.2.0-1+cuda11.0"; \ + TRT_MAJOR_VERSION=7; \ + elif [ ${SHORT_CUDA_VERSION} = 11.1 ]; then \ + TRT_VERSION="7.2.1-1+cuda11.1"; \ + TRT_MAJOR_VERSION=7; \ + else \ + echo "ERROR: Cuda ${SHORT_CUDA_VERSION} not yet supported in Dockerfile.build.ubuntu"; \ + exit 1; \ + fi; \ + apt-get install -y libnvinfer${TRT_MAJOR_VERSION}=${TRT_VERSION} \ + libnvinfer-dev=${TRT_VERSION} \ + libnvinfer-plugin${TRT_MAJOR_VERSION}=${TRT_VERSION} \ + libnvinfer-plugin-dev=${TRT_VERSION}; \ fi && \ - apt-get install -y libnvinfer${TRT_MAJOR_VERSION}=${TRT_VERSION} \ - libnvinfer-dev=${TRT_VERSION} \ - libnvinfer-plugin${TRT_MAJOR_VERSION}=${TRT_VERSION} \ - libnvinfer-plugin-dev=${TRT_VERSION} && \ + apt-get install -y libcudnn8-dev && \ rm -rf /var/lib/apt/lists/* +ENV CUDNN_VERSION=8.0.5 diff --git a/ci/docker/docker-compose.yml b/ci/docker/docker-compose.yml index c7a11c499fdf..6ebc6a0e025e 100644 --- a/ci/docker/docker-compose.yml +++ b/ci/docker/docker-compose.yml @@ -85,49 +85,29 @@ services: dockerfile: Dockerfile.build.ubuntu target: base args: - BASE_IMAGE: ubuntu:18.04 + BASE_IMAGE: ubuntu:20.04 cache_from: - ${DOCKER_CACHE_REGISTRY}/build.ubuntu_cpu:latest - ubuntu_gpu_cu101: - image: ${DOCKER_CACHE_REGISTRY}/build.ubuntu_gpu_cu101:latest + ubuntu_tensorrt_cu111: + image: ${DOCKER_CACHE_REGISTRY}/build.ubuntu_tensorrt_cu111:latest build: context: . dockerfile: Dockerfile.build.ubuntu target: gpu args: - BASE_IMAGE: nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 + BASE_IMAGE: nvidia/cuda:11.1-devel-ubuntu18.04 cache_from: - - ${DOCKER_CACHE_REGISTRY}/build.ubuntu_gpu_cu101:latest - ubuntu_gpu_cu102: - image: ${DOCKER_CACHE_REGISTRY}/build.ubuntu_gpu_cu102:latest + - ${DOCKER_CACHE_REGISTRY}/build.ubuntu_tensorrt_cu111:latest + ubuntu_gpu_cu111: + image: ${DOCKER_CACHE_REGISTRY}/build.ubuntu_gpu_cu111:latest build: context: . dockerfile: Dockerfile.build.ubuntu target: gpu args: - BASE_IMAGE: nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 + BASE_IMAGE: nvidia/cuda:11.1-devel-ubuntu20.04 cache_from: - - ${DOCKER_CACHE_REGISTRY}/build.ubuntu_gpu_cu102:latest - ubuntu_gpu_cu110: - image: ${DOCKER_CACHE_REGISTRY}/build.ubuntu_gpu_cu110:latest - build: - context: . - dockerfile: Dockerfile.build.ubuntu - target: gpu - args: - BASE_IMAGE: nvidia/cuda:11.0-cudnn8-devel-ubuntu18.04 - cache_from: - - ${DOCKER_CACHE_REGISTRY}/build.ubuntu_gpu_cu110:latest - ubuntu_build_cuda: - image: ${DOCKER_CACHE_REGISTRY}/build.ubuntu_build_cuda:latest - build: - context: . - dockerfile: Dockerfile.build.ubuntu - target: gpu - args: - BASE_IMAGE: nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04 - cache_from: - - ${DOCKER_CACHE_REGISTRY}/build.ubuntu_build_cuda:latest + - ${DOCKER_CACHE_REGISTRY}/build.ubuntu_gpu_cu111:latest ################################################################################################### # Dockerfile.build.android based images used for testing cross-compilation for plain ARM ################################################################################################### diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh index 88e77a77a73e..1af9d878cb0f 100755 --- a/ci/docker/runtime_functions.sh +++ b/ci/docker/runtime_functions.sh @@ -314,11 +314,10 @@ build_ubuntu_cpu() { build_ubuntu_cpu_openblas() { set -ex cd /work/build - CXXFLAGS="-Wno-error=strict-overflow" CC=gcc-7 CXX=g++-7 cmake \ + CXXFLAGS="-Wno-error=strict-overflow" cmake \ -DCMAKE_BUILD_TYPE="RelWithDebInfo" \ -DENABLE_TESTCOVERAGE=ON \ -DUSE_TVM_OP=ON \ - -DUSE_CPP_PACKAGE=ON \ -DUSE_MKL_IF_AVAILABLE=OFF \ -DUSE_MKLDNN=OFF \ -DUSE_CUDA=OFF \ @@ -332,7 +331,7 @@ build_ubuntu_cpu_openblas() { build_ubuntu_cpu_mkl() { set -ex cd /work/build - CC=gcc-7 CXX=g++-7 cmake \ + cmake \ -DCMAKE_BUILD_TYPE="RelWithDebInfo" \ -DENABLE_TESTCOVERAGE=OFF \ -DUSE_MKLDNN=OFF \ @@ -348,7 +347,7 @@ build_ubuntu_cpu_mkl() { build_ubuntu_cpu_cmake_debug() { set -ex cd /work/build - CC=gcc-7 CXX=g++-7 cmake \ + cmake \ -DCMAKE_BUILD_TYPE=Debug \ -DENABLE_TESTCOVERAGE=ON \ -DUSE_CUDA=OFF \ @@ -365,7 +364,7 @@ build_ubuntu_cpu_cmake_debug() { build_ubuntu_cpu_cmake_no_tvm_op() { set -ex cd /work/build - CC=gcc-7 CXX=g++-7 cmake \ + cmake \ -DUSE_CUDA=OFF \ -DUSE_TVM_OP=OFF \ -DUSE_MKL_IF_AVAILABLE=OFF \ @@ -384,8 +383,6 @@ build_ubuntu_cpu_cmake_asan() { set -ex cd /work/build - export CXX=g++-8 - export CC=gcc-8 cmake \ -DUSE_CUDA=OFF \ -DUSE_MKL_IF_AVAILABLE=OFF \ @@ -396,8 +393,6 @@ build_ubuntu_cpu_cmake_asan() { -DUSE_GPERFTOOLS=OFF \ -DUSE_JEMALLOC=OFF \ -DUSE_ASAN=ON \ - -DUSE_CPP_PACKAGE=ON \ - -DMXNET_USE_CPU=ON \ /work/mxnet make -j $(nproc) mxnet } @@ -405,11 +400,9 @@ build_ubuntu_cpu_cmake_asan() { build_ubuntu_cpu_gcc8_werror() { set -ex cd /work/build - CXX=g++-8 CC=gcc-8 cmake \ + CC=gcc-8 CXX=g++-8 cmake \ -DUSE_CUDA=OFF \ -DCMAKE_BUILD_TYPE="RelWithDebInfo" \ - -DUSE_CPP_PACKAGE=ON \ - -DMXNET_USE_CPU=ON \ -GNinja /work/mxnet ninja } @@ -420,8 +413,6 @@ build_ubuntu_cpu_clang10_werror() { CXX=clang++-10 CC=clang-10 cmake \ -DUSE_CUDA=OFF \ -DCMAKE_BUILD_TYPE="RelWithDebInfo" \ - -DUSE_CPP_PACKAGE=ON \ - -DMXNET_USE_CPU=ON \ -GNinja /work/mxnet ninja } @@ -438,9 +429,9 @@ build_ubuntu_gpu_clang10_werror() { CXX=clang++-10 CC=clang-10 cmake \ -DUSE_CUDA=ON \ + -DUSE_NVML=OFF \ -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \ -DCMAKE_BUILD_TYPE="RelWithDebInfo" \ - -DUSE_CPP_PACKAGE=OFF \ -GNinja /work/mxnet ninja } @@ -454,7 +445,6 @@ build_ubuntu_cpu_clang6() { -DUSE_CUDA=OFF \ -DUSE_OPENMP=OFF \ -DUSE_DIST_KVSTORE=ON \ - -DUSE_CPP_PACKAGE=ON \ -G Ninja /work/mxnet ninja } @@ -468,7 +458,6 @@ build_ubuntu_cpu_clang100() { -DUSE_CUDA=OFF \ -DUSE_OPENMP=ON \ -DUSE_DIST_KVSTORE=ON \ - -DUSE_CPP_PACKAGE=ON \ -G Ninja /work/mxnet ninja } @@ -484,7 +473,6 @@ build_ubuntu_cpu_clang_tidy() { -DUSE_OPENMP=OFF \ -DCMAKE_BUILD_TYPE=Debug \ -DUSE_DIST_KVSTORE=ON \ - -DUSE_CPP_PACKAGE=ON \ -DCMAKE_CXX_CLANG_TIDY=clang-tidy-10 \ -G Ninja /work/mxnet ninja @@ -497,7 +485,6 @@ build_ubuntu_cpu_clang6_mkldnn() { -DUSE_MKL_IF_AVAILABLE=OFF \ -DUSE_MKLDNN=ON \ -DUSE_CUDA=OFF \ - -DUSE_CPP_PACKAGE=ON \ -DUSE_OPENMP=OFF \ -G Ninja /work/mxnet ninja @@ -510,7 +497,6 @@ build_ubuntu_cpu_clang100_mkldnn() { -DUSE_MKL_IF_AVAILABLE=OFF \ -DUSE_MKLDNN=ON \ -DUSE_CUDA=OFF \ - -DUSE_CPP_PACKAGE=ON \ -G Ninja /work/mxnet ninja } @@ -518,14 +504,13 @@ build_ubuntu_cpu_clang100_mkldnn() { build_ubuntu_cpu_mkldnn() { set -ex cd /work/build - CC=gcc-7 CXX=g++-7 cmake \ + cmake \ -DCMAKE_BUILD_TYPE="RelWithDebInfo" \ -DENABLE_TESTCOVERAGE=ON \ -DUSE_MKL_IF_AVAILABLE=OFF \ -DUSE_TVM_OP=ON \ -DUSE_MKLDNN=ON \ -DUSE_CUDA=OFF \ - -DUSE_CPP_PACKAGE=ON \ -DBUILD_EXTENSION_PATH=/work/mxnet/example/extensions/lib_external_ops \ -G Ninja /work/mxnet ninja @@ -534,29 +519,24 @@ build_ubuntu_cpu_mkldnn() { build_ubuntu_cpu_mkldnn_mkl() { set -ex cd /work/build - CC=gcc-7 CXX=g++-7 cmake \ + cmake \ -DCMAKE_BUILD_TYPE="RelWithDebInfo" \ -DENABLE_TESTCOVERAGE=OFF \ -DUSE_MKLDNN=ON \ -DUSE_CUDA=OFF \ -DUSE_TVM_OP=ON \ -DUSE_MKL_IF_AVAILABLE=ON \ + -DMKL_USE_STATIC_LIBS=OFF \ -DUSE_BLAS=MKL \ -DBUILD_EXTENSION_PATH=/work/mxnet/example/extensions/lib_external_ops \ -GNinja /work/mxnet ninja } -build_ubuntu_gpu() { - build_ubuntu_gpu_cuda101_cudnn7 -} - build_ubuntu_gpu_tensorrt() { set -ex - export CC=gcc-7 - export CXX=g++-7 export ONNX_NAMESPACE=onnx # Build ONNX @@ -597,6 +577,7 @@ build_ubuntu_gpu_tensorrt() { -DUSE_TENSORRT=1 \ -DUSE_OPENMP=0 \ -DUSE_MKLDNN=0 \ + -DUSE_NVML=OFF \ -DUSE_MKL_IF_AVAILABLE=OFF \ -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \ -G Ninja \ @@ -608,12 +589,12 @@ build_ubuntu_gpu_tensorrt() { build_ubuntu_gpu_mkldnn() { set -ex cd /work/build - CC=gcc-7 CXX=g++-7 cmake \ + cmake \ -DCMAKE_BUILD_TYPE="RelWithDebInfo" \ -DUSE_MKL_IF_AVAILABLE=OFF \ -DUSE_CUDA=ON \ + -DUSE_NVML=OFF \ -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \ - -DUSE_CPP_PACKAGE=ON \ -DBUILD_EXTENSION_PATH=/work/mxnet/example/extensions/lib_external_ops \ -G Ninja /work/mxnet ninja @@ -622,29 +603,29 @@ build_ubuntu_gpu_mkldnn() { build_ubuntu_gpu_mkldnn_nocudnn() { set -ex cd /work/build - CC=gcc-7 CXX=g++-7 cmake \ + cmake \ -DCMAKE_BUILD_TYPE="RelWithDebInfo" \ -DUSE_MKL_IF_AVAILABLE=OFF \ -DUSE_CUDA=ON \ + -DUSE_NVML=OFF \ -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \ -DUSE_CUDNN=OFF \ - -DUSE_CPP_PACKAGE=ON \ -DBUILD_EXTENSION_PATH=/work/mxnet/example/extensions/lib_external_ops \ -G Ninja /work/mxnet ninja } -build_ubuntu_gpu_cuda101_cudnn7() { +build_ubuntu_gpu() { set -ex cd /work/build - CC=gcc-7 CXX=g++-7 cmake \ + cmake \ -DCMAKE_BUILD_TYPE="RelWithDebInfo" \ -DUSE_MKL_IF_AVAILABLE=OFF \ -DUSE_CUDA=ON \ + -DUSE_NVML=OFF \ -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \ -DUSE_CUDNN=ON \ -DUSE_MKLDNN=OFF \ - -DUSE_CPP_PACKAGE=ON \ -DUSE_DIST_KVSTORE=ON \ -DBUILD_CYTHON_MODULES=ON \ -DBUILD_EXTENSION_PATH=/work/mxnet/example/extensions/lib_external_ops \ @@ -652,47 +633,27 @@ build_ubuntu_gpu_cuda101_cudnn7() { ninja } -build_ubuntu_gpu_cuda101_cudnn7_debug() { +build_ubuntu_gpu_debug() { set -ex cd /work/build - CC=gcc-7 CXX=g++-7 cmake \ + cmake \ -DCMAKE_BUILD_TYPE=Debug \ -DUSE_MKL_IF_AVAILABLE=OFF \ -DUSE_CUDA=ON \ + -DUSE_NVML=OFF \ -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \ -DUSE_CUDNN=ON \ -DUSE_MKLDNN=OFF \ - -DUSE_CPP_PACKAGE=ON \ -DUSE_DIST_KVSTORE=ON \ -DBUILD_CYTHON_MODULES=ON \ -G Ninja /work/mxnet ninja } -build_ubuntu_gpu_cmake() { - set -ex - cd /work/build - CC=gcc-7 CXX=g++-7 cmake \ - -DUSE_SIGNAL_HANDLER=ON \ - -DUSE_CUDA=ON \ - -DUSE_CUDNN=ON \ - -DUSE_MKL_IF_AVAILABLE=OFF \ - -DUSE_MKLML_MKL=OFF \ - -DUSE_MKLDNN=OFF \ - -DUSE_DIST_KVSTORE=ON \ - -DCMAKE_BUILD_TYPE=Release \ - -DMXNET_CUDA_ARCH="$CI_CMAKE_CUDA_ARCH" \ - -DBUILD_CYTHON_MODULES=1 \ - -G Ninja \ - /work/mxnet - - ninja -} - build_ubuntu_cpu_large_tensor() { set -ex cd /work/build - CC=gcc-7 CXX=g++-7 cmake \ + cmake \ -DUSE_SIGNAL_HANDLER=ON \ -DUSE_CUDA=OFF \ -DUSE_CUDNN=OFF \ @@ -706,12 +667,12 @@ build_ubuntu_cpu_large_tensor() { build_ubuntu_gpu_large_tensor() { set -ex cd /work/build - CC=gcc-7 CXX=g++-7 cmake \ + cmake \ -DUSE_SIGNAL_HANDLER=ON \ -DUSE_CUDA=ON \ -DUSE_CUDNN=ON \ + -DUSE_NVML=OFF \ -DUSE_MKL_IF_AVAILABLE=OFF \ - -DUSE_MKLML_MKL=OFF \ -DUSE_MKLDNN=ON \ -DUSE_DIST_KVSTORE=ON \ -DCMAKE_BUILD_TYPE=Release \ diff --git a/ci/jenkins/Jenkins_steps.groovy b/ci/jenkins/Jenkins_steps.groovy index fca8e44b25ff..1413756dc2c8 100644 --- a/ci/jenkins/Jenkins_steps.groovy +++ b/ci/jenkins/Jenkins_steps.groovy @@ -139,7 +139,7 @@ def compile_unix_int64_gpu(lib_name) { ws('workspace/build-gpu-int64') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_large_tensor', false) + utils.docker_run('ubuntu_gpu_cu111', 'build_ubuntu_gpu_large_tensor', false) utils.pack_lib(lib_name, mx_cmake_lib) } } @@ -195,7 +195,7 @@ def compile_unix_mkldnn_gpu(lib_name) { ws('workspace/build-mkldnn-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_mkldnn', false) + utils.docker_run('ubuntu_gpu_cu111', 'build_ubuntu_gpu_mkldnn', false) utils.pack_lib(lib_name, mx_mkldnn_lib) } } @@ -209,7 +209,7 @@ def compile_unix_mkldnn_nocudnn_gpu(lib_name) { ws('workspace/build-mkldnn-gpu-nocudnn') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_mkldnn_nocudnn', false) + utils.docker_run('ubuntu_gpu_cu111', 'build_ubuntu_gpu_mkldnn_nocudnn', false) utils.pack_lib(lib_name, mx_mkldnn_lib) } } @@ -218,12 +218,12 @@ def compile_unix_mkldnn_nocudnn_gpu(lib_name) { } def compile_unix_full_gpu(lib_name) { - return ['GPU: CUDA10.1+cuDNN7': { + return ['GPU: CUDA+cuDNN': { node(NODE_LINUX_CPU) { ws('workspace/build-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda101_cudnn7', false) + utils.docker_run('ubuntu_gpu_cu111', 'build_ubuntu_gpu', false) utils.pack_lib(lib_name, mx_lib_cpp_examples) } } @@ -232,12 +232,12 @@ def compile_unix_full_gpu(lib_name) { } def compile_unix_full_gpu_debug(lib_name) { - return ['GPU: CUDA10.1+cuDNN7, debug': { + return ['GPU: CUDA+cuDNN, debug': { node(NODE_LINUX_CPU) { ws('workspace/build-gpu') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_build_cuda', 'build_ubuntu_gpu_cuda101_cudnn7_debug', false) + utils.docker_run('ubuntu_gpu_cu111', 'build_ubuntu_gpu_debug', false) utils.pack_lib(lib_name, mx_lib_cpp_examples) } } @@ -245,27 +245,13 @@ def compile_unix_full_gpu_debug(lib_name) { }] } -def compile_unix_cmake_gpu(lib_name) { - return ['GPU: CMake': { - node(NODE_LINUX_CPU) { - ws('workspace/build-cmake-gpu') { - timeout(time: max_time, unit: 'MINUTES') { - utils.init_git() - utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_cmake', false) - utils.pack_lib(lib_name, mx_cmake_lib_cython) - } - } - } - }] -} - def compile_unix_tensorrt_gpu(lib_name) { return ['TensorRT': { node(NODE_LINUX_CPU) { ws('workspace/build-tensorrt') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_gpu_cu102', 'build_ubuntu_gpu_tensorrt', false) + utils.docker_run('ubuntu_tensorrt_cu111', 'build_ubuntu_gpu_tensorrt', false) utils.pack_lib(lib_name, mx_tensorrt_lib) } } @@ -510,7 +496,7 @@ def compile_unix_clang10_cuda_werror(lib_name) { ws('workspace/build-cpu-clang10') { timeout(time: max_time, unit: 'MINUTES') { utils.init_git() - utils.docker_run('ubuntu_gpu_cu101', 'build_ubuntu_gpu_clang10_werror', false) + utils.docker_run('ubuntu_gpu_cu111', 'build_ubuntu_gpu_clang10_werror', false) utils.pack_lib(lib_name, mx_lib) } } @@ -696,7 +682,7 @@ def test_unix_python3_gpu(lib_name) { ws('workspace/ut-python3-gpu') { try { utils.unpack_and_init(lib_name, mx_lib_cython) - python3_gpu_ut_cython('ubuntu_gpu_cu101') + python3_gpu_ut_cython('ubuntu_gpu_cu111') utils.publish_test_coverage() } finally { utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_gpu.xml') @@ -778,7 +764,7 @@ def test_unix_python3_mkldnn_gpu(lib_name) { ws('workspace/ut-python3-mkldnn-gpu') { try { utils.unpack_and_init(lib_name, mx_mkldnn_lib) - python3_gpu_ut('ubuntu_gpu_cu101') + python3_gpu_ut('ubuntu_gpu_cu111') utils.publish_test_coverage() } finally { utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_mkldnn_gpu.xml') @@ -794,7 +780,7 @@ def test_unix_python3_mkldnn_nocudnn_gpu(lib_name) { ws('workspace/ut-python3-mkldnn-gpu-nocudnn') { try { utils.unpack_and_init(lib_name, mx_mkldnn_lib) - python3_gpu_ut_nocudnn('ubuntu_gpu_cu101') + python3_gpu_ut_nocudnn('ubuntu_gpu_cu111') utils.publish_test_coverage() } finally { utils.collect_test_results_unix('tests_gpu.xml', 'tests_python3_mkldnn_gpu_nocudnn.xml') @@ -838,7 +824,7 @@ def test_unix_byteps_gpu(lib_name) { ws('workspace/it-byteps') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init(lib_name, mx_lib) - utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_byteps', true, '32768m') + utils.docker_run('ubuntu_gpu_cu111', 'integrationtest_ubuntu_gpu_byteps', true, '32768m') utils.publish_test_coverage() } } @@ -852,7 +838,7 @@ def test_unix_distributed_kvstore_gpu(lib_name) { ws('workspace/it-dist-kvstore') { timeout(time: max_time, unit: 'MINUTES') { utils.unpack_and_init(lib_name, mx_lib) - utils.docker_run('ubuntu_gpu_cu101', 'integrationtest_ubuntu_gpu_dist_kvstore', true) + utils.docker_run('ubuntu_gpu_cu111', 'integrationtest_ubuntu_gpu_dist_kvstore', true) utils.publish_test_coverage() } } diff --git a/ci/jenkins/Jenkinsfile_unix_gpu b/ci/jenkins/Jenkinsfile_unix_gpu index 6fbdc751ea90..63bd3d29b641 100644 --- a/ci/jenkins/Jenkinsfile_unix_gpu +++ b/ci/jenkins/Jenkinsfile_unix_gpu @@ -38,7 +38,6 @@ core_logic: { custom_steps.compile_unix_mkldnn_nocudnn_gpu('mkldnn_gpu_nocudnn'), custom_steps.compile_unix_full_gpu('gpu'), custom_steps.compile_unix_full_gpu_debug('gpu_debug'), - custom_steps.compile_unix_cmake_gpu('cmake_gpu'), custom_steps.compile_unix_tensorrt_gpu('tensorrt'), custom_steps.compile_unix_int64_gpu('gpu_int64'), ]) diff --git a/python/mxnet/base.py b/python/mxnet/base.py index daa33b856dbc..2be3448d3279 100644 --- a/python/mxnet/base.py +++ b/python/mxnet/base.py @@ -281,7 +281,11 @@ def _load_lib(): # pylint: disable=E1123 lib = ctypes.CDLL(lib_path[0], winmode=0x00000008) else: - lib = ctypes.CDLL(lib_path[0], ctypes.RTLD_LOCAL) + # We use RTLD_GLOBAL as, when dynamically linking with MKL, + # libmkl_core.so may load libmkl_avx512.so via dlopen. When opening + # libmxnet and it's dependencies (libmkl_core.so) via RTLD_LOCAL, MKL's + # dlopen calls will fail with undefined symbol errors. + lib = ctypes.CDLL(lib_path[0], ctypes.RTLD_GLOBAL) # DMatrix functions lib.MXGetLastError.restype = ctypes.c_char_p return lib diff --git a/python/mxnet/contrib/text/utils.py b/python/mxnet/contrib/text/utils.py index 79cf903edbee..46e93daecb01 100644 --- a/python/mxnet/contrib/text/utils.py +++ b/python/mxnet/contrib/text/utils.py @@ -77,7 +77,7 @@ def count_tokens_from_str(source_str, token_delim=' ', seq_delim='\n', source_str = [t.lower() for t in source_str] if counter_to_update is None: - return collections.Counter(source_str) + return collections.Counter(source_str) # pylint: disable=too-many-function-args else: counter_to_update.update(source_str) return counter_to_update diff --git a/python/mxnet/numpy/multiarray.py b/python/mxnet/numpy/multiarray.py index 6f74b49d01a2..e260a41849ad 100644 --- a/python/mxnet/numpy/multiarray.py +++ b/python/mxnet/numpy/multiarray.py @@ -260,8 +260,8 @@ def _wrap_mxnp_np_ufunc(x1, x2): return func(x1, x2) return _wrap_mxnp_np_ufunc -@set_module('mxnet.numpy') # pylint: disable=invalid-name -class ndarray(NDArray): +@set_module('mxnet.numpy') +class ndarray(NDArray): # pylint: disable=invalid-name """ ndarray(handle, writable=True): diff --git a/src/operator/numpy/np_init_op.cc b/src/operator/numpy/np_init_op.cc index e30e977c34bb..6e7aca221e9e 100644 --- a/src/operator/numpy/np_init_op.cc +++ b/src/operator/numpy/np_init_op.cc @@ -47,11 +47,13 @@ inline bool NumpyIndicesShape(const nnvm::NodeAttrs& attrs, << "_npi_indices dimensions the number of dim must not be less than 0"; mxnet::TShape param_dim = param.dimensions; if (!shape_is_known(param_dim)) return false; + CHECK_LT(param_dim.Size(), INT32_MAX) << "ValueError: np.indices does not support large" + << " input tensors (containing >= 2^31 elements)."; const int indim = param.dimensions.ndim(); mxnet::TShape ret(indim + 1, -1); ret[0] = indim; for (int i = 1; i < indim + 1; ++i) { - ret[i] = param.dimensions[i-1]; + ret[i] = param_dim[i-1]; } SHAPE_ASSIGN_CHECK(*out_shapes, 0, ret); return shape_is_known(out_shapes->at(0)); diff --git a/src/operator/numpy/np_repeat_op-inl.h b/src/operator/numpy/np_repeat_op-inl.h index 638f1dee921a..aa51d080e57e 100644 --- a/src/operator/numpy/np_repeat_op-inl.h +++ b/src/operator/numpy/np_repeat_op-inl.h @@ -110,10 +110,14 @@ inline bool RepeatsOpShape(const nnvm::NodeAttrs& attrs, shape[i] = ishape[i]; } } + CHECK_LT(shape.Size(), INT32_MAX) << "ValueError: np.repeat does not support large" + << " input tensors (containing >= 2^31 elements)."; SHAPE_ASSIGN_CHECK(*out_attrs, 0, shape); } else { // If axis is not input by user, return a flat 1D array of size = repeats repeats = param.repeats.value().ndim() == 1 ? ishape.Size() * repeats : repeats; mxnet::TShape shape(1, repeats); + CHECK_LT(shape.Size(), INT32_MAX) << "ValueError: np.repeat does not support large" + << " input tensors (containing >= 2^31 elements)."; SHAPE_ASSIGN_CHECK(*out_attrs, 0, shape); } return shape_is_known(out_attrs->at(0)); diff --git a/src/operator/numpy/np_unique_op.cc b/src/operator/numpy/np_unique_op.cc index 39a84bad3779..7076b44ac3af 100644 --- a/src/operator/numpy/np_unique_op.cc +++ b/src/operator/numpy/np_unique_op.cc @@ -348,6 +348,8 @@ void NumpyUniqueCPUForward(const nnvm::NodeAttrs& attrs, const_cast(outputs[output_flag]).Init(shape_0); } } else { + CHECK_LT(inputs[0].shape().Size(), INT32_MAX) << "ValueError: np.unique does not support large" + << " input tensors (containing >= 2^31)."; if (!param.axis.has_value()) { NumpyUniqueCPUNoneAxisImpl(param, ctx, inputs, req, outputs); } else { diff --git a/src/profiler/profiler.h b/src/profiler/profiler.h index 132a9f90ec68..6dc3cf46c895 100644 --- a/src/profiler/profiler.h +++ b/src/profiler/profiler.h @@ -54,7 +54,12 @@ struct static_string { inline explicit static_string(const char *s) { set(s); } inline const char *c_str() const { return &string_[0]; } inline void set(const char *s) { +#pragma GCC diagnostic push +#if __GNUC__ >= 8 +#pragma GCC diagnostic ignored "-Wstringop-truncation" +#endif strncpy(&string_[0], s, string_size - 1); +#pragma GCC diagnostic pop string_[string_size - 1] = '\0'; } inline void append(const char *s) { diff --git a/tests/nightly/test_np_large_array.py b/tests/nightly/test_np_large_array.py index a1a34d878c84..dd4a94c1a1cb 100644 --- a/tests/nightly/test_np_large_array.py +++ b/tests/nightly/test_np_large_array.py @@ -2132,6 +2132,23 @@ def test_dsplit(): @use_np +def test_unique(): + inp = np.zeros((2, HALF_INT_OVERFLOW)) + assertRaises(ValueError, np.unique, inp, axis=1) + + +@use_np +def test_repeat(): + inp = np.ones((2, HALF_INT_OVERFLOW)) + assertRaises(ValueError, np.repeat, inp, repeats=2, axis=1) + + +@use_np +def test_indices(): + assertRaises(ValueError, np.indices, (2, HALF_INT_OVERFLOW)) + + +@use_np def test_tril_indices(): N = 2**16 data = np.tril_indices(N, -1) @@ -2328,4 +2345,3 @@ def test_insert(): assert out[0, 1] == 1 and out[-1, 1] == 2 assert out2[1] == 5 and out2[2] == 6 assertRaises(MXNetError, np.insert, arr=inp3, obj=np.array([2, 2], dtype=np.int64), values=np.array([5, 6])) - diff --git a/tests/python/unittest/onnx/test_onnxruntime.py b/tests/python/unittest/onnx/test_onnxruntime.py index dfd114a9ff23..3737b7ad0a79 100644 --- a/tests/python/unittest/onnx/test_onnxruntime.py +++ b/tests/python/unittest/onnx/test_onnxruntime.py @@ -22,12 +22,11 @@ import json import os import shutil -import tempfile import pytest -def run_cv_model_test(model): +def run_cv_model_test(model, tmpdir): def get_gluon_cv_model(model_name, tmp): tmpfile = os.path.join(tmp, model_name) ctx = mx.cpu(0) @@ -66,11 +65,13 @@ def softmax(x): e_x = np.exp(x - np.max(x)) return e_x / e_x.sum(axis=0) - def load_imgnet_labels(): - mx.test_utils.download('https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/onnx/image_net_labels.json') - return np.array(json.load(open('image_net_labels.json', 'r'))) + def load_imgnet_labels(tmpdir): + tmpfile = os.path.join(tmpdir, 'image_net_labels.json') + mx.test_utils.download('https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/onnx/image_net_labels.json', + fname=tmpfile) + return np.array(json.load(open(tmpfile, 'r'))) - def download_test_images(): + def download_test_images(tmpdir): test_images = [ ['dog.jpg',['boxer']], ['apron.jpg', ['apron', 'maillot']], @@ -80,13 +81,11 @@ def download_test_images(): ] for f,_ in test_images: mx.test_utils.download('https://github.com/dmlc/web-data/blob/master/mxnet/doc/tutorials/onnx/images/'+f+'?raw=true', - fname=f) + fname=os.path.join(tmpdir, f)) return test_images - labels = load_imgnet_labels() - test_images = download_test_images() - - tmpdir = tempfile.mkdtemp() + labels = load_imgnet_labels(tmpdir) + test_images = download_test_images(tmpdir) sym_file, params_file = get_gluon_cv_model(model, tmpdir) onnx_file = export_model_to_onnx(sym_file, params_file) #print("exported onnx file: ",onnx_file) @@ -98,7 +97,7 @@ def download_test_images(): input_name = session.get_inputs()[0].name for img,classes in test_images: - img_data = normalize_image(img) + img_data = normalize_image(os.path.join(tmpdir, img)) raw_result = session.run([], {input_name: img_data}) res = softmax(np.array(raw_result)).tolist() class_idx = np.argmax(res) @@ -116,67 +115,51 @@ def download_test_images(): shutil.rmtree(tmpdir) @pytest.mark.skip(reason="Older gluon models are not supported, tracked with #19580") -def test_cv_model_inference_onnxruntime_mobilenet0_5(): - run_cv_model_test('mobilenet0.5') +def test_cv_model_inference_onnxruntime_mobilenet0_5(tmp_path): + run_cv_model_test('mobilenet0.5', tmp_path) -@pytest.mark.flaky -def test_cv_model_inference_onnxruntime_mobilenetv2_1_0(): - run_cv_model_test('mobilenetv2_1.0') +def test_cv_model_inference_onnxruntime_mobilenetv2_1_0(tmp_path): + run_cv_model_test('mobilenetv2_1.0', tmp_path) -def test_cv_model_inference_onnxruntime_resnet18_v1(): - run_cv_model_test('resnet18_v1') +def test_cv_model_inference_onnxruntime_resnet18_v1(tmp_path): + run_cv_model_test('resnet18_v1', tmp_path) -def test_cv_model_inference_onnxruntime_resnet18_v2(): - run_cv_model_test('resnet18_v2') +def test_cv_model_inference_onnxruntime_resnet18_v2(tmp_path): + run_cv_model_test('resnet18_v2', tmp_path) -def test_cv_model_inference_onnxruntime_resnet101_v1(): - run_cv_model_test('resnet101_v1') +def test_cv_model_inference_onnxruntime_resnet101_v1(tmp_path): + run_cv_model_test('resnet101_v1', tmp_path) -def test_cv_model_inference_onnxruntime_resnet101_v2(): - run_cv_model_test('resnet101_v2') +def test_cv_model_inference_onnxruntime_resnet101_v2(tmp_path): + run_cv_model_test('resnet101_v2', tmp_path) -def test_cv_model_inference_onnxruntime_resnet152_v1(): - run_cv_model_test('resnet152_v1') +def test_cv_model_inference_onnxruntime_resnet152_v1(tmp_path): + run_cv_model_test('resnet152_v1', tmp_path) -def test_cv_model_inference_onnxruntime_resnet152_v2(): - run_cv_model_test('resnet152_v2') +def test_cv_model_inference_onnxruntime_resnet152_v2(tmp_path): + run_cv_model_test('resnet152_v2', tmp_path) @pytest.mark.skip(reason="Older gluon models are not supported, tracked with #19580") -def test_cv_model_inference_onnxruntime_squeezenet1_0(): - run_cv_model_test('squeezenet1.0') +def test_cv_model_inference_onnxruntime_squeezenet1_0(tmp_path): + run_cv_model_test('squeezenet1.0', tmp_path) @pytest.mark.skip(reason="Older gluon models are not supported, tracked with #19580") -def test_cv_model_inference_onnxruntime_squeezenet1_1(): - run_cv_model_test('squeezenet1.1') +def test_cv_model_inference_onnxruntime_squeezenet1_1(tmp_path): + run_cv_model_test('squeezenet1.1', tmp_path) @pytest.mark.skip(reason="Older gluon models are not supported, tracked with #19580") -def test_cv_model_inference_onnxruntime_vgg11(): - run_cv_model_test('vgg11') +def test_cv_model_inference_onnxruntime_vgg11(tmp_path): + run_cv_model_test('vgg11', tmp_path) @pytest.mark.skip(reason="Older gluon models are not supported, tracked with #19580") -def test_cv_model_inference_onnxruntime_vgg11_bn(): - run_cv_model_test('vgg11_bn') - -def test_cv_model_inference_onnxruntime_vgg19(): - run_cv_model_test('vgg19') - -def test_cv_model_inference_onnxruntime_vgg19_bn(): - run_cv_model_test('vgg19_bn') - -if __name__ == "__main__": - test_cv_model_inference_onnxruntime_mobilenet0_5() - test_cv_model_inference_onnxruntime_mobilenetv2_1_0() - test_cv_model_inference_onnxruntime_resnet18_v1() - test_cv_model_inference_onnxruntime_resnet18_v2() - test_cv_model_inference_onnxruntime_resnet101_v1() - test_cv_model_inference_onnxruntime_resnet101_v2() - test_cv_model_inference_onnxruntime_resnet152_v1() - test_cv_model_inference_onnxruntime_resnet152_v2() - test_cv_model_inference_onnxruntime_squeezenet1_0() - test_cv_model_inference_onnxruntime_squeezenet1_1() - test_cv_model_inference_onnxruntime_vgg11() - test_cv_model_inference_onnxruntime_vgg11_bn() - test_cv_model_inference_onnxruntime_vgg19() - test_cv_model_inference_onnxruntime_vgg19_bn() +def test_cv_model_inference_onnxruntime_vgg11_bn(tmp_path): + run_cv_model_test('vgg11_bn', tmp_path) + +def test_cv_model_inference_onnxruntime_vgg19(tmp_path): + run_cv_model_test('vgg19', tmp_path) + +def test_cv_model_inference_onnxruntime_vgg19_bn(tmp_path): + run_cv_model_test('vgg19_bn', tmp_path) +