MKL2017 implemented layers integration to improve IA perf. on mxnet (#…

…3581) * Add MKL2017 memory struct support and mklconv into mxnet * fix cuda compile problem Signed-off-by: Lingyan <lingyan.guo@intel.com> * remove mkldnnchunk from memory and add all necessary file for compile * add mkl pooling * add mkl relu support * add mkl lrn * add mkl batch norm * add mkl concat * add mkl elementwise sum * add mkl fc * disable USE_MKL2017 by default Signed-off-by: Lingyan <lingyan.guo@intel.com> * revert mkl for sgemm Signed-off-by: Lingyan <lingyan.guo@intel.com> * add fix mkl path script & README Signed-off-by: Lingyan <lingyan.guo@intel.com> * use prepare_mkl.sh to download mkl Signed-off-by: Lingyan <lingyan.guo@intel.com> * Update for download failed for MKL Signed-off-by: Lingyan <lingyan.guo@intel.com> * add step to avoid mkl lib missing when excute python script Signed-off-by: Lingyan <lingyan.guo@intel.com> * update python install to sudo Signed-off-by: Lingyan <lingyan.guo@intel.com> * rollback padding change in pooling-inh.h Since mxnet update the new inception-bn model do not need to add padding patch to use old model Signed-off-by: Lingyan <lingyan.guo@intel.com> * correct pooling formula * minor change for pooling-inl.h * support new pooling_convention for converter * add MKL support in dropout * fix concat unit test issue since MKL concat don't support cross minbatch * disable external mkl if USE_BLAS is mkl Signed-off-by: Lingyan <lingyan.guo@intel.com> * fix compile error when mkl is disable Signed-off-by: Lingyan <lingyan.guo@intel.com> * remove debug msg and fix lint * remove MKL_DEBUG by default
apache · Oct 21, 2016 · 8e1e7f0 · 8e1e7f0
1 parent ae3b210
commit 8e1e7f0
Show file tree

Hide file tree

Showing 33 changed files with 4,301 additions and 25 deletions.
diff --git a/MKL_README.md b/MKL_README.md
@@ -0,0 +1,28 @@
+# MKL2017 PLUGIN
+
+MKL2017 is one INTEL released library to accelerate Deep Neural Network (DNN) applications on Intel architecture.
+This README shows user how to setup and install MKL2017 library with mxnet.
+
+
+## Build/Install Instructions:
+```
+Download MKL:
+
+```
+
+## Build/Install MxNet
+  1. Enable USE_MKL2017=1 in make/config.mk
+    1.1 USE_BLAS should be atlas by default
+    1.2 if need USE_BLAS to be mkl, please  Navigate here - https://registrationcenter.intel.com/en/forms/?productid=2558&licensetype=2 to do a full MKL installation
+  2. Run 'make -jX'
+    2.1 Makefile will execute "prepare_mkl.sh" to download the mkl under root folder.e.g. <MXNET ROOTDIR> /mklml_lnx_2017.0.0.20160801
+    2.2 if download failed because of proxy setting, please do it manually before make
+    2.2.1 wget https://github.com/intel/caffe/releases/download/self_containted_MKLGOLD/mklml_lnx_2017.0.0.20160801.tgz
+    2.2.2 tar zxvf mklml_lnx_2017.0.0.20160801.tgz
+
+  3. Navigate into the python directory
+  4. Run 'sudo python setup.py install'
+  5. Before excute python scipt, need to set LD_LIBRARY_PATH
+  export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:<MXNET ROOTDIR>/mklml_lnx_2017.0.0.20160801/lib
+```
+
diff --git a/Makefile b/Makefile
@@ -58,6 +58,24 @@ ifeq ($(USE_OPENMP), 1)
 	CFLAGS += -fopenmp
 endif
 
+ifeq ($(USE_MKL2017), 1)
+	CFLAGS += -DMXNET_USE_MKL2017=1
+	CFLAGS += -DUSE_MKL=1
+ifneq ($(USE_BLAS), mkl)
+	ICC_ON=0
+	RETURN_STRING=$(shell ./prepare_mkl.sh $(ICC_ON))
+	MKLROOT=$(firstword $(RETURN_STRING))
+	MKL_LDFLAGS=-l$(word 2, $(RETURN_STRING))
+	MKL_EXTERNAL=$(lastword $(RETURN_STRING))
+ifeq ($(MKL_EXTERNAL), 1)
+	MKL_LDFLAGS+=-Wl,-rpath,$(MKLROOT)/lib
+	CFLAGS += -I$(MKLROOT)/include
+	LDFLAGS += -L$(MKLROOT)/lib/ -liomp5 -lmklml_gnu -lmklml_intel
+endif
+endif
+endif
+
+
 ifeq ($(USE_CUDNN), 1)
 	CFLAGS += -DMSHADOW_USE_CUDNN=1
 	LDFLAGS += -lcudnn
@@ -93,7 +111,7 @@ endif
 
 all: lib/libmxnet.a lib/libmxnet.so $(BIN)
 
-SRC = $(wildcard src/*.cc src/*/*.cc)
+SRC = $(wildcard src/*.cc src/*/*.cc src/*/*/*.cc)
 OBJ = $(patsubst %.cc, build/%.o, $(SRC))
 CUSRC = $(wildcard src/*/*.cu)
 CUOBJ = $(patsubst %.cu, build/%_gpu.o, $(CUSRC))

diff --git a/include/mxnet/mkl_memory.h b/include/mxnet/mkl_memory.h
@@ -0,0 +1,55 @@
+/*******************************************************************************
+* Copyright 2016 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*
+* \file mkl_memory.cc
+* \brief
+* \author lingyan.guo@intel.com
+*         zhenlin.luo@intel.com
+*
+*******************************************************************************/
+#ifndef MXNET_MKL_MEMORY_H_
+#define MXNET_MKL_MEMORY_H_
+
+#include <string>
+#include <vector>
+#include <memory>
+
+
+namespace mxnet {
+// Base class
+struct PrvMemDescr {
+  virtual void convert_from_prv(void* cpu_ptr) = 0;
+  virtual void convert_to_prv(void* cpu_ptr) = 0;
+  virtual void convert_from_other(std::shared_ptr<PrvMemDescr> other) = 0;
+  virtual void* prv_ptr() = 0;
+  // returns true for matching layouts
+  virtual bool layout_compare(std::shared_ptr<PrvMemDescr> other) = 0;
+  virtual size_t prv_count() = 0;
+  virtual size_t prv_size() = 0;
+  // This might help using prv_ptr_ by different accelerators/engines
+  enum PrvDescrType {
+    PRV_DESCR_MKL2017,
+    PRV_DESCR_MKLDNN
+  };
+  virtual PrvDescrType get_descr_type() = 0;
+};
+
+struct MKLMemHolder {
+ public:
+  virtual std::shared_ptr<PrvMemDescr> get_prv_descriptor() = 0;
+};
+
+}  // namespace mxnet
+#endif  // MXNET_MKL_MEMORY_H_
diff --git a/make/config.mk b/make/config.mk
@@ -59,6 +59,9 @@ USE_OPENCV = 1
 # use openmp for parallelization
 USE_OPENMP = 1
 
+# whether use MKL2017 library
+USE_MKL2017 = 0
+
 # choose the version of blas you want to use
 # can be: mkl, blas, atlas, openblas
 # in default use atlas for linux while apple for osx

diff --git a/prepare_mkl.sh b/prepare_mkl.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+# set -ex
+#
+# All modification made by Intel Corporation: © 2016 Intel Corporation
+#
+# All contributions by the University of California:
+# Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
+# All rights reserved.
+#
+# All other contributions:
+# Copyright (c) 2014, 2015, the respective contributors
+# All rights reserved.
+# For the list of contributors go to https://github.com/BVLC/caffe/blob/master/CONTRIBUTORS.md
+#
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     * Redistributions of source code must retain the above copyright notice,
+#       this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of Intel Corporation nor the names of its contributors
+#       may be used to endorse or promote products derived from this software
+#       without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+FindLibrary()
+{
+  case "$1" in
+    intel|1)
+      LOCALMKL=`find $DST -name libmklml_intel.so`   # name of MKL SDL lib
+      ;;
+    *)
+      LOCALMKL=`find $DST -name libmklml_gnu.so`   # name of MKL SDL lib
+      ;;
+  esac
+
+}
+
+GetVersionName()
+{
+VERSION_LINE=0
+if [ $1 ]; then
+  VERSION_LINE=`grep __INTEL_MKL_BUILD_DATE $1/include/mkl_version.h 2>/dev/null | sed -e 's/.* //'`
+fi
+if [ -z $VERSION_LINE ]; then
+  VERSION_LINE=0
+fi
+echo $VERSION_LINE  # Return Version Line
+}
+
+# MKL
+DST=`dirname $0`
+OMP=0
+VERSION_MATCH=20160706
+ARCHIVE_BASENAME=mklml_lnx_2017.0.0.20160801.tgz
+MKL_CONTENT_DIR=`echo $ARCHIVE_BASENAME | rev | cut -d "." -f 2- | rev`
+GITHUB_RELEASE_TAG=self_containted_MKLGOLD
+MKLURL="https://github.com/intel/caffe/releases/download/$GITHUB_RELEASE_TAG/$ARCHIVE_BASENAME"
+# there are diffrent MKL lib to be used for GCC and for ICC
+reg='^[0-9]+$'
+VERSION_LINE=`GetVersionName $MKLROOT`
+# Check if MKLROOT is set if positive then set one will be used..
+if [ -z $MKLROOT ] || [ $VERSION_LINE -lt $VERSION_MATCH ]; then
+	# ..if MKLROOT is not set then check if we have MKL downloaded in proper version
+    VERSION_LINE=`GetVersionName $DST/$MKL_CONTENT_DIR`
+    if [ $VERSION_LINE -lt $VERSION_MATCH ] ; then
+      #...If it is not then downloaded and unpacked
+      wget --no-check-certificate -P $DST $MKLURL -O $DST/$ARCHIVE_BASENAME
+      tar -xzf $DST/$ARCHIVE_BASENAME -C $DST
+    fi
+  FindLibrary $1
+  MKLROOT=$PWD/`echo $LOCALMKL | sed -e 's/lib.*$//'`
+fi
+
+# Check what MKL lib we have in MKLROOT
+if [ -z `find $MKLROOT -name libmkl_rt.so -print -quit` ]; then
+  LIBRARIES=`basename $LOCALMKL | sed -e 's/^.*lib//' | sed -e 's/\.so.*$//'`
+  OMP=1
+else
+  LIBRARIES="mkl_rt"
+fi
+
+
+# return value to calling script (Makefile,cmake)
+echo $MKLROOT $LIBRARIES $OMP
diff --git a/src/operator/activation.cc b/src/operator/activation.cc
@@ -6,12 +6,30 @@
 */
 #include "./activation-inl.h"
 #include "./mshadow_op.h"
+#if MXNET_USE_MKL2017 == 1
+#include <mxnet/mkl_memory.h>
+#include "./mkl/mkl_memory-inl.h"
+#include "./mkl/mkl_relu-inl.h"
+#endif  // MXNET_USE_MKL2017
 
 namespace mxnet {
 namespace op {
 template<>
 Operator *CreateOp<cpu>(ActivationParam param, int dtype) {
   Operator *op = NULL;
+#if MXNET_USE_MKL2017 == 1
+  if (param.act_type == activation::kReLU) {
+      switch (dtype) {
+      case mshadow::kFloat32:
+          return new MKLReluOp<cpu, float>();
+      case mshadow::kFloat64:
+          return new MKLReluOp<cpu, double>();
+      default:
+          break;
+      }
+  }
+
+#endif
   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
     switch (param.act_type) {
       case activation::kReLU:

diff --git a/src/operator/batch_norm-inl.h b/src/operator/batch_norm-inl.h
@@ -214,7 +214,7 @@ class BatchNormOp : public Operator {
 };  // class BatchNormOp
 
 template<typename xpu>
-Operator *CreateOp(BatchNormParam param);
+Operator *CreateOp(BatchNormParam param, int dtype);
 
 
 #if DMLC_USE_CXX11
@@ -296,7 +296,13 @@ class BatchNormProp : public OperatorProperty {
     return {"moving_mean", "moving_var"};
   }
 
-  Operator* CreateOperator(Context ctx) const override;
+  Operator* CreateOperator(Context ctx) const override {
+      LOG(FATAL) << "Not Implemented.";
+      return NULL;
+  }
+
+  Operator* CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+      std::vector<int> *in_type) const override;
 
  private:
   BatchNormParam param_;

diff --git a/src/operator/batch_norm.cc b/src/operator/batch_norm.cc
@@ -6,16 +6,31 @@
 */
 
 #include "./batch_norm-inl.h"
+#if MXNET_USE_MKL2017 == 1
+#include <mxnet/mkl_memory.h>
+#include "./mkl/mkl_memory-inl.h"
+#include "./mkl/mkl_batch_norm-inl.h"
+#endif  // MXNET_USE_MKL2017
 
 namespace mxnet {
 namespace op {
 template<>
-Operator *CreateOp<cpu>(BatchNormParam param) {
+Operator *CreateOp<cpu>(BatchNormParam param, int dtype) {
+#if MXNET_USE_MKL2017 == 1
+  return new MKLBatchNormOp<cpu, float>(param);
+#else
   return new BatchNormOp<cpu>(param);
+#endif
 }
 
-Operator *BatchNormProp::CreateOperator(Context ctx) const {
-  DO_BIND_DISPATCH(CreateOp, param_);
+// DO_BIND_DISPATCH comes from operator_common.h
+Operator *BatchNormProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
+    std::vector<int> *in_type) const {
+    std::vector<TShape> out_shape, aux_shape;
+    std::vector<int> out_type, aux_type;
+    CHECK(InferType(in_type, &out_type, &aux_type));
+    CHECK(InferShape(in_shape, &out_shape, &aux_shape));
+    DO_BIND_DISPATCH(CreateOp, param_, (*in_type)[0]);
 }
 
 DMLC_REGISTER_PARAMETER(BatchNormParam);

diff --git a/src/operator/batch_norm.cu b/src/operator/batch_norm.cu
@@ -11,7 +11,7 @@
 namespace mxnet {
 namespace op {
 template<>
-Operator *CreateOp<gpu>(BatchNormParam param) {
+Operator *CreateOp<gpu>(BatchNormParam param, int dtype) {
 #if MXNET_USE_CUDNN == 1 && CUDNN_MAJOR >= 5
   if (!param.use_global_stats) {
     return new CuDNNBatchNormOp(param);

diff --git a/src/operator/concat.cc b/src/operator/concat.cc
@@ -6,12 +6,31 @@
 */
 
 #include "./concat-inl.h"
+#if MXNET_USE_MKL2017 == 1
+#include <mxnet/mkl_memory.h>
+#include "./mkl/mkl_memory-inl.h"
+#include "./mkl/mkl_concat-inl.h"
+#endif  // MXNET_USE_MKL2017
 
 namespace mxnet {
 namespace op {
 template<>
 Operator* CreateOp<cpu>(ConcatParam param, int dtype) {
   Operator *op = NULL;
+#if MXNET_USE_MKL2017 == 1
+  if (1 == param.dim) {
+    switch (dtype) {
+      case mshadow::kFloat32:
+      op = new MKLConcatOp<cpu, float>(param);
+      break;
+    case mshadow::kFloat64:
+      op = new MKLConcatOp<cpu, double>(param);
+      break;
+    default:
+      break;
+    }
+  }
+#endif
   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
     op = new ConcatOp<cpu, DType>(param);
   });

diff --git a/src/operator/convolution.cc b/src/operator/convolution.cc
@@ -6,6 +6,11 @@
 */
 
 #include "./convolution-inl.h"
+#if MXNET_USE_MKL2017 == 1
+#include <mxnet/mkl_memory.h>
+#include "./mkl/mkl_memory-inl.h"
+#include "./mkl/mkl_convolution-inl.h"
+#endif  // MXNET_USE_MKL2017
 
 namespace mxnet {
 namespace op {
@@ -15,6 +20,19 @@ Operator* CreateOp<cpu>(ConvolutionParam param, int dtype,
                         std::vector<TShape> *out_shape,
                         Context ctx) {
   Operator *op = NULL;
+#if MXNET_USE_MKL2017 == 1
+  if ((param.dilate[0] == 1 && param.dilate[1] == 1)
+      && param.kernel.ndim() == 2) {
+    switch (dtype) {
+    case mshadow::kFloat32:
+      return new MKLConvolutionOp<cpu, float>(param);
+    case mshadow::kFloat64:
+      return new MKLConvolutionOp<cpu, double>(param);
+    default:
+      break;
+    }
+  }
+#endif
   MSHADOW_REAL_TYPE_SWITCH(dtype, DType, {
     op = new ConvolutionOp<cpu, DType>(param);
   })