Merge pull request #4199 from tensor-tang/mkldnn_act

luotao1 · web-flow · commit c15ac20257be · 2017-09-20T10:22:00.000+08:00
add MKLDNN relu and tanh
diff --git a/paddle/gserver/activations/ActivationFunction.cpp b/paddle/gserver/activations/ActivationFunction.cpp
@@ -22,9 +22,12 @@ limitations under the License. */
 #include <type_traits>
 #include "paddle/parameter/Argument.h"
 #include "paddle/utils/ClassRegistrar.h"
-
 #include "paddle/utils/Logging.h"
 
+#ifdef PADDLE_USE_MKLDNN
+#include "MKLDNNActivation.h"
+#endif
+
 namespace paddle {
 
 static ClassRegistrar<ActivationFunction> gActivationRegistrar;
@@ -456,6 +459,12 @@ Error __must_check backward(Argument& act) {
 END_DEFINE_ACTIVATION(log)
 
 ActivationFunction* ActivationFunction::create(const std::string& type) {
+#ifdef PADDLE_USE_MKLDNN
+  if (!type.empty() && type.compare(0, 7, "mkldnn_") == 0) {
+    return MKLDNNActivation::create(type);
+  }
+#endif
+
   return gActivationRegistrar.createByType(type);
 }
 
diff --git a/paddle/gserver/activations/MKLDNNActivation.cpp b/paddle/gserver/activations/MKLDNNActivation.cpp
@@ -0,0 +1,87 @@
+/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "MKLDNNActivation.h"
+#include "mkldnn.hpp"
+#include "paddle/utils/ClassRegistrar.h"
+
+namespace paddle {
+
+static ClassRegistrar<ActivationFunction> gMKLDNNActivationRegistrar;
+/**
+ * @def MKLDNN_ACTIVATION_CLASS_NAME
+ * @note MKLDNN_ACTIVATION_CLASS_NAME(relu) relu_;
+ * means mkldnn_reluActivation relu_;
+ */
+#define MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE) mkldnn_##ACT_TYPE##Activation
+
+/**
+ * @def DEFINE_MKLDNN_ELTWISE_ACTIVATION
+ */
+#define DEFINE_MKLDNN_ELTWISE_ACTIVATION(ACT_TYPE, ALPHA, BWD_ALPHA)        \
+  class MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)                              \
+      : public MKLDNNEltwiseActivation {                                    \
+  private:                                                                  \
+    static const std::string name;                                          \
+    static const float alpha;                                               \
+    static const float bwdAlpha;                                            \
+                                                                            \
+  public:                                                                   \
+    const std::string& getName() const { return name; }                     \
+    float getAlpha() const { return alpha; }                                \
+    float getBwdAlpha() const { return bwdAlpha; }                          \
+  };                                                                        \
+  const std::string MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::name =          \
+      "mkldnn_" #ACT_TYPE;                                                  \
+  const float MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::alpha = ALPHA;        \
+  const float MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)::bwdAlpha = BWD_ALPHA; \
+  static InitFunction __reg_activation__mkldnn_##ACT_TYPE([] {              \
+    gMKLDNNActivationRegistrar                                              \
+        .registerClass<MKLDNN_ACTIVATION_CLASS_NAME(ACT_TYPE)>(             \
+            "mkldnn_" #ACT_TYPE);                                           \
+  });
+
+/**
+ * @brief MKLDNN Relu Activation.
+ * Actually mkldnn_relu is Leaky Relu.
+ *  f(x) = x                   (x >= 0)
+ *  f(x) = negative_slope * x  (x <  0)
+ * @note the negative_slope should be -0.f in forward
+ */
+DEFINE_MKLDNN_ELTWISE_ACTIVATION(relu, -0.f, 0.f)
+
+/**
+ * @brief MKLDNN Tanh Activation.
+ */
+DEFINE_MKLDNN_ELTWISE_ACTIVATION(tanh, 0.f, 0.f)
+
+/**
+ * @brief MKLDNN ELU(Exponential Linear Unit) Activation.
+ *  f(x) = x                              (x >= 0)
+ *  f(x) = negative_slope * (exp(x) - 1)  (x <  0)
+ */
+DEFINE_MKLDNN_ELTWISE_ACTIVATION(elu, 0.f, 0.f)
+
+ActivationFunction* MKLDNNActivation::create(const std::string& type) {
+  return gMKLDNNActivationRegistrar.createByType(type);
+}
+
+std::vector<std::string> MKLDNNActivation::getAllRegisteredTypes() {
+  std::vector<std::string> types;
+  gMKLDNNActivationRegistrar.forEachType(
+      [&](const std::string& type) { types.push_back(type); });
+  return types;
+}
+
+}  // namespace paddle
diff --git a/paddle/gserver/activations/MKLDNNActivation.h b/paddle/gserver/activations/MKLDNNActivation.h
@@ -0,0 +1,182 @@
+/* Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+#include "ActivationFunction.h"
+#include "mkldnn.hpp"
+#include "paddle/gserver/layers/MKLDNNBase.h"
+#include "paddle/math/MKLDNNMatrix.h"
+#include "paddle/parameter/Argument.h"
+
+namespace paddle {
+
+/**
+ * @brief Base class of MKLDNN Activation.
+ * Common activation function are provieded,
+ * including mkldnn_relu, mkldnn_elu, mkldnn_tanh, mkldnn_softmax
+ */
+class MKLDNNActivation : public ActivationFunction {
+protected:
+  // input value element count
+  size_t cnt_;
+  // should not merge the resetBwd into resetFwd,
+  // because the grad data would be changing before backward.
+  bool needResetBwd_;
+  // mkldnn matrix, primitive, stream and pipeline
+  MKLDNNMatrixPtr val_;
+  MKLDNNMatrixPtr grad_;
+  std::shared_ptr<MKLDNNStream> stream_;
+  std::shared_ptr<mkldnn::primitive> fwd_;
+  std::shared_ptr<mkldnn::primitive> bwd_;
+  std::vector<mkldnn::primitive> pipelineFwd_;
+  std::vector<mkldnn::primitive> pipelineBwd_;
+
+public:
+  MKLDNNActivation() : cnt_(0), needResetBwd_(true) {}
+  ~MKLDNNActivation() {}
+  static ActivationFunction* create(const std::string& type);
+  static std::vector<std::string> getAllRegisteredTypes();
+  virtual const std::string& getName() const = 0;
+  virtual Error __must_check forward(Argument& act) = 0;
+  virtual Error __must_check backward(Argument& act) = 0;
+};
+
+/**
+ * @brief Base class of MKLDNN Eltwise Activation,
+ * includes mkldnn_relu, mkldnn_elu and mkldnn_tanh.
+ */
+class MKLDNNEltwiseActivation : public MKLDNNActivation {
+  typedef mkldnn::eltwise_forward eltwise_fwd;
+  typedef mkldnn::eltwise_backward eltwise_bwd;
+
+protected:
+  // save the forward primitive desc, which can be used backward
+  std::shared_ptr<eltwise_fwd::primitive_desc> fwdPD_;
+  // eltwise_bwd need src input value
+  MKLDNNMatrixPtr inVal_;
+  // use for copy data
+  std::shared_ptr<mkldnn::reorder> copyInVal_;
+
+public:
+  MKLDNNEltwiseActivation() {}
+
+  ~MKLDNNEltwiseActivation() {}
+
+  virtual const std::string& getName() const = 0;
+
+  // in common, the alpha of forward and backward should be equal.
+  // but for relu, to avoid negative value, they should be opposite
+  virtual float getAlpha() const = 0;
+  virtual float getBwdAlpha() const = 0;
+  virtual float getBeta() const { return 0.f; }
+  virtual mkldnn::algorithm getAlgo(const std::string& type) const {
+    if (type == "mkldnn_relu") {
+      return mkldnn::algorithm::eltwise_relu;
+    } else if (type == "mkldnn_tanh") {
+      return mkldnn::algorithm::eltwise_tanh;
+    } else if (type == "mkldnn_elu") {
+      return mkldnn::algorithm::eltwise_elu;
+    } else {
+      LOG(FATAL) << "Unkown eltwise activation type: " << type;
+    }
+    return (mkldnn::algorithm)0;
+  }
+
+  /**
+   * reshape and reset the forward primitives
+   */
+  void resetFwd(Argument& act) {
+    if (cnt_ == act.value->getElementCnt()) {
+      return;
+    }
+    cnt_ = act.value->getElementCnt();
+    stream_.reset(new MKLDNNStream());
+    auto eng = CPUEngine::Instance().getEngine();
+
+    // get algo setting
+    mkldnn::algorithm algo = getAlgo(this->getName());
+    // note: alpha represents the NegativeSlope when used in relu.
+    float alpha = getAlpha();
+    float beta = getBeta();
+
+    /// forward
+    pipelineFwd_.clear();
+    val_ = std::dynamic_pointer_cast<MKLDNNMatrix>(act.value);
+    if (val_ == nullptr) {
+      int bs = act.getBatchSize();
+      int ih = act.getFrameHeight() > 0 ? act.getFrameHeight() : 1;
+      int iw = act.getFrameWidth() > 0 ? act.getFrameWidth() : 1;
+      int ic = cnt_ / bs / ih / iw;
+      CHECK_EQ(cnt_, (size_t)bs * ic * ih * iw);
+      val_ = MKLDNNMatrix::create(
+          act.value, {bs, ic, ih, iw}, mkldnn::memory::format::nchw, eng);
+      CHECK(val_);
+    }
+    auto fwdDesc = eltwise_fwd::desc(mkldnn::prop_kind::forward_training,
+                                     algo,
+                                     val_->getMemoryDesc(),
+                                     alpha,
+                                     beta);
+    fwdPD_.reset(new eltwise_fwd::primitive_desc(fwdDesc, eng));
+    // use inplace for forward but save input value before submit
+    inVal_ = val_;
+    if (act.grad) {
+      // only copy when need do backward
+      inVal_ = MKLDNNMatrix::create(nullptr, val_->getPrimitiveDesc());
+      copyInVal_ = std::make_shared<mkldnn::reorder>(*val_, *inVal_);
+      CHECK(copyInVal_) << "should not be emptry";
+      pipelineFwd_.push_back(*copyInVal_);
+    }
+    fwd_.reset(new eltwise_fwd(*fwdPD_, *val_, *val_));
+    pipelineFwd_.push_back(*fwd_);
+    needResetBwd_ = true;
+  }
+
+  /**
+   * reset the backward primitives, can not merge into resetFwd as the grad data
+   * would be changing before backward.
+   */
+  void resetBwd(Argument& act) {
+    if (!needResetBwd_) {
+      return;
+    }
+    needResetBwd_ = false;
+    mkldnn::algorithm algo = getAlgo(this->getName());
+    float alpha = getBwdAlpha();
+    float beta = getBeta();
+    grad_ = MKLDNNMatrix::create(act.grad, val_->getPrimitiveDesc());
+    auto eng = CPUEngine::Instance().getEngine();
+    auto bwdDesc = eltwise_bwd::desc(
+        algo, grad_->getMemoryDesc(), val_->getMemoryDesc(), alpha, beta);
+    auto bwdPD = eltwise_bwd::primitive_desc(bwdDesc, eng, *fwdPD_);
+    CHECK(inVal_);
+    bwd_.reset(new eltwise_bwd(bwdPD, *inVal_, *grad_, *grad_));
+    pipelineBwd_.clear();
+    pipelineBwd_.push_back(*bwd_);
+  }
+
+  Error __must_check forward(Argument& act) {
+    resetFwd(act);
+    stream_->submit(pipelineFwd_);
+    return Error();
+  }
+
+  Error __must_check backward(Argument& act) {
+    resetBwd(act);
+    stream_->submit(pipelineBwd_);
+    return Error();
+  }
+};
+
+}  // namespace paddle
diff --git a/paddle/gserver/layers/MKLDNNConvLayer.cpp b/paddle/gserver/layers/MKLDNNConvLayer.cpp
@@ -294,12 +294,9 @@ void MKLDNNConvLayer::resetOutValue(
     std::shared_ptr<conv_fwd::primitive_desc>& pd, MKLDNNMatrixPtr& out) {
   out = MKLDNNMatrix::create(output_.value, pd->dst_primitive_desc());
 
-  // change original output value from cpu matrix to mkldnn matrix
-  output_.value = std::dynamic_pointer_cast<Matrix>(out);
-
   // create reorder if output value has cpu device and pd do not match
   cpuOutVal_ = nullptr;
-  cpuOutVal_ = nullptr;
+  cvtOutVal_ = nullptr;
   if (!outputIsOnlyMKLDNN()) {
     const MatrixPtr& cpuOut = getOutput(CPU_DEVICE).value;
     memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_};
diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp
@@ -172,12 +172,10 @@ void MKLDNNFcLayer::resetWgtBiasValue(MKLDNNMatrixPtr& wgt,
 
 void MKLDNNFcLayer::resetOutValue(MKLDNNMatrixPtr& out) {
   out = MKLDNNMatrix::create(output_.value, {bs_, oc_}, format::nc, engine_);
-  // change original output value to mkldnn output value
-  output_.value = std::dynamic_pointer_cast<Matrix>(out);
   if (!outputIsOnlyMKLDNN()) {
     // fc cpu output value do not need create convert
     // just share point
-    getOutput(CPU_DEVICE).value->setData(output_.value->getData());
+    getOutput(CPU_DEVICE).value->setData(out->getData());
   }
 }
 
diff --git a/paddle/gserver/layers/MKLDNNLayer.h b/paddle/gserver/layers/MKLDNNLayer.h
@@ -119,6 +119,10 @@ class MKLDNNLayer : public Layer {
         inputElemenCnt_ = elemenCnt;
         reshape(bs_, ic_, ih_, iw_, oc_, oh_, ow_);
         resetFwd(pipelineFwd_, inVal_, wgtVal_, biasVal_, outVal_);
+        if (outVal_) {
+          // change original output value to mkldnn output value
+          output_.value = std::dynamic_pointer_cast<Matrix>(outVal_);
+        }
         convertWeightsFromPaddle();
         needResetBwd_ = true;
       }
diff --git a/paddle/gserver/layers/MKLDNNPoolLayer.cpp b/paddle/gserver/layers/MKLDNNPoolLayer.cpp
@@ -134,7 +134,6 @@ void MKLDNNPoolLayer::resetOutValue(MKLDNNMatrixPtr& out) {
   memory::dims outDims = memory::dims{bs_, oc_, oh_, ow_};
   out = MKLDNNMatrix::create(
       output_.value, outDims, inVal_->getFormat(), engine_);
-  output_.value = std::dynamic_pointer_cast<Matrix>(out);
 
   // create reorder if output value has cpu device and pd do not match
   cpuOutVal_ = nullptr;
diff --git a/paddle/gserver/tests/MKLDNNTester.cpp b/paddle/gserver/tests/MKLDNNTester.cpp
diff --git a/paddle/gserver/tests/MKLDNNTester.h b/paddle/gserver/tests/MKLDNNTester.h
diff --git a/paddle/gserver/tests/test_MKLDNN.cpp b/paddle/gserver/tests/test_MKLDNN.cpp

Original file line number	Diff line number	Diff line change
`@@ -172,12 +172,10 @@ void MKLDNNFcLayer::resetWgtBiasValue(MKLDNNMatrixPtr& wgt,`
`172`	`172`
`173`	`173`	`void MKLDNNFcLayer::resetOutValue(MKLDNNMatrixPtr& out) {`
`174`	`174`	`out = MKLDNNMatrix::create(output_.value, {bs_, oc_}, format::nc, engine_);`
`175`		`- // change original output value to mkldnn output value`
`176`		`- output_.value = std::dynamic_pointer_cast<Matrix>(out);`
`177`	`175`	`if (!outputIsOnlyMKLDNN()) {`
`178`	`176`	`// fc cpu output value do not need create convert`
`179`	`177`	`// just share point`
`180`		`- getOutput(CPU_DEVICE).value->setData(output_.value->getData());`
	`178`	`+ getOutput(CPU_DEVICE).value->setData(out->getData());`
`181`	`179`	`}`
`182`	`180`	`}`
`183`	`181`