Intel-tensorflow
diff --git a/‎tensorflow/core/api_def/base_api/api_def_Gelu.pbtxt
Lines changed: 3 additions & 0 deletions b/‎tensorflow/core/api_def/base_api/api_def_Gelu.pbtxt
Lines changed: 3 additions & 0 deletions
diff --git a/‎tensorflow/core/api_def/base_api/api_def_GeluGrad.pbtxt
Lines changed: 3 additions & 0 deletions b/‎tensorflow/core/api_def/base_api/api_def_GeluGrad.pbtxt
Lines changed: 3 additions & 0 deletions
diff --git a/‎tensorflow/core/kernels/BUILD
Lines changed: 7 additions & 0 deletions b/‎tensorflow/core/kernels/BUILD
Lines changed: 7 additions & 0 deletions
diff --git a/‎tensorflow/core/kernels/gelu_op.cc
Lines changed: 132 additions & 0 deletions b/‎tensorflow/core/kernels/gelu_op.cc
Lines changed: 132 additions & 0 deletions
diff --git a/‎tensorflow/core/kernels/gelu_op.h
Lines changed: 101 additions & 0 deletions b/‎tensorflow/core/kernels/gelu_op.h
Lines changed: 101 additions & 0 deletions
diff --git a/‎tensorflow/core/kernels/gelu_op_gpu.cu.cc
Lines changed: 39 additions & 0 deletions b/‎tensorflow/core/kernels/gelu_op_gpu.cu.cc
Lines changed: 39 additions & 0 deletions
diff --git a/‎tensorflow/core/ops/nn_ops.cc
Lines changed: 15 additions & 0 deletions b/‎tensorflow/core/ops/nn_ops.cc
Lines changed: 15 additions & 0 deletions
diff --git a/‎tensorflow/python/eager/pywrap_gradient_exclusions.cc
Lines changed: 2 additions & 1 deletion b/‎tensorflow/python/eager/pywrap_gradient_exclusions.cc
Lines changed: 2 additions & 1 deletion
diff --git a/‎tensorflow/python/keras/activations.py
Lines changed: 20 additions & 0 deletions b/‎tensorflow/python/keras/activations.py
Lines changed: 20 additions & 0 deletions
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "Gelu"
+}
@@ -0,0 +1,3 @@
+op {
+  graph_op_name: "GeluGrad"
+}
@@ -4718,6 +4718,7 @@ cc_library(
         ":depthwise_conv_op",
         ":dilation_ops",
         ":fused_batch_norm_op",
+        ":gelu_op",
         ":in_topk_op",
         ":l2loss_op",
         ":lrn_op",
@@ -4803,6 +4804,12 @@ tf_kernel_library(
     deps = NN_DEPS + if_rocm([":conv_ops_gpu_hdrs"]),
 )
 
+tf_kernel_library(
+    name = "gelu_op",
+    prefix = "gelu_op",
+    deps = NN_DEPS,
+)
+
 tf_kernel_library(
     name = "relu_op",
     prefix = "relu_op",
 
@@ -0,0 +1,132 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/nn_ops.cc.
+
+#define EIGEN_USE_THREADS
+
+#include "tensorflow/core/kernels/gelu_op.h"
+#include "tensorflow/core/framework/numeric_op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+namespace tensorflow {
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+typedef Eigen::GpuDevice GPUDevice;
+
+template <typename Device, typename T>
+class GeluOp : public UnaryElementWiseOp<T, GeluOp<Device, T>> {
+ public:
+  explicit GeluOp(OpKernelConstruction* context)
+      : UnaryElementWiseOp<T, GeluOp<Device, T>>(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("approximate", &approximate_));
+  }
+
+  void Operate(OpKernelContext* context, const Tensor& input, Tensor* output) {
+    functor::Gelu<Device, T> functor;
+    functor(context->eigen_device<Device>(), input.flat<T>(), approximate_,
+            output->flat<T>());
+  }
+
+ private:
+  bool approximate_;
+};
+
+template <typename Device, typename T>
+class GeluGradOp : public BinaryElementWiseOp<T, GeluGradOp<Device, T>> {
+ public:
+  explicit GeluGradOp(OpKernelConstruction* context)
+      : BinaryElementWiseOp<T, GeluGradOp<Device, T>>(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("approximate", &approximate_));
+  }
+
+  void OperateNoTemplate(OpKernelContext* context, const Tensor& g,
+                         const Tensor& a, Tensor* output);
+  // INPUTS:
+  //   g (gradients): backpropagated gradients.
+  //   a (inputs): inputs that were passed to GeluOp().
+  // OUTPUT:
+  //   gradients to backprop.
+  template <int NDIMS>
+  void Operate(OpKernelContext* context, const Tensor& g, const Tensor& a,
+               Tensor* output) {
+    OperateNoTemplate(context, g, a, output);
+  }
+
+ private:
+  bool approximate_;
+};
+
+template <typename Device, typename T>
+void GeluGradOp<Device, T>::OperateNoTemplate(OpKernelContext* context,
+                                              const Tensor& g, const Tensor& a,
+                                              Tensor* output) {
+  OP_REQUIRES(context, a.IsSameSize(g),
+              errors::InvalidArgument("g and a must be the same size"));
+  functor::GeluGrad<Device, T> functor;
+  functor(context->eigen_device<Device>(), g.flat<T>(), a.flat<T>(),
+          approximate_, output->flat<T>());
+}
+
+#define REGISTER_KERNELS(type)                                       \
+  REGISTER_KERNEL_BUILDER(                                           \
+      Name("Gelu").Device(DEVICE_CPU).TypeConstraint<type>("T"),     \
+      GeluOp<CPUDevice, type>);                                      \
+  REGISTER_KERNEL_BUILDER(                                           \
+      Name("GeluGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
+      GeluGradOp<CPUDevice, type>);
+
+TF_CALL_GPU_NUMBER_TYPES(REGISTER_KERNELS);
+#undef REGISTER_KERNELS
+
+#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
+    (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
+// Forward declarations of the functor specializations for GPU.
+namespace functor {
+#define DECLARE_GPU_SPEC(T)                                          \
+  template <>                                                        \
+  void Gelu<GPUDevice, T>::operator()(                               \
+      const GPUDevice& d, typename TTypes<T>::ConstTensor features,  \
+      bool approximate, typename TTypes<T>::Tensor activations);     \
+  extern template struct Gelu<GPUDevice, T>;                         \
+                                                                     \
+  template <>                                                        \
+  void GeluGrad<GPUDevice, T>::operator()(                           \
+      const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \
+      typename TTypes<T>::ConstTensor features, bool approximate,    \
+      typename TTypes<T>::Tensor backprops);                         \
+  extern template struct GeluGrad<GPUDevice, T>;
+
+TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC);
+}  // namespace functor
+
+// Registration of the GPU implementations.
+#define REGISTER_GPU_KERNELS(type)                                   \
+  REGISTER_KERNEL_BUILDER(                                           \
+      Name("Gelu").Device(DEVICE_GPU).TypeConstraint<type>("T"),     \
+      GeluOp<GPUDevice, type>);                                      \
+  REGISTER_KERNEL_BUILDER(                                           \
+      Name("GeluGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
+      GeluGradOp<GPUDevice, type>);
+
+TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
+#undef REGISTER_GPU_KERNELS
+
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+
+}  // namespace tensorflow
@@ -0,0 +1,101 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_KERNELS_GELU_OP_H_
+#define TENSORFLOW_CORE_KERNELS_GELU_OP_H_
+
+#include "tensorflow/core/framework/tensor_types.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+namespace tensorflow {
+
+namespace internal {
+constexpr double kCoeff = 0.044715;
+constexpr double kSqrtHalf = 0.7071067811865476;
+constexpr double kTwoRsqrtPi = 1.1283791670955126;
+constexpr double kAlpha = kSqrtHalf * kTwoRsqrtPi;
+}  // namespace internal
+
+namespace functor {
+
+// Functor used by GeluOp to do the computations.
+template <typename Device, typename T>
+struct Gelu {
+  // Computes Gelu activation.
+  //
+  // features: any shape.
+  // approximate: whether to enable approximation.
+  // activations: same shape as "features".
+  void operator()(const Device& d, typename TTypes<T>::ConstTensor features,
+                  bool approximate, typename TTypes<T>::Tensor activations) {
+    const T one = static_cast<T>(1);
+    const T half = static_cast<T>(0.5);
+    if (approximate) {
+      // y = 0.5 * x * (1 + tanh(sqrt(2 / pi) * (x + 0.044715 * x^3)))
+      activations.device(d) =
+          half * features *
+          (one +
+           (static_cast<T>(internal::kAlpha) *
+            (features + static_cast<T>(internal::kCoeff) * features.cube()))
+               .tanh());
+    } else {
+      // y = x * normcdf(x) = 0.5 * x * (1 + erf(x / sqrt(2)))
+      activations.device(d) =
+          half * features *
+          (one + (features * static_cast<T>(internal::kSqrtHalf)).erf());
+    }
+  }
+};
+
+// Functor used by GeluGradOp to do the computations.
+template <typename Device, typename T>
+struct GeluGrad {
+  // Computes GeluGrad backprops.
+  //
+  // gradients: gradients backpropagated to the Gelu op.
+  // features: inputs that were passed to the Gelu op.
+  // approximate: whether to enable approximation.
+  // backprops: gradients to backpropagate to the Gelu inputs.
+  void operator()(const Device& d, typename TTypes<T>::ConstTensor gradients,
+                  typename TTypes<T>::ConstTensor features, bool approximate,
+                  typename TTypes<T>::Tensor backprops) {
+    const T one = static_cast<T>(1);
+    const T half = static_cast<T>(0.5);
+    if (approximate) {
+      const T kBeta = static_cast<T>(internal::kAlpha) *
+                        static_cast<T>(internal::kCoeff) * static_cast<T>(3);
+      const auto y =
+          (static_cast<T>(internal::kAlpha) *
+           ((static_cast<T>(internal::kCoeff) * features.cube()) + features))
+              .tanh();
+      backprops.device(d) =
+          ((-features * y.square() + features) *
+               (kBeta * features.square() + static_cast<T>(internal::kAlpha)) +
+           one + y) *
+          gradients * half;
+    } else {
+      backprops.device(d) =
+          gradients *
+          (static_cast<T>(internal::kAlpha * 0.5) * features *
+               (-features.square() * half).exp() +
+           (half * (one + (features * static_cast<T>(internal::kSqrtHalf)).erf())));
+    }
+  }
+};
+
+}  // namespace functor
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_KERNELS_GELU_OP_H_
@@ -0,0 +1,39 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
+    (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
+
+#define EIGEN_USE_GPU
+
+#include "tensorflow/core/kernels/gelu_op.h"
+
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor_types.h"
+
+namespace tensorflow {
+
+typedef Eigen::GpuDevice GPUDevice;
+
+// Definition of the GPU implementations declared in gelu_op.cc.
+#define DEFINE_GPU_KERNELS(T)                  \
+  template struct functor::Gelu<GPUDevice, T>; \
+  template struct functor::GeluGrad<GPUDevice, T>;
+
+TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS);
+
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
@@ -1070,6 +1070,21 @@ REGISTER_OP("Dilation2DBackpropFilter")
 
 // --------------------------------------------------------------------------
 
+REGISTER_OP("Gelu")
+    .Input("features: T")
+    .Output("activations: T")
+    .Attr("T: {half, float, double}")
+    .Attr("approximate: bool = true")
+    .SetShapeFn(shape_inference::UnchangedShape);
+
+REGISTER_OP("GeluGrad")
+    .Input("gradients: T")
+    .Input("features: T")
+    .Output("backprops: T")
+    .Attr("T: {half, float, double}")
+    .Attr("approximate: bool = true")
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn);
+
 REGISTER_OP("Relu")
     .Input("features: T")
     .Output("activations: T")
 
@@ -410,7 +410,7 @@ absl::optional<tensorflow::gtl::FlatSet<int>> OpGradientUnusedInputIndices(
 
 absl::optional<tensorflow::gtl::FlatSet<int>> OpGradientUnusedOutputIndices(
     const tensorflow::string &op_name) {
-  static std::array<OpIndexInfo, 459> a = {{
+  static std::array<OpIndexInfo, 460> a = {{
       {"Abs"},
       {"AccumulateNV2"},
       {"Acos"},
@@ -539,6 +539,7 @@ absl::optional<tensorflow::gtl::FlatSet<int>> OpGradientUnusedOutputIndices(
       {"Gather"},
       {"GatherNd"},
       {"GatherV2"},
+      {"Gelu"},
       {"GenerateBoundingBoxProposals"},
       {"GenerateVocabRemapping"},
       {"GetSessionHandle"},
 
@@ -81,6 +81,26 @@ def softmax(x, axis=-1):
                      'Received input: %s' % (x,))
 
 
+@keras_export('keras.activations.gelu')
+def gelu(x, approximate=True):
+  """Gaussian Error Linear Unit.
+
+  Arguments:
+      x: Input tensor.
+
+  Returns:
+      The gaussian error linear activation:
+      `0.5 * x * (1 + tanh(sqrt(2 / pi) * (x + 0.044715 * x^3)))`
+      if `approximate` is `True` or
+      `x * P(X <= x) = 0.5 * x * (1 + erf(x / sqrt(2)))`, where P(X) ~ N(0, 1),
+      if `approximate` is `False`.
+
+  Reference:
+      - [Gaussian Error Linear Units (GELUs)](https://arxiv.org/abs/1606.08415)
+  """
+  return nn.gelu(x, approximate)
+
+
 @keras_export('keras.activations.elu')
 def elu(x, alpha=1.0):
   """Exponential linear unit.
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+op {`
	`2`	`+ graph_op_name: "Gelu"`
	`3`	`+}`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+op {`
	`2`	`+ graph_op_name: "GeluGrad"`
	`3`	`+}`