Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions paddle/fluid/operators/math/softmax.cu
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ void SoftmaxGradCUDNNFunctor<T>::operator()(
XGrad->mutable_data<T>(context.GetPlace())));
}

template class SoftmaxCUDNNFunctor<platform::float16>;
template class SoftmaxCUDNNFunctor<float>;
template class SoftmaxCUDNNFunctor<double>;
template class SoftmaxGradCUDNNFunctor<float>;
Expand Down
8 changes: 5 additions & 3 deletions paddle/fluid/operators/softmax_cudnn_op.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ class SoftmaxGradCUDNNKernel : public framework::OpKernel<T> {
} // namespace paddle

namespace ops = paddle::operators;
REGISTER_OP_KERNEL(softmax, CUDNN, ::paddle::platform::CUDAPlace,
ops::SoftmaxCUDNNKernel<float>);
REGISTER_OP_KERNEL(softmax_grad, CUDNN, ::paddle::platform::CUDAPlace,
namespace plat = paddle::platform;
REGISTER_OP_KERNEL(softmax, CUDNN, plat::CUDAPlace,
ops::SoftmaxCUDNNKernel<float>,
ops::SoftmaxCUDNNKernel<plat::float16>);
REGISTER_OP_KERNEL(softmax_grad, CUDNN, plat::CUDAPlace,
ops::SoftmaxGradCUDNNKernel<float>);
43 changes: 20 additions & 23 deletions paddle/fluid/operators/softmax_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/operators/softmax_op.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/cudnn_helper.h"
#endif

#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
Expand Down Expand Up @@ -41,29 +44,30 @@ class SoftmaxOp : public framework::OperatorWithKernel {
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
// choose cudnn kernel if the runtime supported.
bool use_cudnn = ctx.Attr<bool>("use_cudnn");
bool runtime_cudnn_support = false;
framework::LibraryType library_{framework::LibraryType::kPlain};
#ifdef PADDLE_WITH_CUDA
if (platform::is_gpu_place(ctx.GetPlace())) {
auto& dev_ctx =
ctx.template device_context<platform::CUDADeviceContext>();
runtime_cudnn_support = dev_ctx.cudnn_handle() != nullptr ? true : false;
}
#endif
framework::LibraryType library_ = framework::LibraryType::kPlain;
if (use_cudnn && runtime_cudnn_support) {
if (platform::CanCUDNNBeUsed(ctx)) {
library_ = framework::LibraryType::kCUDNN;
}
#endif
#ifdef PADDLE_WITH_MKLDNN
if (library_ == framework::LibraryType::kPlain &&
platform::CanMKLDNNBeUsed(ctx)) {
library_ = framework::LibraryType::kMKLDNN;
}
#endif

auto input_data_type =
framework::ToDataType(ctx.Input<Tensor>("X")->type());
if (input_data_type == framework::proto::VarType::FP16) {
PADDLE_ENFORCE_EQ(library_, framework::LibraryType::kCUDNN,
"float16 can only be used when CUDNN is used");
}

std::string data_format = ctx.Attr<std::string>("data_format");
return framework::OpKernelType(
framework::ToDataType(ctx.Input<Tensor>("X")->type()), ctx.GetPlace(),
framework::StringToDataLayout(data_format), library_);
return framework::OpKernelType(input_data_type, ctx.GetPlace(),
framework::StringToDataLayout(data_format),
library_);
}
};
class SoftmaxOpMaker : public framework::OpProtoAndCheckerMaker {
Expand Down Expand Up @@ -130,19 +134,12 @@ class SoftmaxOpGrad : public framework::OperatorWithKernel {
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
// choose cudnn kernel if the runtime supported.
bool use_cudnn = ctx.Attr<bool>("use_cudnn");
bool runtime_cudnn_support = false;
framework::LibraryType library_{framework::LibraryType::kPlain};
#ifdef PADDLE_WITH_CUDA
if (platform::is_gpu_place(ctx.GetPlace())) {
auto& dev_ctx =
ctx.template device_context<platform::CUDADeviceContext>();
runtime_cudnn_support = dev_ctx.cudnn_handle() != nullptr ? true : false;
}
#endif
framework::LibraryType library_ = framework::LibraryType::kPlain;
if (use_cudnn && runtime_cudnn_support) {
if (platform::CanCUDNNBeUsed(ctx)) {
library_ = framework::LibraryType::kCUDNN;
}
#endif
std::string data_format = ctx.Attr<std::string>("data_format");
return framework::OpKernelType(
framework::ToDataType(ctx.Input<Tensor>("X")->type()), ctx.GetPlace(),
Expand Down
38 changes: 26 additions & 12 deletions python/paddle/fluid/tests/unittests/test_softmax_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,22 @@ def stable_softmax(x):

class TestSoftmaxOp(OpTest):
def setUp(self):
self.use_mkldnn = False
self.op_type = "softmax"
self.use_cudnn = False
self.init_op_type()
self.inputs = {
'X': np.random.uniform(0.1, 1, [10, 10]).astype("float32")
}
self.outputs = {
'Out': np.apply_along_axis(stable_softmax, 1, self.inputs['X'])
}
self.use_mkldnn = False
self.dtype = np.float32
self.init_kernel_type()

x = np.random.uniform(0.1, 1, [10, 10]).astype(self.dtype)
out = np.apply_along_axis(stable_softmax, 1, x)
self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)}
self.outputs = {'Out': out}
self.attrs = {
'use_cudnn': self.use_cudnn,
'use_mkldnn': self.use_mkldnn
}

def init_op_type(self):
def init_kernel_type(self):
pass

def test_check_output(self):
Expand All @@ -53,6 +53,8 @@ def test_check_output(self):
self.check_output()

def test_check_grad(self):
if self.dtype == np.float16:
return
if self.use_cudnn:
place = core.CUDAPlace(0)
self.check_grad_with_place(
Expand All @@ -62,12 +64,24 @@ def test_check_grad(self):


class TestSoftmaxCUDNNOp(TestSoftmaxOp):
def init_op_type(self):
def init_kernel_type(self):
self.use_cudnn = True


class TestMKLDNN(TestSoftmaxOp):
def init_op_type(self):
class TestSoftmaxFP16CUDNNOp(TestSoftmaxOp):
def init_kernel_type(self):
self.use_cudnn = True
self.dtype = np.float16

def test_check_output(self):
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
if core.is_float16_supported(place):
self.check_output_with_place(place, atol=1e-3)


class TestSoftmaxMKLDNNOp(TestSoftmaxOp):
def init_kernel_type(self):
self.use_mkldnn = True


Expand Down