Skip to content

Commit

Permalink
Restore "Add mkldnn bfloat16 option to C-API " (PaddlePaddle#26882)
Browse files Browse the repository at this point in the history
* Add mkldnn bfloat16 option to C-API

* Add test for bfloat16 gpu

* Change coverage test

* Repair capi_gpu test
  • Loading branch information
wozna authored Sep 2, 2020
1 parent 5e874cc commit 0627a31
Show file tree
Hide file tree
Showing 10 changed files with 110 additions and 2 deletions.
7 changes: 7 additions & 0 deletions go/paddle/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,10 +154,17 @@ func (config *AnalysisConfig) EnableMkldnnQuantizer() {
C.PD_EnableMkldnnQuantizer(config.c)
}

func (config *AnalysisConfig) EnableMkldnnBfloat16() {
C.PD_EnableMkldnnBfloat16(config.c)
}

func (config *AnalysisConfig) MkldnnQuantizerEnabled() bool {
return ConvertCBooleanToGo(C.PD_MkldnnQuantizerEnabled(config.c))
}

func (config *AnalysisConfig) MkldnnBfloat16Enabled() bool {
return ConvertCBooleanToGo(C.PD_MkldnnBfloat16Enabled(config.c))
}
// SetModelBuffer
// ModelFromMemory

Expand Down
18 changes: 18 additions & 0 deletions paddle/fluid/inference/api/analysis_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,17 @@ void AnalysisConfig::EnableMkldnnQuantizer() {
Update();
}

void AnalysisConfig::EnableMkldnnBfloat16() {
#ifdef PADDLE_WITH_MKLDNN
use_mkldnn_bfloat16_ = true;
#else
LOG(ERROR) << "Please compile with MKLDNN first to use MkldnnBfloat16";
use_mkldnn_bfloat16_ = false;
#endif

Update();
}

MkldnnQuantizerConfig *AnalysisConfig::mkldnn_quantizer_config() const {
PADDLE_ENFORCE_NOT_NULL(mkldnn_quantizer_config_,
"MkldnnQuantizer was not enabled yet.");
Expand Down Expand Up @@ -330,6 +341,12 @@ void AnalysisConfig::Update() {
#endif
}

if (use_mkldnn_bfloat16_) {
#ifdef PADDLE_WITH_MKLDNN
pass_builder()->EnableMkldnnBfloat16();
#endif
}

#ifdef PADDLE_WITH_MKLDNN
// Do not optimize when mkldnn is on
if (enable_memory_optim_ && !use_mkldnn_) {
Expand Down Expand Up @@ -398,6 +415,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << ";";

ss << use_mkldnn_quantizer_;
ss << use_mkldnn_bfloat16_;
ss << model_from_memory_;

ss << with_profile_;
Expand Down
21 changes: 21 additions & 0 deletions paddle/fluid/inference/api/analysis_predictor_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -485,4 +485,25 @@ TEST_F(MkldnnQuantizerTest, kl_scaling_factor_unsigned) {
}
#endif

#ifdef PADDLE_WITH_CUDA
TEST(AnalysisPredictor, bf16_gpu_pass_strategy) {
AnalysisConfig config;
config.SetModel(FLAGS_dirname);
config.SwitchIrOptim(true);
config.EnableUseGpu(100, 0);
config.EnableMkldnnBfloat16();
#ifdef PADDLE_WITH_MKLDNN
ASSERT_EQ(config.mkldnn_bfloat16_enabled(), true);
#else
ASSERT_EQ(config.mkldnn_bfloat16_enabled(), false);
#endif
}
#endif

TEST(AnalysisPredictor, bf16_pass_strategy) {
std::vector<std::string> passes;
PassStrategy passStrategy(passes);
passStrategy.EnableMkldnnBfloat16();
}

} // namespace paddle
14 changes: 14 additions & 0 deletions paddle/fluid/inference/api/paddle_analysis_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,19 @@ struct PD_INFER_DECL AnalysisConfig {
///
void EnableMkldnnQuantizer();

///
/// \brief Turn on MKLDNN bfloat16.
///
///
void EnableMkldnnBfloat16();

///
/// \brief A boolean state telling whether to use the MKLDNN Bfloat16.
///
/// \return bool Whether to use the MKLDNN Bfloat16.
///
bool mkldnn_bfloat16_enabled() const { return use_mkldnn_bfloat16_; }

///
/// \brief A boolean state telling whether the thread local CUDA stream is
/// enabled.
Expand Down Expand Up @@ -592,6 +605,7 @@ struct PD_INFER_DECL AnalysisConfig {
int mkldnn_cache_capacity_{0};
bool use_mkldnn_quantizer_{false};
std::shared_ptr<MkldnnQuantizerConfig> mkldnn_quantizer_config_;
bool use_mkldnn_bfloat16_{false};

// If the config is already used on a predictor, it becomes invalid.
// Any config can only be used with one predictor.
Expand Down
12 changes: 12 additions & 0 deletions paddle/fluid/inference/api/paddle_pass_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,10 @@ void GpuPassStrategy::EnableMkldnnQuantizer() {
LOG(ERROR) << "GPU not support MKL-DNN quantization";
}

void GpuPassStrategy::EnableMkldnnBfloat16() {
LOG(ERROR) << "GPU not support MKL-DNN bfloat16";
}

CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) {
// NOTE the large fusions should be located in the front, so that they will
// not be damaged by smaller ones.
Expand Down Expand Up @@ -225,4 +229,12 @@ void CpuPassStrategy::EnableMkldnnQuantizer() {
#endif
}

void CpuPassStrategy::EnableMkldnnBfloat16() {
#ifdef PADDLE_WITH_MKLDNN
use_mkldnn_bfloat16_ = true;
#else
use_mkldnn_bfloat16_ = false;
#endif
}

} // namespace paddle
11 changes: 11 additions & 0 deletions paddle/fluid/inference/api/paddle_pass_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,9 @@ class PD_INFER_DECL PassStrategy : public PaddlePassBuilder {
/// \brief Enable MKLDNN quantize optimization.
virtual void EnableMkldnnQuantizer() {}

/// \brief Enable MKLDNN bfloat16.
virtual void EnableMkldnnBfloat16() {}

/// \brief Check if we are using gpu.
/// \return A bool variable implying whether we are in gpu mode.
bool use_gpu() const { return use_gpu_; }
Expand Down Expand Up @@ -161,6 +164,7 @@ class PD_INFER_DECL CpuPassStrategy : public PassStrategy {
use_gpu_ = other.use_gpu_;
use_mkldnn_ = other.use_mkldnn_;
use_mkldnn_quantizer_ = other.use_mkldnn_quantizer_;
use_mkldnn_bfloat16_ = other.use_mkldnn_bfloat16_;
}
/// \brief Default destructor.
virtual ~CpuPassStrategy() = default;
Expand All @@ -174,9 +178,13 @@ class PD_INFER_DECL CpuPassStrategy : public PassStrategy {
/// \brief Enable MKLDNN quantize optimization.
void EnableMkldnnQuantizer() override;

/// \brief Enable MKLDNN bfloat16.
void EnableMkldnnBfloat16() override;

protected:
/// \cond Protected
bool use_mkldnn_quantizer_{false};
bool use_mkldnn_bfloat16_{false};
/// \endcond
};

Expand Down Expand Up @@ -205,6 +213,9 @@ class PD_INFER_DECL GpuPassStrategy : public PassStrategy {
/// \brief Not supported in GPU mode yet.
void EnableMkldnnQuantizer() override;

/// \brief Not supported in GPU mode yet.
void EnableMkldnnBfloat16() override;

/// \brief Default destructor.
virtual ~GpuPassStrategy() = default;

Expand Down
6 changes: 6 additions & 0 deletions paddle/fluid/inference/capi/paddle_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,12 @@ PADDLE_CAPI_EXPORT extern void PD_EnableMkldnnQuantizer(
PADDLE_CAPI_EXPORT extern bool PD_MkldnnQuantizerEnabled(
const PD_AnalysisConfig* config);

PADDLE_CAPI_EXPORT extern void PD_EnableMkldnnBfloat16(
PD_AnalysisConfig* config);

PADDLE_CAPI_EXPORT extern bool PD_MkldnnBfloat16Enabled(
const PD_AnalysisConfig* config);

PADDLE_CAPI_EXPORT extern void PD_SetModelBuffer(PD_AnalysisConfig* config,
const char* prog_buffer,
size_t prog_buffer_size,
Expand Down
12 changes: 12 additions & 0 deletions paddle/fluid/inference/capi/pd_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,18 @@ bool PD_MkldnnQuantizerEnabled(const PD_AnalysisConfig* config) {
return config->config.mkldnn_quantizer_enabled();
}

void PD_EnableMkldnnBfloat16(PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config, paddle::platform::errors::NotFound(
"PD_AnalysisConfig should not be null"));
config->config.EnableMkldnnBfloat16();
}

bool PD_MkldnnBfloat16Enabled(const PD_AnalysisConfig* config) {
PADDLE_ENFORCE_NOT_NULL(config, paddle::platform::errors::NotFound(
"PD_AnalysisConfig should not be null"));
return config->config.mkldnn_bfloat16_enabled();
}

void PD_SetModelBuffer(PD_AnalysisConfig* config, const char* prog_buffer,
size_t prog_buffer_size, const char* params_buffer,
size_t params_buffer_size) {
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/inference/tests/api/analyzer_capi_tester.cc
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ TEST(PD_AnalysisConfig, profile_mkldnn) {
PD_EnableMkldnnQuantizer(config);
bool quantizer_enable = PD_MkldnnQuantizerEnabled(config);
CHECK(quantizer_enable) << "NO";
PD_EnableMkldnnBfloat16(config);
bool bfloat16_enable = PD_MkldnnBfloat16Enabled(config);
CHECK(bfloat16_enable) << "NO";
PD_SetMkldnnCacheCapacity(config, 0);
PD_SetModel(config, prog_file.c_str(), params_file.c_str());
PD_DeleteAnalysisConfig(config);
Expand Down
8 changes: 6 additions & 2 deletions paddle/fluid/pybind/inference_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,7 @@ void BindAnalysisConfig(py::module *m) {
&AnalysisConfig::cpu_math_library_num_threads)
.def("to_native_config", &AnalysisConfig::ToNativeConfig)
.def("enable_quantizer", &AnalysisConfig::EnableMkldnnQuantizer)
.def("enable_mkldnn_bfloat16", &AnalysisConfig::EnableMkldnnBfloat16)
#ifdef PADDLE_WITH_MKLDNN
.def("quantizer_config", &AnalysisConfig::mkldnn_quantizer_config,
py::return_value_policy::reference)
Expand Down Expand Up @@ -565,21 +566,24 @@ void BindPaddlePassBuilder(py::module *m) {
.def("enable_cudnn", &PassStrategy::EnableCUDNN)
.def("enable_mkldnn", &PassStrategy::EnableMKLDNN)
.def("enable_mkldnn_quantizer", &PassStrategy::EnableMkldnnQuantizer)
.def("enable_mkldnn_bfloat16", &PassStrategy::EnableMkldnnBfloat16)
.def("use_gpu", &PassStrategy::use_gpu);

py::class_<CpuPassStrategy, PassStrategy>(*m, "CpuPassStrategy")
.def(py::init<>())
.def(py::init<const CpuPassStrategy &>())
.def("enable_cudnn", &CpuPassStrategy::EnableCUDNN)
.def("enable_mkldnn", &CpuPassStrategy::EnableMKLDNN)
.def("enable_mkldnn_quantizer", &CpuPassStrategy::EnableMkldnnQuantizer);
.def("enable_mkldnn_quantizer", &CpuPassStrategy::EnableMkldnnQuantizer)
.def("enable_mkldnn_bfloat16", &CpuPassStrategy::EnableMkldnnBfloat16);

py::class_<GpuPassStrategy, PassStrategy>(*m, "GpuPassStrategy")
.def(py::init<>())
.def(py::init<const GpuPassStrategy &>())
.def("enable_cudnn", &GpuPassStrategy::EnableCUDNN)
.def("enable_mkldnn", &GpuPassStrategy::EnableMKLDNN)
.def("enable_mkldnn_quantizer", &GpuPassStrategy::EnableMkldnnQuantizer);
.def("enable_mkldnn_quantizer", &GpuPassStrategy::EnableMkldnnQuantizer)
.def("enable_mkldnn_bfloat16", &GpuPassStrategy::EnableMkldnnBfloat16);
}
} // namespace
} // namespace pybind
Expand Down

0 comments on commit 0627a31

Please sign in to comment.