Skip to content

Commit cfc91e1

Browse files
committed
Merge branch 'develop' into fuse_fusion_group
test=develop
2 parents 9f0c215 + 8cb54ed commit cfc91e1

File tree

106 files changed

+2805
-751
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

106 files changed

+2805
-751
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ set(PYBIND11_PYTHON_VERSION ${PY_VERSION})
9292

9393
# CMAKE_BUILD_TYPE
9494
if(NOT CMAKE_BUILD_TYPE)
95-
set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
95+
set(CMAKE_BUILD_TYPE "Release" CACHE STRING
9696
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
9797
FORCE)
9898
endif()

cmake/external/openblas.cmake

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,41 @@ IF(NOT ${CBLAS_FOUND})
5858
UPDATE_COMMAND ""
5959
CONFIGURE_COMMAND ""
6060
)
61-
ELSE()
61+
ELSE(NOT WIN32)
62+
SET(CBLAS_FOUND false)
63+
SET(CBLAS_LIBRARIES
64+
"${CBLAS_INSTALL_DIR}/lib/openblas${CMAKE_STATIC_LIBRARY_SUFFIX}"
65+
CACHE FILEPATH "openblas library." FORCE)
66+
INCLUDE_DIRECTORIES(${CBLAS_INC_DIR}/openblas) # For openbals code to include its own headers.
67+
INCLUDE_DIRECTORIES(${THIRD_PARTY_PATH}/install)
68+
ExternalProject_Add(
69+
extern_openblas
70+
${EXTERNAL_PROJECT_LOG_ARGS}
71+
GIT_REPOSITORY https://github.com/xianyi/OpenBLAS.git
72+
GIT_TAG "v0.3.7"
73+
PREFIX ${CBLAS_SOURCES_DIR}
74+
INSTALL_DIR ${CBLAS_INSTALL_DIR}
75+
BUILD_IN_SOURCE 0
76+
UPDATE_COMMAND ""
77+
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
78+
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
79+
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
80+
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
81+
-DCMAKE_INSTALL_PREFIX=${CBLAS_INSTALL_DIR}
82+
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
83+
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
84+
-DBUILD_SHARED_LIBS=ON
85+
-DMSVC_STATIC_CRT=${MSVC_STATIC_CRT}
86+
${EXTERNAL_OPTIONAL_ARGS}
87+
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${CBLAS_INSTALL_DIR}
88+
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
89+
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
90+
)
91+
add_custom_command(TARGET extern_openblas POST_BUILD
92+
COMMAND ${CMAKE_COMMAND} -E copy ${CBLAS_INSTALL_DIR}/bin/openblas${CMAKE_SHARED_LIBRARY_SUFFIX} ${CBLAS_INSTALL_DIR}/lib )
93+
ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL)
94+
SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${CBLAS_LIBRARIES})
95+
ADD_DEPENDENCIES(openblas extern_openblas)
6296
ENDIF(NOT WIN32)
6397
SET(CBLAS_PROVIDER openblas)
6498
ENDIF(NOT ${CBLAS_FOUND})

cmake/flags.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,4 +215,4 @@ safe_set_static_flag()
215215
string(REGEX REPLACE "(^| )/W[0-9]( |$)" " " ${flag_var} "${${flag_var}}")
216216
set(flag_var "${flag_var} /w")
217217
endforeach(flag_var)
218-
endif(WIN32)
218+
endif()

paddle/fluid/API.spec

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,8 @@ paddle.fluid.layers.resize_nearest (ArgSpec(args=['input', 'out_shape', 'scale',
196196
paddle.fluid.layers.gather (ArgSpec(args=['input', 'index', 'overwrite'], varargs=None, keywords=None, defaults=(True,)), ('document', 'f985c9b66e3aec96fa753a8eb44c991c'))
197197
paddle.fluid.layers.gather_nd (ArgSpec(args=['input', 'index', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '3cc24f9cf135770aa6263dba25b457f9'))
198198
paddle.fluid.layers.scatter (ArgSpec(args=['input', 'index', 'updates', 'name', 'overwrite'], varargs=None, keywords=None, defaults=(None, True)), ('document', '69b22affd4a6326502af166f04c095ab'))
199+
paddle.fluid.layers.scatter_nd_add (ArgSpec(args=['ref', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c2fa5ee7484b52b95a28abf1d8827cd0'))
200+
paddle.fluid.layers.scatter_nd (ArgSpec(args=['index', 'updates', 'shape', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '14b5449ce42f8ff4ac4ce79b41c86cc5'))
199201
paddle.fluid.layers.sequence_scatter (ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'abe3f714120117a5a3d3e639853932bf'))
200202
paddle.fluid.layers.random_crop (ArgSpec(args=['x', 'shape', 'seed'], varargs=None, keywords=None, defaults=(None,)), ('document', '042af0b8abea96b40c22f6e70d99e042'))
201203
paddle.fluid.layers.mean_iou (ArgSpec(args=['input', 'label', 'num_classes'], varargs=None, keywords=None, defaults=None), ('document', 'e714b4aa7993dfe9c1a38886875dbaac'))

paddle/fluid/framework/data_layout_transform.cc

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818

1919
#include "paddle/fluid/operators/math/math_function.h"
2020
#ifdef PADDLE_WITH_MKLDNN
21-
#include "paddle/fluid/platform/mkldnn_helper.h"
2221
#include "paddle/fluid/platform/mkldnn_reuse.h"
2322
#endif
2423

@@ -135,9 +134,10 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout,
135134
const Tensor& in, Tensor* out,
136135
platform::Place place) {
137136
#ifdef PADDLE_WITH_MKLDNN
138-
PADDLE_ENFORCE(in.format() != memory::format::format_undef &&
139-
in.format() != memory::format::any,
140-
"Input tensor should have specified memory format");
137+
PADDLE_ENFORCE_NE(in.format(), MKLDNNMemoryFormat::format_undef,
138+
"Input tensor should have specified memory format");
139+
PADDLE_ENFORCE_NE(in.format(), MKLDNNMemoryFormat::any,
140+
"Input tensor should have specified memory format");
141141

142142
// Set default as NCHW in case not specified
143143
out_layout =
@@ -147,8 +147,8 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout,
147147
auto* dev_ctx = dynamic_cast<platform::MKLDNNDeviceContext*>(pool.Get(place));
148148
auto& cpu_engine = dev_ctx->GetEngine();
149149

150-
std::vector<int> in_tz = paddle::framework::vectorize2int(in.dims());
151-
std::vector<int> out_tz = in_tz;
150+
auto in_tz = paddle::framework::vectorize<int>(in.dims());
151+
auto out_tz = in_tz;
152152

153153
memory::data_type in_type = ToMKLDNNDataType(in.type());
154154
PADDLE_ENFORCE(in_type != memory::data_type::data_undef,
@@ -183,7 +183,7 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout,
183183
}
184184
out->set_layout(out_layout);
185185
// reset format since the out tensor will be feed to non-MKLDNN OPkernel
186-
out->set_format(memory::format::format_undef);
186+
out->set_format(MKLDNNMemoryFormat::format_undef);
187187
#endif
188188
}
189189

paddle/fluid/framework/data_layout_transform.h

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,30 +21,33 @@
2121
#include "paddle/fluid/framework/tensor.h"
2222
#include "paddle/fluid/framework/variable.h"
2323

24+
#ifdef PADDLE_WITH_MKLDNN
25+
#include "paddle/fluid/platform/mkldnn_helper.h"
26+
#endif
27+
2428
namespace paddle {
2529
namespace framework {
2630

2731
#ifdef PADDLE_WITH_MKLDNN
28-
using MKLDNNFormat = mkldnn::memory::format;
2932
using MKLDNNDataType = mkldnn::memory::data_type;
3033

31-
inline MKLDNNFormat ToMKLDNNFormat(const DataLayout& layout) {
34+
inline MKLDNNMemoryFormat ToMKLDNNFormat(const DataLayout& layout) {
3235
switch (layout) {
3336
case DataLayout::kNHWC:
34-
return MKLDNNFormat::nhwc;
37+
return MKLDNNMemoryFormat::nhwc;
3538
case DataLayout::kNCHW:
36-
return MKLDNNFormat::nchw;
39+
return MKLDNNMemoryFormat::nchw;
3740
default:
3841
PADDLE_THROW("Fail to convert layout %s to MKLDNN format",
3942
DataLayoutToString(layout));
4043
}
4144
}
4245

43-
inline DataLayout ToPaddleLayout(const MKLDNNFormat& format) {
46+
inline DataLayout ToPaddleLayout(const MKLDNNMemoryFormat& format) {
4447
switch (format) {
45-
case MKLDNNFormat::nhwc:
48+
case MKLDNNMemoryFormat::nhwc:
4649
return DataLayout::kNHWC;
47-
case MKLDNNFormat::nchw:
50+
case MKLDNNMemoryFormat::nchw:
4851
return DataLayout::kNCHW;
4952
default:
5053
PADDLE_THROW("Fail to convert MKLDNN format to paddle layout");

paddle/fluid/framework/ddim.cc

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,6 @@ bool DDim::operator==(const DDim& d) const {
4848

4949
bool DDim::operator!=(const DDim& d) const { return !(*this == d); }
5050

51-
std::vector<int64_t> vectorize(const DDim& ddim) {
52-
std::vector<int64_t> result(DDim::kMaxRank);
53-
dynamic_dim_assign(ddim.Get(), result.data(), ddim.size());
54-
result.resize(ddim.size());
55-
return result;
56-
}
57-
5851
// NOTE: framework::vectorize converts to type int64_t
5952
// which does not fit cudnn inputs.
6053
std::vector<int> vectorize2int(const DDim& ddim) {

paddle/fluid/framework/ddim.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,13 @@ DDim make_ddim(const std::vector<int>& dims);
170170
*/
171171
DDim make_ddim(std::initializer_list<int64_t> dims);
172172

173-
std::vector<int64_t> vectorize(const DDim& ddim);
173+
template <typename T = int64_t>
174+
std::vector<T> vectorize(const DDim& ddim) {
175+
std::vector<T> result(DDim::kMaxRank);
176+
dynamic_dim_assign(ddim.Get(), result.data(), ddim.size());
177+
result.resize(ddim.size());
178+
return result;
179+
}
174180
std::vector<int> vectorize2int(const DDim& ddim);
175181

176182
int64_t product(const DDim& ddim);

paddle/fluid/framework/details/CMakeLists.txt

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,11 +87,18 @@ cc_library(fast_threaded_ssa_graph_executor SRCS fast_threaded_ssa_graph_executo
8787
DEPS fetch_op_handle ssa_graph_executor scope simple_threadpool device_context)
8888
cc_test(fused_broadcast_op_test SRCS fused_broadcast_op_handle_test.cc DEPS fused_broadcast_op_handle)
8989

90+
if(WITH_NGRAPH)
91+
set(NGRAPH_BS_DEPS ngraph)
92+
else()
93+
set(NGRAPH_BS_DEPS)
94+
endif()
95+
9096
cc_library(build_strategy SRCS build_strategy.cc DEPS
9197
graph_viz_pass multi_devices_graph_pass
9298
multi_devices_graph_print_pass multi_devices_graph_check_pass
9399
fuse_elewise_add_act_pass multi_batch_merge_pass
94100
fuse_relu_depthwise_conv_pass
95101
lock_free_optimize_pass
96102
coalesce_grad_tensor_pass fuse_all_reduce_op_pass backward_optimizer_op_deps_pass
97-
fuse_adam_op_pass fuse_sgd_op_pass fuse_momentum_op_pass)
103+
fuse_adam_op_pass fuse_sgd_op_pass fuse_momentum_op_pass
104+
${NGRAPH_BS_DEPS})

paddle/fluid/framework/details/build_strategy.cc

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ limitations under the License. */
2727
#include "paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h"
2828

2929
DECLARE_bool(use_mkldnn);
30+
DECLARE_bool(use_ngraph);
3031

3132
namespace paddle {
3233
namespace framework {
@@ -53,6 +54,8 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
5354
"sequential_execution_pass");
5455
AppendPassWithCheck(strategy_.sync_batch_norm_, "sync_batch_norm_pass");
5556

57+
AppendPassToUseNgraph("ngraph_subgraph_pass");
58+
5659
AppendOpFusePasses();
5760
AppendPrintGraphPass("graph_viz_pass", "_fused_graph");
5861

@@ -220,6 +223,22 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
220223
#endif
221224
}
222225

226+
void AppendPassToUseNgraph(const std::string &pass_name) {
227+
#ifdef PADDLE_WITH_NGRAPH
228+
if (FLAGS_use_ngraph) {
229+
if (strategy_.reduce_ != BuildStrategy::ReduceStrategy::kAllReduce) {
230+
LOG(WARNING) << "Currently ngraph_subgraph_pass works under AllReduce,"
231+
"please set FLAGS_use_ngraph=false.";
232+
} else {
233+
AppendPass(pass_name);
234+
}
235+
}
236+
#else
237+
PADDLE_ENFORCE_NE(FLAGS_use_ngraph, true,
238+
"Please compile with NGRAPH first to use NGRAPH");
239+
#endif
240+
}
241+
223242
private:
224243
BuildStrategy strategy_;
225244
};
@@ -360,3 +379,6 @@ USE_PASS(runtime_context_cache_pass);
360379
#ifdef PADDLE_WITH_MKLDNN
361380
USE_PASS(mkldnn_placement_pass);
362381
#endif
382+
#ifdef PADDLE_WITH_NGRAPH
383+
USE_PASS(ngraph_subgraph_pass);
384+
#endif

0 commit comments

Comments
 (0)