Skip to content

Fuse AllReduce Operator #11141

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 54 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
80ad101
Async delete scope
reyoung Jun 1, 2018
39f225a
Start graph builder factory
reyoung Jun 4, 2018
d696964
Refine code
reyoung Jun 4, 2018
a99e6ca
Add fused strategy
reyoung Jun 4, 2018
f4674a5
Init FuseAllReduceGraphBuilder
reyoung Jun 4, 2018
150f45a
Fuse AllReduce Operations
reyoung Jun 4, 2018
594f529
Use unordered_set to save Graph::ops
reyoung Jun 4, 2018
861eafc
Merge branch 'feature/use_unordered_set_for_ops' into perf_tuning
reyoung Jun 4, 2018
526d39e
Complete GetNotDependedAllReduceOp
reyoung Jun 4, 2018
f58f0be
Add debug str
reyoung Jun 5, 2018
3ce37ca
Add FuseVarsOpHandle
chengduoZH Jun 4, 2018
ea79771
Fix bug
reyoung Jun 5, 2018
db0e616
Merge branch 'perf_tuning' of https://github.com/reyoung/Paddle into …
reyoung Jun 5, 2018
4f5a11b
Add FuseVarsOpHandle
chengduoZH Jun 4, 2018
5d802c9
Merge branch 'perf_tuning' of https://github.com/reyoung/Paddle into …
reyoung Jun 5, 2018
c514ec4
small fix
chengduoZH Jun 5, 2018
8527148
Add more log
reyoung Jun 5, 2018
a8bef59
Merge branch 'perf_tuning' of https://github.com/reyoung/Paddle into …
reyoung Jun 5, 2018
0174e91
Consider type of graph
reyoung Jun 5, 2018
732c0de
refine FuseAllReduceOp
chengduoZH Jun 5, 2018
bb1d4ab
small fix
chengduoZH Jun 5, 2018
3847ba4
Add ExtractVariable, InsertVaraible
reyoung Jun 5, 2018
77021b0
Add stub
reyoung Jun 5, 2018
ac2e758
Merge branch 'perf_tuning' of https://github.com/reyoung/Paddle into …
reyoung Jun 5, 2018
9b6988d
Stash
reyoung Jun 5, 2018
3821600
Add FuseVariable
chengduoZH Jun 5, 2018
456dc66
GetFusedGradient complete
reyoung Jun 5, 2018
2e40cf1
Merge branch 'perf_tuning' of https://github.com/reyoung/Paddle into …
reyoung Jun 5, 2018
006f558
Add TODO
reyoung Jun 5, 2018
9552584
Add dependence
chengduoZH Jun 5, 2018
ee8e259
Main logic done
reyoung Jun 6, 2018
f02af59
Fix typo
reyoung Jun 6, 2018
bb348a7
Add mutable API
reyoung Jun 6, 2018
2a4b4ea
Complete logic
reyoung Jun 6, 2018
2de22ab
Fix bug
reyoung Jun 6, 2018
9c15abd
Add Unittest
reyoung Jun 6, 2018
b190c8f
Update source
reyoung Jun 6, 2018
cc3a0f0
Add FusedVarOpHandle
reyoung Jun 6, 2018
91ac02d
Complete unittest
reyoung Jun 6, 2018
975c07c
Revert operator.cc
reyoung Jun 6, 2018
12d714b
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
reyoung Jun 6, 2018
c6bde65
Default is AllReduce
reyoung Jun 6, 2018
63c2fc0
Add in deps
reyoung Jun 6, 2018
4a9474d
small fix
chengduoZH Jun 6, 2018
5eb681f
small fix
chengduoZH Jun 6, 2018
148931f
Fix memleak
reyoung Jun 6, 2018
14f8377
Merge branch 'perf_tuning' of https://github.com/reyoung/Paddle into …
reyoung Jun 6, 2018
ef99aba
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
reyoung Jun 7, 2018
bce6986
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
reyoung Jun 7, 2018
1fa1352
Remove hack code
reyoung Jun 7, 2018
9705ab4
remove duplicate NoDummyInputSize
chengduoZH Jun 11, 2018
6959a8b
refine test_parallel_executor_fused_allreduce.py
chengduoZH Jun 11, 2018
3ae0707
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
chengduoZH Jun 11, 2018
35a057e
test fuse_all_reduce on mnist
chengduoZH Jun 11, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion paddle/fluid/framework/details/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,15 @@ cc_library(fuse_vars_op_handle SRCS fuse_vars_op_handle.cc DEPS op_handle_base s
cc_library(multi_devices_graph_builder SRCS multi_devices_graph_builder.cc DEPS ssa_graph_builder computation_op_handle
scale_loss_grad_op_handle rpc_op_handle ${multi_devices_graph_builder_deps} reduce_op_handle broadcast_op_handle)

if(WITH_GPU)
nv_library(fuse_all_reduce_graph_builder SRCS fuse_all_reduce_graph_builder.cc
DEPS ssa_graph_builder nccl_all_reduce_op_handle fuse_vars_op_handle)
set(graph_builder_factory_deps fuse_all_reduce_graph_builder)
else()
set(graph_builder_factory_deps)
endif()

cc_library(ssa_graph_builder_factory SRCS ssa_graph_builder_factory.cc DEPS multi_devices_graph_builder ssa_graph_printer ssa_graph_checker)
cc_library(ssa_graph_builder_factory SRCS ssa_graph_builder_factory.cc DEPS multi_devices_graph_builder ssa_graph_printer ssa_graph_checker ${graph_builder_factory_deps})

cc_library(ssa_graph_executor SRCS ssa_graph_executor.cc DEPS ssa_graph framework_proto)
cc_library(threaded_ssa_graph_executor SRCS threaded_ssa_graph_executor.cc DEPS fetch_op_handle ssa_graph_executor scope
Expand Down
18 changes: 17 additions & 1 deletion paddle/fluid/framework/details/build_strategy.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.

#pragma once
#include <stdint.h>

#include <string>

Expand All @@ -21,7 +22,22 @@ namespace framework {
namespace details {

struct BuildStrategy {
enum class ReduceStrategy { kAllReduce = 0, kReduce = 1 };
enum class ReduceStrategy {
kAllReduce = 0x0000,
kReduce = 0x0001,

kOperationMask = 0x00FF,
kFusedBit = 0x0100,

kFusedAllReduce = kFusedBit | kAllReduce,
kFusedReduce = kFusedBit | kReduce,
};

ReduceStrategy ReduceOperation() const {
return static_cast<ReduceStrategy>(
static_cast<uint16_t>(reduce_) &
static_cast<uint16_t>(ReduceStrategy::kOperationMask));
}

enum class GradientScaleStrategy {
kCoeffNumDevice = 0,
Expand Down
Loading