Skip to content

Commit 1907345

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into zyf_slice
2 parents deff69c + 9d985ca commit 1907345

File tree

63 files changed

+2144
-554
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+2144
-554
lines changed

cmake/generic.cmake

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -932,12 +932,8 @@ function(generate_dummy_static_lib)
932932
if(NOT dummy_GENERATOR)
933933
message(FATAL_ERROR "You must provide a generator file name.")
934934
endif()
935-
# if ${dummy_GENERATOR} contains "/", it may be a file path
936-
if(NOT ${dummy_GENERATOR} MATCHES ".*/.*")
937-
set(dummy_GENERATOR "${CMAKE_CURRENT_LIST_DIR}/${dummy_GENERATOR}")
938-
endif()
939935
if(NOT dummy_CONTENT)
940-
set(dummy_CONTENT "${dummy_FILE_PATH} for lib ${dummy_LIB_NAME}")
936+
set(dummy_CONTENT "${dummy_LIB_NAME}_dummy.c for lib ${dummy_LIB_NAME}")
941937
endif()
942938

943939
configure_file(${PROJECT_SOURCE_DIR}/cmake/dummy.c.in ${dummy_FILE_PATH} @ONLY)

cmake/unity_build.cmake

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,11 +77,14 @@ function(compose_unity_target_sources TARGET TYPE)
7777
get_property(unity_group_index_max GLOBAL PROPERTY ${TARGET}_${TYPE}_group_index)
7878
foreach(src ${ARGN})
7979
set(unity_file "")
80-
# UB use absolute path of source.
80+
# Note(zhouwei25): UB use the path releative to CMAKE_SOURCE_DIR.
81+
# If use absolute path, sccache/ccache hit rate will be reduced.
8182
if(IS_ABSOLUTE ${src})
8283
set(src_absolute_path ${src})
84+
file(RELATIVE_PATH src_relative_path ${CMAKE_SOURCE_DIR} ${src})
8385
else()
8486
set(src_absolute_path ${CMAKE_CURRENT_SOURCE_DIR}/${src})
87+
file(RELATIVE_PATH src_relative_path ${CMAKE_SOURCE_DIR} ${src_absolute_path})
8588
endif()
8689
# If `unity_group_index_max` is empty, there is no combination
8790
# relationship.
@@ -106,7 +109,7 @@ function(compose_unity_target_sources TARGET TYPE)
106109
set_property(GLOBAL APPEND PROPERTY ${unity_file_sources} ${UNITY_CU_BEFORE_CODE})
107110
endif()
108111
endif()
109-
set_property(GLOBAL APPEND PROPERTY ${unity_file_sources} "#include \"${src_absolute_path}\"")
112+
set_property(GLOBAL APPEND PROPERTY ${unity_file_sources} "#include \"${src_relative_path}\"")
110113
set(unity_target_sources ${unity_target_sources} ${unity_file})
111114
break()
112115
endif()

paddle/fluid/distributed/service/communicator.h

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -68,31 +68,62 @@ class BlockingQueue {
6868
}
6969

7070
bool Push(const T &elem) {
71-
{
72-
std::unique_lock<std::mutex> lock(mutex_);
73-
cv_.wait(lock, [&] { return queue_.size() < capacity_; });
74-
queue_.push_back(elem);
71+
std::unique_lock<std::mutex> lock(mutex_);
72+
WaitForWrite(lock);
73+
74+
queue_.push_back(elem);
75+
76+
Notify();
77+
return true;
78+
}
79+
bool WaitForWrite(std::unique_lock<std::mutex> &lock) { // NOLINT
80+
while (FullUnlocked()) {
81+
if (empty_waiters_ != 0) {
82+
empty_cond_.notify_one();
83+
}
84+
full_waiters_++;
85+
full_cond_.wait(lock);
86+
full_waiters_--;
7587
}
76-
cv_.notify_one();
7788
return true;
7889
}
79-
80-
bool Push(T &&elem) {
81-
{
82-
std::unique_lock<std::mutex> lock(mutex_);
83-
cv_.wait(lock, [&] { return queue_.size() < capacity_; });
84-
queue_.emplace_back(std::move(elem));
90+
bool WaitForRead(std::unique_lock<std::mutex> &lock) { // NOLINT
91+
while (EmptyUnlocked()) {
92+
if (full_waiters_ != 0) {
93+
full_cond_.notify_one();
94+
}
95+
empty_waiters_++;
96+
empty_cond_.wait(lock);
97+
empty_waiters_--;
8598
}
86-
cv_.notify_one();
8799
return true;
88100
}
101+
bool EmptyUnlocked() { return queue_.empty(); }
102+
103+
bool FullUnlocked() { return queue_.size() >= capacity_; }
104+
void Notify() {
105+
if (empty_waiters_ != 0 && (!EmptyUnlocked())) {
106+
empty_cond_.notify_one();
107+
}
108+
if (full_waiters_ != 0 && (!FullUnlocked())) {
109+
full_cond_.notify_one();
110+
}
111+
}
112+
113+
bool Push(T &&elem) {
114+
std::unique_lock<std::mutex> lock(mutex_);
115+
WaitForWrite(lock);
116+
queue_.emplace_back(std::move(elem));
89117

118+
Notify();
119+
return true;
120+
}
90121
T Pop() {
91122
std::unique_lock<std::mutex> lock(mutex_);
92-
cv_.wait(lock, [=] { return !queue_.empty(); });
123+
WaitForRead(lock);
93124
T rc(std::move(queue_.front()));
94125
queue_.pop_front();
95-
cv_.notify_one();
126+
Notify();
96127
return rc;
97128
}
98129

@@ -107,11 +138,14 @@ class BlockingQueue {
107138
}
108139

109140
private:
141+
int empty_waiters_ = 0;
142+
int full_waiters_ = 0;
143+
std::condition_variable empty_cond_;
144+
std::condition_variable full_cond_;
110145
const size_t capacity_;
111146
std::deque<T> queue_;
112147

113148
mutable std::mutex mutex_;
114-
std::condition_variable cv_;
115149
};
116150

117151
template <typename T, int MajorType = Eigen::RowMajor,

paddle/fluid/framework/distributed_strategy.proto

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ message DistributedStrategy {
188188
optional bool find_unused_parameters = 28 [ default = false ];
189189
optional bool tensor_parallel = 29 [ default = false ];
190190
optional bool without_graph_optimization = 30 [ default = false ];
191-
optional int32 fuse_grad_size_in_num = 31 [ default = 1 ];
191+
optional int32 fuse_grad_size_in_num = 31 [ default = 8 ];
192192
optional bool calc_comm_same_stream = 32 [ default = false ];
193193
optional bool asp = 33 [ default = false ];
194194

paddle/fluid/framework/ir/graph.cc

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ limitations under the License. */
1717
#include "paddle/fluid/framework/ir/graph.h"
1818
#include "paddle/fluid/framework/operator.h"
1919

20-
DEFINE_bool(convert_all_blocks, false,
20+
DEFINE_bool(convert_all_blocks, true,
2121
"Convert all blocks in program into SSAgraphs");
2222

2323
namespace paddle {
@@ -56,10 +56,12 @@ Graph::Graph(const ProgramDesc &program, const int64_t start_op_index,
5656
// sub_graph.
5757
std::unique_ptr<Graph> first_sub_graph = std::make_unique<Graph>(
5858
program_.Block(0), this, start_op_index, end_op_index);
59+
first_sub_graph->block_id_ = 0;
5960
sub_graphs_.push_back(std::move(first_sub_graph));
6061
for (size_t idx = 1; idx < program_.Size(); ++idx) {
6162
std::unique_ptr<Graph> sub_graph =
6263
std::make_unique<Graph>(program_.Block(idx), this);
64+
sub_graph->block_id_ = idx;
6365
sub_graphs_.push_back(std::move(sub_graph));
6466
}
6567
} else {
@@ -90,14 +92,32 @@ std::map<std::string, std::vector<ir::Node *>> Graph::InitFromProgram(
9092
std::map<std::string, std::vector<ir::Node *>> Graph::InitFromBlock(
9193
const BlockDesc &block, const int64_t start_op_index,
9294
const int64_t end_op_index) {
93-
std::unordered_map<std::string, VarDesc *> all_vars;
95+
std::unordered_map<std::string, std::pair<VarDesc *, int>>
96+
name_to_desc_block_id;
97+
98+
const BlockDesc *block_var_visible = &block;
99+
while (block_var_visible != nullptr) {
100+
for (auto *var : block_var_visible->AllVars()) {
101+
name_to_desc_block_id.emplace(
102+
var->Name(), std::make_pair(var, block_var_visible->ID()));
103+
}
104+
const BlockDesc *forward_block = block_var_visible->ForwardBlock();
105+
if (forward_block != nullptr) {
106+
for (auto *var : forward_block->AllVars()) {
107+
name_to_desc_block_id.emplace(var->Name(),
108+
std::make_pair(var, forward_block->ID()));
109+
}
110+
}
111+
block_var_visible = block_var_visible->ParentBlock();
112+
}
94113
// var nodes for each var name, will have multiple versions in SSA
95114
std::map<std::string, std::vector<ir::Node *>> var_nodes;
115+
std::unordered_map<std::string, VarDesc *> not_visited_vars;
96116
for (auto *var : block.AllVars()) {
97-
all_vars.emplace(var->Name(), var);
117+
not_visited_vars.emplace(var->Name(), var);
98118
}
99119

100-
auto not_visited_vars = all_vars;
120+
int desc_order = 0;
101121
auto all_ops = block.AllOps();
102122
PADDLE_ENFORCE_LE(
103123
end_op_index, all_ops.size(),
@@ -109,15 +129,18 @@ std::map<std::string, std::vector<ir::Node *>> Graph::InitFromBlock(
109129
auto *op = all_ops[i];
110130
VLOG(3) << "create OpNode by " << op->Type();
111131
ir::Node *node = CreateOpNode(op);
132+
node->SetDescOrder(desc_order);
133+
++desc_order;
112134
// For input args, reuse the same var name if it was created before.
113135
// Otherwise, create a new one.
114136
for (auto &each_var_name : op->InputArgumentNames()) {
115137
not_visited_vars.erase(each_var_name);
116138
ir::Node *var = nullptr;
117139
if (var_nodes.find(each_var_name) != var_nodes.end()) {
118140
var = var_nodes.at(each_var_name).back();
119-
} else if (all_vars.count(each_var_name) != 0) {
120-
var = CreateVarNode(all_vars.at(each_var_name));
141+
} else if (name_to_desc_block_id.count(each_var_name) != 0) {
142+
auto desc_and_block_id = name_to_desc_block_id.at(each_var_name);
143+
var = CreateVarNode(desc_and_block_id.first, desc_and_block_id.second);
121144
var_nodes[each_var_name].push_back(var);
122145
} else {
123146
// Operation input var can be optional (dispensable). Which means
@@ -143,8 +166,9 @@ std::map<std::string, std::vector<ir::Node *>> Graph::InitFromBlock(
143166
}
144167

145168
ir::Node *var = nullptr;
146-
if (all_vars.count(each_var_name) != 0) {
147-
var = CreateVarNode(all_vars.at(each_var_name));
169+
if (name_to_desc_block_id.count(each_var_name) != 0) {
170+
auto desc_and_block_id = name_to_desc_block_id.at(each_var_name);
171+
var = CreateVarNode(desc_and_block_id.first, desc_and_block_id.second);
148172
} else {
149173
// Operation output vars can be @EMPTY@. For example, while_grad
150174
// can have multi @EMPTY@ outputs with no VarDesc.
@@ -270,6 +294,7 @@ std::shared_ptr<Graph> Graph::Clone() {
270294
auto cloned_graph = std::make_shared<Graph>(this->program_);
271295
cloned_graph->ReleaseNodes();
272296
cloned_graph->num_node_created_ = 0;
297+
cloned_graph->block_id_ = this->block_id_;
273298
std::unordered_map<ir::Node *, ir::Node *> origin_to_cloned;
274299
for (auto *n : this->node_set_) {
275300
PADDLE_ENFORCE_NOT_NULL(n, platform::errors::InvalidArgument(
@@ -313,6 +338,7 @@ std::unique_ptr<Graph> Graph::CloneSubGraph(const size_t idx) {
313338
std::make_unique<Graph>(this->program_.Block(idx), this);
314339
cloned_sub_graph->ReleaseNodes();
315340
cloned_sub_graph->num_node_created_ = 0;
341+
cloned_sub_graph->block_id_ = idx;
316342
std::unordered_map<ir::Node *, ir::Node *> origin_to_cloned;
317343
for (auto *n : this->sub_graphs_.at(idx)->Nodes()) {
318344
PADDLE_ENFORCE_NOT_NULL(n, platform::errors::InvalidArgument(

paddle/fluid/framework/ir/graph.h

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,14 @@ class Graph {
104104
attr_dels_.clear();
105105
}
106106

107-
bool IsConstructedByPartialProgram() const { return is_partial_; }
107+
bool IsConstructedByPartialProgram() const {
108+
if (FLAGS_convert_all_blocks) {
109+
if (IsMainGraph()) {
110+
return GetSubGraph(0)->IsConstructedByPartialProgram();
111+
}
112+
}
113+
return is_partial_;
114+
}
108115

109116
bool Has(const std::string &attr_name) const {
110117
if (FLAGS_convert_all_blocks) {
@@ -210,7 +217,7 @@ class Graph {
210217
}
211218

212219
// Create a normal variable with non-null VarDesc.
213-
ir::Node *CreateVarNode(VarDesc *var_desc) {
220+
ir::Node *CreateVarNode(VarDesc *var_desc, int block_id = -1) {
214221
if (FLAGS_convert_all_blocks) {
215222
if (IsMainGraph()) {
216223
return GetSubGraph(0)->CreateVarNode(var_desc);
@@ -219,7 +226,8 @@ class Graph {
219226
PADDLE_ENFORCE_NOT_NULL(
220227
var_desc, platform::errors::InvalidArgument(
221228
"The VarDesc used to create variable node is null."));
222-
auto *x = AddNode(new ir::Node(var_desc));
229+
auto *x =
230+
AddNode(new ir::Node(var_desc, block_id == -1 ? block_id_ : block_id));
223231
x->SetId(num_node_created_++);
224232
return x;
225233
}
@@ -252,7 +260,7 @@ class Graph {
252260
const std::string name = string::Sprintf(
253261
"%s@%llu", static_cast<const char *>(ir::Node::kControlDepVarName),
254262
num_node_created_);
255-
auto *x = AddNode(new ir::Node(name, ir::Node::Type::kVariable));
263+
auto *x = AddNode(new ir::Node(name, ir::Node::Type::kVariable, block_id_));
256264
x->SetId(num_node_created_++);
257265
return x;
258266
}
@@ -265,7 +273,7 @@ class Graph {
265273
return GetSubGraph(0)->CreateEmptyNode(name, type);
266274
}
267275
}
268-
auto *x = AddNode(new ir::Node(name, type));
276+
auto *x = AddNode(new ir::Node(name, type, block_id_));
269277
x->SetId(num_node_created_++);
270278
return x;
271279
}
@@ -365,6 +373,15 @@ class Graph {
365373
return sub_graphs_.at(idx).get();
366374
}
367375

376+
int GetBlockId() const {
377+
if (FLAGS_convert_all_blocks) {
378+
if (IsMainGraph()) {
379+
return GetSubGraph(0)->block_id_;
380+
}
381+
}
382+
return block_id_;
383+
}
384+
368385
size_t SubGraphsSize() const {
369386
PADDLE_ENFORCE_EQ(
370387
this->IsMainGraph(), true,
@@ -394,6 +411,9 @@ class Graph {
394411
PADDLE_ENFORCE_EQ(
395412
this->IsMainGraph(), true,
396413
platform::errors::InvalidArgument("This graph is not main_graph"));
414+
PADDLE_ENFORCE_EQ(sub_graphs_.size(), sub_graph->block_id_,
415+
platform::errors::InvalidArgument(
416+
"sub_graph idx is not equal to block_id_"));
397417
sub_graphs_.push_back(std::move(sub_graph));
398418
}
399419

@@ -416,6 +436,8 @@ class Graph {
416436
// parts: forward graph and backward graph, which can be executed
417437
// independently.
418438
bool is_partial_{false};
439+
// The block this SubGraph belongs to.
440+
int block_id_{0};
419441
};
420442

421443
bool IsControlDepVar(const ir::Node &var);

0 commit comments

Comments
 (0)