Skip to content

Commit 13ca9c0

Browse files
committed
add more log
1 parent bc082c7 commit 13ca9c0

File tree

2 files changed

+23
-10
lines changed

2 files changed

+23
-10
lines changed

paddle/fluid/operators/distributed/brpc_client.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ void HandleSendResponse(brpc::Controller* cntl, sendrecv::VoidMessage* response,
4747
}
4848
var_h->Finish(true);
4949

50-
VLOG(4) << "Received SendResponse from: " << cntl->remote_side()
50+
VLOG(4) << "HandleSendResponse from: " << cntl->remote_side()
5151
<< ", varname: " << var_h->name()
5252
<< ", latency: " << cntl->latency_us() << "us";
5353
VLOG(4) << "Finish HandleSendResponse";
@@ -113,7 +113,7 @@ void HandleFetchBarrierResponse(brpc::Controller* cntl,
113113
}
114114

115115
var_h->Finish(true);
116-
VLOG(4) << "Received HandleFetchBarrierResponse from: " << cntl->remote_side()
116+
VLOG(4) << "HandleFetchBarrierResponse from: " << cntl->remote_side()
117117
<< ", varname: " << var_h->name()
118118
<< ", latency: " << cntl->latency_us() << "us";
119119
VLOG(4) << "Finish HandleFetchBarrierResponse";
@@ -130,14 +130,14 @@ void HandleGetResponse(brpc::Controller* cntl,
130130
ch_ptr->Push(ch_ctx);
131131

132132
if (cntl->Failed()) {
133-
LOG(FATAL) << "Fail to send SendVar: " << var_h->name()
133+
LOG(FATAL) << "Fail to GetVar: " << var_h->name()
134134
<< ", error text: " << cntl->ErrorText();
135135
cls->DecreaseReqCount();
136136
var_h->Finish(false);
137137
return;
138138
}
139139

140-
VLOG(4) << "Received SendResponse from: " << cntl->remote_side()
140+
VLOG(4) << "HandleGetResponse from: " << cntl->remote_side()
141141
<< ", varname: " << var_h->name()
142142
<< ", latency: " << cntl->latency_us() << "us";
143143

paddle/fluid/operators/distributed/brpc_server.cc

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,9 @@ class BRPCServiceImpl : public SendRecvService {
7272
brpc::Controller* cntl = static_cast<brpc::Controller*>(cntl_butil);
7373

7474
std::string varname = request->varname();
75-
VLOG(3) << "RequestSend var_name:" << varname;
75+
VLOG(3) << "RequestSend var_name:" << varname
76+
<< ", trainer_id:" << request->trainer_id()
77+
<< ", from:" << cntl->remote_side();
7678

7779
distributed::BRPCVariableResponse resp(request_send_h_->scope(),
7880
request_send_h_->dev_ctx(),
@@ -98,7 +100,9 @@ class BRPCServiceImpl : public SendRecvService {
98100
brpc::Controller* cntl = static_cast<brpc::Controller*>(cntl_butil);
99101

100102
std::string varname = request->varname();
101-
VLOG(3) << "RequestGet " << varname;
103+
VLOG(3) << "RequestGet varname:" << varname
104+
<< ", trainer_id:" << request->trainer_id()
105+
<< ", from:" << cntl->remote_side();
102106

103107
auto scope = request_get_h_->scope();
104108
auto invar = scope->FindVar(varname);
@@ -128,7 +132,9 @@ class BRPCServiceImpl : public SendRecvService {
128132
std::string in_var_name = request->varname();
129133
std::string out_var_name = request->out_varname();
130134
VLOG(3) << "RequestPrefetch, in_var_name: " << in_var_name
131-
<< ", out_var_name: " << out_var_name;
135+
<< ", out_var_name: " << out_var_name
136+
<< ", trainer_id:" << request->trainer_id()
137+
<< ", from:" << cntl->remote_side();
132138

133139
distributed::BRPCVariableResponse resp(
134140
request_prefetch_h_->scope(), request_prefetch_h_->dev_ctx(), true);
@@ -158,6 +164,7 @@ class BRPCServiceImpl : public SendRecvService {
158164
"kRequestCheckpointNotify handler should be registed first!");
159165

160166
brpc::ClosureGuard done_guard(done);
167+
brpc::Controller* cntl = static_cast<brpc::Controller*>(cntl_butil);
161168

162169
distributed::BRPCVariableResponse resp(request_checkpoint_h_->scope(),
163170
request_checkpoint_h_->dev_ctx());
@@ -169,7 +176,9 @@ class BRPCServiceImpl : public SendRecvService {
169176
int trainer_id = request->trainer_id();
170177

171178
VLOG(4) << "RequestCheckpointNotify notify: " << checkpoint_notify
172-
<< ", dir: " << checkpoint_dir;
179+
<< ", dir: " << checkpoint_dir
180+
<< ", trainer_id:" << request->trainer_id()
181+
<< ", from:" << cntl->remote_side();
173182

174183
request_checkpoint_h_->Handle(checkpoint_notify, scope, nullptr, nullptr,
175184
trainer_id, checkpoint_dir);
@@ -188,7 +197,9 @@ class BRPCServiceImpl : public SendRecvService {
188197

189198
// proc request.
190199
std::string varname = request->varname();
191-
VLOG(3) << "GetMonomerVariable " << varname;
200+
VLOG(3) << "GetMonomerVariable " << varname
201+
<< ", trainer_id:" << request->trainer_id()
202+
<< ", from:" << cntl->remote_side();
192203

193204
rpc_server_->WaitVarCond(varname);
194205
distributed::MonomerHandle h = rpc_server_->GetMonomer(varname);
@@ -215,7 +226,9 @@ class BRPCServiceImpl : public SendRecvService {
215226
brpc::ClosureGuard done_guard(done);
216227

217228
std::string varname = request->varname();
218-
VLOG(3) << "RequestGetMonomerBarrier var_name:" << varname;
229+
VLOG(3) << "RequestGetMonomerBarrier var_name:" << varname
230+
<< ", trainer_id:" << request->trainer_id()
231+
<< ", from:" << cntl->remote_side();
219232

220233
rpc_server_->WaitVarCond(varname);
221234
distributed::MonomerHandle h = rpc_server_->GetMonomer(varname);

0 commit comments

Comments
 (0)