@@ -155,13 +155,9 @@ void ParallelExecutor::BCastParamsToGPUs(
155155#endif
156156}
157157
158- void ParallelExecutor::Run (
159- const std::vector<std::string> &fetch_tensors,
160- const std::string &fetched_var_name,
161- const std::unordered_map<std::string, LoDTensor> &feed_tensors) {
158+ void ParallelExecutor::Run (const std::vector<std::string> &fetch_tensors,
159+ const std::string &fetched_var_name) {
162160 platform::RecordBlock b (0 );
163- SplitTensorToPlaces (feed_tensors);
164-
165161 // Create local scopes.
166162 for (auto &scope : member_->local_scopes_ ) {
167163 Scope &local_scope = scope->NewScope ();
@@ -195,14 +191,28 @@ void ParallelExecutor::Run(
195191 auto &local_scope =
196192 *scope->Var (details::kLocalExecScopeName )->GetMutable <Scope *>();
197193 scope->DeleteScope (local_scope);
198- local_scope = nullptr ;
199194 }
200195}
201196
202- void ParallelExecutor::SplitTensorToPlaces (
203- const std::unordered_map<std::string, LoDTensor> &feed_tensors) {
204- for (auto it : feed_tensors) {
205- auto lod_tensors = it.second .SplitLoDTensor (member_->places_ );
197+ void ParallelExecutor::FeedTensorsIntoLocalScopes (
198+ const std::vector<std::unordered_map<std::string, LoDTensor>> &tensors) {
199+ PADDLE_ENFORCE_EQ (member_->local_scopes_ .size (), tensors.size ());
200+
201+ for (size_t i = 0 ; i < tensors.size (); ++i) {
202+ auto &map = tensors[i];
203+ auto *scope = member_->local_scopes_ [i];
204+ for (auto &pair : map) {
205+ auto *trg = scope->Var (pair.first )->GetMutable <LoDTensor>();
206+ trg->ShareDataWith (pair.second );
207+ trg->set_lod (pair.second .lod ());
208+ }
209+ }
210+ }
211+
212+ void ParallelExecutor::FeedAndSplitTensorIntoLocalScopes (
213+ const std::unordered_map<std::string, LoDTensor> &tensors) {
214+ for (auto pair : tensors) {
215+ auto lod_tensors = pair.second .SplitLoDTensor (member_->places_ );
206216 PADDLE_ENFORCE_EQ (
207217 member_->places_ .size (), lod_tensors.size (),
208218 " The number of samples of current batch is less than the count of "
@@ -211,7 +221,7 @@ void ParallelExecutor::SplitTensorToPlaces(
211221 for (size_t j = 0 ; j < member_->places_ .size (); ++j) {
212222 // TODO(panxy0718): Do I need to delete this var?
213223 auto t =
214- member_->local_scopes_ [j]->Var (it .first )->GetMutable <LoDTensor>();
224+ member_->local_scopes_ [j]->Var (pair .first )->GetMutable <LoDTensor>();
215225 t->ShareDataWith (lod_tensors[j]);
216226 t->set_lod (lod_tensors[j].lod ());
217227 }
0 commit comments