@@ -142,10 +142,9 @@ public:
142
142
RepartDistMatrix &operator =(const RepartDistMatrix &other)
143
143
{
144
144
if (&other != this ) {
145
- std::cout << __FILE__ << __LINE__ << " WARN issued a copy of RepartDistMatrix\n " ;
146
- // FatalErrorInFunction << "Copying the RepartDistMatrix is disallowed "
147
- // "for performance reasons"
148
- // << abort(FatalError);
145
+ // FatalErrorInFunction << "Copying the RepartDistMatrix is disallowed "
146
+ // "for performance reasons"
147
+ // << abort(FatalError);
149
148
gko::experimental::EnableDistributedLinOp<
150
149
RepartDistMatrix>::operator =(std::move (other));
151
150
this ->dist_mtx_ = other.dist_mtx_ ;
@@ -197,13 +196,15 @@ public:
197
196
198
197
// create original communicator pattern
199
198
auto ranks_per_gpu = repartitioner.get_ranks_per_gpu ();
199
+ label rank{comm.rank ()};
200
+
200
201
auto src_comm_pattern =
201
202
host_A.get ()->create_communication_pattern (exec_handler);
202
203
// create partiton here and pass to constructor
204
+ //
203
205
auto dst_comm_pattern = repartitioner.repartition_comm_pattern (
204
206
exec_handler, src_comm_pattern, orig_partition);
205
207
206
- label rank{src_comm_pattern->get_comm ().rank ()};
207
208
label owner_rank = repartitioner.get_owner_rank (exec_handler);
208
209
bool owner = repartitioner.is_owner (exec_handler);
209
210
@@ -214,37 +215,17 @@ public:
214
215
local_sparsity_ = repart_loc_sparsity;
215
216
non_local_sparsity_ = repart_non_loc_sparsity;
216
217
217
- // std::cout << __FILE__ << " rank " << rank
218
- // << " build_localized_partition "
219
- // << " dim " << local_sparsity_->dim[0] << " send idxs size "
220
- // << dst_comm_pattern.send_idxs.size() << " target ids "
221
- // << dst_comm_pattern.target_ids << " target sizes "
222
- // << dst_comm_pattern.target_sizes << "\n";
223
-
224
218
auto localized_partition = local_part_type::build_from_blocked_recv (
225
219
exec, local_sparsity_->dim [0 ], dst_comm_pattern->send_idxs ,
226
220
dst_comm_pattern->target_ids , dst_comm_pattern->target_sizes );
227
221
228
- // std::cout << __FILE__ << " rank " << rank << " local sparsity size "
229
- // << local_sparsity_->size_ << " local sparsity dim ["
230
- // << local_sparsity_->dim[0] << "x" << local_sparsity_->dim[1]
231
- // << "] non_local sparsity size " << non_local_sparsity_->size_
232
- // << " non local sparsity dim [" << non_local_sparsity_->dim[0]
233
- // << "x" << non_local_sparsity_->dim[1] << "] target_ids "
234
- // << dst_comm_pattern->target_ids << " target_sizes "
235
- // << dst_comm_pattern->target_sizes << " target_send_idxs.size "
236
- // << dst_comm_pattern->send_idxs.size()
237
- // << " non_local_sparsity.size " << non_local_sparsity_->size_
238
- // << " get_recv_indices "
239
- // << localized_partition->get_recv_indices().get_num_elems()
240
- // << " \n";
241
222
242
223
auto sparse_comm =
243
224
sparse_communicator::create (comm, localized_partition);
244
225
245
226
auto device_exec = exec_handler.get_device_exec ();
246
227
auto dist_A = gko::share (generate_dist_mtx_with_inner_type<dist_mtx>(
247
- matrix_format, exec , sparse_comm, local_sparsity_,
228
+ matrix_format, device_exec , sparse_comm, local_sparsity_,
248
229
non_local_sparsity_));
249
230
250
231
auto local_coeffs = gko::array<scalar>(exec, local_sparsity_->size_ );
@@ -257,6 +238,23 @@ public:
257
238
non_local_coeffs.fill (0.0 );
258
239
}
259
240
241
+
242
+ // std::cout << __FILE__ << " rank " << rank << "\n\tlocal sparsity size "
243
+ // << local_sparsity_->size_ << " dim ["
244
+ // << local_sparsity_->dim[0] << "x" << local_sparsity_->dim[1]
245
+ // << "]\n\tnon_local sparsity size " << non_local_sparsity_->size_
246
+ // << " dim [" << non_local_sparsity_->dim[0]
247
+ // << "x" << non_local_sparsity_->dim[1] << "]\n\tcomm pattern:\n"
248
+ // << "\ttarget_ids: "
249
+ // << dst_comm_pattern->target_ids << "\n\ttarget_sizes: "
250
+ // << dst_comm_pattern->target_sizes << "\n\ttarget_send_idxs.size "
251
+ // << dst_comm_pattern->send_idxs.size()
252
+ // << "\n\tget_recv_indices "
253
+ // << localized_partition->get_recv_indices().get_num_elems()
254
+ // << "\n\tget_send_indices "
255
+ // << localized_partition->get_send_indices().get_num_elems()
256
+ // << "\n";
257
+
260
258
// FIXME make sure that we work on the device executor
261
259
dist_A->read_distributed (
262
260
device_matrix_data (exec, local_sparsity_->dim ,
0 commit comments