Skip to content

Commit 7a15cfa

Browse files
committed
Fix typo and protect the autotuning code.
Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
1 parent 0b2f141 commit 7a15cfa

File tree

8 files changed

+133
-143
lines changed

8 files changed

+133
-143
lines changed

ompi/mca/coll/han/coll_han.h

Lines changed: 56 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@
2424

2525
BEGIN_C_DECLS
2626

27+
/**
28+
* Auto-tuning is disabled for now.
29+
*/
30+
#define OMPI_MCA_COLL_HAN_AUTO_TUNE 0
31+
2732
/*
2833
* Today;
2934
* . only 2 modules available for intranode (low) level
@@ -203,21 +208,24 @@ typedef struct mca_coll_han_component_t {
203208
uint32_t han_scatter_up_module;
204209
/* low level module for scatter */
205210
uint32_t han_scatter_low_module;
206-
/* whether enable auto tune */
207-
uint32_t han_auto_tune;
208211
/* whether we need reproducible results
209212
* (but disables topological optimisations)
210213
*/
211214
uint32_t han_reproducible;
215+
#if OMPI_MCA_COLL_HAN_AUTO_TUNE
216+
/* whether enable auto tune */
217+
uint32_t han_auto_tune;
212218
/* create a 3D array
213219
* num_processes (n): 2 4 8 16 32 64 (6)
214220
* num_core (c): 2 4 8 12 (4)
215221
* message size (m): 1 - 4194304 (23)
216222
*/
217-
uint32_t han_auto_tune_n;
218-
uint32_t han_auto_tune_c;
219-
uint32_t han_auto_tune_m;
220-
selection *han_auto_tuned;
223+
uint32_t han_auto_tune_n;
224+
uint32_t han_auto_tune_c;
225+
uint32_t han_auto_tune_m;
226+
char* han_auto_tune_filename;
227+
selection* han_auto_tuned;
228+
#endif /* OMPI_MCA_COLL_HAN_AUTO_TUNE */
221229
bool use_simple_algorithm[COLLCOUNT];
222230

223231
/* Dynamic configuration rules */
@@ -382,11 +390,11 @@ mca_coll_han_scatter_intra_dynamic(SCATTER_BASE_ARGS,
382390

383391
/* Bcast */
384392
int mca_coll_han_bcast_intra_simple(void *buff,
385-
int count,
386-
struct ompi_datatype_t *dtype,
387-
int root,
388-
struct ompi_communicator_t *comm,
389-
mca_coll_base_module_t *module);
393+
int count,
394+
struct ompi_datatype_t *dtype,
395+
int root,
396+
struct ompi_communicator_t *comm,
397+
mca_coll_base_module_t *module);
390398
void mac_coll_han_set_bcast_argu(mca_bcast_argu_t * argu, mca_coll_task_t * cur_task, void *buff,
391399
int seg_count, struct ompi_datatype_t *dtype,
392400
int root_up_rank, int root_low_rank,
@@ -449,23 +457,23 @@ int mca_coll_han_reduce_t1_task(void *task_argu);
449457
/* Allreduce */
450458
int
451459
mca_coll_han_allreduce_intra_simple(const void *sbuf,
452-
void *rbuf,
453-
int count,
454-
struct ompi_datatype_t *dtype,
455-
struct ompi_op_t *op,
456-
struct ompi_communicator_t *comm,
457-
mca_coll_base_module_t *module);
460+
void *rbuf,
461+
int count,
462+
struct ompi_datatype_t *dtype,
463+
struct ompi_op_t *op,
464+
struct ompi_communicator_t *comm,
465+
mca_coll_base_module_t *module);
458466
int
459467
mca_coll_han_allreduce_reproducible_decision(struct ompi_communicator_t *comm,
460468
mca_coll_base_module_t *module);
461469
int
462470
mca_coll_han_allreduce_reproducible(const void *sbuf,
463471
void *rbuf,
464-
int count,
465-
struct ompi_datatype_t *dtype,
466-
struct ompi_op_t *op,
467-
struct ompi_communicator_t *comm,
468-
mca_coll_base_module_t *module);
472+
int count,
473+
struct ompi_datatype_t *dtype,
474+
struct ompi_op_t *op,
475+
struct ompi_communicator_t *comm,
476+
mca_coll_base_module_t *module);
469477

470478
void mac_coll_han_set_allreduce_argu(mca_allreduce_argu_t * argu,
471479
mca_coll_task_t * cur_task,
@@ -497,11 +505,11 @@ int mca_coll_han_allreduce_t3_task(void *task_argu);
497505
/* Scatter */
498506
int
499507
mca_coll_han_scatter_intra(const void *sbuf, int scount,
500-
struct ompi_datatype_t *sdtype,
501-
void *rbuf, int rcount,
502-
struct ompi_datatype_t *rdtype,
503-
int root,
504-
struct ompi_communicator_t *comm, mca_coll_base_module_t * module);
508+
struct ompi_datatype_t *sdtype,
509+
void *rbuf, int rcount,
510+
struct ompi_datatype_t *rdtype,
511+
int root,
512+
struct ompi_communicator_t *comm, mca_coll_base_module_t * module);
505513
int mca_coll_han_scatter_us_task(void *task_argu);
506514
int mca_coll_han_scatter_ls_task(void *task_argu);
507515
void mac_coll_han_set_scatter_argu(mca_scatter_argu_t * argu,
@@ -524,11 +532,11 @@ void mac_coll_han_set_scatter_argu(mca_scatter_argu_t * argu,
524532
/* Gather */
525533
int
526534
mca_coll_han_gather_intra(const void *sbuf, int scount,
527-
struct ompi_datatype_t *sdtype,
528-
void *rbuf, int rcount,
529-
struct ompi_datatype_t *rdtype,
530-
int root,
531-
struct ompi_communicator_t *comm, mca_coll_base_module_t * module);
535+
struct ompi_datatype_t *sdtype,
536+
void *rbuf, int rcount,
537+
struct ompi_datatype_t *rdtype,
538+
int root,
539+
struct ompi_communicator_t *comm, mca_coll_base_module_t * module);
532540
int mca_coll_han_gather_lg_task(void *task_argu);
533541
int mca_coll_han_gather_ug_task(void *task_argu);
534542
void mac_coll_han_set_gather_argu(mca_gather_argu_t * argu,
@@ -548,19 +556,19 @@ void mac_coll_han_set_gather_argu(mca_gather_argu_t * argu,
548556
int w_rank, bool noop, ompi_request_t * req);
549557
int
550558
mca_coll_han_gather_intra_simple(const void *sbuf, int scount,
551-
struct ompi_datatype_t *sdtype,
552-
void *rbuf, int rcount,
553-
struct ompi_datatype_t *rdtype,
554-
int root,
555-
struct ompi_communicator_t *comm,
556-
mca_coll_base_module_t *module);
559+
struct ompi_datatype_t *sdtype,
560+
void *rbuf, int rcount,
561+
struct ompi_datatype_t *rdtype,
562+
int root,
563+
struct ompi_communicator_t *comm,
564+
mca_coll_base_module_t *module);
557565
/* reordering after gather, for unordered ranks */
558566
void
559567
ompi_coll_han_reorder_gather(const void *sbuf,
560-
void *rbuf, int rcount,
561-
struct ompi_datatype_t *rdtype,
562-
struct ompi_communicator_t *comm,
563-
int * topo);
568+
void *rbuf, int rcount,
569+
struct ompi_datatype_t *rdtype,
570+
struct ompi_communicator_t *comm,
571+
int * topo);
564572

565573

566574

@@ -590,11 +598,12 @@ void mac_coll_han_set_allgather_argu(mca_allgather_argu_t * argu,
590598
bool noop, bool is_mapbycore, int *topo, ompi_request_t * req);
591599
int
592600
mca_coll_han_allgather_intra_simple(const void *sbuf, int scount,
593-
struct ompi_datatype_t *sdtype,
594-
void* rbuf, int rcount,
595-
struct ompi_datatype_t *rdtype,
596-
struct ompi_communicator_t *comm,
597-
mca_coll_base_module_t *module);
601+
struct ompi_datatype_t *sdtype,
602+
void* rbuf, int rcount,
603+
struct ompi_datatype_t *rdtype,
604+
struct ompi_communicator_t *comm,
605+
mca_coll_base_module_t *module);
598606

599607
END_C_DECLS
608+
600609
#endif /* MCA_COLL_HAN_EXPORT_H */

ompi/mca/coll/han/coll_han_allgather.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ mca_coll_han_allgather_intra_simple(const void *sbuf, int scount,
286286
ptrdiff_t rsize, rgap = 0;
287287
/* Compute the size to receive all the local data, including datatypes empty gaps */
288288
rsize = opal_datatype_span(&rdtype->super, (int64_t)rcount * low_size, &rgap);
289-
// intermediary buffer on node leaders to gather on low comm
289+
/* intermediary buffer on node leaders to gather on low comm */
290290
tmp_buf = (char *) malloc(rsize);
291291
tmp_buf_start = tmp_buf - rgap;
292292
}
@@ -298,17 +298,18 @@ mca_coll_han_allgather_intra_simple(const void *sbuf, int scount,
298298
/* 2. allgather between node leaders, from tmp_buf to reorder_buf */
299299
if (low_rank == root_low_rank) {
300300
/* allocate buffer to store unordered result on node leaders
301-
* * if the processes are mapped-by core, no need to reorder:
302-
* * distribution of ranks on core first and node next,
303-
* * in a increasing order for both patterns */
301+
* if the processes are mapped-by core, no need to reorder:
302+
* distribution of ranks on core first and node next,
303+
* in a increasing order for both patterns.
304+
*/
304305
char *reorder_buf = NULL;
305306
char *reorder_buf_start = NULL;
306307
if (han_module->is_mapbycore) {
307308
reorder_buf_start = rbuf;
308309
} else {
309310
if (0 == low_rank && 0 == up_rank) { // first rank displays message
310311
OPAL_OUTPUT_VERBOSE((30, mca_coll_han_component.han_output,
311-
"[%d]: Future Allgather needs reordering: ", w_rank));
312+
"[%d]: Future Allgather needs reordering: ", up_rank));
312313
}
313314
ptrdiff_t rsize, rgap = 0;
314315
rsize = opal_datatype_span(&rdtype->super, (int64_t)rcount * low_size * up_size, &rgap);

ompi/mca/coll/han/coll_han_allreduce.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ mca_coll_han_allreduce_intra(const void *sbuf,
103103
mca_coll_han_comm_create(comm, han_module);
104104
ompi_communicator_t *low_comm;
105105
ompi_communicator_t *up_comm;
106+
#if OMPI_MCA_COLL_HAN_AUTO_TUNE
106107
/* Auto tune is enabled */
107108
if (mca_coll_han_component.han_auto_tune && mca_coll_han_component.han_auto_tuned != NULL) {
108109
uint32_t n = han_auto_tuned_get_n(ompi_comm_size(han_module->cached_up_comms[0]));
@@ -140,12 +141,14 @@ mca_coll_han_allreduce_intra(const void *sbuf,
140141
adapt_ibcast_segment_size = us;
141142
}
142143
*/
143-
} else {
144-
low_comm = han_module->cached_low_comms[mca_coll_han_component.han_bcast_low_module];
145-
up_comm = han_module->cached_up_comms[mca_coll_han_component.han_bcast_up_module];
146-
COLL_BASE_COMPUTED_SEGCOUNT(mca_coll_han_component.han_allreduce_segsize, typelng,
147-
seg_count);
148-
}
144+
} else
145+
#endif /* OMPI_MCA_COLL_HAN_AUTO_TUNE */
146+
{
147+
low_comm = han_module->cached_low_comms[mca_coll_han_component.han_bcast_low_module];
148+
up_comm = han_module->cached_up_comms[mca_coll_han_component.han_bcast_up_module];
149+
COLL_BASE_COMPUTED_SEGCOUNT(mca_coll_han_component.han_allreduce_segsize, typelng,
150+
seg_count);
151+
}
149152

150153
/* Determine number of elements sent per task. */
151154
OPAL_OUTPUT_VERBOSE((10, mca_coll_han_component.han_output,

ompi/mca/coll/han/coll_han_bcast.c

Lines changed: 9 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ mca_coll_han_bcast_intra(void *buff,
8484
ompi_communicator_t *low_comm;
8585
ompi_communicator_t *up_comm;
8686
/* Auto tune is enabled */
87+
#if OMPI_MCA_COLL_HAN_AUTO_TUNE
8788
if (mca_coll_han_component.han_auto_tune && mca_coll_han_component.han_auto_tuned != NULL) {
8889
uint32_t n = han_auto_tuned_get_n(ompi_comm_size(han_module->cached_up_comms[0]));
8990
uint32_t c = han_auto_tuned_get_c(ompi_comm_size(han_module->cached_low_comms[0]));
@@ -105,23 +106,15 @@ mca_coll_han_bcast_intra(void *buff,
105106
low_comm = han_module->cached_low_comms[lmod];
106107
/* Set up fs */
107108
COLL_BASE_COMPUTED_SEGCOUNT((size_t) fs, typelng, seg_count);
108-
/* Set up ualg and us, which is only available when using ADAPT */
109-
/*
110-
if (umod == 1) {
111-
((mca_coll_adapt_module_t *) (up_comm->c_coll->coll_ibcast_module))->adapt_component->
112-
adapt_ibcast_algorithm = ualg;
113-
((mca_coll_adapt_module_t *) (up_comm->c_coll->coll_ibcast_module))->adapt_component->
114-
adapt_ibcast_segment_size = us;
109+
} else
110+
#endif /* OMPI_MCA_COLL_HAN_AUTO_TUNE */
111+
{
112+
/* If auto tune is disabled, use MCA parameters */
113+
low_comm = han_module->cached_low_comms[mca_coll_han_component.han_bcast_low_module];
114+
up_comm = han_module->cached_up_comms[mca_coll_han_component.han_bcast_up_module];
115+
COLL_BASE_COMPUTED_SEGCOUNT(mca_coll_han_component.han_bcast_segsize, typelng,
116+
seg_count);
115117
}
116-
*/
117-
118-
} else {
119-
/* If auto tune is disabled, use MCA parameters */
120-
low_comm = han_module->cached_low_comms[mca_coll_han_component.han_bcast_low_module];
121-
up_comm = han_module->cached_up_comms[mca_coll_han_component.han_bcast_up_module];
122-
COLL_BASE_COMPUTED_SEGCOUNT(mca_coll_han_component.han_bcast_segsize, typelng,
123-
seg_count);
124-
}
125118

126119
int num_segments = (count + seg_count - 1) / seg_count;
127120
OPAL_OUTPUT_VERBOSE((20, mca_coll_han_component.han_output,

0 commit comments

Comments
 (0)