Skip to content

Commit a43c46f

Browse files
authored
Merge pull request #10500 from samnordmann/ucc_dt_patch
coll/ucc: add support for dt float128 float32(64,128)_complex
2 parents 8b7976a + 8b99a15 commit a43c46f

File tree

3 files changed

+120
-49
lines changed

3 files changed

+120
-49
lines changed

config/ompi_check_ucc.m4

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,15 @@ AC_DEFUN([OMPI_CHECK_UCC],[
4141
LIBS="${$1_LIBS} ${LIBS}"
4242
AC_CHECK_FUNCS(ucc_comm_free, [], [])
4343

44+
AC_MSG_CHECKING([if UCC supports float128 and float32(64,128)_complex datatypes])
45+
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <ucc/api/ucc.h>]],
46+
[[ucc_datatype_t dt = UCC_DT_FLOAT32_COMPLEX;]])],
47+
[flag=1
48+
AC_MSG_RESULT([yes])],
49+
[flag=0
50+
AC_MSG_RESULT([no])])
51+
AC_DEFINE_UNQUOTED(UCC_HAVE_COMPLEX_AND_FLOAT128_DT, $flag, [Check if float128 and float32(64,128)_complex dt are available in ucc.])
52+
4453
CPPFLAGS=$CPPFLAGS_save
4554
LDFLAGS=$LDFLAGS_save
4655
LIBS=$LIBS_save])

ompi/mca/coll/ucc/coll_ucc_dtypes.h

Lines changed: 49 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -17,39 +17,58 @@
1717
#define COLL_UCC_OP_UNSUPPORTED ((ucc_reduction_op_t)-1)
1818

1919
static ucc_datatype_t ompi_datatype_2_ucc_dt[OPAL_DATATYPE_MAX_PREDEFINED] = {
20-
[OPAL_DATATYPE_LOOP] = COLL_UCC_DT_UNSUPPORTED,
21-
[OPAL_DATATYPE_END_LOOP] = COLL_UCC_DT_UNSUPPORTED,
22-
[OPAL_DATATYPE_LB] = COLL_UCC_DT_UNSUPPORTED,
23-
[OPAL_DATATYPE_UB] = COLL_UCC_DT_UNSUPPORTED,
24-
[OPAL_DATATYPE_INT1] = UCC_DT_INT8,
25-
[OPAL_DATATYPE_INT2] = UCC_DT_INT16,
26-
[OPAL_DATATYPE_INT4] = UCC_DT_INT32,
27-
[OPAL_DATATYPE_INT8] = UCC_DT_INT64,
28-
[OPAL_DATATYPE_INT16] = UCC_DT_INT128,
29-
[OPAL_DATATYPE_UINT1] = UCC_DT_UINT8,
30-
[OPAL_DATATYPE_UINT2] = UCC_DT_UINT16,
31-
[OPAL_DATATYPE_UINT4] = UCC_DT_UINT32,
32-
[OPAL_DATATYPE_UINT8] = UCC_DT_UINT64,
33-
[OPAL_DATATYPE_UINT16] = UCC_DT_UINT128,
34-
[OPAL_DATATYPE_FLOAT2] = UCC_DT_FLOAT16,
35-
[OPAL_DATATYPE_FLOAT4] = UCC_DT_FLOAT32,
36-
[OPAL_DATATYPE_FLOAT8] = UCC_DT_FLOAT64,
37-
[OPAL_DATATYPE_FLOAT12] = COLL_UCC_DT_UNSUPPORTED,
38-
[OPAL_DATATYPE_FLOAT16] = COLL_UCC_DT_UNSUPPORTED,
39-
[OPAL_DATATYPE_SHORT_FLOAT_COMPLEX] = COLL_UCC_DT_UNSUPPORTED,
40-
[OPAL_DATATYPE_FLOAT_COMPLEX] = COLL_UCC_DT_UNSUPPORTED,
41-
[OPAL_DATATYPE_DOUBLE_COMPLEX] = COLL_UCC_DT_UNSUPPORTED,
42-
[OPAL_DATATYPE_LONG_DOUBLE_COMPLEX] = COLL_UCC_DT_UNSUPPORTED,
43-
[OPAL_DATATYPE_BOOL] = COLL_UCC_DT_UNSUPPORTED,
44-
[OPAL_DATATYPE_WCHAR] = COLL_UCC_DT_UNSUPPORTED,
20+
[OPAL_DATATYPE_LOOP] = COLL_UCC_DT_UNSUPPORTED,
21+
[OPAL_DATATYPE_END_LOOP] = COLL_UCC_DT_UNSUPPORTED,
22+
[OPAL_DATATYPE_LB] = COLL_UCC_DT_UNSUPPORTED,
23+
[OPAL_DATATYPE_UB] = COLL_UCC_DT_UNSUPPORTED,
24+
[OPAL_DATATYPE_INT1] = UCC_DT_INT8,
25+
[OPAL_DATATYPE_INT2] = UCC_DT_INT16,
26+
[OPAL_DATATYPE_INT4] = UCC_DT_INT32,
27+
[OPAL_DATATYPE_INT8] = UCC_DT_INT64,
28+
[OPAL_DATATYPE_INT16] = UCC_DT_INT128,
29+
[OPAL_DATATYPE_UINT1] = UCC_DT_UINT8,
30+
[OPAL_DATATYPE_UINT2] = UCC_DT_UINT16,
31+
[OPAL_DATATYPE_UINT4] = UCC_DT_UINT32,
32+
[OPAL_DATATYPE_UINT8] = UCC_DT_UINT64,
33+
[OPAL_DATATYPE_UINT16] = UCC_DT_UINT128,
34+
[OPAL_DATATYPE_FLOAT2] = UCC_DT_FLOAT16,
35+
[OPAL_DATATYPE_FLOAT4] = UCC_DT_FLOAT32,
36+
[OPAL_DATATYPE_FLOAT8] = UCC_DT_FLOAT64,
37+
[OPAL_DATATYPE_FLOAT12] = COLL_UCC_DT_UNSUPPORTED,
38+
[OPAL_DATATYPE_BOOL] = COLL_UCC_DT_UNSUPPORTED,
39+
[OPAL_DATATYPE_WCHAR] = COLL_UCC_DT_UNSUPPORTED,
40+
[OPAL_DATATYPE_SHORT_FLOAT_COMPLEX] = COLL_UCC_DT_UNSUPPORTED,
4541
#if SIZEOF_LONG == 4
46-
[OPAL_DATATYPE_LONG] = UCC_DT_INT32,
47-
[OPAL_DATATYPE_UNSIGNED_LONG] = UCC_DT_UINT32,
42+
[OPAL_DATATYPE_LONG] = UCC_DT_INT32,
43+
[OPAL_DATATYPE_UNSIGNED_LONG] = UCC_DT_UINT32,
4844
#elif SIZEOF_LONG == 8
49-
[OPAL_DATATYPE_LONG] = UCC_DT_INT64,
50-
[OPAL_DATATYPE_UNSIGNED_LONG] = UCC_DT_UINT64,
45+
[OPAL_DATATYPE_LONG] = UCC_DT_INT64,
46+
[OPAL_DATATYPE_UNSIGNED_LONG] = UCC_DT_UINT64,
5147
#endif
52-
[OPAL_DATATYPE_UNAVAILABLE] = COLL_UCC_DT_UNSUPPORTED
48+
#if UCC_HAVE_COMPLEX_AND_FLOAT128_DT
49+
[OPAL_DATATYPE_FLOAT16] = UCC_DT_FLOAT128,
50+
#if SIZEOF_FLOAT__COMPLEX == 8
51+
[OPAL_DATATYPE_FLOAT_COMPLEX] = UCC_DT_FLOAT32_COMPLEX,
52+
#else
53+
[OPAL_DATATYPE_FLOAT_COMPLEX] = COLL_UCC_DT_UNSUPPORTED,
54+
#endif
55+
#if SIZEOF_DOUBLE__COMPLEX == 16
56+
[OPAL_DATATYPE_DOUBLE_COMPLEX] = UCC_DT_FLOAT64_COMPLEX,
57+
#else
58+
[OPAL_DATATYPE_DOUBLE_COMPLEX] = COLL_UCC_DT_UNSUPPORTED,
59+
#endif
60+
#if SIZEOF_LONG_DOUBLE__COMPLEX == 32
61+
[OPAL_DATATYPE_LONG_DOUBLE_COMPLEX] = UCC_DT_FLOAT128_COMPLEX,
62+
#else
63+
[OPAL_DATATYPE_LONG_DOUBLE_COMPLEX] = COLL_UCC_DT_UNSUPPORTED,
64+
#endif
65+
#else
66+
[OPAL_DATATYPE_FLOAT16] = COLL_UCC_DT_UNSUPPORTED,
67+
[OPAL_DATATYPE_FLOAT_COMPLEX] = COLL_UCC_DT_UNSUPPORTED,
68+
[OPAL_DATATYPE_DOUBLE_COMPLEX] = COLL_UCC_DT_UNSUPPORTED,
69+
[OPAL_DATATYPE_LONG_DOUBLE_COMPLEX] = COLL_UCC_DT_UNSUPPORTED,
70+
#endif
71+
[OPAL_DATATYPE_UNAVAILABLE] = COLL_UCC_DT_UNSUPPORTED
5372
};
5473

5574
static inline ucc_datatype_t ompi_dtype_to_ucc_dtype(ompi_datatype_t *dtype)

oshmem/mca/scoll/ucc/scoll_ucc_dtypes.h

Lines changed: 62 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -19,25 +19,68 @@
1919
#define SCOLL_UCC_OP_UNSUPPORTED -1
2020

2121
static ucc_datatype_t shmem_datatype_to_ucc_dt[OSHMEM_OP_TYPE_NUMBER + 1] = {
22-
UCC_DT_INT16, /* OSHMEM_OP_TYPE_SHORT 0 */
23-
UCC_DT_INT32, /* OSHMEM_OP_TYPE_INT 1 */
24-
UCC_DT_INT64, /* OSHMEM_OP_TYPE_LONG 2 */
25-
UCC_DT_INT64, /* OSHMEM_OP_TYPE_LLONG 3 */
26-
UCC_DT_INT16, /* OSHMEM_OP_TYPE_INT16_T 4 */
27-
UCC_DT_INT32, /* OSHMEM_OP_TYPE_INT32_T 5 */
28-
UCC_DT_INT64, /* OSHMEM_OP_TYPE_INT64_T 6 */
29-
UCC_DT_FLOAT32, /* OSHMEM_OP_TYPE_FLOAT 7 */
30-
UCC_DT_FLOAT64, /* OSHMEM_OP_TYPE_DOUBLE 8 */
31-
SCOLL_UCC_DT_UNSUPPORTED, /* OSHMEM_OP_TYPE_LDOUBLE 9 */
32-
SCOLL_UCC_DT_UNSUPPORTED, /* OSHMEM_OP_TYPE_FCOMPLEX 10 */
33-
SCOLL_UCC_DT_UNSUPPORTED, /* OSHMEM_OP_TYPE_DCOMPLEX 11 */
34-
UCC_DT_INT16, /* OSHMEM_OP_TYPE_FINT2 12 */
35-
UCC_DT_INT32, /* OSHMEM_OP_TYPE_FINT4 13 */
36-
UCC_DT_INT64, /* OSHMEM_OP_TYPE_FINT8 14 */
37-
UCC_DT_FLOAT32, /* OSHMEM_OP_TYPE_FREAL4 15 */
38-
UCC_DT_FLOAT64, /* OSHMEM_OP_TYPE_FREAL8 16 */
39-
SCOLL_UCC_DT_UNSUPPORTED, /* OSHMEM_OP_TYPE_FREAL16 17 */
40-
SCOLL_UCC_DT_UNSUPPORTED /* OSHMEM_OP_TYPE_NUMBER 18 */
22+
#if SIZEOF_SHORT == 2
23+
[OSHMEM_OP_TYPE_SHORT] = UCC_DT_INT16,
24+
#else
25+
[OSHMEM_OP_TYPE_SHORT] = SCOLL_UCC_DT_UNSUPPORTED,
26+
#endif
27+
#if SIZEOF_INT == 4
28+
[OSHMEM_OP_TYPE_INT] = UCC_DT_INT32,
29+
#else
30+
[OSHMEM_OP_TYPE_INT] = SCOLL_UCC_DT_UNSUPPORTED,
31+
#endif
32+
#if SIZEOF_LONG == 8
33+
[OSHMEM_OP_TYPE_LONG] = UCC_DT_INT64,
34+
#else
35+
[OSHMEM_OP_TYPE_LONG] = SCOLL_UCC_DT_UNSUPPORTED,
36+
#endif
37+
#if SIZEOF_LONG_LONG == 8
38+
[OSHMEM_OP_TYPE_LLONG] = UCC_DT_INT64,
39+
#else
40+
[OSHMEM_OP_TYPE_LLONG] = SCOLL_UCC_DT_UNSUPPORTED,
41+
#endif
42+
[OSHMEM_OP_TYPE_INT16_T] = UCC_DT_INT16,
43+
[OSHMEM_OP_TYPE_INT32_T] = UCC_DT_INT32,
44+
[OSHMEM_OP_TYPE_INT64_T] = UCC_DT_INT64,
45+
#if SIZEOF_FLOAT == 4
46+
[OSHMEM_OP_TYPE_FLOAT] = UCC_DT_FLOAT32,
47+
#else
48+
[OSHMEM_OP_TYPE_FLOAT] = SCOLL_UCC_DT_UNSUPPORTED,
49+
#endif
50+
#if SIZEOF_DOUBLE == 8
51+
[OSHMEM_OP_TYPE_DOUBLE] = UCC_DT_FLOAT64,
52+
#else
53+
[OSHMEM_OP_TYPE_DOUBLE] = SCOLL_UCC_DT_UNSUPPORTED,
54+
#endif
55+
#if UCC_HAVE_COMPLEX_AND_FLOAT128_DT
56+
[OSHMEM_OP_TYPE_FREAL16] = UCC_DT_FLOAT128,
57+
#if SIZEOF_LONG_DOUBLE == 16
58+
[OSHMEM_OP_TYPE_LDOUBLE] = UCC_DT_FLOAT128,
59+
#else
60+
[OSHMEM_OP_TYPE_LDOUBLE] = SCOLL_UCC_DT_UNSUPPORTED,
61+
#endif
62+
#if SIZEOF_FLOAT__COMPLEX == 8
63+
[OSHMEM_OP_TYPE_FCOMPLEX] = UCC_DT_FLOAT32_COMPLEX,
64+
#else
65+
[OSHMEM_OP_TYPE_FCOMPLEX] = SCOLL_UCC_DT_UNSUPPORTED,
66+
#endif
67+
#if SIZEOF_DOUBLE__COMPLEX == 16
68+
[OSHMEM_OP_TYPE_DCOMPLEX] = UCC_DT_FLOAT64_COMPLEX,
69+
#else
70+
[OSHMEM_OP_TYPE_DCOMPLEX] = SCOLL_UCC_DT_UNSUPPORTED,
71+
#endif
72+
#else
73+
[OSHMEM_OP_TYPE_FREAL16] = SCOLL_UCC_DT_UNSUPPORTED,
74+
[OSHMEM_OP_TYPE_LDOUBLE] = SCOLL_UCC_DT_UNSUPPORTED,
75+
[OSHMEM_OP_TYPE_FCOMPLEX] = SCOLL_UCC_DT_UNSUPPORTED,
76+
[OSHMEM_OP_TYPE_DCOMPLEX] = SCOLL_UCC_DT_UNSUPPORTED,
77+
#endif
78+
[OSHMEM_OP_TYPE_FINT2] = UCC_DT_INT16,
79+
[OSHMEM_OP_TYPE_FINT4] = UCC_DT_INT32,
80+
[OSHMEM_OP_TYPE_FINT8] = UCC_DT_INT64,
81+
[OSHMEM_OP_TYPE_FREAL4] = UCC_DT_FLOAT32,
82+
[OSHMEM_OP_TYPE_FREAL8] = UCC_DT_FLOAT64,
83+
[OSHMEM_OP_TYPE_NUMBER] = SCOLL_UCC_DT_UNSUPPORTED
4184
};
4285

4386
static inline ucc_datatype_t shmem_op_to_ucc_dtype(oshmem_op_t *op)

0 commit comments

Comments
 (0)