32
32
#define RAJA_config_HPP
33
33
34
34
#include <utility>
35
+ #include <cstddef>
35
36
#include <type_traits>
36
37
37
38
#if defined(_MSVC_LANG)
@@ -239,6 +240,15 @@ static_assert(RAJA_HAS_SOME_CXX14,
239
240
#define RAJA_PRAGMA(x) _Pragma(RAJA_STRINGIFY(x))
240
241
#endif
241
242
243
+
244
+ /* NOTE: Below we define RAJA_MAX_ALIGN for each compiler, currently it is set as 16 bytes
245
+ for all cases, except MSVC. Previously this was set by alignof(std::max_align_t) which, in Clang,
246
+ is based on the sizeof(long double). This causes an in inconsistency as CUDA/HIP long doubles
247
+ are demoted to doubles causing alignof(std::max_align_t) to return 8 bytes on the device and
248
+ 16 bytes on the host. We therefore set a standard size and ensure validity through a
249
+ static_assert.
250
+ */
251
+
242
252
namespace RAJA {
243
253
244
254
#if defined(RAJA_ENABLE_OPENMP) && !defined(__HIP_DEVICE_COMPILE__)
@@ -374,7 +384,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
374
384
//
375
385
// Configuration options for Intel compilers
376
386
//
377
-
387
+ #define RAJA_MAX_ALIGN 16
378
388
#if defined (RAJA_ENABLE_FORCEINLINE_RECURSIVE)
379
389
#define RAJA_FORCEINLINE_RECURSIVE RAJA_PRAGMA(forceinline recursive)
380
390
#else
@@ -387,6 +397,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
387
397
#define RAJA_INLINE inline __attribute__((always_inline))
388
398
#endif
389
399
400
+
390
401
#define RAJA_UNROLL RAJA_PRAGMA(unroll)
391
402
#define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(unroll(N))
392
403
@@ -412,9 +423,9 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
412
423
//
413
424
// Configuration options for GNU compilers
414
425
//
426
+ #define RAJA_MAX_ALIGN 16
415
427
#define RAJA_FORCEINLINE_RECURSIVE
416
428
#define RAJA_INLINE inline __attribute__((always_inline))
417
-
418
429
#if !defined(__NVCC__)
419
430
#define RAJA_UNROLL RAJA_PRAGMA(GCC unroll 10000)
420
431
#define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(GCC unroll N)
@@ -446,11 +457,11 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
446
457
//
447
458
// Configuration options for xlc compiler (i.e., bgq/sequoia).
448
459
//
460
+ #define RAJA_MAX_ALIGN 16
449
461
#define RAJA_FORCEINLINE_RECURSIVE
450
462
#define RAJA_INLINE inline __attribute__((always_inline))
451
463
#define RAJA_UNROLL
452
464
#define RAJA_UNROLL_COUNT(N)
453
-
454
465
// FIXME: alignx is breaking CUDA+xlc
455
466
#if defined(RAJA_ENABLE_CUDA)
456
467
#define RAJA_ALIGN_DATA(d) d
@@ -476,12 +487,11 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
476
487
//
477
488
// Configuration options for clang compilers
478
489
//
490
+ #define RAJA_MAX_ALIGN 16
479
491
#define RAJA_FORCEINLINE_RECURSIVE
480
492
#define RAJA_INLINE inline __attribute__((always_inline))
481
493
#define RAJA_UNROLL RAJA_PRAGMA(clang loop unroll(enable))
482
494
#define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(clang loop unroll_count(N))
483
-
484
-
485
495
// note that neither nvcc nor Apple Clang compiler currently doesn't support
486
496
// the __builtin_assume_aligned attribute
487
497
#if defined(RAJA_ENABLE_CUDA) || defined(__APPLE__)
@@ -514,7 +524,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
514
524
515
525
// This is the same as undefined compiler, but squelches the warning message
516
526
#elif defined(RAJA_COMPILER_MSVC)
517
-
527
+ #define RAJA_MAX_ALIGN alignof(std::max_align_t)
518
528
#define RAJA_FORCEINLINE_RECURSIVE
519
529
#define RAJA_INLINE inline
520
530
#define RAJA_ALIGN_DATA(d) d
@@ -526,6 +536,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
526
536
#else
527
537
528
538
#pragma message("RAJA_COMPILER unknown, using default empty macros.")
539
+ #define RAJA_MAX_ALIGN 16
529
540
#define RAJA_FORCEINLINE_RECURSIVE
530
541
#define RAJA_INLINE inline
531
542
#define RAJA_ALIGN_DATA(d) d
@@ -536,6 +547,9 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
536
547
537
548
#endif
538
549
550
+ static_assert(RAJA_MAX_ALIGN >= alignof(std::max_align_t) && (RAJA_MAX_ALIGN/alignof(std::max_align_t))*alignof(std::max_align_t) == RAJA_MAX_ALIGN,
551
+ "Inconsistent RAJA_MAX_ALIGN size");
552
+
539
553
#cmakedefine RAJA_HAVE_POSIX_MEMALIGN
540
554
#cmakedefine RAJA_HAVE_ALIGNED_ALLOC
541
555
#cmakedefine RAJA_HAVE_MM_MALLOC
0 commit comments