Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
33937a4
[cpp17] move CUDA builds from c++14 to c++17 (#333)
valassi Jan 26, 2022
06d674b
[cpp17] require cuda >=11.2 to be able to use c++17 (#333 - see also …
valassi Jan 26, 2022
4739f55
[cpp17] improve a comment about nvcc builds of c++ code with c++17 (#…
valassi Jan 26, 2022
1c34929
[cpp17] use "if constexpr" in MemoryAccessHelpers also in cuda, thank…
valassi Jan 26, 2022
48615c2
[cpp17] enable if constexpr in Bridge.h (needs c++17 and cuda>=11.2)
valassi Jan 26, 2022
db70f9c
[cpp17] bug fix in OMPFLAGS - effectively reenable OpenMP (code was n…
valassi Jan 26, 2022
f4c80a1
[cpp17] remove OMPFLAGS support for gcc<9.3 in Makefile
valassi Jan 26, 2022
9f86299
[cpp17] require gcc>=9.3 (#96) - it was observed in the past that Ope…
valassi Jan 26, 2022
d93bfb9
[cpp17] fix OpenMP MT builds after effectively reenabling OpenMP
valassi Jan 26, 2022
be34933
[cpp17] remove CUFLAGS CXXFLAGS CPPFLAGS from link instructions (keep…
valassi Jan 26, 2022
d4feb80
[cpp17] add back OMPFAGS to linker after removing CXXFLAGS
valassi Jan 26, 2022
5c7ebbe
[cpp17] Use __CUDACC_VER_MAJOR__ instead of __CUDACC__ in getCompiler…
valassi Jan 26, 2022
7415f1f
[cpp17] improve the previous patch: use __NVCC__ instead of __CUDA_MA…
valassi Jan 26, 2022
5245d4b
[cpp17] TEMPORARELY DISABLE OMP (need to reassess MT)
valassi Jan 26, 2022
d2725d6
[cpp17] further use __NVCC__ instead of __CUDACC__ throughout getComp…
valassi Jan 26, 2022
87bde06
[cpp17] complete backport to codegen and regenerate eemumu auto
valassi Jan 26, 2022
e8979bc
[cpp17] BUG FIX IN LOGS! recover gcc102 baselines from c2e67b4a425319…
valassi Jan 26, 2022
157d430
[cpp17] rerun eemumu basic log, check all is ok
valassi Jan 26, 2022
0654f26
[cpp17] add ggtg and ggttggg to sync script
valassi Jan 26, 2022
fd6d8de
[cpp17] regenerate ggtt* auto and resync manu
valassi Jan 26, 2022
b26408b
[cpp17] ** COMPLETE CPP17 ** rerun ggtt/ggttg/ggttgg, all ok
valassi Jan 26, 2022
f2669dd
[cpp17] allow gcc<=9.3 if gcc is used below clang (#96 and #355)
valassi Jan 26, 2022
cb0f512
[cpp17] backport to codegen and regenerate eemumu auto
valassi Jan 26, 2022
95474a6
[cpp17] ** COMPLETE CPP17 ** regenerate gg_tt* with gcc check bug fix…
valassi Jan 26, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ template <typename T>
void Bridge<T>::gpu_sequence( const T *momenta, T *mes, const bool goodHelOnly )
{
constexpr int neppM = MemoryAccessMomenta::neppM;
if ( neppM == 1 ) // eventually move to "if constexpr" (need c++17, not available in cuda 11.1)
if constexpr ( neppM == 1 ) // needs c++17 and cuda >=11.2 (#333)
{
checkCuda( cudaMemcpy( m_devMomentaC.data(), momenta, m_devMomentaC.bytes(), cudaMemcpyHostToDevice ) );
}
Expand Down Expand Up @@ -297,7 +297,7 @@ void hst_transposeMomentaF2C( const T *in, T *out, const int evt )
constexpr int npar = mgOnGpu::npar;
constexpr int np4 = mgOnGpu::np4;
constexpr int neppM = MemoryAccessMomenta::neppM;
if ( neppM == 1 ) // eventually move to "if constexpr" (need c++17, not available in cuda 11.1)
if constexpr ( neppM == 1 ) // needs c++17 and cuda >=11.2 (#333)
{
memcpy( out, in, evt * npar * np4 * sizeof(T) );
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,12 @@ TESTDIR = ../../../../../test
GTESTLIBDIR = $(TESTDIR)/googletest/build/lib/
GTESTLIBS = $(GTESTLIBDIR)/libgtest.a $(GTESTLIBDIR)/libgtest_main.a

# OpenMP flags
# OpenMP flags (on gcc this requires gcc>=9.3, issue #269)
ifneq ($(shell $(CXX) --version | grep ^Intel),)
override OMPFLAGS =
else ifeq ($(shell $(CXX) --version | grep GCC | cut -d' ' -f3 | cut -d'.' -f1),9) # disable OMP on gcc9 (issue #269)
override OMPFLAGS =
OMPFLAGS ?= -fopenmp
endif
###OMPFLAGS ?= -fopenmp # TEMPORARELY DISABLE OMP (need to reassess MT)
override OMPFLAGS = # TEMPORARELY DISABLE OMP (need to reassess MT)
$(info OMPFLAGS=$(OMPFLAGS))
CXXFLAGS += $(OMPFLAGS)

Expand Down Expand Up @@ -99,17 +98,8 @@ ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),)
CUOPTFLAGS = -lineinfo
CUFLAGS = $(OPTFLAGS) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math
###CUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow
NVCC_VERSION = $(shell $(NVCC) --version | grep 'Cuda compilation tools' | cut -d' ' -f5 | cut -d, -f1)
# TEMPORARY! For as long as cuda 11.0/11.1 are supported (issues #282 and #292)
# (Eventually, use only c++17; previously, c++14 in cuda110/111 and c++17 above; now c++14 everywhere)
###ifeq ($(NVCC_VERSION),11.0)
### CUFLAGS += -std=c++14
###else ifeq ($(NVCC_VERSION),11.1)
### CUFLAGS += -std=c++14
###else
### CUFLAGS += -std=c++17
###endif
CUFLAGS += -std=c++14
###NVCC_VERSION = $(shell $(NVCC) --version | grep 'Cuda compilation tools' | cut -d' ' -f5 | cut -d, -f1)
CUFLAGS += -std=c++17 # need CUDA >= 11.2 (see #333): this is enforced in mgOnGpuConfig.h
# Without -maxrregcount: baseline throughput: 6.5E8 (16384 32 12) up to 7.3E8 (65536 128 12)
###CUFLAGS+= --maxrregcount 160 # improves throughput: 6.9E8 (16384 32 12) up to 7.7E8 (65536 128 12)
###CUFLAGS+= --maxrregcount 128 # improves throughput: 7.3E8 (16384 32 12) up to 7.6E8 (65536 128 12)
Expand Down Expand Up @@ -323,7 +313,7 @@ $(cu_main): $(BUILDDIR)/gcheck_sa.o $(LIBDIR)/lib$(MODELLIB).a $(cu_objects) $(c
$(NVCC) $< -o $@ $(cu_objects) $(cucxx_objects) $(CUARCHFLAGS) $(LIBFLAGS) $(CULIBFLAGS)

$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MODELLIB).a $(cxx_objects) $(cucxx_objects)
$(CXX) $< -o $@ $(cxx_objects) $(cucxx_objects) $(CPPFLAGS) $(CXXFLAGS) -ldl -pthread $(LIBFLAGS) $(CULIBFLAGS)
$(CXX) $< -o $@ $(cxx_objects) $(cucxx_objects) $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) $(CULIBFLAGS)

$(BUILDDIR)/testxxx.o: $(GTESTLIBS)
$(BUILDDIR)/testxxx.o: INCFLAGS += -I$(TESTDIR)/googletest/googletest/include
Expand Down Expand Up @@ -373,11 +363,11 @@ endif
ifeq ($(NVCC),)
# Link only runTest.o
$(testmain): $(LIBDIR)/lib$(MODELLIB).a $(cxx_objects) $(GTESTLIBS)
$(CXX) -o $@ $(cxx_objects) $(CPPFLAGS) $(CXXFLAGS) -ldl -pthread $(LIBFLAGS) $(CULIBFLAGS)
$(CXX) -o $@ $(cxx_objects) -ldl -pthread $(LIBFLAGS) $(CULIBFLAGS)
else
# Link both runTest.o and runTest_cu.o
$(testmain): $(LIBDIR)/lib$(MODELLIB).a $(cxx_objects) $(cu_objects) $(GTESTLIBS)
$(NVCC) -o $@ $(cxx_objects) $(cu_objects) $(CPPFLAGS) $(CUFLAGS) -ldl $(LIBFLAGS) $(CULIBFLAGS) -lcuda -lgomp
$(NVCC) -o $@ $(cxx_objects) $(cu_objects) -ldl $(LIBFLAGS) $(CULIBFLAGS) -lcuda -lgomp
endif

$(GTESTLIBS):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,7 @@ class KernelAccessHelper : public MemoryAccessHelper<T>
__host__ __device__ inline
fptype* kernelAccessRecord( fptype* buffer )
{
//if constexpr ( !onDevice ) // FIXME! enable this when we move to nvcc supporting c++17
if ( !onDevice )
if constexpr ( !onDevice ) // requires c++17 also in CUDA (#333)
{
return T::ieventAccessRecord( buffer, 0 );
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ namespace mgOnGpu

// Alignment requirement for using reinterpret_cast with SIMD vectorized code
// (using reinterpret_cast with non aligned memory may lead to segmentation faults!)
// [NB eventually define this also for code built with nvcc (#318), however this requires c++17 and nvcc >= 11.2]
// Only needed for C++ code but can be enforced also in NVCC builds of C++ code using CUDA>=11.2 and C++17 (#318, #319, #333)
#ifndef __CUDACC__
constexpr int cppAlign = 64; // alignment requirement for SIMD vectorization (64-byte i.e. 512-bit)
#endif
Expand Down Expand Up @@ -190,4 +190,24 @@ using mgOnGpu::fptype;
// For SANITY CHECKS: check that neppR, neppM, neppV... are powers of two (https://stackoverflow.com/a/108360)
inline constexpr bool ispoweroftwo( int n ){ return ( n > 0 ) && !( n & ( n - 1 ) ); }

// Compiler version support (#96): require nvcc from CUDA >= 11.2, e.g. to use C++17 (see #333)
#ifdef __NVCC__
#if ( __CUDACC_VER_MAJOR__ < 11 ) || ( __CUDACC_VER_MAJOR__ == 11 && __CUDACC_VER_MINOR__ < 2 )
#error Unsupported CUDA version: please use CUDA >= 11.2
#endif
#endif

// Compiler version support (#96): require clang >= 11
#if defined __clang__
#if ( __clang_major__ < 11 )
#error Unsupported clang version: please use clang >= 11
#endif
// Compiler version support (#96): require gcc >= 9.3, e.g. for some OMP issues (see #269)
// [NB skip this check for the gcc toolchain below clang or icx (TEMPORARY? #355)]
#elif defined __GNUC__
#if ( __GNUC__ < 9 ) || ( __GNUC__ == 9 && __GNUC_MINOR__ < 3 )
#error Unsupported gcc version: please gcc >= 9.3
#endif
#endif

#endif // MGONGPUCONFIG_H
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,10 @@ namespace mg5amcCpu
{
std::stringstream out;
// CUDA version (NVCC)
#ifdef __CUDACC__
// [Use __NVCC__ instead of __CUDACC__ here!]
// [This tests if 'nvcc' was used even to build a .cc file, even if not necessarily 'nvcc -x cu' for a .cu file]
// [Check 'nvcc --compiler-options -dM -E dummy.c | grep CUDA': see https://stackoverflow.com/a/53713712]
#ifdef __NVCC__
#if defined __CUDACC_VER_MAJOR__ && defined __CUDACC_VER_MINOR__ && defined __CUDACC_VER_BUILD__
out << "nvcc " << __CUDACC_VER_MAJOR__ << "." << __CUDACC_VER_MINOR__ << "." << __CUDACC_VER_BUILD__;
#else
Expand All @@ -148,7 +151,7 @@ namespace mg5amcCpu
#error "icc is no longer supported: please use icx"
#elif defined __INTEL_LLVM_COMPILER // alternative: __INTEL_CLANG_COMPILER
out << "icx " << __INTEL_LLVM_COMPILER;
#ifdef __CUDACC__
#ifdef __NVCC__
out << ", ";
#else
out << " (";
Expand All @@ -166,7 +169,7 @@ namespace mg5amcCpu
std::array<char, 128> tchainbuf;
while ( fgets( tchainbuf.data(), tchainbuf.size(), tchainpipe.get() ) != nullptr ) tchainout += tchainbuf.data();
tchainout.pop_back(); // remove trailing newline
#if defined __CUDACC__ or defined __INTEL_LLVM_COMPILER
#if defined __NVCC__ or defined __INTEL_LLVM_COMPILER
out << ", gcc " << tchainout;
#else
out << " (gcc " << tchainout << ")";
Expand All @@ -182,7 +185,7 @@ namespace mg5amcCpu
out << "gcc UNKNOWKN";
#endif
#endif
#if defined __CUDACC__ or defined __INTEL_LLVM_COMPILER
#if defined __NVCC__ or defined __INTEL_LLVM_COMPILER
out << ")";
#endif
return out.str();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -887,9 +887,9 @@ def get_all_sigmaKin_lines(self, color_amplitudes, class_name):
// - private: give each thread its own copy, without initialising
// - firstprivate: give each thread its own copy, and initialise with value from outside
#ifdef MGONGPU_CPPSIMD
#pragma omp parallel for default(none) shared(allmomenta,allMEs,cHel,cIPC,cIPD,ihel,npagV,isAligned_allMEs) private (amp_sv,w_sv,jamp_sv)
#pragma omp parallel for default(none) shared(allmomenta,allMEs,cHel,cIPC,cIPD,ihel,npagV,amp_fp,w_fp,isAligned_allMEs) private (amp_sv,w_sv,jamp_sv)
#else
#pragma omp parallel for default(none) shared(allmomenta,allMEs,cHel,cIPC,cIPD,ihel,npagV) private (amp_sv,w_sv,jamp_sv)
#pragma omp parallel for default(none) shared(allmomenta,allMEs,cHel,cIPC,cIPD,ihel,npagV,amp_fp,w_fp) private (amp_sv,w_sv,jamp_sv)
#endif
#endif
for ( int ipagV = 0; ipagV < npagV; ++ipagV )
Expand Down
12 changes: 11 additions & 1 deletion epochX/cudacpp/CODEGEN/syncManu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

eemumu=0
ggtt=0
ggttg=0
ggttgg=0
ggttggg=0

function usage()
{
Expand All @@ -17,18 +19,26 @@ while [ "$1" != "" ]; do
elif [ "$1" == "-ggtt" ]; then
ggtt=1
shift
elif [ "$1" == "-ggttg" ]; then
ggttg=1
shift
elif [ "$1" == "-ggttgg" ]; then
ggttgg=1
shift
elif [ "$1" == "-ggttggg" ]; then
ggttggg=1
shift
else
usage
fi
done

# Check that at least one process has been selected
processes=
if [ "${ggttgg}" == "1" ]; then processes="gg_ttgg $processes"; fi
if [ "${ggtt}" == "1" ]; then processes="gg_tt $processes"; fi
if [ "${ggttg}" == "1" ]; then processes="gg_ttg $processes"; fi
if [ "${ggttgg}" == "1" ]; then processes="gg_ttgg $processes"; fi
if [ "${ggttggg}" == "1" ]; then processes="gg_ttggg $processes"; fi
if [ "${eemumu}" == "1" ]; then processes="ee_mumu $processes"; fi
if [ "${processes}" == "" ]; then usage; fi

Expand Down
8 changes: 4 additions & 4 deletions epochX/cudacpp/ee_mumu.auto/CODEGEN_cudacpp_ee_mumu_log.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ generate e+ e- > mu+ mu-
No model currently active, so we import the Standard Model
INFO: load particles
INFO: load vertices
DEBUG: model prefixing takes 0.006892681121826172 
DEBUG: model prefixing takes 0.006869077682495117 
INFO: Restrict model sm with file models/sm/restrict_default.dat .
INFO: Change particles name to pass to MG5 convention
DEBUG: True [misc.py at line 2192] 
Expand Down Expand Up @@ -120,6 +120,6 @@ INFO: /data/avalassi/GPU2020/MG5aMC/2.7.0_gpu/CODEGEN_cudacpp_ee_mumu/src/. and
DEBUG: Entering PLUGIN_ProcessExporter.finalize [output.py at line 178] 
quit

real 0m3.732s
user 0m0.933s
sys 0m0.140s
real 0m3.952s
user 0m0.946s
sys 0m0.125s
4 changes: 2 additions & 2 deletions epochX/cudacpp/ee_mumu.auto/SubProcesses/Bridge.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ template <typename T>
void Bridge<T>::gpu_sequence( const T *momenta, T *mes, const bool goodHelOnly )
{
constexpr int neppM = MemoryAccessMomenta::neppM;
if ( neppM == 1 ) // eventually move to "if constexpr" (need c++17, not available in cuda 11.1)
if constexpr ( neppM == 1 ) // needs c++17 and cuda >=11.2 (#333)
{
checkCuda( cudaMemcpy( m_devMomentaC.data(), momenta, m_devMomentaC.bytes(), cudaMemcpyHostToDevice ) );
}
Expand Down Expand Up @@ -297,7 +297,7 @@ void hst_transposeMomentaF2C( const T *in, T *out, const int evt )
constexpr int npar = mgOnGpu::npar;
constexpr int np4 = mgOnGpu::np4;
constexpr int neppM = MemoryAccessMomenta::neppM;
if ( neppM == 1 ) // eventually move to "if constexpr" (need c++17, not available in cuda 11.1)
if constexpr ( neppM == 1 ) // needs c++17 and cuda >=11.2 (#333)
{
memcpy( out, in, evt * npar * np4 * sizeof(T) );
}
Expand Down
26 changes: 8 additions & 18 deletions epochX/cudacpp/ee_mumu.auto/SubProcesses/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,12 @@ TESTDIR = ../../../../../test
GTESTLIBDIR = $(TESTDIR)/googletest/build/lib/
GTESTLIBS = $(GTESTLIBDIR)/libgtest.a $(GTESTLIBDIR)/libgtest_main.a

# OpenMP flags
# OpenMP flags (on gcc this requires gcc>=9.3, issue #269)
ifneq ($(shell $(CXX) --version | grep ^Intel),)
override OMPFLAGS =
else ifeq ($(shell $(CXX) --version | grep GCC | cut -d' ' -f3 | cut -d'.' -f1),9) # disable OMP on gcc9 (issue #269)
override OMPFLAGS =
OMPFLAGS ?= -fopenmp
endif
###OMPFLAGS ?= -fopenmp # TEMPORARELY DISABLE OMP (need to reassess MT)
override OMPFLAGS = # TEMPORARELY DISABLE OMP (need to reassess MT)
$(info OMPFLAGS=$(OMPFLAGS))
CXXFLAGS += $(OMPFLAGS)

Expand Down Expand Up @@ -99,17 +98,8 @@ ifneq ($(wildcard $(CUDA_HOME)/bin/nvcc),)
CUOPTFLAGS = -lineinfo
CUFLAGS = $(OPTFLAGS) $(CUOPTFLAGS) $(INCFLAGS) $(CUINC) $(USE_NVTX) $(CUARCHFLAGS) -use_fast_math
###CUFLAGS += -Xcompiler -Wall -Xcompiler -Wextra -Xcompiler -Wshadow
NVCC_VERSION = $(shell $(NVCC) --version | grep 'Cuda compilation tools' | cut -d' ' -f5 | cut -d, -f1)
# TEMPORARY! For as long as cuda 11.0/11.1 are supported (issues #282 and #292)
# (Eventually, use only c++17; previously, c++14 in cuda110/111 and c++17 above; now c++14 everywhere)
###ifeq ($(NVCC_VERSION),11.0)
### CUFLAGS += -std=c++14
###else ifeq ($(NVCC_VERSION),11.1)
### CUFLAGS += -std=c++14
###else
### CUFLAGS += -std=c++17
###endif
CUFLAGS += -std=c++14
###NVCC_VERSION = $(shell $(NVCC) --version | grep 'Cuda compilation tools' | cut -d' ' -f5 | cut -d, -f1)
CUFLAGS += -std=c++17 # need CUDA >= 11.2 (see #333): this is enforced in mgOnGpuConfig.h
# Without -maxrregcount: baseline throughput: 6.5E8 (16384 32 12) up to 7.3E8 (65536 128 12)
###CUFLAGS+= --maxrregcount 160 # improves throughput: 6.9E8 (16384 32 12) up to 7.7E8 (65536 128 12)
###CUFLAGS+= --maxrregcount 128 # improves throughput: 7.3E8 (16384 32 12) up to 7.6E8 (65536 128 12)
Expand Down Expand Up @@ -323,7 +313,7 @@ $(cu_main): $(BUILDDIR)/gcheck_sa.o $(LIBDIR)/lib$(MODELLIB).a $(cu_objects) $(c
$(NVCC) $< -o $@ $(cu_objects) $(cucxx_objects) $(CUARCHFLAGS) $(LIBFLAGS) $(CULIBFLAGS)

$(cxx_main): $(BUILDDIR)/check_sa.o $(LIBDIR)/lib$(MODELLIB).a $(cxx_objects) $(cucxx_objects)
$(CXX) $< -o $@ $(cxx_objects) $(cucxx_objects) $(CPPFLAGS) $(CXXFLAGS) -ldl -pthread $(LIBFLAGS) $(CULIBFLAGS)
$(CXX) $< -o $@ $(cxx_objects) $(cucxx_objects) $(OMPFLAGS) -ldl -pthread $(LIBFLAGS) $(CULIBFLAGS)

$(BUILDDIR)/testxxx.o: $(GTESTLIBS)
$(BUILDDIR)/testxxx.o: INCFLAGS += -I$(TESTDIR)/googletest/googletest/include
Expand Down Expand Up @@ -373,11 +363,11 @@ endif
ifeq ($(NVCC),)
# Link only runTest.o
$(testmain): $(LIBDIR)/lib$(MODELLIB).a $(cxx_objects) $(GTESTLIBS)
$(CXX) -o $@ $(cxx_objects) $(CPPFLAGS) $(CXXFLAGS) -ldl -pthread $(LIBFLAGS) $(CULIBFLAGS)
$(CXX) -o $@ $(cxx_objects) -ldl -pthread $(LIBFLAGS) $(CULIBFLAGS)
else
# Link both runTest.o and runTest_cu.o
$(testmain): $(LIBDIR)/lib$(MODELLIB).a $(cxx_objects) $(cu_objects) $(GTESTLIBS)
$(NVCC) -o $@ $(cxx_objects) $(cu_objects) $(CPPFLAGS) $(CUFLAGS) -ldl $(LIBFLAGS) $(CULIBFLAGS) -lcuda -lgomp
$(NVCC) -o $@ $(cxx_objects) $(cu_objects) -ldl $(LIBFLAGS) $(CULIBFLAGS) -lcuda -lgomp
endif

$(GTESTLIBS):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,7 @@ class KernelAccessHelper : public MemoryAccessHelper<T>
__host__ __device__ inline
fptype* kernelAccessRecord( fptype* buffer )
{
//if constexpr ( !onDevice ) // FIXME! enable this when we move to nvcc supporting c++17
if ( !onDevice )
if constexpr ( !onDevice ) // requires c++17 also in CUDA (#333)
{
return T::ieventAccessRecord( buffer, 0 );
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,9 @@ namespace mg5amcCpu
// - private: give each thread its own copy, without initialising
// - firstprivate: give each thread its own copy, and initialise with value from outside
#ifdef MGONGPU_CPPSIMD
#pragma omp parallel for default(none) shared(allmomenta,allMEs,cHel,cIPC,cIPD,ihel,npagV,isAligned_allMEs) private (amp_sv,w_sv,jamp_sv)
#pragma omp parallel for default(none) shared(allmomenta,allMEs,cHel,cIPC,cIPD,ihel,npagV,amp_fp,w_fp,isAligned_allMEs) private (amp_sv,w_sv,jamp_sv)
#else
#pragma omp parallel for default(none) shared(allmomenta,allMEs,cHel,cIPC,cIPD,ihel,npagV) private (amp_sv,w_sv,jamp_sv)
#pragma omp parallel for default(none) shared(allmomenta,allMEs,cHel,cIPC,cIPD,ihel,npagV,amp_fp,w_fp) private (amp_sv,w_sv,jamp_sv)
#endif
#endif
for ( int ipagV = 0; ipagV < npagV; ++ipagV )
Expand Down Expand Up @@ -356,7 +356,10 @@ namespace mg5amcCpu
{
std::stringstream out;
// CUDA version (NVCC)
#ifdef __CUDACC__
// [Use __NVCC__ instead of __CUDACC__ here!]
// [This tests if 'nvcc' was used even to build a .cc file, even if not necessarily 'nvcc -x cu' for a .cu file]
// [Check 'nvcc --compiler-options -dM -E dummy.c | grep CUDA': see https://stackoverflow.com/a/53713712]
#ifdef __NVCC__
#if defined __CUDACC_VER_MAJOR__ && defined __CUDACC_VER_MINOR__ && defined __CUDACC_VER_BUILD__
out << "nvcc " << __CUDACC_VER_MAJOR__ << "." << __CUDACC_VER_MINOR__ << "." << __CUDACC_VER_BUILD__;
#else
Expand All @@ -369,7 +372,7 @@ namespace mg5amcCpu
#error "icc is no longer supported: please use icx"
#elif defined __INTEL_LLVM_COMPILER // alternative: __INTEL_CLANG_COMPILER
out << "icx " << __INTEL_LLVM_COMPILER;
#ifdef __CUDACC__
#ifdef __NVCC__
out << ", ";
#else
out << " (";
Expand All @@ -387,7 +390,7 @@ namespace mg5amcCpu
std::array<char, 128> tchainbuf;
while ( fgets( tchainbuf.data(), tchainbuf.size(), tchainpipe.get() ) != nullptr ) tchainout += tchainbuf.data();
tchainout.pop_back(); // remove trailing newline
#if defined __CUDACC__ or defined __INTEL_LLVM_COMPILER
#if defined __NVCC__ or defined __INTEL_LLVM_COMPILER
out << ", gcc " << tchainout;
#else
out << " (gcc " << tchainout << ")";
Expand All @@ -403,7 +406,7 @@ namespace mg5amcCpu
out << "gcc UNKNOWKN";
#endif
#endif
#if defined __CUDACC__ or defined __INTEL_LLVM_COMPILER
#if defined __NVCC__ or defined __INTEL_LLVM_COMPILER
out << ")";
#endif
return out.str();
Expand Down
Loading