Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
2cc22ce
update mg5amcnlo
roiser Mar 12, 2025
d7c4c96
change the number of numerators to MAXAMPS (number of diagrams
roiser Mar 31, 2025
9cd9c4d
add kernel access returning pointer instead of array
roiser Mar 31, 2025
c2ba0f9
change CPPProcess.cc
roiser Apr 2, 2025
8d1edec
forgot a hardcoded 3 in the code, replace by ndiagrams
roiser Apr 2, 2025
b3a4710
trigger CI
roiser Apr 3, 2025
d8e1624
trigger CI 2
roiser Apr 3, 2025
9753e6b
code formatting
roiser Apr 3, 2025
f14d0d5
fix getChannelId for cuda
roiser Apr 3, 2025
ebc6bdb
fix indentation
roiser Apr 3, 2025
ab338f7
C++ starts counting at 0 ;-)
roiser Apr 4, 2025
a041e62
added some verbose/debug flags, disabled FPE traps by default
zeniheisser Mar 11, 2025
f2eff61
added mod tag and date
zeniheisser Mar 11, 2025
21b75f5
lorem ipsum
zeniheisser Apr 3, 2025
8fb363a
move submodule to 3.6.2
roiser Apr 4, 2025
fb557d0
fixed bug where precision wasn't passed properly from the run_card
zeniheisser Apr 24, 2025
dec050d
trigger CI
roiser Apr 3, 2025
90973b4
trigger CI 2
roiser Apr 3, 2025
34ec964
buffer for numerators cannot be aligned, it depends on the number of
roiser Apr 28, 2025
62c204a
Revert "buffer for numerators cannot be aligned, it depends on the nu…
roiser Apr 28, 2025
b39e508
for the CPU version correct the index access, GPU version still needs to
roiser Apr 30, 2025
0701742
change to processConfig for diagram numbers
roiser May 12, 2025
557b125
change to processConfig for diagram numbers
roiser May 12, 2025
5e1b675
change to processConfig for diagram numbers
roiser May 12, 2025
13fcd5e
fix formatting
roiser May 12, 2025
8e28871
move submodule
roiser May 12, 2025
e61241f
move submodule
roiser May 12, 2025
778f242
move submodule
roiser May 12, 2025
ddb25e8
move to 3.6.3
roiser May 12, 2025
f731bf2
move to 3.6.2
roiser May 12, 2025
70cfe63
move to 3.6.3
roiser May 12, 2025
2cc58f5
trigger CI
roiser May 12, 2025
3a44412
trigger CI
roiser May 12, 2025
0c78649
collect all channelids
roiser Jun 3, 2025
ec116a9
fix formatting
roiser Jun 3, 2025
47b0e7c
regenerate all processes
roiser Nov 21, 2025
975a113
regenerate all processes
roiser Nov 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 1 addition & 1 deletion MG5aMC/mg5amcnlo
Submodule mg5amcnlo updated 609 files
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def compile(self, *args, **opts):
if 'cwd' in opts and os.path.basename(opts['cwd']) == 'Source':
path = pjoin(opts['cwd'], 'make_opts')
common_run_interface.CommonRunCmd.update_make_opts_full(path,
{'FPTYPE': self.run_card['floating_type'] })
{'override FPTYPE': self.run_card['floating_type'] })
misc.sprint('FPTYPE checked')
cudacpp_supported_backends = [ 'fortran', 'cuda', 'hip', 'cpp', 'cppnone', 'cppsse4', 'cppavx2', 'cpp512y', 'cpp512z', 'cppauto' ]
if args and args[0][0] == 'madevent' and hasattr(self, 'run_card'):
Expand Down Expand Up @@ -76,7 +76,7 @@ def reset_makeopts(self, old_value, new_value, name):
if not hasattr(self, 'path'):
raise Exception
if name == 'floating_type':
common_run_interface.CommonRunCmd.update_make_opts_full({'FPTYPE': new_value})
common_run_interface.CommonRunCmd.update_make_opts_full({'override FPTYPE': new_value})
else:
raise Exception
Sourcedir = pjoin(os.path.dirname(os.path.dirname(self.path)), 'Source')
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright (C) 2020-2024 CERN and UCLouvain.
// Licensed under the GNU Lesser General Public License (version 3 or later).
// Created by: S. Roiser (Nov 2021) for the MG5aMC CUDACPP plugin.
// Further modified by: S. Roiser, J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
// Further modified by: S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2021-2025) for the MG5aMC CUDACPP plugin.

#ifndef BRIDGE_H
#define BRIDGE_H 1
Expand Down Expand Up @@ -255,11 +255,15 @@ namespace mg5amcCpu
throw std::logic_error( "Bridge constructor: FIXME! cannot choose gputhreads" ); // this should never happen!
m_gpublocks = m_nevt / m_gputhreads;
}
#ifdef MGONGPUCPP_VERBOSE
std::cout << "WARNING! Instantiate device Bridge (nevt=" << m_nevt << ", gpublocks=" << m_gpublocks << ", gputhreads=" << m_gputhreads
<< ", gpublocks*gputhreads=" << m_gpublocks * m_gputhreads << ")" << std::endl;
#endif
m_pmek.reset( new MatrixElementKernelDevice( m_devMomentaC, m_devGs, m_devRndHel, m_devRndCol, m_devChannelIds, m_devMEs, m_devSelHel, m_devSelCol, m_gpublocks, m_gputhreads ) );
#else
#ifdef MGONGPUCPP_VERBOSE
std::cout << "WARNING! Instantiate host Bridge (nevt=" << m_nevt << ")" << std::endl;
#endif
m_pmek.reset( new MatrixElementKernelHost( m_hstMomentaC, m_hstGs, m_hstRndHel, m_hstRndCol, m_hstChannelIds, m_hstMEs, m_hstSelHel, m_hstSelCol, m_nevt ) );
#endif // MGONGPUCPP_GPUIMPL
// Create a process object, read param card and set parameters
Expand Down Expand Up @@ -290,8 +294,10 @@ namespace mg5amcCpu
throw std::runtime_error( "Bridge: gpublocks*gputhreads must equal m_nevt in set_gpugrid" );
m_gpublocks = gpublocks;
m_gputhreads = gputhreads;
#ifdef MGONGPUCPP_VERBOSE
std::cout << "WARNING! Set grid in Bridge (nevt=" << m_nevt << ", gpublocks=" << m_gpublocks << ", gputhreads=" << m_gputhreads
<< ", gpublocks*gputhreads=" << m_gpublocks * m_gputhreads << ")" << std::endl;
#endif
m_pmek->setGrid( m_gpublocks, m_gputhreads );
}
#endif
Expand Down Expand Up @@ -347,7 +353,9 @@ namespace mg5amcCpu
if( goodHelOnly ) return;
m_pmek->computeMatrixElements( useChannelIds );
copyHostFromDevice( m_hstMEs, m_devMEs );
#ifdef MGONGPUCPP_VERBOSE
flagAbnormalMEs( m_hstMEs.data(), m_nevt );
#endif
copyHostFromDevice( m_hstSelHel, m_devSelHel );
copyHostFromDevice( m_hstSelCol, m_devSelCol );
if constexpr( std::is_same_v<FORTRANFPTYPE, fptype> )
Expand Down Expand Up @@ -400,7 +408,9 @@ namespace mg5amcCpu
}
if( goodHelOnly ) return;
m_pmek->computeMatrixElements( useChannelIds );
#ifdef MGONGPUCPP_VERBOSE
flagAbnormalMEs( m_hstMEs.data(), m_nevt );
#endif
if constexpr( std::is_same_v<FORTRANFPTYPE, fptype> )
{
memcpy( mes, m_hstMEs.data(), m_hstMEs.bytes() );
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright (C) 2020-2024 CERN and UCLouvain.
// Licensed under the GNU Lesser General Public License (version 3 or later).
// Created by: J. Teig (Jun 2023, based on earlier work by S. Roiser) for the MG5aMC CUDACPP plugin.
// Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
// Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2025) for the MG5aMC CUDACPP plugin.

#ifndef MG5AMC_GPURUNTIME_H
#define MG5AMC_GPURUNTIME_H 1
Expand Down Expand Up @@ -50,7 +50,7 @@ namespace mg5amcGpu
// Set up CUDA application
// ** NB: strictly speaking this is not needed when using the CUDA runtime API **
// Calling cudaSetDevice on startup is useful to properly book-keep the time spent in CUDA initialization
static void setUp( const bool debug = true )
static void setUp( const bool debug = false ) // ZW: changed debug default to false
{
// ** NB: it is useful to call cudaSetDevice, or cudaFree, to properly book-keep the time spent in CUDA initialization
// ** NB: otherwise, the first CUDA operation (eg a cudaMemcpyToSymbol in CPPProcess ctor) appears to take much longer!
Expand All @@ -71,7 +71,7 @@ namespace mg5amcGpu
// ** NB: strictly speaking this is not needed when using the CUDA runtime API **
// Calling cudaDeviceReset on shutdown is only needed for checking memory leaks in cuda-memcheck
// See https://docs.nvidia.com/cuda/cuda-memcheck/index.html#leak-checking
static void tearDown( const bool debug = true )
static void tearDown( const bool debug = false ) // ZW: changed debug default to false
{
if( debug ) std::cout << "__GpuRuntime: calling GpuDeviceReset()" << std::endl;
checkGpu( gpuDeviceReset() );
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright (C) 2020-2024 CERN and UCLouvain.
// Licensed under the GNU Lesser General Public License (version 3 or later).
// Created by: A. Valassi (Jan 2022) for the MG5aMC CUDACPP plugin.
// Further modified by: J. Teig, A. Valassi (2022-2024) for the MG5aMC CUDACPP plugin.
// Further modified by: J. Teig, A. Valassi, Z. Wettersten (2022-2025) for the MG5aMC CUDACPP plugin.

#include "MatrixElementKernels.h"

Expand Down Expand Up @@ -60,7 +60,9 @@ namespace mg5amcCpu
#ifdef MGONGPU_CHANNELID_DEBUG
MatrixElementKernelBase::dumpNevtProcessedByChannel();
#endif
#ifdef MGONGPUCPP_VERBOSE
MatrixElementKernelBase::dumpSignallingFPEs();
#endif
}

//--------------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright (C) 2020-2024 CERN and UCLouvain.
// Licensed under the GNU Lesser General Public License (version 3 or later).
// Created by: A. Valassi (Jan 2022) for the MG5aMC CUDACPP plugin.
// Further modified by: J. Teig, A. Valassi (2022-2024) for the MG5aMC CUDACPP plugin.
// Further modified by: J. Teig, A. Valassi, Z. Wettersten (2022-2025) for the MG5aMC CUDACPP plugin.

#ifndef MATRIXELEMENTKERNELS_H
#define MATRIXELEMENTKERNELS_H 1
Expand Down Expand Up @@ -134,7 +134,7 @@ namespace mg5amcCpu

// Does this host system support the SIMD used in the matrix element calculation?
// [NB: this is private, SIMD vectorization in mg5amc C++ code is currently only used in the ME calculations below MatrixElementKernelHost!]
static bool hostSupportsSIMD( const bool verbose = true );
static bool hostSupportsSIMD( const bool verbose = false ); // ZW: default verbose false

private:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,14 @@ namespace mg5amcCpu
#endif
}

// Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal)
// [Signature (SCALAR OR VECTOR) ===> fptype_sv* kernelAccess( fptype* buffer ) <===]
static __host__ __device__ inline fptype_sv*
kernelAccessP( fptype* buffer )
{
return reinterpret_cast<fptype_sv*>( buffer );
}

// Locate a field (output) in a memory buffer (input) from a kernel event-indexing mechanism (internal) and the given field indexes (input)
// [Signature (const, SCALAR) ===> const fptype& kernelAccessConst( const fptype* buffer ) <===]
static constexpr auto kernelAccessConst_s =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "CPPProcess.h"
#include "GpuRuntime.h"
#include "Parameters_%(model_name)s.h"
#include "processConfig.h"

#include <sstream>

Expand Down Expand Up @@ -274,7 +275,8 @@ namespace mg5amcCpu
typedef BufferBase<fptype> BufferNumerators;

// The size (number of elements) per event in a memory buffer for numerators
constexpr size_t sizePerEventNumerators = 1;
// (should be equal to the number of diagrams in the process)
constexpr size_t sizePerEventNumerators = processConfig::ndiagrams;

#ifndef MGONGPUCPP_GPUIMPL
// A class encapsulating a C++ host buffer for gs
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Copyright (C) 2025 CERN and UCLouvain.
// Licensed under the GNU Lesser General Public License (version 3 or later).
// Created by: S. Roiser (May 2025) for the MG5aMC CUDACPP plugin.
// Further modified by: ... for the MG5aMC CUDACPP plugin.


#ifndef MG5_CONFIG_%(processid_uppercase)s_H
#define MG5_CONFIG_%(processid_uppercase)s_H 1

namespace processConfig {

constexpr int ndiagrams = %(ndiagrams)d;

}

#endif // MG5_CONFIG_%(processid_uppercase)s_H
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "MemoryAccessMatrixElements.h"
#include "MemoryAccessMomenta.h"
#include "MemoryAccessWavefunctions.h"
#include "processConfig.h"

#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
#include "MemoryAccessDenominators.h"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
! Copyright (C) 2020-2024 CERN and UCLouvain.
! Licensed under the GNU Lesser General Public License (version 3 or later).
! Modified by: A. Valassi (Sep 2021) for the MG5aMC CUDACPP plugin.
! Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
! Further modified by: J. Teig, A. Valassi, Z. Wettersten (2021-2025) for the MG5aMC CUDACPP plugin.
!==========================================================================
//==========================================================================
// Class member functions for calculating the matrix elements for
Expand All @@ -16,6 +16,69 @@ namespace mg5amcGpu
namespace mg5amcCpu
#endif
{
#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
__device__ INLINE unsigned int getChannelId( const unsigned int* allChannelIds
#ifndef MGONGPUCPP_GPUIMPL
,
const int ievt00,
bool sanityCheckMixedPrecision = true
#endif
)
{
unsigned int channelId = 0; // disable multichannel single-diagram enhancement unless allChannelIds != nullptr
#ifdef MGONGPUCPP_GPUIMPL
using CID_ACCESS = DeviceAccessChannelIds; // non-trivial access: buffer includes all events
// SCALAR channelId for the current event (CUDA) or for the whole SIMD event page (C++)
// The cudacpp implementation ASSUMES (and checks! #898) that all channelIds are the same in a SIMD event page
if( allChannelIds != nullptr )
{
const unsigned int* channelIds = allChannelIds; // fix #899 (distinguish channelIds and allChannelIds)
const uint_sv channelIds_sv = CID_ACCESS::kernelAccessConst( channelIds ); // fix #895 (compute this only once for all diagrams)
// NB: channelIds_sv is a scalar in CUDA
channelId = channelIds_sv;
assert( channelId > 0 ); // SANITY CHECK: scalar channelId must be > 0 if multichannel is enabled (allChannelIds != nullptr)
}
#else // Cuda or C++
using CID_ACCESS = HostAccessChannelIds; // non-trivial access: buffer includes all events
// SCALAR channelId for the whole SIMD neppV2 event page (C++), i.e. one or two neppV event page(s)
// The cudacpp implementation ASSUMES (and checks! #898) that all channelIds are the same in a neppV2 SIMD event page
// **NB! in "mixed" precision, using SIMD, calculate_wavefunctions computes MEs for TWO neppV pages with a single channelId! #924
if( allChannelIds != nullptr )
{
// First - and/or only - neppV page of channels (iParity=0 => ievt0 = ievt00 + 0 * neppV)
const unsigned int* channelIds = CID_ACCESS::ieventAccessRecordConst( allChannelIds, ievt00 ); // fix bug #899/#911
uint_sv channelIds_sv = CID_ACCESS::kernelAccessConst( channelIds ); // fix #895 (compute this only once for all diagrams)
#ifndef MGONGPU_CPPSIMD
// NB: channelIds_sv is a scalar in no-SIMD C++
channelId = channelIds_sv;
#else
// NB: channelIds_sv is a vector in SIMD C++
channelId = channelIds_sv[0]; // element[0]
for( int i = 1; i < neppV; ++i ) // elements[1...neppV-1]
{
assert( channelId == channelIds_sv[i] ); // SANITY CHECK #898: check that all events in a SIMD vector have the same channelId
}
#endif
assert( channelId > 0 ); // SANITY CHECK: scalar channelId must be > 0 if multichannel is enabled (allChannelIds != nullptr)
if( sanityCheckMixedPrecision )
{
#if defined MGONGPU_CPPSIMD and defined MGONGPU_FPTYPE_DOUBLE and defined MGONGPU_FPTYPE2_FLOAT
// Second neppV page of channels (iParity=1 => ievt0 = ievt00 + 1 * neppV)
const unsigned int* channelIds2 = CID_ACCESS::ieventAccessRecordConst( allChannelIds, ievt00 + neppV ); // fix bug #899/#911
uint_v channelIds2_v = CID_ACCESS::kernelAccessConst( channelIds2 ); // fix #895 (compute this only once for all diagrams)
// **NB! in "mixed" precision, using SIMD, calculate_wavefunctions computes MEs for TWO neppV pages with a single channelId! #924
for( int i = 0; i < neppV; ++i )
{
assert( channelId == channelIds2_v[i] ); // SANITY CHECKS #898 #924: all events in the 2nd SIMD vector have the same channelId as that of the 1st SIMD vector
}
#endif
}
}
#endif // MGONGPUCPP_GPUIMPL
return channelId;
}
#endif // MGONGPU_SUPPORTS_MULTICHANNEL

constexpr int nw6 = CPPProcess::nw6; // dimensions of each wavefunction (HELAS KEK 91-11): e.g. 6 for e+ e- -> mu+ mu- (fermions and vectors)
constexpr int npar = CPPProcess::npar; // #particles in total (external = initial + final): e.g. 4 for e+ e- -> mu+ mu-
constexpr int ncomb = CPPProcess::ncomb; // #helicity combinations: e.g. 16 for e+ e- -> mu+ mu- (2**4 = fermion spin up/down ** npar)
Expand Down Expand Up @@ -117,7 +180,9 @@ namespace mg5amcCpu
#else
memcpy( cHel, tHel, ncomb * npar * sizeof( short ) );
#endif
#ifdef MGONGPUCPP_DEBUG
fpeEnable(); // enable SIGFPE traps for Floating Point Exceptions
#endif
}

//--------------------------------------------------------------------------
Expand Down Expand Up @@ -476,17 +541,13 @@ namespace mg5amcCpu
#ifdef MGONGPUCPP_GPUIMPL
// Remember: in CUDA this is a kernel for one event, in c++ this processes n events
const int ievt = blockDim.x * blockIdx.x + threadIdx.x; // index of event (thread) in grid
#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
using CID_ACCESS = DeviceAccessChannelIds; // non-trivial access: buffer includes all events
#endif
#else
//assert( (size_t)(allmomenta) %% mgOnGpu::cppAlign == 0 ); // SANITY CHECK: require SIMD-friendly alignment [COMMENT OUT TO TEST MISALIGNED ACCESS]
//assert( (size_t)(allMEs) %% mgOnGpu::cppAlign == 0 ); // SANITY CHECK: require SIMD-friendly alignment [COMMENT OUT TO TEST MISALIGNED ACCESS]
using E_ACCESS = HostAccessMatrixElements; // non-trivial access: buffer includes all events
#ifdef MGONGPU_SUPPORTS_MULTICHANNEL
using NUM_ACCESS = HostAccessNumerators; // non-trivial access: buffer includes all events
using DEN_ACCESS = HostAccessDenominators; // non-trivial access: buffer includes all events
using CID_ACCESS = HostAccessChannelIds; // non-trivial access: buffer includes all events
#endif
#endif

Expand Down
Loading