Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
22a0ac0
[CODEGEN] Added GPU abstraction to CODEGEN
Jooorgen Jul 13, 2023
81cf765
[jthip] change % to %% in CODEGEN cudacpp.mk
valassi Jul 13, 2023
b83f8c9
[jthip] clang-format GpuAbstraction.h both in CODEGEN and in ggttgg.mad
valassi Jul 13, 2023
1afbafc
[jthip] clang-format GpuRuntime.h both in CODEGEN and in ggttgg.mad
valassi Jul 13, 2023
d1f5c5b
Made the codegenerated files same as the templated files in gg_ttgg
Jooorgen Jul 17, 2023
1b5c0fd
[jthip] backport to CODEGEN from ggttgg.mad
valassi Jul 18, 2023
0f1b811
[jthip] complete backport to CODEGEN from ggttgg.mad, including a few…
valassi Jul 18, 2023
71ff5e2
[jthip] in CODEGEN, remove the copying to src of GpuRuntime.h and Gpu…
valassi Jul 18, 2023
a37fb41
[jthip] In CODEGEN, acknowledge Joergen in each file and in COPYRIGHT…
valassi Jul 18, 2023
428aa50
[CODEGEN] Added HIP runtime include in mgOnGpuConfig.h in codegen
Jooorgen Jul 20, 2023
24fbbb6
[jthip/namespace] backport latest changes from ggttgg.mad to CODEGEN
valassi Jul 25, 2023
10df703
[jthip] in CODEGEN, backport also cudacpp_src.mk using GPUCC instead …
valassi Jul 25, 2023
43e0c64
[CODEGEN] Added changes from gg_ttgg.mad to code generator
Jooorgen Aug 10, 2023
e99a2b8
[CODEGEN] Added export of GPUCC and GPUFLAGS to codegen
Jooorgen Aug 10, 2023
4adb62f
Fixed warning and changed HIPARCHFLAGS export so it exports to cudacp…
Jooorgen Aug 10, 2023
e18c882
[CODEGEN] Fixed error in runTest.cc and reverted changes in cudacpp_s…
Jooorgen Aug 10, 2023
f588cd4
Merge branch 'madgraph5:master' into gpu_abstraction_only
Jooorgen Sep 28, 2023
597de73
[CODEGEN] Added GPU abstraction to CODEGEN
Jooorgen Jul 13, 2023
1a6496a
Updated first name in Author list
Jooorgen Sep 29, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ generates includes the following authors:
Stephan Hageboeck (CERN)
Olivier Mattelaer (Universite Catholique de Louvain, original author)
Stefan Roiser (CERN, original author)
Jorgen Teig (CERN)
Andrea Valassi (CERN, original author)
Zenny Wettersten (CERN)

Expand All @@ -28,5 +29,4 @@ acknowledged collaboration with the following collaborators:
Taran Singhania (PES University Bangalore)
David Smith (CERN)
Carl Vuosalo (University of Wisconsin-Madison)
Joergen Teig (CERN)

Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ The full development team currently includes the following authors :
Stephan Hageboeck (CERN)
Olivier Mattelaer (Universite Catholique de Louvain, original author)
Stefan Roiser (CERN, original author)
Joergen Teig (CERN)
Andrea Valassi (CERN, original author)
Zenny Wettersten (CERN)
See https://github.com/madgraph5/madgraph4gpu for more details. For the full
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// Copyright (C) 2020-2023 CERN and UCLouvain.
// Licensed under the GNU Lesser General Public License (version 3 or later).
// Modified by: A. Valassi (Sep 2021) for the MG5aMC CUDACPP plugin.
// Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin.
// Further modified by: J. Teig, A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin.
//==========================================================================
// This file has been automatically generated for CUDA/C++ standalone by
%(info_lines)s
Expand All @@ -15,7 +15,7 @@
#include <iomanip>
#include <iostream>

#ifdef __CUDACC__
#ifdef MGONGPUCPP_GPUIMPL
using namespace mg5amcGpu;
#else
using namespace mg5amcCpu;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#include "read_slha.h"

// NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
#ifdef __CUDACC__
#ifdef MGONGPUCPP_GPUIMPL
namespace mg5amcGpu
#else
namespace mg5amcCpu
Expand Down Expand Up @@ -85,7 +85,7 @@ namespace mg5amcCpu
#include <limits>

// NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
#ifdef __CUDACC__
#ifdef MGONGPUCPP_GPUIMPL
namespace mg5amcGpu
#else
namespace mg5amcCpu
Expand Down Expand Up @@ -155,7 +155,7 @@ namespace mg5amcCpu
//==========================================================================

// NB: namespaces mg5amcGpu and mg5amcCpu includes types which are defined in different ways for CPU and GPU builds (see #318 and #725)
#ifdef __CUDACC__
#ifdef MGONGPUCPP_GPUIMPL
namespace mg5amcGpu
#else
namespace mg5amcCpu
Expand All @@ -172,7 +172,7 @@ namespace mg5amcCpu
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable" // e.g. <<warning: unused variable ‘mdl_G__exp__2’ [-Wunused-variable]>>
#pragma GCC diagnostic ignored "-Wunused-parameter" // e.g. <<warning: unused parameter ‘G’ [-Wunused-parameter]>>
#ifdef __CUDACC__
#ifdef MGONGPUCPP_GPUIMPL
#pragma nv_diagnostic push
#pragma nv_diag_suppress 177 // e.g. <<warning #177-D: variable "mdl_G__exp__2" was declared but never referenced>>
#endif
Expand All @@ -194,9 +194,9 @@ namespace mg5amcCpu
%(dcoupsetdcoup)s
}
%(eftspecial2)s
return out;
}
#ifdef __CUDACC__
return out;
}
#ifdef MGONGPUCPP_GPUIMPL
#pragma GCC diagnostic pop
#pragma nv_diagnostic pop
#endif
Expand All @@ -212,6 +212,12 @@ namespace mg5amcCpu

//==========================================================================

#ifdef MGONGPUCPP_GPUIMPL
namespace mg5amcGpu
#else
namespace mg5amcCpu
#endif
{
#pragma GCC diagnostic push
#ifndef __clang__
#pragma GCC diagnostic ignored "-Wunused-but-set-variable" // e.g. <<warning: variable ‘couplings_sv’ set but not used [-Wunused-but-set-variable]>>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright (C) 2020-2023 CERN and UCLouvain.
// Licensed under the GNU Lesser General Public License (version 3 or later).
// Created by: S. Roiser (Nov 2021) for the MG5aMC CUDACPP plugin.
// Further modified by: S. Roiser, A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin.
// Further modified by: S. Roiser, J. Teig, A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin.

#ifndef BRIDGE_H
#define BRIDGE_H 1
Expand All @@ -23,7 +23,7 @@
#include <memory>
#include <type_traits>

#ifdef __CUDACC__
#ifdef MGONGPUCPP_GPUIMPL
namespace mg5amcGpu
#else
namespace mg5amcCpu
Expand Down Expand Up @@ -83,7 +83,7 @@ namespace mg5amcCpu
Bridge& operator=( const Bridge& ) = delete;
Bridge& operator=( Bridge&& ) = delete;

#ifdef __CUDACC__
#ifdef MGONGPUCPP_GPUIMPL
/**
* Set the gpublocks and gputhreads for the gpusequence - throws if evnt != gpublocks*gputhreads
* (this is needed for BridgeKernel tests rather than for actual production use in Fortran)
Expand Down Expand Up @@ -150,7 +150,7 @@ namespace mg5amcCpu
unsigned int m_nevt; // number of events
int m_nGoodHel; // the number of good helicities (-1 initially when they have not yet been calculated)

#ifdef __CUDACC__
#ifdef MGONGPUCPP_GPUIMPL
int m_gputhreads; // number of gpu threads (default set from number of events, can be modified)
int m_gpublocks; // number of gpu blocks (default set from number of events, can be modified)
DeviceBuffer<FORTRANFPTYPE, sizePerEventMomenta> m_devMomentaF;
Expand Down Expand Up @@ -187,12 +187,12 @@ namespace mg5amcCpu
// Forward declare transposition methods
//

#ifdef __CUDACC__
#ifdef MGONGPUCPP_GPUIMPL

template<typename Tin, typename Tout>
__global__ void dev_transposeMomentaF2C( const Tin* in, Tout* out, const unsigned int nevt );

#endif // __CUDACC__
#endif // MGONGPUCPP_GPUIMPL

template<typename Tin, typename Tout>
void hst_transposeMomentaF2C( const Tin* in, Tout* out, const unsigned int nevt );
Expand All @@ -209,7 +209,7 @@ namespace mg5amcCpu
Bridge<FORTRANFPTYPE>::Bridge( unsigned int nevtF, unsigned int nparF, unsigned int np4F )
: m_nevt( nevtF )
, m_nGoodHel( -1 )
#ifdef __CUDACC__
#ifdef MGONGPUCPP_GPUIMPL
, m_gputhreads( 256 ) // default number of gpu threads
, m_gpublocks( m_nevt / m_gputhreads ) // this ensures m_nevt <= m_gpublocks*m_gputhreads
, m_devMomentaF( m_nevt )
Expand All @@ -233,7 +233,7 @@ namespace mg5amcCpu
{
if( nparF != CPPProcess::npar ) throw std::runtime_error( "Bridge constructor: npar mismatch" );
if( np4F != CPPProcess::np4 ) throw std::runtime_error( "Bridge constructor: np4 mismatch" );
#ifdef __CUDACC__
#ifdef MGONGPUCPP_GPUIMPL
if( ( m_nevt < s_gputhreadsmin ) || ( m_nevt % s_gputhreadsmin != 0 ) )
throw std::runtime_error( "Bridge constructor: nevt should be a multiple of " + std::to_string( s_gputhreadsmin ) );
while( m_nevt != m_gpublocks * m_gputhreads )
Expand All @@ -249,7 +249,7 @@ namespace mg5amcCpu
#else
std::cout << "WARNING! Instantiate host Bridge (nevt=" << m_nevt << ")" << std::endl;
m_pmek.reset( new MatrixElementKernelHost( m_hstMomentaC, m_hstGs, m_hstRndHel, m_hstRndCol, m_hstMEs, m_hstSelHel, m_hstSelCol, m_nevt ) );
#endif // __CUDACC__
#endif // MGONGPUCPP_GPUIMPL
// Create a process object, read param card and set parameters
// FIXME: the process instance can happily go out of scope because it is only needed to read parameters?
// FIXME: the CPPProcess should really be a singleton? what if fbridgecreate is called from several Fortran threads?
Expand All @@ -262,7 +262,7 @@ namespace mg5amcCpu
process.initProc( paramCard );
}

#ifdef __CUDACC__
#ifdef MGONGPUCPP_GPUIMPL
template<typename FORTRANFPTYPE>
void Bridge<FORTRANFPTYPE>::set_gpugrid( const int gpublocks, const int gputhreads )
{
Expand All @@ -276,7 +276,7 @@ namespace mg5amcCpu
}
#endif

#ifdef __CUDACC__
#ifdef MGONGPUCPP_GPUIMPL
template<typename FORTRANFPTYPE>
void Bridge<FORTRANFPTYPE>::gpu_sequence( const FORTRANFPTYPE* momenta,
const FORTRANFPTYPE* gs,
Expand All @@ -291,14 +291,14 @@ namespace mg5amcCpu
constexpr int neppM = MemoryAccessMomenta::neppM;
if constexpr( neppM == 1 && std::is_same_v<FORTRANFPTYPE, fptype> )
{
checkCuda( cudaMemcpy( m_devMomentaC.data(), momenta, m_devMomentaC.bytes(), cudaMemcpyHostToDevice ) );
gpuMemcpy( m_devMomentaC.data(), momenta, m_devMomentaC.bytes(), gpuMemcpyHostToDevice );
}
else
{
checkCuda( cudaMemcpy( m_devMomentaF.data(), momenta, m_devMomentaF.bytes(), cudaMemcpyHostToDevice ) );
gpuMemcpy( m_devMomentaF.data(), momenta, m_devMomentaF.bytes(), gpuMemcpyHostToDevice );
const int thrPerEvt = CPPProcess::npar * CPPProcess::np4; // AV: transpose alg does 1 element per thread (NOT 1 event per thread)
//const int thrPerEvt = 1; // AV: try new alg with 1 event per thread... this seems slower
dev_transposeMomentaF2C<<<m_gpublocks * thrPerEvt, m_gputhreads>>>( m_devMomentaF.data(), m_devMomentaC.data(), m_nevt );
gpuLaunchKernel( dev_transposeMomentaF2C, m_gpublocks * thrPerEvt, m_gputhreads, m_devMomentaF.data(), m_devMomentaC.data(), m_nevt );
}
if constexpr( std::is_same_v<FORTRANFPTYPE, fptype> )
{
Expand Down Expand Up @@ -341,7 +341,7 @@ namespace mg5amcCpu
}
#endif

#ifndef __CUDACC__
#ifndef MGONGPUCPP_GPUIMPL
template<typename FORTRANFPTYPE>
void Bridge<FORTRANFPTYPE>::cpu_sequence( const FORTRANFPTYPE* momenta,
const FORTRANFPTYPE* gs,
Expand Down Expand Up @@ -396,7 +396,7 @@ namespace mg5amcCpu
// - C++ array: momenta[npagM][npar][np4][neppM] with nevt=npagM*neppM (AOSOA)
//

#ifdef __CUDACC__
#ifdef MGONGPUCPP_GPUIMPL
template<typename Tin, typename Tout>
__global__ void dev_transposeMomentaF2C( const Tin* in, Tout* out, const unsigned int nevt )
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
// Copyright (C) 2020-2023 CERN and UCLouvain.
// Licensed under the GNU Lesser General Public License (version 3 or later).
// Created by: A. Valassi (Jan 2022) for the MG5aMC CUDACPP plugin.
// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin.
// Further modified by: J. Teig, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin.

#include "BridgeKernels.h"

#include "GpuAbstraction.h"
#include "MemoryAccessMomenta.h"

#include <sstream>

//============================================================================

#ifdef __CUDACC__
#ifdef MGONGPUCPP_GPUIMPL
namespace mg5amcGpu
#else
namespace mg5amcCpu
Expand Down Expand Up @@ -45,7 +46,7 @@ namespace mg5amcCpu

//============================================================================

#ifndef __CUDACC__
#ifndef MGONGPUCPP_GPUIMPL
namespace mg5amcCpu
{

Expand Down Expand Up @@ -96,7 +97,7 @@ namespace mg5amcCpu

//============================================================================

#ifdef __CUDACC__
#ifdef MGONGPUCPP_GPUIMPL
namespace mg5amcGpu
{

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright (C) 2020-2023 CERN and UCLouvain.
// Licensed under the GNU Lesser General Public License (version 3 or later).
// Created by: A. Valassi (Jan 2022) for the MG5aMC CUDACPP plugin.
// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin.
// Further modified by: J. Teig, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin.

#ifndef BRIDGEKERNELS_H
#define BRIDGEKERNELS_H 1
Expand All @@ -12,7 +12,7 @@
#include "MatrixElementKernels.h"
#include "MemoryBuffers.h"

#ifdef __CUDACC__
#ifdef MGONGPUCPP_GPUIMPL
namespace mg5amcGpu
#else
namespace mg5amcCpu
Expand Down Expand Up @@ -49,7 +49,7 @@ namespace mg5amcCpu

//--------------------------------------------------------------------------

#ifndef __CUDACC__
#ifndef MGONGPUCPP_GPUIMPL
// A Bridge wrapper class encapsulating matrix element calculations on a CPU host
class BridgeKernelHost final : public BridgeKernelBase
{
Expand Down Expand Up @@ -89,7 +89,7 @@ namespace mg5amcCpu

//--------------------------------------------------------------------------

#ifdef __CUDACC__
#ifdef MGONGPUCPP_GPUIMPL
// A Bridge wrapper class encapsulating matrix element calculations on a GPU device
class BridgeKernelDevice : public BridgeKernelBase
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
// Copyright (C) 2020-2023 CERN and UCLouvain.
// Licensed under the GNU Lesser General Public License (version 3 or later).
// Created by: A. Valassi (Dec 2021) for the MG5aMC CUDACPP plugin.
// Further modified by: A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin.
// Further modified by: J. Teig, A. Valassi (2021-2023) for the MG5aMC CUDACPP plugin.

#include "CommonRandomNumbers.h"
#include "GpuAbstraction.h"
#include "MemoryBuffers.h"
#include "RandomNumberKernels.h"

#include <cassert>

#ifdef __CUDACC__
#ifdef MGONGPUCPP_GPUIMPL
namespace mg5amcGpu
#else
namespace mg5amcCpu
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
// Copyright (C) 2020-2023 CERN and UCLouvain.
// Licensed under the GNU Lesser General Public License (version 3 or later).
// Created by: A. Valassi (Jan 2022) for the MG5aMC CUDACPP plugin.
// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin.
// Further modified by: J. Teig, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin.

#include "CrossSectionKernels.h"

#include "GpuAbstraction.h"
#include "MemoryAccessMatrixElements.h"
#include "MemoryAccessWeights.h"
#include "MemoryBuffers.h"
Expand Down Expand Up @@ -77,7 +78,7 @@ debug_me_is_abnormal( const fptype& me, size_t ievtALL )

//============================================================================

#ifdef __CUDACC__
#ifdef MGONGPUCPP_GPUIMPL
namespace mg5amcGpu
#else
namespace mg5amcCpu
Expand Down Expand Up @@ -185,7 +186,7 @@ namespace mg5amcCpu

//============================================================================

#ifdef __CUDACC__
#ifdef MGONGPUCPP_GPUIMPL
namespace mg5amcGpu
{

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright (C) 2020-2023 CERN and UCLouvain.
// Licensed under the GNU Lesser General Public License (version 3 or later).
// Created by: A. Valassi (Jan 2022) for the MG5aMC CUDACPP plugin.
// Further modified by: A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin.
// Further modified by: J. Teig, A. Valassi (2022-2023) for the MG5aMC CUDACPP plugin.

#ifndef CROSSSECTIONKERNELS_H
#define CROSSSECTIONKERNELS_H 1
Expand All @@ -13,7 +13,7 @@

//============================================================================

#ifdef __CUDACC__
#ifdef MGONGPUCPP_GPUIMPL
namespace mg5amcGpu
#else
namespace mg5amcCpu
Expand Down Expand Up @@ -96,7 +96,7 @@ namespace mg5amcCpu
//--------------------------------------------------------------------------

/*
#ifdef __CUDACC__
#ifdef MGONGPUCPP_GPUIMPL
// A class encapsulating the calculation of event statistics on a GPU device
class CrossSectionKernelDevice : public CrossSectionKernelBase, public NumberOfEvents
{
Expand Down
Loading