Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,88 +1,95 @@
/*
* CommonRandomNumbers.h
*
* Created on: 04.11.2020
* Author: Stephan Hageboeck
*/
// Copyright (C) 2020-2023 CERN and UCLouvain.
// Licensed under the GNU Lesser General Public License (version 3 or later).
// Created by: S. Hageboeck (Nov 2020) for the MG5aMC CUDACPP plugin.
// Further modified by: A. Valassi (2023) for the MG5aMC CUDACPP plugin.

#ifndef COMMONRANDOMNUMBERS_H_
#define COMMONRANDOMNUMBERS_H_
#define COMMONRANDOMNUMBERS_H_ 1

#include <vector>
#include <future>
#include <random>
#include <thread>
#include <future>

namespace CommonRandomNumbers {

/// Create `n` random numbers using simple c++ engine.
template<typename T>
std::vector<T> generate(std::size_t n, std::minstd_rand::result_type seed = 1337) {
std::vector<T> result;
result.reserve(n);

std::minstd_rand generator(seed);
std::uniform_real_distribution<T> distribution(0.0, 1.0);
#include <vector>

for (std::size_t i=0; i<n; ++i) {
result.push_back(distribution(generator));
}
namespace CommonRandomNumbers
{

return result;
}
/// Create `n` random numbers using simple c++ engine.
template<typename T>
std::vector<T> generate( std::size_t n, std::minstd_rand::result_type seed = 1337 )
{
std::vector<T> result;
result.reserve( n );

std::minstd_rand generator( seed );
std::uniform_real_distribution<T> distribution( 0.0, 1.0 );

/// Create `nBlock` blocks of random numbers.
/// Each block uses a generator that's seeded with `seed + blockIndex`, and blocks are generated in parallel.
template<typename T>
std::vector<std::vector<T>> generateParallel(std::size_t nPerBlock, std::size_t nBlock, std::minstd_rand::result_type seed = 1337) {
std::vector<std::vector<T>> results(nBlock);
std::vector<std::thread> threads;
const auto partPerThread = nBlock/std::thread::hardware_concurrency() + (nBlock % std::thread::hardware_concurrency() != 0);

auto makeBlock = [nPerBlock,nBlock,seed,&results](std::size_t partitionBegin, std::size_t partitionEnd) {
for (std::size_t partition = partitionBegin; partition < partitionEnd && partition < nBlock; ++partition) {
results[partition] = generate<T>(nPerBlock, seed + partition);
for( std::size_t i = 0; i < n; ++i )
{
result.push_back( distribution( generator ) );
}
};

for (unsigned int threadId = 0; threadId < std::thread::hardware_concurrency(); ++threadId) {
threads.emplace_back(makeBlock, threadId * partPerThread, (threadId+1) * partPerThread);
return result;
}

for (auto& thread : threads) {
thread.join();
}
/// Create `nBlock` blocks of random numbers.
/// Each block uses a generator that's seeded with `seed + blockIndex`, and blocks are generated in parallel.
template<typename T>
std::vector<std::vector<T>> generateParallel( std::size_t nPerBlock, std::size_t nBlock, std::minstd_rand::result_type seed = 1337 )
{
std::vector<std::vector<T>> results( nBlock );
std::vector<std::thread> threads;
const auto partPerThread = nBlock / std::thread::hardware_concurrency() + ( nBlock % std::thread::hardware_concurrency() != 0 );

auto makeBlock = [nPerBlock, nBlock, seed, &results]( std::size_t partitionBegin, std::size_t partitionEnd )
{
for( std::size_t partition = partitionBegin; partition < partitionEnd && partition < nBlock; ++partition )
{
results[partition] = generate<T>( nPerBlock, seed + partition );
}
};

for( unsigned int threadId = 0; threadId < std::thread::hardware_concurrency(); ++threadId )
{
threads.emplace_back( makeBlock, threadId * partPerThread, ( threadId + 1 ) * partPerThread );
}

return results;
}
for( auto& thread: threads )
{
thread.join();
}

return results;
}

/// Starts asynchronous generation of random numbers. This uses as many threads as cores, and generates blocks of random numbers.
/// These become available at unspecified times, but the blocks 0, 1, 2, ... are generated first.
/// Each block is seeded with seed + blockIndex to generate stable sequences.
/// \param[in/out] promises Vector of promise objects storing blocks of random numbers.
/// \param[in] nPerBlock Configures number of entries generated per block.
/// \param[in] nBlock Configures the number of blocks generated.
/// \param[in] nThread Optional concurrency.
/// \param[in] seed Optional seed.
template<typename T>
void startGenerateAsync(std::vector<std::promise<std::vector<T>>>& promises, std::size_t nPerBlock, std::size_t nBlock,
unsigned int nThread = std::thread::hardware_concurrency(), std::minstd_rand::result_type seed = 1337) {
promises.resize(nBlock);
std::vector<std::thread> threads;

auto makeBlocks = [=,&promises](std::size_t threadID) {
for (std::size_t partition = threadID; partition < nBlock; partition += nThread) {
auto values = generate<T>(nPerBlock, seed + partition);
promises[partition].set_value(std::move(values));
/// Starts asynchronous generation of random numbers. This uses as many threads as cores, and generates blocks of random numbers.
/// These become available at unspecified times, but the blocks 0, 1, 2, ... are generated first.
/// Each block is seeded with seed + blockIndex to generate stable sequences.
/// \param[in/out] promises Vector of promise objects storing blocks of random numbers.
/// \param[in] nPerBlock Configures number of entries generated per block.
/// \param[in] nBlock Configures the number of blocks generated.
/// \param[in] nThread Optional concurrency.
/// \param[in] seed Optional seed.
template<typename T>
void startGenerateAsync( std::vector<std::promise<std::vector<T>>>& promises, std::size_t nPerBlock, std::size_t nBlock, unsigned int nThread = std::thread::hardware_concurrency(), std::minstd_rand::result_type seed = 1337 )
{
promises.resize( nBlock );
std::vector<std::thread> threads;

auto makeBlocks = [=, &promises]( std::size_t threadID )
{
for( std::size_t partition = threadID; partition < nBlock; partition += nThread )
{
auto values = generate<T>( nPerBlock, seed + partition );
promises[partition].set_value( std::move( values ) );
}
};

for( unsigned int threadId = 0; threadId < nThread; ++threadId )
{
std::thread( makeBlocks, threadId ).detach();
}
};

for (unsigned int threadId = 0; threadId < nThread; ++threadId) {
std::thread(makeBlocks, threadId).detach();
}
}

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,6 @@ MG5AMC_COMMONLIB = mg5amc_common
LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
INCFLAGS += -I../../src

# Dependency on tools directory
TOOLSDIR = ../../../../../tools
INCFLAGS += -I$(TOOLSDIR)

# Dependency on test directory
TESTDIR = ../../../../../test
GTESTLIBDIR = $(TESTDIR)/googletest/build/lib/
Expand Down
18 changes: 9 additions & 9 deletions epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu-
No model currently active, so we import the Standard Model
INFO: load particles
INFO: load vertices
DEBUG: model prefixing takes 0.00481867790222168 
DEBUG: model prefixing takes 0.004959821701049805 
INFO: Restrict model sm with file models/sm/restrict_default.dat .
DEBUG: Simplifying conditional expressions 
DEBUG: remove interactions: u s w+ at order: QED=1 
Expand Down Expand Up @@ -176,7 +176,7 @@ INFO: Creating files in directory P1_epem_mupmum
DEBUG: Entering PLUGIN_OneProcessExporter.__init__ [model_handling.py at line 1027] 
DEBUG: kwargs[prefix] = 0 [model_handling.py at line 1028] 
DEBUG: proc_id =  1 [model_handling.py at line 1033] 
DEBUG: process_exporter_cpp =  <PLUGIN.CUDACPP_SA_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fb98cd4f430> [export_v4.py at line 6163] 
DEBUG: process_exporter_cpp =  <PLUGIN.CUDACPP_SA_OUTPUT.model_handling.PLUGIN_OneProcessExporter object at 0x7fe3be9aa430> [export_v4.py at line 6163] 
INFO: Creating files in directory .
DEBUG: Entering PLUGIN_OneProcessExporter.generate_process_files [model_handling.py at line 1281] 
DEBUG: self.include_multi_channel is already defined: this is madevent+second_exporter mode [model_handling.py at line 1283] 
Expand Down Expand Up @@ -208,20 +208,20 @@ INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./.
WARNING: vector code for lepton pdf not implemented. We removed the option to run dressed lepton 
INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1
INFO: Finding symmetric diagrams for subprocess group epem_mupmum
Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s
Wrote files for 8 helas calls in 0.088 s
Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s
Wrote files for 8 helas calls in 0.087 s
ALOHA: aloha starts to compute helicity amplitudes
ALOHA: aloha creates FFV1 routines
ALOHA: aloha creates FFV2 routines
ALOHA: aloha creates FFV4 routines
ALOHA: aloha creates 3 routines in 0.173 s
ALOHA: aloha creates 3 routines in 0.174 s
DEBUG: Entering PLUGIN_ProcessExporter.convert_model (create the model) [output.py at line 187] 
ALOHA: aloha starts to compute helicity amplitudes
ALOHA: aloha creates FFV1 routines
ALOHA: aloha creates FFV2 routines
ALOHA: aloha creates FFV4 routines
ALOHA: aloha creates FFV2_4 routines
ALOHA: aloha creates 7 routines in 0.216 s
ALOHA: aloha creates 7 routines in 0.224 s
<class 'aloha.create_aloha.AbstractRoutine'> FFV1
<class 'aloha.create_aloha.AbstractRoutine'> FFV1
<class 'aloha.create_aloha.AbstractRoutine'> FFV2
Expand Down Expand Up @@ -259,6 +259,6 @@ Type "launch" to generate events from this process, or see
Run "open index.html" to see more information about this process.
quit

real 0m1.847s
user 0m1.529s
sys 0m0.217s
real 0m1.810s
user 0m1.562s
sys 0m0.223s
96 changes: 96 additions & 0 deletions epochX/cudacpp/ee_mumu.mad/SubProcesses/CommonRandomNumbers.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
// Copyright (C) 2020-2023 CERN and UCLouvain.
// Licensed under the GNU Lesser General Public License (version 3 or later).
// Created by: S. Hageboeck (Nov 2020) for the MG5aMC CUDACPP plugin.
// Further modified by: A. Valassi (2023) for the MG5aMC CUDACPP plugin.

#ifndef COMMONRANDOMNUMBERS_H_
#define COMMONRANDOMNUMBERS_H_ 1

#include <future>
#include <random>
#include <thread>
#include <vector>

namespace CommonRandomNumbers
{

/// Create `n` random numbers using simple c++ engine.
template<typename T>
std::vector<T> generate( std::size_t n, std::minstd_rand::result_type seed = 1337 )
{
std::vector<T> result;
result.reserve( n );

std::minstd_rand generator( seed );
std::uniform_real_distribution<T> distribution( 0.0, 1.0 );

for( std::size_t i = 0; i < n; ++i )
{
result.push_back( distribution( generator ) );
}

return result;
}

/// Create `nBlock` blocks of random numbers.
/// Each block uses a generator that's seeded with `seed + blockIndex`, and blocks are generated in parallel.
template<typename T>
std::vector<std::vector<T>> generateParallel( std::size_t nPerBlock, std::size_t nBlock, std::minstd_rand::result_type seed = 1337 )
{
std::vector<std::vector<T>> results( nBlock );
std::vector<std::thread> threads;
const auto partPerThread = nBlock / std::thread::hardware_concurrency() + ( nBlock % std::thread::hardware_concurrency() != 0 );

auto makeBlock = [nPerBlock, nBlock, seed, &results]( std::size_t partitionBegin, std::size_t partitionEnd )
{
for( std::size_t partition = partitionBegin; partition < partitionEnd && partition < nBlock; ++partition )
{
results[partition] = generate<T>( nPerBlock, seed + partition );
}
};

for( unsigned int threadId = 0; threadId < std::thread::hardware_concurrency(); ++threadId )
{
threads.emplace_back( makeBlock, threadId * partPerThread, ( threadId + 1 ) * partPerThread );
}

for( auto& thread: threads )
{
thread.join();
}

return results;
}

/// Starts asynchronous generation of random numbers. This uses as many threads as cores, and generates blocks of random numbers.
/// These become available at unspecified times, but the blocks 0, 1, 2, ... are generated first.
/// Each block is seeded with seed + blockIndex to generate stable sequences.
/// \param[in/out] promises Vector of promise objects storing blocks of random numbers.
/// \param[in] nPerBlock Configures number of entries generated per block.
/// \param[in] nBlock Configures the number of blocks generated.
/// \param[in] nThread Optional concurrency.
/// \param[in] seed Optional seed.
template<typename T>
void startGenerateAsync( std::vector<std::promise<std::vector<T>>>& promises, std::size_t nPerBlock, std::size_t nBlock, unsigned int nThread = std::thread::hardware_concurrency(), std::minstd_rand::result_type seed = 1337 )
{
promises.resize( nBlock );
std::vector<std::thread> threads;

auto makeBlocks = [=, &promises]( std::size_t threadID )
{
for( std::size_t partition = threadID; partition < nBlock; partition += nThread )
{
auto values = generate<T>( nPerBlock, seed + partition );
promises[partition].set_value( std::move( values ) );
}
};

for( unsigned int threadId = 0; threadId < nThread; ++threadId )
{
std::thread( makeBlocks, threadId ).detach();
}
}

}

#endif /* COMMONRANDOMNUMBERS_H_ */
4 changes: 0 additions & 4 deletions epochX/cudacpp/ee_mumu.mad/SubProcesses/cudacpp.mk
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,6 @@ MG5AMC_COMMONLIB = mg5amc_common
LIBFLAGS = -L$(LIBDIR) -l$(MG5AMC_COMMONLIB)
INCFLAGS += -I../../src

# Dependency on tools directory
TOOLSDIR = ../../../../../tools
INCFLAGS += -I$(TOOLSDIR)

# Dependency on test directory
TESTDIR = ../../../../../test
GTESTLIBDIR = $(TESTDIR)/googletest/build/lib/
Expand Down
10 changes: 5 additions & 5 deletions epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ generate e+ e- > mu+ mu-
No model currently active, so we import the Standard Model
INFO: load particles
INFO: load vertices
DEBUG: model prefixing takes 0.0048007965087890625 
DEBUG: model prefixing takes 0.004700660705566406 
INFO: Restrict model sm with file models/sm/restrict_default.dat .
DEBUG: Simplifying conditional expressions 
DEBUG: remove interactions: u s w+ at order: QED=1 
Expand Down Expand Up @@ -201,7 +201,7 @@ ALOHA: aloha creates FFV1 routines
ALOHA: aloha creates FFV2 routines
ALOHA: aloha creates FFV4 routines
ALOHA: aloha creates FFV2_4 routines
ALOHA: aloha creates 4 routines in 0.331 s
ALOHA: aloha creates 4 routines in 0.232 s
<class 'aloha.create_aloha.AbstractRoutine'> FFV1
<class 'aloha.create_aloha.AbstractRoutine'> FFV1
<class 'aloha.create_aloha.AbstractRoutine'> FFV2
Expand Down Expand Up @@ -230,6 +230,6 @@ INFO: /data/avalassi/GPU2023/MG5aMC/ghav-mg5amcnlo/CODEGEN_cudacpp_ee_mumu/src/.
DEBUG: Entering PLUGIN_ProcessExporter.finalize [output.py at line 196] 
quit

real 0m0.795s
user 0m0.591s
sys 0m0.051s
real 0m1.242s
user 0m0.577s
sys 0m0.061s
Loading