Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/*
* CommonRandomNumbers.h
*
* Created on: 04.11.2020
* Author: Stephan Hageboeck
*/

#ifndef COMMONRANDOMNUMBERS_H_
#define COMMONRANDOMNUMBERS_H_

#include <vector>
#include <random>
#include <thread>
#include <future>

namespace CommonRandomNumbers {

/// Create `n` random numbers using simple c++ engine.
template<typename T>
std::vector<T> generate(std::size_t n, std::minstd_rand::result_type seed = 1337) {
std::vector<T> result;
result.reserve(n);

std::minstd_rand generator(seed);
std::uniform_real_distribution<T> distribution(0.0, 1.0);

for (std::size_t i=0; i<n; ++i) {
result.push_back(distribution(generator));
}

return result;
}


/// Create `nBlock` blocks of random numbers.
/// Each block uses a generator that's seeded with `seed + blockIndex`, and blocks are generated in parallel.
template<typename T>
std::vector<std::vector<T>> generateParallel(std::size_t nPerBlock, std::size_t nBlock, std::minstd_rand::result_type seed = 1337) {
std::vector<std::vector<T>> results(nBlock);
std::vector<std::thread> threads;
const auto partPerThread = nBlock/std::thread::hardware_concurrency() + (nBlock % std::thread::hardware_concurrency() != 0);

auto makeBlock = [nPerBlock,nBlock,seed,&results](std::size_t partitionBegin, std::size_t partitionEnd) {
for (std::size_t partition = partitionBegin; partition < partitionEnd && partition < nBlock; ++partition) {
results[partition] = generate<T>(nPerBlock, seed + partition);
}
};

for (unsigned int threadId = 0; threadId < std::thread::hardware_concurrency(); ++threadId) {
threads.emplace_back(makeBlock, threadId * partPerThread, (threadId+1) * partPerThread);
}

for (auto& thread : threads) {
thread.join();
}

return results;
}


/// Starts asynchronous generation of random numbers. This uses as many threads as cores, and generates blocks of random numbers.
/// These become available at unspecified times, but the blocks 0, 1, 2, ... are generated first.
/// Each block is seeded with seed + blockIndex to generate stable sequences.
/// \param[in/out] promises Vector of promise objects storing blocks of random numbers.
/// \param[in] nPerBlock Configures number of entries generated per block.
/// \param[in] nBlock Configures the number of blocks generated.
/// \param[in] nThread Optional concurrency.
/// \param[in] seed Optional seed.
template<typename T>
void startGenerateAsync(std::vector<std::promise<std::vector<T>>>& promises, std::size_t nPerBlock, std::size_t nBlock,
unsigned int nThread = std::thread::hardware_concurrency(), std::minstd_rand::result_type seed = 1337) {
promises.resize(nBlock);
std::vector<std::thread> threads;

auto makeBlocks = [=,&promises](std::size_t threadID) {
for (std::size_t partition = threadID; partition < nBlock; partition += nThread) {
auto values = generate<T>(nPerBlock, seed + partition);
promises[partition].set_value(std::move(values));
}
};

for (unsigned int threadId = 0; threadId < nThread; ++threadId) {
std::thread(makeBlocks, threadId).detach();
}
}

}

#endif /* COMMONRANDOMNUMBERS_H_ */
4 changes: 2 additions & 2 deletions epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ class PLUGIN_ProcessExporter(PLUGIN_export_cpp.ProcessExporterGPU):
s+'gpu/MemoryAccessAmplitudes.h', s+'gpu/MemoryAccessWavefunctions.h',
s+'gpu/MemoryAccessGs.h', s+'gpu/MemoryAccessCouplingsFixed.h',
s+'gpu/MemoryAccessNumerators.h', s+'gpu/MemoryAccessDenominators.h',
s+'gpu/EventStatistics.h',
s+'gpu/EventStatistics.h', s+'gpu/CommonRandomNumbers.h',
s+'gpu/CrossSectionKernels.cc', s+'gpu/CrossSectionKernels.h',
s+'gpu/MatrixElementKernels.cc', s+'gpu/MatrixElementKernels.h',
s+'gpu/RamboSamplingKernels.cc', s+'gpu/RamboSamplingKernels.h',
Expand All @@ -114,7 +114,7 @@ class PLUGIN_ProcessExporter(PLUGIN_export_cpp.ProcessExporterGPU):
'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h',
'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h',
'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h',
'EventStatistics.h',
'EventStatistics.h', 'CommonRandomNumbers.h',
'CrossSectionKernels.cc', 'CrossSectionKernels.h',
'MatrixElementKernels.cc', 'MatrixElementKernels.h',
'RamboSamplingKernels.cc', 'RamboSamplingKernels.h',
Expand Down