Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions .github/workflows/regression.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,20 @@ jobs:
run: |
module load cuda/12.9 python3/3.12.3 opensn/gcc/14
test/run_tests --gpu -d test/python -j 32 -v 1
hip:
runs-on: [self-hosted, cuda]
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
- name: build
shell: bash
run: |
module load cuda/12.9 python3/3.12.3 rocm/7.1.1 opensn/clang/19
export HIP_PLATFORM=nvidia
export HIP_COMPILER=nvcc
mkdir build && cd build && cmake -DOPENSN_WITH_HIP=ON -DCMAKE_HIP_ARCHITECTURES=89 -DOPENSN_WITH_PYTHON_MODULE=ON .. && make -j && cd ..
- name: test
shell: bash
run: |
module load cuda/12.9 python3/3.12.3 rocm/7.1.1 opensn/clang/19
test/run_tests --gpu -d test/python -j 32 -v 1
20 changes: 15 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 3.25.2)
cmake_minimum_required(VERSION 3.29)

project(opensn
VERSION 0.0.1
Expand Down Expand Up @@ -103,6 +103,11 @@ set(OPENSN_WITH_GPU OFF)
if (OPENSN_WITH_CUDA OR OPENSN_WITH_HIP)
set(OPENSN_WITH_GPU ON FORCE)
endif()
if(OPENSN_WITH_HIP AND CMAKE_HIP_PLATFORM STREQUAL "nvidia")
set(HIP_ON_NVIDIA ON)
else()
set(HIP_ON_NVIDIA OFF)
endif()
option(OPENSN_WITH_PYTHON "Build with python support" ON)
option(OPENSN_WITH_PYTHON_MODULE "Build with python module" OFF)

Expand Down Expand Up @@ -252,13 +257,19 @@ add_library(libopensn SHARED ${LIBOPENSN_SRCS})

if (OPENSN_WITH_GPU)
if (OPENSN_WITH_CUDA)
target_compile_options(libopensngpu PRIVATE --expt-extended-lambda --expt-relaxed-constexpr)
target_compile_options(libopensngpu PRIVATE $<BUILD_INTERFACE:-diag-suppress 1301>)
target_compile_features(libopensngpu PUBLIC cuda_std_20)
elseif (OPENSN_WITH_HIP)
set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_C_COMPILER}")
else ()
set_property(TARGET libopensngpu PROPERTY HIP_STANDARD 20)
set_property(TARGET libopensngpu PROPERTY HIP_STANDARD_REQUIRED ON)
endif()
if (HIP_ON_NVIDIA)
target_compile_options(libopensngpu PRIVATE $<$<COMPILE_LANGUAGE:HIP>:-ccbin=${CMAKE_CXX_COMPILER}>)
endif()
if (OPENSN_WITH_CUDA OR HIP_ON_NVIDIA)
target_compile_options(libopensngpu PRIVATE --expt-extended-lambda --expt-relaxed-constexpr)
target_compile_options(libopensngpu PRIVATE $<BUILD_INTERFACE:-diag-suppress 1301>)
endif()
target_compile_definitions(libopensngpu PRIVATE __OPENSN_WITH_GPU__)
target_compile_definitions(libopensn PRIVATE __OPENSN_WITH_GPU__)
set_property(TARGET libopensngpu PROPERTY POSITION_INDEPENDENT_CODE ON)
Expand All @@ -284,7 +295,6 @@ target_include_directories(libopensn
)

if (OPENSN_WITH_GPU)
target_include_directories(libopensngpu PUBLIC $<BUILD_INTERFACE:${MPI_CXX_INCLUDE_DIRS}>)
target_compile_options(libopensngpu PRIVATE $<BUILD_INTERFACE:${OPENSN_CXX_FLAGS}>)
set(GPU_LANG_CONDITION $<OR:$<COMPILE_LANGUAGE:CUDA>,$<COMPILE_LANGUAGE:HIP>>)
target_compile_options(libopensngpu PRIVATE
Expand Down
17 changes: 17 additions & 0 deletions external/caribou/device_memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include "api_mapping.hpp" // GPU_API
#include "exception.hpp" // caribou::check_error
#include "stream.hpp" // caribou::Stream

namespace caribou {

Expand Down Expand Up @@ -57,6 +58,9 @@ class DeviceMemory : public impl::MemoryImpl<T> {
DeviceMemory(void) = default;
/** @brief Allocate memory for holding n elements.*/
DeviceMemory(std::size_t n) : impl::MemoryImpl<T>(DeviceMemory<T>::malloc_(n)), size_(n) {}
/** @brief Allocate memory for holding n elements asynchronously.*/
DeviceMemory(std::size_t n, Stream & stream) :
impl::MemoryImpl<T>(DeviceMemory<T>::malloc_async_(n, stream)), size_(n) {}
/** @brief Owning a pre-allocated memory.*/
DeviceMemory(T * ptr, std::size_t n = 0) : impl::MemoryImpl<T>(ptr), size_(n) {}
/// @}
Expand Down Expand Up @@ -91,6 +95,11 @@ class DeviceMemory : public impl::MemoryImpl<T> {
size_ = 0;
return impl::MemoryImpl<T>::release();
}
/** @brief Asynchronously free the managed memory.*/
void async_free(Stream & stream) noexcept {
T * ptr = this->release();
static_cast<void>(::GPU_API(FreeAsync)(reinterpret_cast<void *>(ptr), stream));
}
/** @brief Zero-fill device memory with zeros.*/
void zero_fill(void) { check_error(::GPU_API(Memset)(this->get(), 0, this->size_ * sizeof(T))); }
/// @}
Expand All @@ -106,6 +115,14 @@ class DeviceMemory : public impl::MemoryImpl<T> {
check_error(::GPU_API(Memset)(result, 0, sizeof(T) * size));
return result;
}

/** @brief Allocate device memory asynchronously.*/
static T * malloc_async_(std::size_t size, Stream & stream) {
T * result;
check_error(::GPU_API(MallocAsync)(&result, sizeof(T) * size, stream));
check_error(::GPU_API(MemsetAsync)(result, 0, sizeof(T) * size, stream));
return result;
}
};

} // namespace caribou
Original file line number Diff line number Diff line change
Expand Up @@ -937,7 +937,7 @@ DiscreteOrdinatesProblem::InitializeSweepDataStructures()
quadrature_fluds_commondata_map_.clear();
if (sweep_type_ == "AAH" && use_gpus_)
{
CreateFLUDSCommonDataForDevice();
CreateAAHD_FLUDSCommonData();
}
else if (sweep_type_ == "AAH")
{
Expand Down Expand Up @@ -968,21 +968,55 @@ DiscreteOrdinatesProblem::InitializeSweepDataStructures()

#ifndef __OPENSN_WITH_GPU__
void
DiscreteOrdinatesProblem::CreateFLUDSCommonDataForDevice()
DiscreteOrdinatesProblem::CreateAAHD_FLUDSCommonData()
{
throw std::runtime_error(
"DiscreteOrdinatesProblem::CreateFLUDSCommonDataForDevice : OPENSN_WITH_CUDA not enabled.");
"DiscreteOrdinatesProblem::CreateAAHD_FLUDSCommonData : OPENSN_WITH_CUDA not enabled.");
}

std::shared_ptr<FLUDS>
DiscreteOrdinatesProblem::CreateFLUDSForDevice(std::size_t num_groups,
std::size_t num_angles,
const FLUDSCommonData& common_data)
DiscreteOrdinatesProblem::CreateAAHD_FLUDS(std::size_t num_groups,
std::size_t num_angles,
const FLUDSCommonData& common_data)
{
throw std::runtime_error(
"DiscreteOrdinatesProblem::CreateFLUDSForDevice : OPENSN_WITH_CUDA not enabled.");
"DiscreteOrdinatesProblem::CreateAAHD_FLUDS : OPENSN_WITH_CUDA not enabled.");
return {};
}

std::shared_ptr<AngleSet>
DiscreteOrdinatesProblem::CreateAAHD_AngleSet(
size_t id,
size_t num_groups,
const SPDS& spds,
std::shared_ptr<FLUDS>& fluds,
std::vector<size_t>& angle_indices,
std::map<uint64_t, std::shared_ptr<SweepBoundary>>& boundaries,
int maximum_message_size,
const MPICommunicatorSet& in_comm_set)
{
throw std::runtime_error(
"DiscreteOrdinatesProblem::CreateAAHD_AngleSet : OPENSN_WITH_CUDA not enabled.");
return {};
}

std::shared_ptr<SweepChunk>
DiscreteOrdinatesProblem::CreateAAHD_SweepChunk(LBSGroupset& groupset)
{
throw std::runtime_error(
"DiscreteOrdinatesProblem::CreateAAHD_SweepChunk : OPENSN_WITH_CUDA not enabled.");
return {};
}

void
DiscreteOrdinatesProblem::CopyPhiAndSrcToDevice()
{
}

void
DiscreteOrdinatesProblem::CopyPhiAndOutflowBackToHost()
{
}
#endif

std::pair<UniqueSOGroupings, DirIDToSOMap>
Expand Down Expand Up @@ -1195,7 +1229,7 @@ DiscreteOrdinatesProblem::InitFluxDataStructures(LBSGroupset& groupset)
std::shared_ptr<FLUDS> fluds;
if (use_gpus_)
{
fluds = CreateFLUDSForDevice(gs_num_grps, angle_indices.size(), fluds_common_data);
fluds = CreateAAHD_FLUDS(gs_num_grps, angle_indices.size(), fluds_common_data);
}
else
{
Expand All @@ -1205,16 +1239,29 @@ DiscreteOrdinatesProblem::InitFluxDataStructures(LBSGroupset& groupset)
dynamic_cast<const AAH_FLUDSCommonData&>(fluds_common_data));
}

auto angle_set = std::make_shared<AAH_AngleSet>(angle_set_id++,
gs_num_grps,
*sweep_ordering,
fluds,
angle_indices,
sweep_boundaries_,
options_.max_mpi_message_size,
*grid_local_comm_set_,
use_gpus_);

std::shared_ptr<AngleSet> angle_set;
if (use_gpus_)
{
angle_set = CreateAAHD_AngleSet(angle_set_id++,
gs_num_grps,
*sweep_ordering,
fluds,
angle_indices,
sweep_boundaries_,
options_.max_mpi_message_size,
*grid_local_comm_set_);
}
else
{
angle_set = std::make_shared<AAH_AngleSet>(angle_set_id++,
gs_num_grps,
*sweep_ordering,
fluds,
angle_indices,
sweep_boundaries_,
options_.max_mpi_message_size,
*grid_local_comm_set_);
}
groupset.angle_agg->GetAngleSetGroups().push_back(angle_set);
}
else if (sweep_type_ == "CBC")
Expand All @@ -1232,8 +1279,7 @@ DiscreteOrdinatesProblem::InitFluxDataStructures(LBSGroupset& groupset)
fluds,
angle_indices,
sweep_boundaries_,
*grid_local_comm_set_,
use_gpus_);
*grid_local_comm_set_);

groupset.angle_agg->GetAngleSetGroups().push_back(angle_set);
}
Expand Down Expand Up @@ -1266,6 +1312,13 @@ DiscreteOrdinatesProblem::SetSweepChunk(LBSGroupset& groupset)
return sweep_chunk;
}

if (use_gpus_)
{
auto sweep_chunk = CreateAAHD_SweepChunk(groupset);

return sweep_chunk;
}

auto sweep_chunk = std::make_shared<AAHSweepChunk>(*this, groupset);

return sweep_chunk;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,18 @@
// SPDX-License-Identifier: MIT

#include "modules/linear_boltzmann_solvers/discrete_ordinates_problem/discrete_ordinates_problem.h"
#include "modules/linear_boltzmann_solvers/discrete_ordinates_problem/sweep/angle_set/aahd_angle_set.h"
#include "modules/linear_boltzmann_solvers/discrete_ordinates_problem/sweep/fluds/aahd_fluds_common_data.h"
#include "modules/linear_boltzmann_solvers/discrete_ordinates_problem/sweep/fluds/aahd_fluds.h"
#include "modules/linear_boltzmann_solvers/discrete_ordinates_problem/sweep_chunks/aahd_sweep_chunk.h"
#include "modules/linear_boltzmann_solvers/lbs_problem/device/memory_pinner.h"
#include "modules/linear_boltzmann_solvers/lbs_problem/device/carrier/outflow_carrier.h"

namespace opensn
{

void
DiscreteOrdinatesProblem::CreateFLUDSCommonDataForDevice()
DiscreteOrdinatesProblem::CreateAAHD_FLUDSCommonData()
{
for (const auto& [quadrature, spds_list] : quadrature_spds_map_)
{
Expand All @@ -22,12 +26,56 @@ DiscreteOrdinatesProblem::CreateFLUDSCommonDataForDevice()
}

std::shared_ptr<FLUDS>
DiscreteOrdinatesProblem::CreateFLUDSForDevice(std::size_t num_groups,
std::size_t num_angles,
const FLUDSCommonData& common_data)
DiscreteOrdinatesProblem::CreateAAHD_FLUDS(std::size_t num_groups,
std::size_t num_angles,
const FLUDSCommonData& common_data)
{
return std::make_shared<AAHD_FLUDS>(
num_groups, num_angles, dynamic_cast<const AAHD_FLUDSCommonData&>(common_data));
}

std::shared_ptr<AngleSet>
DiscreteOrdinatesProblem::CreateAAHD_AngleSet(
size_t id,
size_t num_groups,
const SPDS& spds,
std::shared_ptr<FLUDS>& fluds,
std::vector<size_t>& angle_indices,
std::map<uint64_t, std::shared_ptr<SweepBoundary>>& boundaries,
int maximum_message_size,
const MPICommunicatorSet& in_comm_set)
{
return std::make_shared<AAHD_AngleSet>(
id, num_groups, spds, fluds, angle_indices, boundaries, maximum_message_size, in_comm_set);
}

std::shared_ptr<SweepChunk>
DiscreteOrdinatesProblem::CreateAAHD_SweepChunk(LBSGroupset& groupset)
{
return std::make_shared<AAHDSweepChunk>(*this, groupset);
}

void
DiscreteOrdinatesProblem::CopyPhiAndSrcToDevice()
{
if (!use_gpus_)
return;
auto* src = reinterpret_cast<MemoryPinner<double>*>(GetPinner(0));
src->CopyToDevice();
MemoryPinner<double>* phi = reinterpret_cast<MemoryPinner<double>*>(GetPinner(1));
phi->CopyToDevice();
}

void
DiscreteOrdinatesProblem::CopyPhiAndOutflowBackToHost()
{
if (!use_gpus_)
return;
auto* phi = reinterpret_cast<MemoryPinner<double>*>(GetPinner(1));
phi->CopyFromDevice();
auto* outflow = reinterpret_cast<OutflowCarrier*>(GetCarrier(1));
outflow->AccumulateBack(GetCellTransportViews());
outflow->Reset();
}

} // namespace opensn
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ class DiscreteOrdinatesProblem : public LBSProblem
void SetBoundaryOptions(const InputParameters& params) override;
void ClearBoundaries() override;

void CopyPhiAndSrcToDevice();
void CopyPhiAndOutflowBackToHost();

protected:
explicit DiscreteOrdinatesProblem(const std::string& name,
std::shared_ptr<MeshContinuum> grid_ptr);
Expand Down Expand Up @@ -134,10 +137,20 @@ class DiscreteOrdinatesProblem : public LBSProblem
std::vector<std::vector<double>> psi_old_local_;

private:
void CreateFLUDSCommonDataForDevice();
std::shared_ptr<FLUDS> CreateFLUDSForDevice(std::size_t num_groups,
std::size_t num_angles,
const FLUDSCommonData& common_data);
void CreateAAHD_FLUDSCommonData();
std::shared_ptr<FLUDS> CreateAAHD_FLUDS(std::size_t num_groups,
std::size_t num_angles,
const FLUDSCommonData& common_data);
std::shared_ptr<AngleSet>
CreateAAHD_AngleSet(size_t id,
size_t num_groups,
const SPDS& spds,
std::shared_ptr<FLUDS>& fluds,
std::vector<size_t>& angle_indices,
std::map<uint64_t, std::shared_ptr<SweepBoundary>>& boundaries,
int maximum_message_size,
const MPICommunicatorSet& in_comm_set);
std::shared_ptr<SweepChunk> CreateAAHD_SweepChunk(LBSGroupset& groupset);

/**
* This routine groups angle-indices to groups sharing the same sweep ordering. It also takes
Expand Down
Loading