From eb3f1c0fa3e33e9d382255a6c493ca9c5e10fe3a Mon Sep 17 00:00:00 2001 From: Helen Schottenhamml Date: Mon, 9 Sep 2024 17:19:41 +0200 Subject: [PATCH] Shifted Periodicity --- src/CMakeLists.txt | 2 +- src/boundary/CMakeLists.txt | 1 + src/boundary/ShiftedPeriodicity.h | 611 ++++++++++++++++++ src/gpu/CMakeLists.txt | 2 + src/gpu/FieldAccessor.h | 30 +- src/gpu/FieldAccessor3D.h | 14 +- src/gpu/FieldAccessorXYZ.h | 8 +- src/gpu/ShiftedPeriodicity.cu | 53 ++ src/gpu/ShiftedPeriodicity.h | 144 +++++ tests/boundary/CMakeLists.txt | 11 + tests/boundary/TestShiftedPeriodicity.cpp | 310 +++++++++ tests/boundary/TestShiftedPeriodicitySetup.py | 43 ++ tests/gpu/CMakeLists.txt | 10 + tests/gpu/TestShiftedPeriodicityGPU.cpp | 319 +++++++++ tests/gpu/TestShiftedPeriodicitySetupGPU.py | 40 ++ 15 files changed, 1571 insertions(+), 27 deletions(-) create mode 100644 src/boundary/ShiftedPeriodicity.h create mode 100644 src/gpu/ShiftedPeriodicity.cu create mode 100644 src/gpu/ShiftedPeriodicity.h create mode 100644 tests/boundary/TestShiftedPeriodicity.cpp create mode 100644 tests/boundary/TestShiftedPeriodicitySetup.py create mode 100644 tests/gpu/TestShiftedPeriodicityGPU.cpp create mode 100644 tests/gpu/TestShiftedPeriodicitySetupGPU.py diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d49a1e63..13e7ac49 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -24,7 +24,7 @@ add_subdirectory( blockforest ) add_subdirectory( boundary ) add_subdirectory( communication ) add_subdirectory( core ) -add_subdirectory(gpu) +add_subdirectory( gpu ) add_subdirectory( domain_decomposition ) add_subdirectory( executiontree ) if ( WALBERLA_BUILD_WITH_FFT AND FFTW3_FOUND ) diff --git a/src/boundary/CMakeLists.txt b/src/boundary/CMakeLists.txt index c122cbdd..7da3ebfe 100644 --- a/src/boundary/CMakeLists.txt +++ b/src/boundary/CMakeLists.txt @@ -14,4 +14,5 @@ target_sources( boundary BoundaryHandlingCollection.h Boundary.cpp BoundaryUID.h + ShiftedPeriodicity.h ) diff --git a/src/boundary/ShiftedPeriodicity.h b/src/boundary/ShiftedPeriodicity.h new file mode 100644 index 00000000..ce2a32e5 --- /dev/null +++ b/src/boundary/ShiftedPeriodicity.h @@ -0,0 +1,611 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \file ShiftedPeriodicity.h +//! \ingroup boundary +//! \author Helen Schottenhamml +// +//====================================================================================================================== + +#include "blockforest/Block.h" +#include "blockforest/BlockID.h" +#include "blockforest/StructuredBlockForest.h" + +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" +#include "core/debug/CheckFunctions.h" +#include "core/debug/Debug.h" +#include "core/logging/Logging.h" +#include "core/math/AABBFwd.h" +#include "core/math/Vector3.h" +#include "core/mpi/MPIWrapper.h" +#include "core/mpi/MPIManager.h" + +#include "domain_decomposition/BlockDataID.h" +#include "domain_decomposition/IBlock.h" +#include "domain_decomposition/IBlockID.h" +#include "domain_decomposition/MapPointToPeriodicDomain.h" + +#include +#include +#include +#include +#include +#include +#include + + +namespace walberla { +namespace boundary { + +//******************************************************************************************************************* +/*! + * A periodicity boundary condition that adds a user-defined spatial shift to the field when applied. + * This shift can prevent the locking of large-scale turbulent features in the flow direction, see e.g., + * Munters et al. (https://doi.org/10.1063/1.4941912). + * + * Periodicity defined in the blockforest must be turned off in the normal-direction. Only uniform grids are supported + * for the moment. Normal direction and shift direction must not coincide. + * Moreover, this boundary condition is only applied at the minimum and maximum of the domain in a given direction, but + * cannot be applied in the interior of the domain for now. + * + * This base class handles the logistics of splitting the field and communication. + * + * @tparam Field_T Type of the ghost-layer field that is shifted periodically + */ +//******************************************************************************************************************* +template +class ShiftedPeriodicityBase { + + public: + + using FieldType = Field_T; + using ValueType = typename FieldType::value_type; + using ShiftType = int; + + ShiftedPeriodicityBase( const std::weak_ptr & blockForest, + const BlockDataID & fieldID, const uint_t fieldGhostLayers, + const uint_t normalDir, const uint_t shiftDir, const ShiftType shiftValue ) + : blockForest_(blockForest), normalDir_(normalDir), shiftDir_(shiftDir), + fieldID_( fieldID ), fieldGhostLayers_(fieldGhostLayers) + { + auto sbf = blockForest_.lock(); + + WALBERLA_ASSERT_NOT_NULLPTR( sbf ) + + WALBERLA_CHECK(sbf->storesUniformBlockGrid(), "Periodic shift is currently only implemented for uniform grids.") + WALBERLA_CHECK(sbf->containsGlobalBlockInformation(), "For the periodic shift, the blockforest must be constructed to retain global information.") + + shift_ = Vector3{}; + auto adaptedShiftValue = shiftValue % ShiftType(sbf->getNumberOfCells(shiftDir_)); + shift_[shiftDir_] = ShiftType(adaptedShiftValue >= 0 ? adaptedShiftValue : adaptedShiftValue + ShiftType(sbf->getNumberOfCells(shiftDir_))); + + // sanity checks + WALBERLA_CHECK_UNEQUAL( shiftDir_, normalDir_, "Direction of periodic shift and boundary normal must not coincide." ) + + WALBERLA_CHECK( sbf->isPeriodic(shiftDir_), "Blockforest must be periodic in direction " << shiftDir_ << "!" ) + WALBERLA_CHECK( !sbf->isPeriodic(normalDir_), "Blockforest must NOT be periodic in direction " << normalDir_ << "!" ) + + } + + Vector3 shift() const { return shift_; } + uint_t shiftDirection() const { return shiftDir_; } + uint_t normalDirection() const { return normalDir_; } + + void operator()() { + + auto mpiInstance = mpi::MPIManager::instance(); + const auto currentRank = numeric_cast(mpiInstance->rank()); + + const auto sbf = blockForest_.lock(); + WALBERLA_ASSERT_NOT_NULLPTR( sbf ) + + // only setup send and receive information once at the beginning + if(!setupPeriodicity_){ + setupPeriodicity(); + setupPeriodicity_ = true; + } + + // set up local to avoid temporary fields; key is unique tag to ensure correct communication + // if several messages have the same source and destination + // thanks to the unique tag, the same buffer can be used for both local and MPI communication + std::map> buffer; + std::map> sendRequests; + std::map> recvRequests; + + std::vector localBlocks{}; + sbf->getBlocks(localBlocks); + + /// packing and schedule sends + + for( auto block : localBlocks ) { + + const auto blockID = dynamic_cast(block)->getId(); + + WALBERLA_ASSERT_EQUAL(sendAABBs_[blockID].size(), recvAABBs_[blockID].size() ) + + for(uint_t i = 0; i < sendAABBs_[blockID].size(); ++i) { + + auto & sendInfo = sendAABBs_[blockID][i]; + + const auto sendRank = std::get<0>(sendInfo); + const auto sendTag = std::get<3>(sendInfo); + + const auto sendCI = std::get<2>(sendInfo); + + const auto sendSize = sendCI.numCells() * uint_c(fSize_); + + buffer[sendTag].resize(sendSize); + static_cast(this)->packBuffer(block, sendCI, buffer[sendTag]); + + WALBERLA_MPI_SECTION() { + // schedule sends if MPI + if (sendRank != currentRank) + { + MPI_Isend(buffer[sendTag].data(), mpi::MPISize(buffer[sendTag].size() * sizeof(ValueType)), MPI_BYTE, + sendRank, sendTag, mpiInstance->comm(), &sendRequests[blockID][sendTag]); + } + } + + } + + } + + /// schedule receives + + for( auto block : localBlocks ) { + + const auto blockID = dynamic_cast(block)->getId(); + + WALBERLA_ASSERT_EQUAL(sendAABBs_[blockID].size(), recvAABBs_[blockID].size() ) + + for(uint_t i = 0; i < recvAABBs_[blockID].size(); ++i) { + + auto & recvInfo = recvAABBs_[blockID][i]; + + const auto recvRank = std::get<0>(recvInfo); + const auto recvTag = std::get<3>(recvInfo); + const auto recvCI = std::get<2>(recvInfo); + + WALBERLA_MPI_SECTION() { + // do not schedule receives for local communication + if (recvRank != currentRank) { + const auto recvSize = recvCI.numCells() * uint_c(fSize_); + buffer[recvTag].resize(recvSize); + + // Schedule receives + MPI_Irecv(buffer[recvTag].data(), mpi::MPISize(buffer[recvTag].size() * sizeof(ValueType)), MPI_BYTE, + recvRank, recvTag, mpiInstance->comm(), &recvRequests[blockID][recvTag]); + } + } + + } + + } + + /// unpacking + + for( auto block : localBlocks ) { + + const auto blockID = dynamic_cast(block)->getId(); + + for(uint_t i = 0; i < recvAABBs_[blockID].size(); ++i) { + + auto & recvInfo = recvAABBs_[blockID][i]; + + const auto recvRank = std::get<0>(recvInfo); + const auto recvTag = std::get<3>(recvInfo); + + const auto recvCI = std::get<2>(recvInfo); + + if(recvRank == currentRank) { + WALBERLA_ASSERT_GREATER(buffer.count(recvTag), 0) + static_cast(this)->unpackBuffer(block, recvCI, buffer[recvTag]); + } else { + WALBERLA_MPI_SECTION() { + MPI_Status status; + MPI_Wait(&recvRequests[blockID][recvTag], &status); + + WALBERLA_ASSERT_GREATER(buffer.count(recvTag), 0) + static_cast< Derived_T* >(this)->unpackBuffer(block, recvCI, buffer[recvTag]); + } + } + + } + + } + + WALBERLA_MPI_SECTION() { + // wait for all communication to be finished + for (auto& block : localBlocks) + { + const auto blockID = dynamic_cast< Block* >(block)->getId(); + + if (sendRequests[blockID].empty()) return; + + std::vector< MPI_Request > v; + std::transform(sendRequests[blockID].begin(), sendRequests[blockID].end(), std::back_inserter(v), + second(sendRequests[blockID])); + MPI_Waitall(int_c(sendRequests[blockID].size()), v.data(), MPI_STATUSES_IGNORE); + } + } + + } + + private: + + /*! + * Creates send and receive information (source, target rank, source / target blocks, source and destination CI + * (here in form of an AABB), and unique communication tag) per block if the shift does not coincide with the + * block size. + * + * If the shift does not coincide with the block size, each block is split into two send sub-blocks and two receive + * sub-blocks whose source / target block will differ (maybe also the rank). + * Afterwards, all the information necessary for the communication is determined and saved in the corresponding + * data structure. + */ + void processDoubleAABB( const AABB & originalAABB, const std::shared_ptr & sbf, + const real_t nGL, const BlockID & blockID, const int normalShift ) { + + WALBERLA_ASSERT(normalShift == -1 || normalShift == 1) + + const auto localShift = normalShift == 1 ? shift_ : -shift_; + + const auto offset = ShiftType(int_c(shift_[shiftDir_]) % int_c(sbf->getNumberOfCellsPerBlock(shiftDir_))); + Vector3 normalShiftVector{}; + normalShiftVector[normalDir_] = ShiftType(normalShift * int_c(sbf->getNumberOfCellsPerBlock(normalDir_))); + + const auto remDir = uint_t(3) - normalDir_ - shiftDir_; + + AABB aabb1 = originalAABB; + AABB aabb2 = originalAABB; + + if( normalShift == 1 ) { + aabb1.setAxisBounds(shiftDir_, aabb1.min(shiftDir_), aabb1.max(shiftDir_) - real_c(offset)); + aabb2.setAxisBounds(shiftDir_, aabb2.max(shiftDir_) - real_c(offset), aabb2.max(shiftDir_)); + } else { + aabb1.setAxisBounds(shiftDir_, aabb1.min(shiftDir_), aabb1.min(shiftDir_) + real_c(offset)); + aabb2.setAxisBounds(shiftDir_, aabb2.min(shiftDir_) + real_c(offset), aabb2.max(shiftDir_)); + } + + auto center1 = aabb1.center(); + auto center2 = aabb2.center(); + + // account for ghost layers + aabb1.setAxisBounds(remDir, aabb1.min(remDir) - nGL, aabb1.max(remDir) + nGL); + aabb2.setAxisBounds(remDir, aabb2.min(remDir) - nGL, aabb2.max(remDir) + nGL); + + auto localSendAABB1 = aabb1; + auto localRecvAABB1 = aabb1; + auto localSendAABB2 = aabb2; + auto localRecvAABB2 = aabb2; + + localSendAABB1.setAxisBounds(shiftDir_, localSendAABB1.min(shiftDir_), localSendAABB1.max(shiftDir_) + nGL); + localSendAABB2.setAxisBounds(shiftDir_, localSendAABB2.min(shiftDir_) - nGL, localSendAABB2.max(shiftDir_)); + localRecvAABB1.setAxisBounds(shiftDir_, localRecvAABB1.min(shiftDir_) - nGL, localRecvAABB1.max(shiftDir_)); + localRecvAABB2.setAxisBounds(shiftDir_, localRecvAABB2.min(shiftDir_), localRecvAABB2.max(shiftDir_) + nGL); + + if(normalShift == 1) { // at maximum of domain -> send inner layers, receive ghost layers + localSendAABB1.setAxisBounds(normalDir_, localSendAABB1.max(normalDir_) - nGL, localSendAABB1.max(normalDir_)); + localSendAABB2.setAxisBounds(normalDir_, localSendAABB2.max(normalDir_) - nGL, localSendAABB2.max(normalDir_)); + localRecvAABB1.setAxisBounds(normalDir_, localRecvAABB1.max(normalDir_), localRecvAABB1.max(normalDir_) + nGL); + localRecvAABB2.setAxisBounds(normalDir_, localRecvAABB2.max(normalDir_), localRecvAABB2.max(normalDir_) + nGL); + } else { // at minimum of domain -> send inner layers, receive ghost layers + localSendAABB1.setAxisBounds(normalDir_, localSendAABB1.min(normalDir_), localSendAABB1.min(normalDir_) + nGL); + localSendAABB2.setAxisBounds(normalDir_, localSendAABB2.min(normalDir_), localSendAABB2.min(normalDir_) + nGL); + localRecvAABB1.setAxisBounds(normalDir_, localRecvAABB1.min(normalDir_) - nGL, localRecvAABB1.min(normalDir_)); + localRecvAABB2.setAxisBounds(normalDir_, localRecvAABB2.min(normalDir_) - nGL, localRecvAABB2.min(normalDir_)); + } + + WALBERLA_ASSERT_FLOAT_EQUAL(localSendAABB1.volume(), localRecvAABB1.volume()) + WALBERLA_ASSERT_FLOAT_EQUAL(localSendAABB2.volume(), localRecvAABB2.volume()) + + center1 += (localShift + normalShiftVector); + center2 += (localShift + normalShiftVector); + + std::array virtualPeriodicity{false}; + virtualPeriodicity[normalDir_] = true; + virtualPeriodicity[shiftDir_] = true; + + domain_decomposition::mapPointToPeriodicDomain( virtualPeriodicity, sbf->getDomain(), center1 ); + domain_decomposition::mapPointToPeriodicDomain( virtualPeriodicity, sbf->getDomain(), center2 ); + + uint_t rank1{}; + uint_t rank2{}; + + BlockID id1{}; + BlockID id2{}; + + const auto blockInformation = sbf->getBlockInformation(); + WALBERLA_ASSERT(blockInformation.active()) + + blockInformation.getProcess(rank1, center1[0], center1[1], center1[2]); + blockInformation.getProcess(rank2, center2[0], center2[1], center2[2]); + + blockInformation.getId(id1, center1[0], center1[1], center1[2]); + blockInformation.getId(id2, center2[0], center2[1], center2[2]); + + // define unique send / receive tags for communication + + const int atMaxTagSend = normalShift < 0 ? 0 : 1; + const int atMaxTagRecv = normalShift < 0 ? 1 : 0; + + const int sendTag1 = ((int_c(blockID.getID()) + int_c(id1.getID() * numGlobalBlocks_)) * 2 + atMaxTagSend) * 2 + 0; + const int sendTag2 = ((int_c(blockID.getID()) + int_c(id2.getID() * numGlobalBlocks_)) * 2 + atMaxTagSend) * 2 + 1; + const int recvTag2 = ((int_c(id2.getID()) + int_c(blockID.getID() * numGlobalBlocks_)) * 2 + atMaxTagRecv) * 2 + 0; + const int recvTag1 = ((int_c(id1.getID()) + int_c(blockID.getID() * numGlobalBlocks_)) * 2 + atMaxTagRecv) * 2 + 1; + + auto block = sbf->getBlock(blockID); + + sendAABBs_[blockID].emplace_back(mpi::MPIRank(rank1), id1, globalAABBToLocalCI(localSendAABB1, sbf, block), sendTag1); + sendAABBs_[blockID].emplace_back(mpi::MPIRank(rank2), id2, globalAABBToLocalCI(localSendAABB2, sbf, block), sendTag2); + recvAABBs_[blockID].emplace_back(mpi::MPIRank(rank2), id2, globalAABBToLocalCI(localRecvAABB2, sbf, block), recvTag2); + recvAABBs_[blockID].emplace_back(mpi::MPIRank(rank1), id1, globalAABBToLocalCI(localRecvAABB1, sbf, block), recvTag1); + + WALBERLA_LOG_DETAIL("blockID = " << blockID.getID() << ", normalShift = " << normalShift + << "\n\tsendRank1 = " << rank1 << "\tsendID1 = " << id1.getID() << "\tsendTag1 = " << sendTag1 << "\taabb = " << localSendAABB1 + << "\n\tsendRank2 = " << rank2 << "\tsendID2 = " << id2.getID() << "\tsendTag2 = " << sendTag2 << "\taabb = " << localSendAABB2 + << "\n\trecvRank1 = " << rank1 << "\trecvID1 = " << id1.getID() << "\trecvTag1 = " << recvTag1 << "\taabb = " << localRecvAABB1 + << "\n\trecvRank2 = " << rank2 << "\trecvID2 = " << id2.getID() << "\trecvTag2 = " << recvTag2 << "\taabb = " << localRecvAABB2 + ) + } + + /*! + * Does the same things as `processDoubleAABB` but assuming that the shift is a multiple of the block size, i.e., + * every field slice is shifted one or several entire blocks. In this case, every block only has ONE send and ONE + * receive block whose information must be stored. + */ + void processSingleAABB( const AABB & originalAABB, const std::shared_ptr & sbf, + const real_t nGL, const BlockID & blockID, const int normalShift ) { + + WALBERLA_ASSERT(normalShift == -1 || normalShift == 1) + + const auto localShift = normalShift == 1 ? shift_ : -shift_; + + Vector3 normalShiftVector{}; + normalShiftVector[normalDir_] = ShiftType(normalShift * int_c(sbf->getNumberOfCellsPerBlock(normalDir_))); + + // aabb + auto aabb = originalAABB.getExtended(nGL); + auto center = aabb.center(); + + // receive from the interior of domain in ghost layers + auto localSendAABB = aabb; + auto localRecvAABB = aabb; + + if(normalShift == 1) { // at maximum of domain -> send inner layers, receive ghost layers + localSendAABB.setAxisBounds(normalDir_, localSendAABB.max(normalDir_) - 2 * nGL, localSendAABB.max(normalDir_) - nGL); + localRecvAABB.setAxisBounds(normalDir_, localRecvAABB.max(normalDir_) - nGL, localRecvAABB.max(normalDir_)); + } else { // at minimum of domain -> send inner layers, receive ghost layers + localSendAABB.setAxisBounds(normalDir_, localSendAABB.min(normalDir_) + nGL, localSendAABB.min(normalDir_) + 2 * nGL); + localRecvAABB.setAxisBounds(normalDir_, localRecvAABB.min(normalDir_), localRecvAABB.min(normalDir_) + nGL); + } + + WALBERLA_ASSERT_FLOAT_EQUAL(localSendAABB.volume(), localRecvAABB.volume()) + + center += (localShift + normalShiftVector); + + std::array virtualPeriodicity{false}; + virtualPeriodicity[normalDir_] = true; + virtualPeriodicity[shiftDir_] = true; + + domain_decomposition::mapPointToPeriodicDomain( virtualPeriodicity, sbf->getDomain(), center ); + + uint_t rank{}; + + BlockID id{}; + + const auto blockInformation = sbf->getBlockInformation(); + WALBERLA_ASSERT(blockInformation.active()) + + blockInformation.getProcess(rank, center[0], center[1], center[2]); + + blockInformation.getId(id, center[0], center[1], center[2]); + + // define unique send / receive tags for communication + + const int atMaxTagSend = normalShift < 0 ? 0 : 1; + const int atMaxTagRecv = normalShift < 0 ? 1 : 0; + + const int sendTag = ((int_c(blockID.getID()) + int_c(id.getID() * numGlobalBlocks_)) * 2 + atMaxTagSend) * 2 + 0; + const int recvTag = ((int_c(id.getID()) + int_c(blockID.getID() * numGlobalBlocks_)) * 2 + atMaxTagRecv) * 2 + 0; + + auto block = sbf->getBlock(blockID); + + sendAABBs_[blockID].emplace_back(mpi::MPIRank(rank), id, globalAABBToLocalCI(localSendAABB, sbf, block), sendTag); + recvAABBs_[blockID].emplace_back(mpi::MPIRank(rank), id, globalAABBToLocalCI(localRecvAABB, sbf, block), recvTag); + + WALBERLA_LOG_DETAIL("blockID = " << blockID.getID() << ", normalShift = " << normalShift + << "\n\tsendRank = " << rank << "\tsendID = " << id.getID() << "\tsendTag = " << sendTag << "\taabb = " << localSendAABB + << "\n\trecvRank = " << rank << "\trecvID = " << id.getID() << "\trecvTag = " << recvTag << "\taabb = " << localRecvAABB + ) + } + + /*! + * Determines which blocks are participating in the boundary condition / communication, and calls the appropriate + * functions to extract and save the communication information. + */ + void setupPeriodicity() { + + const auto sbf = blockForest_.lock(); + WALBERLA_ASSERT_NOT_NULLPTR( sbf ) + + auto & blockInformation = sbf->getBlockInformation(); + WALBERLA_ASSERT(blockInformation.active()) + std::vector> globalBlocks; + blockInformation.getAllBlocks(globalBlocks); + numGlobalBlocks_ = globalBlocks.size(); + + // get blocks of current processor + std::vector localBlocks; + sbf->getBlocks(localBlocks); + + const auto nGL = real_c(fieldGhostLayers_); + + const bool shiftWholeBlock = (shift_[shiftDir_] % ShiftType(sbf->getNumberOfCells(*localBlocks[0], shiftDir_))) == 0; + + // get f-size + { + auto* field = localBlocks[0]->getData(fieldID_); + WALBERLA_ASSERT_NOT_NULLPTR(field) + fSize_ = cell_idx_c(field->fSize()); + } + + for( auto block : localBlocks ) { + + // get minimal ghost layer region (in normal direction) + const auto blockAABB = block->getAABB(); + const auto blockID = dynamic_cast(block)->getId(); + + const bool atMin = sbf->atDomainMinBorder(normalDir_, *block); + const bool atMax = sbf->atDomainMaxBorder(normalDir_, *block); + + if(atMin) { + if(shiftWholeBlock) { + processSingleAABB(blockAABB, sbf, nGL, blockID, -1); + } else { + processDoubleAABB(blockAABB, sbf, nGL, blockID, -1); + } + } + if(atMax) { + if(shiftWholeBlock) { + processSingleAABB(blockAABB, sbf, nGL, blockID, +1); + } else { + processDoubleAABB(blockAABB, sbf, nGL, blockID, +1); + } + } + + } + + } + + Vector3 toCellVector( const Vector3 & point ) { + return Vector3{ cell_idx_c(point[0]), cell_idx_c(point[1]), cell_idx_c(point[2]) }; + } + + CellInterval globalAABBToLocalCI( const AABB & aabb, const std::shared_ptr & sbf, IBlock * const block ) { + auto globalCI = CellInterval{toCellVector(aabb.min()), toCellVector(aabb.max()) - Vector3(1, 1, 1)}; + CellInterval localCI; + sbf->transformGlobalToBlockLocalCellInterval(localCI, *block, globalCI); + + return localCI; + } + + template< typename tPair > + struct second_t { + typename tPair::second_type operator()( const tPair& p ) const { return p.second; } + }; + template< typename tMap > + second_t< typename tMap::value_type > second( const tMap& ) { return second_t< typename tMap::value_type >(); } + + + std::weak_ptr blockForest_; + + uint_t normalDir_; + uint_t shiftDir_; + Vector3 shift_; + + // for each local block, stores the ranks where to send / receive, the corresponding block IDs, + // the local AABBs that need to be packed / unpacked, and a unique tag for communication + std::map>> sendAABBs_{}; + std::map>> recvAABBs_{}; + + bool setupPeriodicity_{false}; + uint_t numGlobalBlocks_{}; + + protected: + + const BlockDataID fieldID_; + + const uint_t fieldGhostLayers_; + cell_idx_t fSize_; + +}; // class ShiftedPeriodicityBase + + +//******************************************************************************************************************* +/*! + * A periodicity boundary condition that adds a user-defined spatial shift to the field when applied. + * This shift can prevent the locking of large-scale turbulent features in the flow direction, see e.g., + * Munters et al. (https://doi.org/10.1063/1.4941912). + * + * Periodicity defined in the blockforest must be turned off in the normal-direction. + * + * This class handles the CPU-specific packing and unpacking of the communication buffers. + * + * @tparam GhostLayerField_T Type of the ghost-layer field that is shifted periodically + */ +//******************************************************************************************************************* +template +class ShiftedPeriodicity : public ShiftedPeriodicityBase, GhostLayerField_T> { + + using Base = ShiftedPeriodicityBase, GhostLayerField_T>; + friend Base; + + public: + + using ValueType = typename GhostLayerField_T::value_type; + using ShiftType = typename Base::ShiftType; + + ShiftedPeriodicity( const std::weak_ptr & blockForest, + const BlockDataID & fieldID, const uint_t fieldGhostLayers, + const uint_t normalDir, const uint_t shiftDir, const ShiftType shiftValue ) + : Base( blockForest, fieldID, fieldGhostLayers, normalDir, shiftDir, shiftValue ) + {} + + private: + + void packBuffer(IBlock * const block, const CellInterval & ci, + std::vector & buffer) { + + // get field + auto field = block->getData(this->fieldID_); + WALBERLA_ASSERT_NOT_NULLPTR(field) + + auto bufferIt = buffer.begin(); + + // forward iterate over ci and add values to value vector + for(auto cellIt = ci.begin(); cellIt != ci.end(); ++cellIt) { + for(cell_idx_t f = 0; f < this->fSize_; ++f, ++bufferIt) { + WALBERLA_ASSERT(field->coordinatesValid(cellIt->x(), cellIt->y(), cellIt->z(), f)) + *bufferIt = field->get(*cellIt, f); + } + } + + } + + void unpackBuffer(IBlock* const block, const CellInterval & ci, + const std::vector & buffer) { + + // get field + auto field = block->getData(this->fieldID_); + WALBERLA_ASSERT_NOT_NULLPTR(field) + + auto bufferIt = buffer.begin(); + + // forward iterate over ci and add values to value vector + for(auto cellIt = ci.begin(); cellIt != ci.end(); ++cellIt) { + for(cell_idx_t f = 0; f < this->fSize_; ++f, ++bufferIt) { + WALBERLA_ASSERT(field->coordinatesValid(cellIt->x(), cellIt->y(), cellIt->z(), f)) + field->get(*cellIt, f) = *bufferIt; + } + } + + } + +}; // class ShiftedPeriodicity + +} // namespace lbm +} // namespace walberla diff --git a/src/gpu/CMakeLists.txt b/src/gpu/CMakeLists.txt index fb6810d4..e870cdb5 100644 --- a/src/gpu/CMakeLists.txt +++ b/src/gpu/CMakeLists.txt @@ -38,6 +38,8 @@ target_sources( gpu ParallelStreams.h GPURAII.h DeviceSelectMPI.cpp + ShiftedPeriodicity.cu + ShiftedPeriodicity.h ) # sources only for CUDA diff --git a/src/gpu/FieldAccessor.h b/src/gpu/FieldAccessor.h index d737983d..e34d9c3c 100644 --- a/src/gpu/FieldAccessor.h +++ b/src/gpu/FieldAccessor.h @@ -56,29 +56,29 @@ namespace gpu fOffset_(fOffset), indexingScheme_(indexingScheme ) {} - __device__ void set( uint3 blockIdx, uint3 threadIdx ) + __device__ void set( uint3 _blockIdx, uint3 _threadIdx ) { switch ( indexingScheme_) { - case FZYX: ptr_ += blockIdx.z * fOffset_ + blockIdx.y * zOffset_ + blockIdx.x * yOffset_ + threadIdx.x * xOffset_; break; - case FZY : ptr_ += blockIdx.y * fOffset_ + blockIdx.x * zOffset_ + threadIdx.x * yOffset_; break; - case FZ : ptr_ += blockIdx.x * fOffset_ + threadIdx.x * zOffset_; break; - case F : ptr_ += threadIdx.x * fOffset_; break; - - case ZYXF: ptr_ += blockIdx.z * zOffset_ + blockIdx.y * yOffset_ + blockIdx.x * xOffset_ + threadIdx.x * fOffset_; break; - case ZYX : ptr_ += blockIdx.y * zOffset_ + blockIdx.x * yOffset_ + threadIdx.x * xOffset_; break; - case ZY : ptr_ += blockIdx.x * zOffset_ + threadIdx.x * yOffset_; break; - case Z : ptr_ += threadIdx.x * zOffset_; break; + case FZYX: ptr_ += _blockIdx.z * fOffset_ + _blockIdx.y * zOffset_ + _blockIdx.x * yOffset_ + _threadIdx.x * xOffset_; break; + case FZY : ptr_ += _blockIdx.y * fOffset_ + _blockIdx.x * zOffset_ + _threadIdx.x * yOffset_; break; + case FZ : ptr_ += _blockIdx.x * fOffset_ + _threadIdx.x * zOffset_; break; + case F : ptr_ += _threadIdx.x * fOffset_; break; + + case ZYXF: ptr_ += _blockIdx.z * zOffset_ + _blockIdx.y * yOffset_ + _blockIdx.x * xOffset_ + _threadIdx.x * fOffset_; break; + case ZYX : ptr_ += _blockIdx.y * zOffset_ + _blockIdx.x * yOffset_ + _threadIdx.x * xOffset_; break; + case ZY : ptr_ += _blockIdx.x * zOffset_ + _threadIdx.x * yOffset_; break; + case Z : ptr_ += _threadIdx.x * zOffset_; break; } } - __device__ uint_t getLinearIndex( uint3 blockIdx, uint3 threadIdx, uint3 gridDim, uint3 blockDim ) + __device__ uint_t getLinearIndex( uint3 _blockIdx, uint3 _threadIdx, uint3 _gridDim, uint3 _blockDim ) { - return threadIdx.x + - blockIdx.x * blockDim.x + - blockIdx.y * blockDim.x * gridDim.x + - blockIdx.z * blockDim.x * gridDim.x * gridDim.y ; + return _threadIdx.x + + _blockIdx.x * _blockDim.x + + _blockIdx.y * _blockDim.x * _gridDim.x + + _blockIdx.z * _blockDim.x * _gridDim.x * _gridDim.y ; } // This is always true for this specific field indexing class. diff --git a/src/gpu/FieldAccessor3D.h b/src/gpu/FieldAccessor3D.h index 66e95f72..5f32a43c 100644 --- a/src/gpu/FieldAccessor3D.h +++ b/src/gpu/FieldAccessor3D.h @@ -38,17 +38,17 @@ namespace gpu uint_t yOffset, uint_t zOffset, uint_t fOffset, - const dim3 & idxDim, - const dim3 & blockDim ) + const dim3 & _idxDim, + const dim3 & _blockDim ) : ptr_( ptr ), xOffset_( xOffset ), yOffset_( yOffset ), zOffset_( zOffset ), fOffset_( fOffset ), - idxDim_( idxDim ), blockDim_( blockDim ) + idxDim_( _idxDim ), blockDim_( _blockDim ) {} - __device__ __forceinline__ void set( const uint3& blockIdx, const uint3& threadIdx ) + __device__ __forceinline__ void set( const uint3& _blockIdx, const uint3& _threadIdx ) { - uint_t x = blockIdx.x * blockDim_.x + threadIdx.x; - uint_t y = blockIdx.y * blockDim_.y + threadIdx.y; - uint_t z = blockIdx.z * blockDim_.z + threadIdx.z; + uint_t x = _blockIdx.x * blockDim_.x + _threadIdx.x; + uint_t y = _blockIdx.y * blockDim_.y + _threadIdx.y; + uint_t z = _blockIdx.z * blockDim_.z + _threadIdx.z; if ( x < idxDim_.x && y < idxDim_.y && z < idxDim_.z ) { diff --git a/src/gpu/FieldAccessorXYZ.h b/src/gpu/FieldAccessorXYZ.h index d9046a78..73c7a0a0 100644 --- a/src/gpu/FieldAccessorXYZ.h +++ b/src/gpu/FieldAccessorXYZ.h @@ -37,11 +37,11 @@ namespace gpu : ptr_(ptr), xOffset_(xOffset), yOffset_(yOffset), zOffset_(zOffset), fOffset_(fOffset) {} - __device__ void set( uint3 blockIdx, uint3 threadIdx ) + __device__ void set( uint3 _blockIdx, uint3 _threadIdx ) { - ptr_ += threadIdx.x * xOffset_ + - blockIdx.x * yOffset_ + - blockIdx.y * zOffset_ ; + ptr_ += _threadIdx.x * xOffset_ + + _blockIdx.x * yOffset_ + + _blockIdx.y * zOffset_ ; } __device__ T & get() { return * (T*)(ptr_); } diff --git a/src/gpu/ShiftedPeriodicity.cu b/src/gpu/ShiftedPeriodicity.cu new file mode 100644 index 00000000..d2ae3f70 --- /dev/null +++ b/src/gpu/ShiftedPeriodicity.cu @@ -0,0 +1,53 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \file ShiftedPeriodicity.cu +//! \ingroup gpu +//! \author Helen Schottenhamml +// +//====================================================================================================================== + +#include "gpu/ShiftedPeriodicity.h" + +namespace walberla { +namespace gpu +{ + +namespace internal { +#ifdef WALBERLA_BUILD_WITH_GPU_SUPPORT + __global__ void packBufferGPU( gpu::FieldAccessor fa, real_t * const buffer ) { + fa.set(blockIdx, threadIdx); + if(fa.isValidPosition()) { + buffer[fa.getLinearIndex(blockIdx, threadIdx, gridDim, blockDim)] = fa.get(); + } + } + __global__ void unpackBufferGPU( gpu::FieldAccessor fa, const real_t * const buffer ) { + fa.set(blockIdx, threadIdx); + if(fa.isValidPosition()) { + fa.get() = buffer[fa.getLinearIndex(blockIdx, threadIdx, gridDim, blockDim)]; + } + } +#else + __global__ void packBufferGPU( gpu::FieldAccessor, real_t * const ) { + WALBERLA_ABORT("gpu/ShiftedPeriodicity only supported when built with GPU support. Please use boundary/ShiftedPeriodicity on CPUs") + } + __global__ void unpackBufferGPU( gpu::FieldAccessor, const real_t * const ) { + WALBERLA_ABORT("gpu/ShiftedPeriodicity only supported when built with GPU support. Please use boundary/ShiftedPeriodicity on CPUs") + } +#endif +} + +} // namespace gpu +} // namespace walberla diff --git a/src/gpu/ShiftedPeriodicity.h b/src/gpu/ShiftedPeriodicity.h new file mode 100644 index 00000000..cf46577c --- /dev/null +++ b/src/gpu/ShiftedPeriodicity.h @@ -0,0 +1,144 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \file ShiftedPeriodicity.h +//! \ingroup gpu +//! \author Helen Schottenhamml +// +//====================================================================================================================== + +#include "blockforest/StructuredBlockForest.h" + +#include "boundary/ShiftedPeriodicity.h" + +#include "core/DataTypes.h" +#include "core/cell/CellInterval.h" +#include "core/debug/Debug.h" +#include "core/math/Vector3.h" + +#include "domain_decomposition/BlockDataID.h" +#include "domain_decomposition/IBlock.h" + +#include "gpu/DeviceWrapper.h" +#include "gpu/FieldAccessor.h" +#include "gpu/FieldIndexing.h" +#include "gpu/GPUField.h" +#include "gpu/GPUWrapper.h" +#include "gpu/Kernel.h" + +#include +#include +#include +#include + +#include "ErrorChecking.h" + +namespace walberla { +namespace gpu +{ + +namespace internal { + // GPU kernels - can be extended for other data types + __global__ void packBufferGPU( gpu::FieldAccessor fa, real_t * const buffer ); + __global__ void unpackBufferGPU( gpu::FieldAccessor fa, const real_t * const buffer ); +} + +//******************************************************************************************************************* +/*! + * A periodicity boundary condition that adds a user-defined spatial shift to the field when applied. + * This shift can prevent the locking of large-scale turbulent features in the flow direction, see e.g., + * Munters et al. (https://doi.org/10.1063/1.4941912). + * + * Periodicity defined in the blockforest must be turned off in the normal-direction. + * + * This class handles the GPU-specific packing and unpacking of the communication buffers. + * + * @tparam GhostLayerField_T Type of the ghost-layer field that is shifted periodically + */ +//******************************************************************************************************************* +template< typename GPUField_T > +class ShiftedPeriodicityGPU : public boundary::ShiftedPeriodicityBase, GPUField_T> { + + using Base = boundary::ShiftedPeriodicityBase, GPUField_T>; + friend Base; + + public: + + using ValueType = typename GPUField_T::value_type; + using ShiftType = typename Base::ShiftType; + using FieldIdx_T = gpu::FieldIndexing; + + ShiftedPeriodicityGPU(const std::weak_ptr< StructuredBlockForest >& blockForest, + const BlockDataID& fieldID, const uint_t fieldGhostLayers, + const uint_t normalDir, const uint_t shiftDir, const ShiftType shiftValue) + : Base(blockForest, fieldID, fieldGhostLayers, normalDir, shiftDir, shiftValue) + {} + + + private: + + void packBuffer(IBlock* const block, const CellInterval& ci, std::vector< ValueType >& h_buffer) { + + // get field + auto d_field = block->getData< GPUField_T >(this->fieldID_); + WALBERLA_ASSERT_NOT_NULLPTR(d_field) + + const uint_t nValues = ci.numCells() * uint_c(this->fSize_); + + // create GPU buffer + ValueType * d_buffer{}; + WALBERLA_GPU_CHECK(gpuMalloc(&d_buffer, nValues * sizeof(ValueType))) + + // fill buffer on GPU + auto packKernel = gpu::make_kernel( &internal::packBufferGPU ); + packKernel.addFieldIndexingParam( FieldIdx_T::interval( *d_field, ci, 0, this->fSize_ ) ); + packKernel.addParam(d_buffer); + packKernel(); + + // copy from device to host buffer + WALBERLA_GPU_CHECK(gpuMemcpy(h_buffer.data(), d_buffer, nValues * sizeof(ValueType), gpuMemcpyDeviceToHost)) + + WALBERLA_GPU_CHECK(gpuFree(d_buffer)) + + } + + void unpackBuffer(IBlock* const block, const CellInterval& ci, const std::vector< ValueType >& h_buffer) { + + // get field + auto d_field = block->getData< GPUField_T >(this->fieldID_); + WALBERLA_ASSERT_NOT_NULLPTR(d_field) + + const uint_t nValues = ci.numCells() * uint_c(this->fSize_); + + // create GPU buffer + ValueType * d_buffer{}; + WALBERLA_GPU_CHECK(gpuMalloc(&d_buffer, nValues * sizeof(ValueType))) + + // copy from host to device buffer + WALBERLA_GPU_CHECK(gpuMemcpy(d_buffer, h_buffer.data(), nValues * sizeof(ValueType), gpuMemcpyHostToDevice)) + + // unpack buffer on GPU + auto unpackKernel = gpu::make_kernel( &internal::unpackBufferGPU ); + unpackKernel.addFieldIndexingParam( FieldIdx_T::interval( *d_field, ci, 0, this->fSize_ ) ); + unpackKernel.addParam(d_buffer); + unpackKernel(); + + WALBERLA_GPU_CHECK(gpuFree(d_buffer)) + } + +}; // class ShiftedPeriodicity + +} // namespace gpu +} // namespace walberla diff --git a/tests/boundary/CMakeLists.txt b/tests/boundary/CMakeLists.txt index 82ced321..8dd843d9 100644 --- a/tests/boundary/CMakeLists.txt +++ b/tests/boundary/CMakeLists.txt @@ -7,3 +7,14 @@ waLBerla_compile_test( FILES BoundaryHandling.cpp ) waLBerla_execute_test( NAME BoundaryHandling ) + +if (WALBERLA_BUILD_WITH_PYTHON) + + waLBerla_link_files_to_builddir( *.py ) + + waLBerla_compile_test( FILES TestShiftedPeriodicity.cpp DEPENDS blockforest field python_coupling ) + waLBerla_execute_test( NAME TestShiftedPeriodicity1 COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/TestShiftedPeriodicitySetup.py ) + waLBerla_execute_test( NAME TestShiftedPeriodicity2 COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/TestShiftedPeriodicitySetup.py PROCESSES 2 ) + waLBerla_execute_test( NAME TestShiftedPeriodicity4 COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/TestShiftedPeriodicitySetup.py PROCESSES 4 ) + +endif() \ No newline at end of file diff --git a/tests/boundary/TestShiftedPeriodicity.cpp b/tests/boundary/TestShiftedPeriodicity.cpp new file mode 100644 index 00000000..2351a2f3 --- /dev/null +++ b/tests/boundary/TestShiftedPeriodicity.cpp @@ -0,0 +1,310 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \file TestShiftedPeriodicity.cpp +//! \ingroup boundary +//! \author Helen Schottenhamml +// +//====================================================================================================================== + +#include "blockforest/StructuredBlockForest.h" + +#include "core/cell/Cell.h" +#include "core/cell/CellInterval.h" +#include "core/debug/CheckFunctions.h" +#include "core/logging/Initialization.h" +#include "core/math/Vector3.h" +#include "core/mpi/Environment.h" +#include "core/mpi/MPIWrapper.h" +#include "core/mpi/MPIManager.h" +#include "core/mpi/Operation.h" +#include "core/mpi/Reduce.h" + +#include "domain_decomposition/BlockDataID.h" +#include "domain_decomposition/IBlock.h" + +#include "field/Layout.h" +//#include "field/vtk/VTKWriter.h" + +#include "python_coupling/CreateConfig.h" + +#include "stencil/D3Q27.h" +#include "stencil/Directions.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace walberla { + +using Stencil_T = stencil::D3Q27; + +using ValueType_T = real_t; +using Field_T = GhostLayerField< ValueType_T, 3 >; + +constexpr cell_idx_t fieldGhostLayers = 1; + +////////// +// MAIN // +////////// + +template< typename FieldType_T > +class FieldInitialiser { + + public: + FieldInitialiser( const std::weak_ptr< StructuredBlockForest > & sbf, const BlockDataID fieldId ) + : sbf_(sbf), fieldId_(fieldId) + {} + + void operator()() { + + + const auto blocks = sbf_.lock(); + WALBERLA_ASSERT_NOT_NULLPTR(blocks) + + for ( auto & block : *blocks ) + { + // get field + auto * field = block.template getData(fieldId_); + WALBERLA_ASSERT_NOT_NULLPTR(field) + + // get cell interval + auto ci = field->xyzSizeWithGhostLayer(); + + for (const auto& cell : ci) { + + // get global coordinates + Cell globalCell; + blocks->transformBlockLocalToGlobalCell(globalCell, block, cell); + + for (uint_t d = 0; d < FieldType_T::F_SIZE; ++d) + { + field->get(cell, d) = real_c( (globalCell.x() + 2 * fieldGhostLayers) + * (globalCell.y() + 2 * fieldGhostLayers) + * (globalCell.z() + 2 * fieldGhostLayers) + * int_c(d + 1)); + } + } + } + + } + + private: + std::weak_ptr< StructuredBlockForest > sbf_{}; + const BlockDataID fieldId_{}; + +}; + + +int main( int argc, char **argv ) { + + const mpi::Environment env(argc, argv); + + for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg) + { + auto config = *cfg; + logging::configureLogging(config); + + // create the domain, flag field and vector field (non-uniform initialisation) + auto blocks = blockforest::createUniformBlockGridFromConfig(config->getBlock("DomainSetup"), nullptr, true); + + const auto fieldID = field::addToStorage< Field_T >(blocks, "test field", real_t(0), field::Layout::fzyx, fieldGhostLayers); + FieldInitialiser< Field_T > initialiser(blocks, fieldID); + + // re-initialise fields + initialiser(); + + // create periodic shift boundary condition + + const auto spConfig = config->getBlock("Boundaries").getBlock("ShiftedPeriodicity"); + const uint_t shiftDir = spConfig.getParameter("shiftDir"); + const int shiftValue = spConfig.getParameter("shiftValue"); + const uint_t normalDir = spConfig.getParameter("normalDir"); + + boundary::ShiftedPeriodicity shiftedPeriodicity( + blocks, fieldID, fieldGhostLayers, normalDir, shiftDir, shiftValue + ); + +// auto vtkOutput = field::createVTKOutput(fieldID, *blocks, "test_field", 1, fieldGhostLayers, +// false, "vtk_out", "simulation_step", false, false); +// vtkOutput(); + + // apply boundary condition and standard communication + shiftedPeriodicity(); + +// vtkOutput(); + + /// compare values + // create local domain slices to compare values before and after shift + + const uint_t remDir = 3 - shiftDir - normalDir; + const auto shift = shiftedPeriodicity.shift(); + + const uint_t shiftSize = blocks->getNumberOfCells(shiftDir); + const uint_t remSize = blocks->getNumberOfCells(remDir); + const uint_t sizeSlice = shiftSize * remSize * Field_T::F_SIZE; + + std::vector innerMin(sizeSlice, ValueType_T(0)); + std::vector innerMax(sizeSlice, ValueType_T(0)); + std::vector glMin(sizeSlice, ValueType_T(0)); + std::vector glMax(sizeSlice, ValueType_T(0)); + + auto getIdx = [&remSize](const cell_idx_t shiftIdx, const cell_idx_t remIdx){ + return shiftIdx * cell_idx_c(Field_T::F_SIZE * remSize) + + remIdx * cell_idx_c(Field_T::F_SIZE); + }; + + // fill slices for comparing values + for(auto & block : *blocks) { + const bool atMin = blocks->atDomainMinBorder(normalDir, block); + const bool atMax = blocks->atDomainMaxBorder(normalDir, block); + if(!atMin && !atMax) + continue; + + auto * field = block.getData(fieldID); + WALBERLA_ASSERT_NOT_NULLPTR(field) + + // fill innerMin and glMin + if(atMin) { + Vector3 dirVector{}; + dirVector[normalDir] = -1; + const auto dir = stencil::vectorToDirection(dirVector); + + CellInterval innerMinCI; + CellInterval glMinCI; + field->getSliceBeforeGhostLayer(dir, innerMinCI, fieldGhostLayers, false); + field->getGhostRegion(dir, glMinCI, fieldGhostLayers, false); + + // fill inner min + for(const auto & cell : innerMinCI){ + Cell globalCell; + blocks->transformBlockLocalToGlobalCell(globalCell, block, cell); + + cell_idx_t idxShiftDir = globalCell[shiftDir] - shift[shiftDir]; + if(idxShiftDir >= cell_idx_c(shiftSize)) idxShiftDir -= cell_idx_c(shiftSize); + if(idxShiftDir <= - int_c(fieldGhostLayers)) idxShiftDir += cell_idx_c(shiftSize); + + const cell_idx_t idx = getIdx(idxShiftDir, cell_idx_c(globalCell[remDir])); + + for(cell_idx_t f = 0; f < cell_idx_c(Field_T::F_SIZE); ++f) { + WALBERLA_ASSERT(field->coordinatesValid(cell.x(), cell.y(), cell.z(), f)) + WALBERLA_ASSERT_LESS(idx + f, innerMin.size()) + + innerMin[uint_c(idx + f)] = field->get(cell, f); + } + } + + // fill gl min + for(const auto & cell : glMinCI){ + Cell globalCell; + blocks->transformBlockLocalToGlobalCell(globalCell, block, cell); + + const cell_idx_t idx = getIdx(cell_idx_c(globalCell[shiftDir]), globalCell[remDir]); + + for(cell_idx_t f = 0; f < cell_idx_c(Field_T::F_SIZE); ++f) { + WALBERLA_ASSERT(field->coordinatesValid(cell.x(), cell.y(), cell.z(), f)) + WALBERLA_ASSERT_LESS(idx + f, glMin.size()) + + glMin[uint_c(idx + f)] = field->get(cell, f); + } + } + } + + // fill innerMax and glMax + if(atMax) { + Vector3 dirVector{}; + dirVector[normalDir] = 1; + const auto dir = stencil::vectorToDirection(dirVector); + + CellInterval innerMaxCI; + CellInterval glMaxCI; + field->getSliceBeforeGhostLayer(dir, innerMaxCI, fieldGhostLayers, false); + field->getGhostRegion(dir, glMaxCI, fieldGhostLayers, false); + + // fill inner max + for(const auto & cell : innerMaxCI){ + Cell globalCell; + blocks->transformBlockLocalToGlobalCell(globalCell, block, cell); + + cell_idx_t idxShiftDir = globalCell[shiftDir] + shift[shiftDir]; + if(idxShiftDir >= cell_idx_c(shiftSize)) idxShiftDir -= cell_idx_c(shiftSize); + if(idxShiftDir <= - int_c(fieldGhostLayers)) idxShiftDir += cell_idx_c(shiftSize); + + const cell_idx_t idx = getIdx(idxShiftDir, cell_idx_c(globalCell[remDir])); + + for(cell_idx_t f = 0; f < cell_idx_c(Field_T::F_SIZE); ++f) { + WALBERLA_ASSERT(field->coordinatesValid(cell.x(), cell.y(), cell.z(), f)) + WALBERLA_ASSERT_LESS(idx + f, innerMax.size()) + + innerMax[uint_c(idx + f)] = field->get(cell, f); + } + } + + // fill gl max + for(const auto & cell : glMaxCI){ + Cell globalCell; + blocks->transformBlockLocalToGlobalCell(globalCell, block, cell); + + const cell_idx_t idx = getIdx(cell_idx_c(globalCell[shiftDir]), cell_idx_c(globalCell[remDir])); + + for(cell_idx_t f = 0; f < cell_idx_c(Field_T::F_SIZE); ++f) { + WALBERLA_ASSERT(field->coordinatesValid(cell.x(), cell.y(), cell.z(), f)) + WALBERLA_ASSERT_LESS(idx + f, glMax.size()) + + glMax[uint_c(idx + f)] = field->get(cell, f); + } + } + } + + + } + + WALBERLA_MPI_SECTION() { + + mpi::reduceInplace(innerMin, mpi::SUM); + mpi::reduceInplace(innerMax, mpi::SUM); + mpi::reduceInplace(glMin, mpi::SUM); + mpi::reduceInplace(glMax, mpi::SUM); + + } + + WALBERLA_ROOT_SECTION() { + for(uint_t i = 0; i < sizeSlice; ++i) { + WALBERLA_CHECK_FLOAT_EQUAL(innerMin[i], glMax[i]) + WALBERLA_CHECK_FLOAT_EQUAL(innerMax[i], glMin[i]) + } + } + + } + + return 0; +} + +} // namespace walberla + + + +int main(int argc, char **argv) { + + walberla::debug::enterTestMode(); + + return walberla::main(argc,argv); +} diff --git a/tests/boundary/TestShiftedPeriodicitySetup.py b/tests/boundary/TestShiftedPeriodicitySetup.py new file mode 100644 index 00000000..1ca890c2 --- /dev/null +++ b/tests/boundary/TestShiftedPeriodicitySetup.py @@ -0,0 +1,43 @@ +import waLBerla as wlb + + +class Scenario: + def __init__(self, normal_dir, shift_dir, shift_value, periodicity): + self.normal_dir = normal_dir + self.shift_dir = shift_dir + self.shift_value = shift_value + self.periodicity = tuple(periodicity) + + @wlb.member_callback + def config(self): + + return { + 'DomainSetup': { + 'blocks': (3, 3, 3), + 'cellsPerBlock': (4, 4, 4), + 'cartesianSetup': 0, + 'periodic': self.periodicity, + }, + 'Boundaries': { + 'ShiftedPeriodicity': { + 'shiftDir': self.shift_dir, + 'shiftValue': self.shift_value, + 'normalDir': self.normal_dir + } + }, + 'Logging': { + 'logLevel': "Info" + } + } + + +scenarios = wlb.ScenarioManager() + +for normal_dir in (0, 1, 2): + for shift_dir in (0, 1, 2): + if normal_dir == shift_dir: + continue + periodicity = 3 * [0] + periodicity[shift_dir] = 1 + for shift_value in (-3, 0, 2, 5, 8, 15): + scenarios.add(Scenario(normal_dir, shift_dir, shift_value, periodicity)) diff --git a/tests/gpu/CMakeLists.txt b/tests/gpu/CMakeLists.txt index e760cca4..18dab47d 100644 --- a/tests/gpu/CMakeLists.txt +++ b/tests/gpu/CMakeLists.txt @@ -56,4 +56,14 @@ waLBerla_generate_target_from_python(NAME CodegenGeneratedGPUFieldPackInfo FILE waLBerla_compile_test( FILES codegen/GeneratedFieldPackInfoTestGPU.cpp DEPENDS blockforest core field CodegenGeneratedGPUFieldPackInfo ) waLBerla_execute_test( NAME GeneratedFieldPackInfoTestGPU ) + + if (WALBERLA_BUILD_WITH_PYTHON) + + waLBerla_link_files_to_builddir( *.py ) + + waLBerla_compile_test( FILES TestShiftedPeriodicityGPU.cpp DEPENDS gpu blockforest field python_coupling ) + waLBerla_execute_test( NAME TestShiftedPeriodicityGPU COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/TestShiftedPeriodicitySetupGPU.py ) + + endif() + endif() diff --git a/tests/gpu/TestShiftedPeriodicityGPU.cpp b/tests/gpu/TestShiftedPeriodicityGPU.cpp new file mode 100644 index 00000000..02cd9777 --- /dev/null +++ b/tests/gpu/TestShiftedPeriodicityGPU.cpp @@ -0,0 +1,319 @@ +//====================================================================================================================== +// +// This file is part of waLBerla. waLBerla is free software: you can +// redistribute it and/or modify it under the terms of the GNU General Public +// License as published by the Free Software Foundation, either version 3 of +// the License, or (at your option) any later version. +// +// waLBerla is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// for more details. +// +// You should have received a copy of the GNU General Public License along +// with waLBerla (see COPYING.txt). If not, see . +// +//! \file TestShiftedPeriodicityGPU.cpp +//! \ingroup gpu +//! \author Helen Schottenhamml +// +//====================================================================================================================== + +#include "blockforest/StructuredBlockForest.h" + +#include "core/cell/Cell.h" +#include "core/cell/CellInterval.h" +#include "core/debug/CheckFunctions.h" +#include "core/logging/Initialization.h" +#include "core/math/Vector3.h" +#include "core/mpi/Environment.h" +#include "core/mpi/MPIWrapper.h" +#include "core/mpi/MPIManager.h" +#include "core/mpi/Operation.h" +#include "core/mpi/Reduce.h" + +#include "domain_decomposition/BlockDataID.h" +#include "domain_decomposition/IBlock.h" + +#include "field/Layout.h" +#include "field/vtk/VTKWriter.h" + +#include "python_coupling/CreateConfig.h" + +#include "stencil/D3Q27.h" +#include "stencil/Directions.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace walberla { + +using Stencil_T = stencil::D3Q27; + +using ValueType_T = real_t; +using Field_T = GhostLayerField< ValueType_T, 3 >; +using GPUField_T = gpu::GPUField< ValueType_T >; + +constexpr cell_idx_t fieldGhostLayers = 1; + +////////// +// MAIN // +////////// + +template< typename FieldType_T > +class FieldInitialiser { + + public: + FieldInitialiser( const std::weak_ptr< StructuredBlockForest > & sbf, const BlockDataID fieldId ) + : sbf_(sbf), fieldId_(fieldId) + {} + + void operator()() { + + + const auto blocks = sbf_.lock(); + WALBERLA_ASSERT_NOT_NULLPTR(blocks) + + for ( auto & block : *blocks ) + { + // get field + auto * field = block.template getData(fieldId_); + WALBERLA_ASSERT_NOT_NULLPTR(field) + + // get cell interval + auto ci = field->xyzSizeWithGhostLayer(); + + for (const auto& cell : ci) { + + // get global coordinates + Cell globalCell; + blocks->transformBlockLocalToGlobalCell(globalCell, block, cell); + + for (uint_t d = 0; d < FieldType_T::F_SIZE; ++d) + { + field->get(cell, d) = real_c( (globalCell.x() + 2 * fieldGhostLayers) + * (globalCell.y() + 2 * fieldGhostLayers) + * (globalCell.z() + 2 * fieldGhostLayers) + * int_c(d + 1)); + } + } + } + + } + + private: + std::weak_ptr< StructuredBlockForest > sbf_{}; + const BlockDataID fieldId_{}; + +}; + + +int main( int argc, char **argv ) { + + const mpi::Environment env(argc, argv); + + for (auto cfg = python_coupling::configBegin(argc, argv); cfg != python_coupling::configEnd(); ++cfg) + { + auto config = *cfg; + logging::configureLogging(config); + + // create the domain, flag field and vector field (non-uniform initialisation) + auto blocks = blockforest::createUniformBlockGridFromConfig(config->getBlock("DomainSetup"), nullptr, true); + + const auto h_fieldID = field::addToStorage< Field_T >(blocks, "test field CPU", real_t(0), field::Layout::fzyx, fieldGhostLayers); + FieldInitialiser< Field_T > initialiser(blocks, h_fieldID); + + const auto d_fieldID = gpu::addGPUFieldToStorage< Field_T >(blocks, h_fieldID, "test field GPU"); + + // re-initialise fields + initialiser(); + + // create periodic shift boundary condition + + const auto spConfig = config->getBlock("Boundaries").getBlock("ShiftedPeriodicity"); + const uint_t shiftDir = spConfig.getParameter("shiftDir"); + const int shiftValue = spConfig.getParameter("shiftValue"); + const uint_t normalDir = spConfig.getParameter("normalDir"); + ; + gpu::ShiftedPeriodicityGPU shiftedPeriodicity( + blocks, d_fieldID, fieldGhostLayers, normalDir, shiftDir, shiftValue + ); + +// auto vtkOutput = field::createVTKOutput(fieldID, *blocks, "test_field", 1, fieldGhostLayers, +// false, "vtk_out", "simulation_step", false, false); +// vtkOutput(); + + // apply boundary condition and standard communication + shiftedPeriodicity(); + +// vtkOutput(); + + /// compare values + + // copy resulting GPU to CPU + gpu::fieldCpy(blocks, h_fieldID, d_fieldID); + + // create local domain slices to compare values before and after shift + + const uint_t remDir = 3 - shiftDir - normalDir; + const auto shift = shiftedPeriodicity.shift(); + + const uint_t shiftSize = blocks->getNumberOfCells(shiftDir); + const uint_t remSize = blocks->getNumberOfCells(remDir); + const uint_t sizeSlice = shiftSize * remSize * Field_T::F_SIZE; + + std::vector innerMin(sizeSlice, ValueType_T(0)); + std::vector innerMax(sizeSlice, ValueType_T(0)); + std::vector glMin(sizeSlice, ValueType_T(0)); + std::vector glMax(sizeSlice, ValueType_T(0)); + + auto getIdx = [&remSize](const cell_idx_t shiftIdx, const cell_idx_t remIdx){ + return shiftIdx * cell_idx_c(Field_T::F_SIZE * remSize) + + remIdx * cell_idx_c(Field_T::F_SIZE); + }; + + // fill slices for comparing values + for(auto & block : *blocks) { + const bool atMin = blocks->atDomainMinBorder(normalDir, block); + const bool atMax = blocks->atDomainMaxBorder(normalDir, block); + if(!atMin && !atMax) + continue; + + auto * field = block.getData(h_fieldID); + WALBERLA_ASSERT_NOT_NULLPTR(field) + + // fill innerMin and glMin + if(atMin) { + Vector3 dirVector{}; + dirVector[normalDir] = -1; + const auto dir = stencil::vectorToDirection(dirVector); + + CellInterval innerMinCI; + CellInterval glMinCI; + field->getSliceBeforeGhostLayer(dir, innerMinCI, fieldGhostLayers, false); + field->getGhostRegion(dir, glMinCI, fieldGhostLayers, false); + + // fill inner min + for(const auto & cell : innerMinCI){ + Cell globalCell; + blocks->transformBlockLocalToGlobalCell(globalCell, block, cell); + + cell_idx_t idxShiftDir = globalCell[shiftDir] - shift[shiftDir]; + if(idxShiftDir >= cell_idx_c(shiftSize)) idxShiftDir -= cell_idx_c(shiftSize); + if(idxShiftDir <= - int_c(fieldGhostLayers)) idxShiftDir += cell_idx_c(shiftSize); + + const cell_idx_t idx = getIdx(idxShiftDir, cell_idx_c(globalCell[remDir])); + + for(cell_idx_t f = 0; f < cell_idx_c(Field_T::F_SIZE); ++f) { + WALBERLA_ASSERT(field->coordinatesValid(cell.x(), cell.y(), cell.z(), f)) + WALBERLA_ASSERT_LESS(idx + f, innerMin.size()) + + innerMin[uint_c(idx + f)] = field->get(cell, f); + } + } + + // fill gl min + for(const auto & cell : glMinCI){ + Cell globalCell; + blocks->transformBlockLocalToGlobalCell(globalCell, block, cell); + + const cell_idx_t idx = getIdx(cell_idx_c(globalCell[shiftDir]), globalCell[remDir]); + + for(cell_idx_t f = 0; f < cell_idx_c(Field_T::F_SIZE); ++f) { + WALBERLA_ASSERT(field->coordinatesValid(cell.x(), cell.y(), cell.z(), f)) + WALBERLA_ASSERT_LESS(idx + f, glMin.size()) + + glMin[uint_c(idx + f)] = field->get(cell, f); + } + } + } + + // fill innerMax and glMax + if(atMax) { + Vector3 dirVector{}; + dirVector[normalDir] = 1; + const auto dir = stencil::vectorToDirection(dirVector); + + CellInterval innerMaxCI; + CellInterval glMaxCI; + field->getSliceBeforeGhostLayer(dir, innerMaxCI, fieldGhostLayers, false); + field->getGhostRegion(dir, glMaxCI, fieldGhostLayers, false); + + // fill inner max + for(const auto & cell : innerMaxCI){ + Cell globalCell; + blocks->transformBlockLocalToGlobalCell(globalCell, block, cell); + + cell_idx_t idxShiftDir = globalCell[shiftDir] + shift[shiftDir]; + if(idxShiftDir >= cell_idx_c(shiftSize)) idxShiftDir -= cell_idx_c(shiftSize); + if(idxShiftDir <= - int_c(fieldGhostLayers)) idxShiftDir += cell_idx_c(shiftSize); + + const cell_idx_t idx = getIdx(idxShiftDir, cell_idx_c(globalCell[remDir])); + + for(cell_idx_t f = 0; f < cell_idx_c(Field_T::F_SIZE); ++f) { + WALBERLA_ASSERT(field->coordinatesValid(cell.x(), cell.y(), cell.z(), f)) + WALBERLA_ASSERT_LESS(idx + f, innerMax.size()) + + innerMax[uint_c(idx + f)] = field->get(cell, f); + } + } + + // fill gl min + for(const auto & cell : glMaxCI){ + Cell globalCell; + blocks->transformBlockLocalToGlobalCell(globalCell, block, cell); + + const cell_idx_t idx = getIdx(cell_idx_c(globalCell[shiftDir]), cell_idx_c(globalCell[remDir])); + + for(cell_idx_t f = 0; f < cell_idx_c(Field_T::F_SIZE); ++f) { + WALBERLA_ASSERT(field->coordinatesValid(cell.x(), cell.y(), cell.z(), f)) + WALBERLA_ASSERT_LESS(idx + f, glMax.size()) + + glMax[uint_c(idx + f)] = field->get(cell, f); + } + } + } + + + } + + WALBERLA_MPI_SECTION() { + + mpi::reduceInplace(innerMin, mpi::SUM); + mpi::reduceInplace(innerMax, mpi::SUM); + mpi::reduceInplace(glMin, mpi::SUM); + mpi::reduceInplace(glMax, mpi::SUM); + + } + + WALBERLA_ROOT_SECTION() { + for(uint_t i = 0; i < sizeSlice; ++i) { + WALBERLA_CHECK_FLOAT_EQUAL(innerMin[i], glMax[i]) + WALBERLA_CHECK_FLOAT_EQUAL(innerMax[i], glMin[i]) + } + } + + } + + return 0; +} + +} // namespace walberla + + + +int main(int argc, char **argv) { + + walberla::debug::enterTestMode(); + + return walberla::main(argc,argv); +} diff --git a/tests/gpu/TestShiftedPeriodicitySetupGPU.py b/tests/gpu/TestShiftedPeriodicitySetupGPU.py new file mode 100644 index 00000000..06443685 --- /dev/null +++ b/tests/gpu/TestShiftedPeriodicitySetupGPU.py @@ -0,0 +1,40 @@ +import waLBerla as wlb + + +class Scenario: + def __init__(self, normal_dir, shift_dir, shift_value, periodicity): + self.normal_dir = normal_dir + self.shift_dir = shift_dir + self.shift_value = shift_value + self.periodicity = tuple(periodicity) + + @wlb.member_callback + def config(self): + + return { + 'DomainSetup': { + 'blocks': (3, 3, 3), + 'cellsPerBlock': (4, 4, 4), + 'cartesianSetup': 0, + 'periodic': self.periodicity, + }, + 'Boundaries': { + 'ShiftedPeriodicity': { + 'shiftDir': self.shift_dir, + 'shiftValue': self.shift_value, + 'normalDir': self.normal_dir + } + } + } + + +scenarios = wlb.ScenarioManager() + +for normal_dir in (0, 1, 2): + for shift_dir in (0, 1, 2): + if normal_dir == shift_dir: + continue + periodicity = 3 * [0] + periodicity[shift_dir] = 1 + for shift_value in (-3, 0, 2, 5, 8, 15): + scenarios.add(Scenario(normal_dir, shift_dir, shift_value, periodicity))