Skip to content

Commit

Permalink
Rename AMReX_CUDA_MAX_THREADS to AMReX_GPU_MAX_THREADS (AMReX-Codes#3115
Browse files Browse the repository at this point in the history
)

Rename cmake option AMReX_CUDA_MAX_THREADS to AMReX_GPU_MAX_THREADS.
This option can now be used for all GPU backends.

GNU Make has also been updated for this.
  • Loading branch information
WeiqunZhang authored Feb 4, 2023
1 parent 8595daa commit e81a9c2
Show file tree
Hide file tree
Showing 7 changed files with 28 additions and 24 deletions.
10 changes: 7 additions & 3 deletions Docs/sphinx_documentation/source/GPU.rst
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,12 @@ can run it and that will generate results like:
Building with CMake
-------------------

To build AMReX with GPU support in CMake, add
``-DAMReX_GPU_BACKEND=CUDA|HIP|SYCL`` to the ``cmake`` invocation, for CUDA,
HIP and SYCL, respectively. By default, AMReX uses 256 threads per GPU
block/group in most situations. This can be changed with
``-DAMReX_GPU_MAX_THREADS=N``, where ``N`` is 128 for example.

Enabling CUDA support
^^^^^^^^^^^^^^^^^^^^^

Expand Down Expand Up @@ -226,8 +232,6 @@ check the :ref:`table <tab:cmakecudavar>` below.
+------------------------------+-------------------------------------------------+-------------+-----------------+
| AMReX_CUDA_LTO | Enable CUDA link-time-optimization | NO | YES, NO |
+------------------------------+-------------------------------------------------+-------------+-----------------+
| AMReX_CUDA_MAX_THREADS | Max number of CUDA threads per block | 256 | User-defined |
+------------------------------+-------------------------------------------------+-------------+-----------------+
| AMReX_CUDA_MAXREGCOUNT | Limits the number of CUDA registers available | 255 | User-defined |
+------------------------------+-------------------------------------------------+-------------+-----------------+
| AMReX_CUDA_PTX_VERBOSE | Verbose code generation statistics in ptxas | NO | YES, NO |
Expand Down Expand Up @@ -336,7 +340,7 @@ for example ``CMAKE_CXX_FLAGS``, can be used for HIP as well.


Since CMake does not support autodetection of HIP compilers/target architectures
yet, ``CMAKE_CXX_COMPILER`` must be set to a valid HIP compiler, i.e. ``clang++`` or ``hipcc`` or ``nvcc``,
yet, ``CMAKE_CXX_COMPILER`` must be set to a valid HIP compiler, i.e. ``clang++`` or ``hipcc``,
and ``AMReX_AMD_ARCH`` to the target architecture you are building for.
Thus **AMReX_AMD_ARCH and CMAKE_CXX_COMPILER are required user-inputs when AMReX_GPU_BACKEND=HIP**.
We again read also an *environment variable*: ``AMREX_AMD_ARCH`` (note: all caps) and the C++ compiler can be hinted as always, e.g. with ``export CXX=$(which clang++)``.
Expand Down
4 changes: 0 additions & 4 deletions Src/Base/AMReX_GpuControl.H
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,6 @@

#include <utility>

#ifndef AMREX_GPU_MAX_THREADS
#define AMREX_GPU_MAX_THREADS 256
#endif

#if defined(AMREX_USE_CUDA) && (__CUDACC_VER_MAJOR__ > 11 || ((__CUDACC_VER_MAJOR__ == 11) && (__CUDACC_VER_MINOR__ >= 2)))
#define AMREX_CUDA_GE_11_2 1
#endif
Expand Down
4 changes: 0 additions & 4 deletions Tools/CMake/AMReXCUDAOptions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,6 @@ set(AMReX_CUDA_ARCH ${AMReX_CUDA_ARCH_DEFAULT} CACHE STRING "CUDA architecture (
option(AMReX_CUDA_FASTMATH "Enable CUDA fastmath" ON)
cuda_print_option( AMReX_CUDA_FASTMATH )

set(AMReX_CUDA_MAX_THREADS "256" CACHE STRING
"Maximum number of CUDA threads per block" )
message( STATUS " AMReX_CUDA_MAX_THREADS = ${AMReX_CUDA_MAX_THREADS}")

set(AMReX_CUDA_MAXREGCOUNT "255" CACHE STRING
"Limit the maximum number of registers available" )
message( STATUS " AMReX_CUDA_MAXREGCOUNT = ${AMReX_CUDA_MAXREGCOUNT}")
Expand Down
6 changes: 6 additions & 0 deletions Tools/CMake/AMReXOptions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,12 @@ endif ()

if (NOT AMReX_GPU_BACKEND STREQUAL NONE)
message( STATUS " AMReX_GPU_BACKEND = ${AMReX_GPU_BACKEND}")

# We might set different default for different GPUs in the future.
set(AMReX_GPU_MAX_THREADS_DEFAULT "256")
set(AMReX_GPU_MAX_THREADS ${AMReX_GPU_MAX_THREADS_DEFAULT} CACHE STRING
"Maximum number of GPU threads per block" )
message( STATUS " AMReX_GPU_MAX_THREADS = ${AMReX_GPU_MAX_THREADS}")
endif ()

# Legacy variables for internal use only
Expand Down
10 changes: 5 additions & 5 deletions Tools/CMake/AMReXSetDefines.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ add_amrex_define( AMREX_USE_OMP IF AMReX_OMP )

# DPCPP
add_amrex_define( AMREX_USE_DPCPP NO_LEGACY IF AMReX_DPCPP )
add_amrex_define( AMREX_USE_GPU NO_LEGACY IF AMReX_DPCPP )
add_amrex_define( AMREX_USE_ONEDPL NO_LEGACY IF AMReX_DPCPP_ONEDPL )

# HIP
Expand Down Expand Up @@ -138,16 +137,17 @@ add_amrex_define( AMREX_USE_ASCENT NO_LEGACY IF AMReX_ASCENT )
#
add_amrex_define( AMREX_USE_CUDA NO_LEGACY IF AMReX_CUDA )
add_amrex_define( AMREX_USE_NVML NO_LEGACY IF AMReX_CUDA )
add_amrex_define( AMREX_GPU_MAX_THREADS=${AMReX_CUDA_MAX_THREADS} NO_LEGACY
IF AMReX_CUDA )

#
# General setup for any GPUs
#
if (AMReX_CUDA OR AMReX_HIP)
add_amrex_define( AMREX_USE_GPU NO_LEGACY )
if (NOT AMReX_GPU_BACKEND STREQUAL NONE)
add_amrex_define( AMREX_USE_GPU NO_LEGACY )
add_amrex_define( AMREX_GPU_MAX_THREADS=${AMReX_GPU_MAX_THREADS} NO_LEGACY )
add_amrex_define( BL_COALESCE_FABS )
endif()

if (AMReX_CUDA OR AMReX_HIP)
add_amrex_define( AMREX_GPUS_PER_SOCKET=${GPUS_PER_SOCKET}
NO_LEGACY IF GPUS_PER_SOCKET)

Expand Down
3 changes: 3 additions & 0 deletions Tools/CMake/AMReX_Config.H.in
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@
#cmakedefine AMREX_USE_HIP
#cmakedefine AMREX_USE_NVML
#cmakedefine AMREX_GPU_MAX_THREADS @AMREX_GPU_MAX_THREADS@
#ifndef AMREX_GPU_MAX_THREADS
#define AMREX_GPU_MAX_THREADS 0
#endif
#cmakedefine AMREX_USE_ACC
#cmakedefine AMREX_USE_GPU
#cmakedefine BL_COALESCE_FABS
Expand Down
15 changes: 7 additions & 8 deletions Tools/GNUMake/Make.defs
Original file line number Diff line number Diff line change
Expand Up @@ -252,13 +252,14 @@ else
USE_CUPTI := FALSE
endif

# Maximum number of GPU threads per block.
CUDA_MAX_THREADS ?= 256
GPU_MAX_THREADS ?= $(CUDA_MAX_THREADS)

ifeq ($(USE_CUDA),TRUE)
# Set the default CUDA architecture version.
CUDA_ARCH ?= 70

# Maximum number of CUDA threads per block.
CUDA_MAX_THREADS ?= 256

# Limit the maximum number of registers available.
CUDA_MAXREGCOUNT ?= 255

Expand Down Expand Up @@ -790,13 +791,15 @@ else
endif

ifeq ($(USE_GPU),TRUE)
DEFINES += -DAMREX_USE_GPU -DBL_COALESCE_FABS
DEFINES += -DAMREX_USE_GPU -DBL_COALESCE_FABS -DAMREX_GPU_MAX_THREADS=$(GPU_MAX_THREADS)
ifeq ($(GPU_ERROR_CHECK),FALSE)
DEFINES += -DAMREX_GPU_NO_ERROR_CHECK
endif
ifeq ($(USE_GPU_RDC),TRUE)
DEFINES += -DAMREX_USE_GPU_RDC
endif
else
DEFINES += -DAMREX_GPU_MAX_THREADS=0
endif

ifeq ($(USE_SINGLE_PRECISION_PARTICLES), TRUE)
Expand Down Expand Up @@ -1128,10 +1131,6 @@ else ifeq ($(USE_CUDA),TRUE)
DEFINES += -DAMREX_GPUS_PER_NODE=$(GPUS_PER_NODE)
endif

# Set the CUDA threads define in case the user updated it.

DEFINES += -DAMREX_GPU_MAX_THREADS=$(CUDA_MAX_THREADS)

ifneq ($(LINK_WITH_FORTRAN_COMPILER),TRUE)
LINKFLAGS = $(NVCC_FLAGS) $(CXXFLAGS_FROM_HOST)
AMREX_LINKER = nvcc
Expand Down

0 comments on commit e81a9c2

Please sign in to comment.