Skip to content

Commit

Permalink
GNU Make: Add support for CUDA LTO (AMReX-Codes#3993)
Browse files Browse the repository at this point in the history
## Summary

Pretty much as in the title. This is controlled with the `CUDA_LTO`
makefile variable which defaults to `FALSE`.

## Additional background

This was added for CMake in AMReX-Codes#1095
  • Loading branch information
mirenradia authored Jun 21, 2024
1 parent 0e3e39b commit 85bef47
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 4 deletions.
5 changes: 4 additions & 1 deletion Tools/GNUMake/Make.defs
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,9 @@ ifeq ($(USE_CUDA),TRUE)
# Limit the maximum number of registers available.
CUDA_MAXREGCOUNT ?= 255

# Link-time optimization
CUDA_LTO ?= FALSE

# Enable verbosity in the CUDA compilation.
CUDA_VERBOSE ?= TRUE
endif
Expand Down Expand Up @@ -1176,7 +1179,7 @@ else ifeq ($(USE_CUDA),TRUE)
endif

ifneq ($(LINK_WITH_FORTRAN_COMPILER),TRUE)
LINKFLAGS = $(NVCC_FLAGS) $(CXXFLAGS_FROM_HOST)
LINKFLAGS = $(NVCC_FLAGS) $(NVCC_ARCH_LINK_FLAGS) $(CXXFLAGS_FROM_HOST)
AMREX_LINKER = nvcc
endif

Expand Down
16 changes: 13 additions & 3 deletions Tools/GNUMake/comps/nvcc.mak
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,17 @@ else
CFLAGS_FROM_HOST := $(CXXFLAGS_FROM_HOST)
endif

NVCC_FLAGS = -Wno-deprecated-gpu-targets -m64 $(foreach arch,$(CUDA_ARCH),--generate-code arch=compute_$(arch),code=sm_$(arch)) -maxrregcount=$(CUDA_MAXREGCOUNT) --expt-relaxed-constexpr --expt-extended-lambda --forward-unknown-to-host-compiler
NVCC_ARCH_FLAGS = $(foreach arch,$(CUDA_ARCH),--generate-code arch=compute_$(arch),code=sm_$(arch))

ifeq ($(CUDA_LTO),TRUE)
NVCC_ARCH_COMPILE_FLAGS = $(subst sm,lto,$(NVCC_ARCH_FLAGS))
NVCC_ARCH_LINK_FLAGS = -dlto $(NVCC_ARCH_FLAGS)
else
NVCC_ARCH_COMPILE_FLAGS = $(NVCC_ARCH_FLAGS)
NVCC_ARCH_LINK_FLAGS = $(NVCC_ARCH_FLAGS)
endif

NVCC_FLAGS = -Wno-deprecated-gpu-targets -m64 -maxrregcount=$(CUDA_MAXREGCOUNT) --expt-relaxed-constexpr --expt-extended-lambda --forward-unknown-to-host-compiler
# This is to work around a bug with nvcc, see: https://github.com/kokkos/kokkos/issues/1473
NVCC_FLAGS += -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored
# and another bug related to implicit returns with if constexpr, see: https://stackoverflow.com/questions/64523302/cuda-missing-return-statement-at-end-of-non-void-function-in-constexpr-if-fun
Expand Down Expand Up @@ -145,8 +155,8 @@ ifeq ($(nvcc_diag_error),1)
NVCC_FLAGS += --display-error-number --diag-error 20092
endif

CXXFLAGS = $(CXXFLAGS_FROM_HOST) $(NVCC_FLAGS) -x cu
CFLAGS = $(CFLAGS_FROM_HOST) $(NVCC_FLAGS) -x cu
CXXFLAGS = $(CXXFLAGS_FROM_HOST) $(NVCC_FLAGS) $(NVCC_ARCH_COMPILE_FLAGS) -x cu
CFLAGS = $(CFLAGS_FROM_HOST) $(NVCC_FLAGS) $(NVCC_ARCH_COMPILE_FLAGS) -x cu

ifeq ($(USE_GPU_RDC),TRUE)
CXXFLAGS += -dc
Expand Down

0 comments on commit 85bef47

Please sign in to comment.