Skip to content

Commit

Permalink
[flang][cuda] Allow to set the stack limit size (#124859)
Browse files Browse the repository at this point in the history
This patch adds a call to the CUFInit function just after `ProgramStart`
when CUDA Fortran is enabled to initialize the CUDA context. This allows
us to set up some context information like the stack limit that can be
defined by an environment variable `ACC_OFFLOAD_STACKSIZE=<value>`.
  • Loading branch information
clementval authored Jan 29, 2025
1 parent 9052b37 commit 654b763
Show file tree
Hide file tree
Showing 9 changed files with 87 additions and 9 deletions.
13 changes: 7 additions & 6 deletions flang/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,13 @@ if (FLANG_INCLUDE_TESTS)
add_compile_definitions(FLANG_INCLUDE_TESTS=1)
endif()

option(FLANG_CUF_RUNTIME
"Compile CUDA Fortran runtime sources" OFF)
if (FLANG_CUF_RUNTIME)
find_package(CUDAToolkit REQUIRED)
add_compile_definitions(FLANG_CUDA_SUPPORT=1)
endif()

add_subdirectory(include)
add_subdirectory(lib)
add_subdirectory(cmake/modules)
Expand All @@ -481,12 +488,6 @@ if (FLANG_BUILD_TOOLS)
add_subdirectory(tools)
endif()

option(FLANG_CUF_RUNTIME
"Compile CUDA Fortran runtime sources" OFF)
if (FLANG_CUF_RUNTIME)
find_package(CUDAToolkit REQUIRED)
endif()

add_subdirectory(runtime)

if (LLVM_INCLUDE_EXAMPLES)
Expand Down
3 changes: 2 additions & 1 deletion flang/include/flang/Optimizer/Builder/Runtime/Main.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ class GlobalOp;
namespace fir::runtime {

void genMain(fir::FirOpBuilder &builder, mlir::Location loc,
const std::vector<Fortran::lower::EnvironmentDefault> &defs);
const std::vector<Fortran::lower::EnvironmentDefault> &defs,
bool initCuda = false);
}

#endif // FORTRAN_OPTIMIZER_BUILDER_RUNTIME_MAIN_H
20 changes: 20 additions & 0 deletions flang/include/flang/Runtime/CUDA/init.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
//===-- include/flang/Runtime/CUDA/init.h -----------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef FORTRAN_RUNTIME_CUDA_INIT_H_
#define FORTRAN_RUNTIME_CUDA_INIT_H_

#include "common.h"
#include "flang/Runtime/entry-names.h"

extern "C" {

void RTDECL(CUFInit)();
}

#endif // FORTRAN_RUNTIME_CUDA_INIT_H_
4 changes: 3 additions & 1 deletion flang/lib/Lower/Bridge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,9 @@ class FirConverter : public Fortran::lower::AbstractConverter {
if (hasMainProgram)
createGlobalOutsideOfFunctionLowering([&]() {
fir::runtime::genMain(*builder, toLocation(),
bridge.getEnvironmentDefaults());
bridge.getEnvironmentDefaults(),
getFoldingContext().languageFeatures().IsEnabled(
Fortran::common::LanguageFeature::CUDA));
});

finalizeOpenACCLowering();
Expand Down
15 changes: 14 additions & 1 deletion flang/lib/Optimizer/Builder/Runtime/Main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,17 @@
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Runtime/main.h"
#include "flang/Runtime/stop.h"
#ifdef FLANG_CUDA_SUPPORT
#include "flang/Runtime/CUDA/init.h"
#endif

using namespace Fortran::runtime;

/// Create a `int main(...)` that calls the Fortran entry point
void fir::runtime::genMain(
fir::FirOpBuilder &builder, mlir::Location loc,
const std::vector<Fortran::lower::EnvironmentDefault> &defs) {
const std::vector<Fortran::lower::EnvironmentDefault> &defs,
bool initCuda) {
auto *context = builder.getContext();
auto argcTy = builder.getDefaultIntegerType();
auto ptrTy = mlir::LLVM::LLVMPointerType::get(context);
Expand Down Expand Up @@ -61,6 +65,15 @@ void fir::runtime::genMain(
args.push_back(env);

builder.create<fir::CallOp>(loc, startFn, args);

#ifdef FLANG_CUDA_SUPPORT
if (initCuda) {
auto initFn = builder.createFunction(
loc, RTNAME_STRING(CUFInit), mlir::FunctionType::get(context, {}, {}));
builder.create<fir::CallOp>(loc, initFn);
}
#endif

builder.create<fir::CallOp>(loc, qqMainFn);
builder.create<fir::CallOp>(loc, stopFn);

Expand Down
1 change: 1 addition & 0 deletions flang/runtime/CUDA/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ add_flang_library(${CUFRT_LIBNAME}
allocator.cpp
allocatable.cpp
descriptor.cpp
init.cpp
kernel.cpp
memmove-function.cpp
memory.cpp
Expand Down
25 changes: 25 additions & 0 deletions flang/runtime/CUDA/init.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
//===-- runtime/CUDA/init.cpp ---------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "flang/Runtime/CUDA/init.h"
#include "../environment.h"
#include "../terminator.h"
#include "flang/Runtime/CUDA/common.h"

#include "cuda_runtime.h"

extern "C" {

void RTDEF(CUFInit)() {
// Perform ctx initialization based on execution environment if necessary.
if (Fortran::runtime::executionEnvironment.cudaStackLimit) {
CUDA_REPORT_IF_ERROR(cudaDeviceSetLimit(cudaLimitStackSize,
Fortran::runtime::executionEnvironment.cudaStackLimit));
}
}
}
12 changes: 12 additions & 0 deletions flang/runtime/environment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,18 @@ void ExecutionEnvironment::Configure(int ac, const char *av[],
}
}

if (auto *x{std::getenv("ACC_OFFLOAD_STACK_SIZE")}) {
char *end;
auto n{std::strtoul(x, &end, 10)};
if (n > 0 && n < std::numeric_limits<std::size_t>::max() && *end == '\0') {
cudaStackLimit = n;
} else {
std::fprintf(stderr,
"Fortran runtime: ACC_OFFLOAD_STACK_SIZE=%s is invalid; ignored\n",
x);
}
}

// TODO: Set RP/ROUND='PROCESSOR_DEFINED' from environment
}

Expand Down
3 changes: 3 additions & 0 deletions flang/runtime/environment.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ struct ExecutionEnvironment {
bool noStopMessage{false}; // NO_STOP_MESSAGE=1 inhibits "Fortran STOP"
bool defaultUTF8{false}; // DEFAULT_UTF8
bool checkPointerDeallocation{true}; // FORT_CHECK_POINTER_DEALLOCATION

// CUDA related variables
std::size_t cudaStackLimit{0}; // ACC_OFFLOAD_STACK_SIZE
};

RT_OFFLOAD_VAR_GROUP_BEGIN
Expand Down

0 comments on commit 654b763

Please sign in to comment.