Skip to content

Commit 1a844f6

Browse files
committed
[flang][runtime] Replace recursion with iterative work queue
Recursion, both direct and indirect, prevents accurate stack size calculation at link time for GPU device code. Restructure these recursive (often mutually so) routines in the Fortran runtime with new implementations based on an iterative work queue with suspendable/resumable work tickets: Assign, Initialize, initializeClone, Finalize, Destroy, and DescriptorIO. Note that derived type FINAL subroutine calls, defined assignments, and defined I/O procedures all perform callbacks into user code, which may well reenter the runtime library. This kind of recursion is not handled by this change, although it may be possible to do so in the future using thread-local work queues. The effects of this restructuring on CPU performance are yet to be measured. There is a fast(?) mode in the work queue implementation that causes new work items to be executed to completion immediately upon creation, saving the overhead of actually representing and managing the work queue. This mode can't be used on GPU devices, but it is enabled by default for CPU hosts. It can be disabled easily for debugging and performance testing.
1 parent 4b23d4c commit 1a844f6

31 files changed

+2225
-1118
lines changed

flang-rt/include/flang-rt/runtime/environment.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ struct ExecutionEnvironment {
6464
bool defaultUTF8{false}; // DEFAULT_UTF8
6565
bool checkPointerDeallocation{true}; // FORT_CHECK_POINTER_DEALLOCATION
6666

67+
enum InternalDebugging { WorkQueue = 1 };
68+
int internalDebugging{0}; // FLANG_RT_DEBUG
69+
6770
// CUDA related variables
6871
std::size_t cudaStackLimit{0}; // ACC_OFFLOAD_STACK_SIZE
6972
bool cudaDeviceIsManaged{false}; // NV_CUDAFOR_DEVICE_IS_MANAGED

flang-rt/include/flang-rt/runtime/stat.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ class Terminator;
2424
enum Stat {
2525
StatOk = 0, // required to be zero by Fortran
2626

27-
// Interoperable STAT= codes
27+
// Interoperable STAT= codes (>= 11)
2828
StatBaseNull = CFI_ERROR_BASE_ADDR_NULL,
2929
StatBaseNotNull = CFI_ERROR_BASE_ADDR_NOT_NULL,
3030
StatInvalidElemLen = CFI_INVALID_ELEM_LEN,
@@ -36,7 +36,7 @@ enum Stat {
3636
StatMemAllocation = CFI_ERROR_MEM_ALLOCATION,
3737
StatOutOfBounds = CFI_ERROR_OUT_OF_BOUNDS,
3838

39-
// Standard STAT= values
39+
// Standard STAT= values (>= 101)
4040
StatFailedImage = FORTRAN_RUNTIME_STAT_FAILED_IMAGE,
4141
StatLocked = FORTRAN_RUNTIME_STAT_LOCKED,
4242
StatLockedOtherImage = FORTRAN_RUNTIME_STAT_LOCKED_OTHER_IMAGE,
@@ -49,10 +49,14 @@ enum Stat {
4949
// Additional "processor-defined" STAT= values
5050
StatInvalidArgumentNumber = FORTRAN_RUNTIME_STAT_INVALID_ARG_NUMBER,
5151
StatMissingArgument = FORTRAN_RUNTIME_STAT_MISSING_ARG,
52-
StatValueTooShort = FORTRAN_RUNTIME_STAT_VALUE_TOO_SHORT,
52+
StatValueTooShort = FORTRAN_RUNTIME_STAT_VALUE_TOO_SHORT, // -1
5353
StatMoveAllocSameAllocatable =
5454
FORTRAN_RUNTIME_STAT_MOVE_ALLOC_SAME_ALLOCATABLE,
5555
StatBadPointerDeallocation = FORTRAN_RUNTIME_STAT_BAD_POINTER_DEALLOCATION,
56+
57+
// Dummy status for work queue continuation, declared here to perhaps
58+
// avoid collisions
59+
StatContinue = 201
5660
};
5761

5862
RT_API_ATTRS const char *StatErrorString(int);

flang-rt/include/flang-rt/runtime/type-info.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ class DerivedType {
240240
RT_API_ATTRS bool noFinalizationNeeded() const {
241241
return noFinalizationNeeded_;
242242
}
243+
RT_API_ATTRS bool noDefinedAssignment() const { return noDefinedAssignment_; }
243244

244245
RT_API_ATTRS std::size_t LenParameters() const {
245246
return lenParameterKind().Elements();
@@ -322,6 +323,7 @@ class DerivedType {
322323
bool noInitializationNeeded_{false};
323324
bool noDestructionNeeded_{false};
324325
bool noFinalizationNeeded_{false};
326+
bool noDefinedAssignment_{false};
325327
};
326328

327329
} // namespace Fortran::runtime::typeInfo

0 commit comments

Comments
 (0)