Skip to content

Commit cd0dd8e

Browse files
jhuber6jhuber-ornl
authored andcommitted
[OpenMP] Adding flags for disabling the following optimizations: Deglobalization SPMDization State machine rewrites Folding
This work provides four flags to disable four different sets of OpenMP optimizations. These flags take effect in llvm/lib/Transforms/IPO/OpenMPOpt.cpp and include the following: - openmp-opt-disable-deglobalization: Defaults to false, adding this flag sets the variable DisableOpenMPOptDeglobalization to true. This prevents AA registration for HeapToStack and HeapToShared. - openmp-opt-disable-spmdization: Defaults to false, adding this flag sets the variable DisableOpenMPOptSPMDization to true. This indicates a pessimistic fixpoint in changeToSPMDMode. - openmp-opt-disable-folding: Defaults to false, adding this flag sets the variable DisableOpenMPOptFolding to true. This indicates a pessimistic fixpoint in the attributor init for AAFoldRuntimeCall. - openmp-opt-disable-state-machine-rewrite: Defaults to false, adding this flag sets the variable DisableOpenMPOptStateMachineRewrite to true. This first prevents changes to the state machine in rewriteDeviceCodeStateMachine by returning before changes are made, and if a custom state machine is built in buildCustomStateMachine, stops by returning a pessimistic fixpoint. Reviewed By: jhuber6 Differential Revision: https://reviews.llvm.org/D106802
1 parent 648844f commit cd0dd8e

File tree

5 files changed

+1193
-2
lines changed

5 files changed

+1193
-2
lines changed

llvm/lib/Transforms/IPO/OpenMPOpt.cpp

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,26 @@ static cl::opt<bool> HideMemoryTransferLatency(
7272
" transfers"),
7373
cl::Hidden, cl::init(false));
7474

75+
static cl::opt<bool> DisableOpenMPOptDeglobalization(
76+
"openmp-opt-disable-deglobalization", cl::ZeroOrMore,
77+
cl::desc("Disable OpenMP optimizations involving deglobalization."),
78+
cl::Hidden, cl::init(false));
79+
80+
static cl::opt<bool> DisableOpenMPOptSPMDization(
81+
"openmp-opt-disable-spmdization", cl::ZeroOrMore,
82+
cl::desc("Disable OpenMP optimizations involving SPMD-ization."),
83+
cl::Hidden, cl::init(false));
84+
85+
static cl::opt<bool> DisableOpenMPOptFolding(
86+
"openmp-opt-disable-folding", cl::ZeroOrMore,
87+
cl::desc("Disable OpenMP optimizations involving folding."), cl::Hidden,
88+
cl::init(false));
89+
90+
static cl::opt<bool> DisableOpenMPOptStateMachineRewrite(
91+
"openmp-opt-disable-state-machine-rewrite", cl::ZeroOrMore,
92+
cl::desc("Disable OpenMP optimizations that replace the state machine."),
93+
cl::Hidden, cl::init(false));
94+
7595
STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
7696
"Number of OpenMP runtime calls deduplicated");
7797
STATISTIC(NumOpenMPParallelRegionsDeleted,
@@ -1918,6 +1938,10 @@ bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
19181938
if (!KernelParallelRFI)
19191939
return Changed;
19201940

1941+
// If we have disabled state machine changes, exit
1942+
if (DisableOpenMPOptStateMachineRewrite)
1943+
return Changed;
1944+
19211945
for (Function *F : SCC) {
19221946

19231947
// Check if the function is a use in a __kmpc_parallel_51 call at
@@ -2962,6 +2986,10 @@ struct AAKernelInfoFunction : AAKernelInfo {
29622986
}
29632987

29642988
bool changeToSPMDMode(Attributor &A) {
2989+
// If we have disabled SPMD-ization, stop
2990+
if (DisableOpenMPOptSPMDization)
2991+
indicatePessimisticFixpoint();
2992+
29652993
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
29662994

29672995
if (!SPMDCompatibilityTracker.isAssumed()) {
@@ -3042,6 +3070,10 @@ struct AAKernelInfoFunction : AAKernelInfo {
30423070
};
30433071

30443072
ChangeStatus buildCustomStateMachine(Attributor &A) {
3073+
// If we have disabled state machine rewrites, don't make a custom one
3074+
if (DisableOpenMPOptStateMachineRewrite)
3075+
return indicatePessimisticFixpoint();
3076+
30453077
assert(ReachedKnownParallelRegions.isValidState() &&
30463078
"Custom state machine with invalid parallel region states?");
30473079

@@ -3685,6 +3717,9 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
36853717
}
36863718

36873719
void initialize(Attributor &A) override {
3720+
if (DisableOpenMPOptFolding)
3721+
indicatePessimisticFixpoint();
3722+
36883723
Function *Callee = getAssociatedFunction();
36893724

36903725
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
@@ -4012,7 +4047,8 @@ void OpenMPOpt::registerAAs(bool IsModulePass) {
40124047
A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F));
40134048
return false;
40144049
};
4015-
GlobalizationRFI.foreachUse(SCC, CreateAA);
4050+
if (!DisableOpenMPOptDeglobalization)
4051+
GlobalizationRFI.foreachUse(SCC, CreateAA);
40164052

40174053
// Create an ExecutionDomain AA for every function and a HeapToStack AA for
40184054
// every function if there is a device kernel.
@@ -4024,7 +4060,8 @@ void OpenMPOpt::registerAAs(bool IsModulePass) {
40244060
continue;
40254061

40264062
A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(*F));
4027-
A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F));
4063+
if (!DisableOpenMPOptDeglobalization)
4064+
A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F));
40284065

40294066
for (auto &I : instructions(*F)) {
40304067
if (auto *LI = dyn_cast<LoadInst>(&I)) {

0 commit comments

Comments
 (0)