Skip to content

Commit 4e3310a

Browse files
authored
[clang] Fix OMPT ident flag in combined distribute parallel for pragma (llvm#80987)
Authored-by: Raúl Peñacoba Veigas <rpenacob@bsc.es>
1 parent eb31970 commit 4e3310a

File tree

90 files changed

+1019
-1011
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

90 files changed

+1019
-1011
lines changed

clang/lib/CodeGen/CGOpenMPRuntime.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2647,6 +2647,9 @@ void CGOpenMPRuntime::emitDistributeStaticInit(
26472647
void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
26482648
SourceLocation Loc,
26492649
OpenMPDirectiveKind DKind) {
2650+
assert(DKind == OMPD_distribute || DKind == OMPD_for ||
2651+
DKind == OMPD_sections &&
2652+
"Expected distribute, for, or sections directive kind");
26502653
if (!CGF.HaveInsertPoint())
26512654
return;
26522655
// Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);

clang/lib/CodeGen/CGStmtOpenMP.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2910,10 +2910,10 @@ void CodeGenFunction::EmitOMPOuterLoop(
29102910
EmitBlock(LoopExit.getBlock());
29112911

29122912
// Tell the runtime we are done.
2913-
auto &&CodeGen = [DynamicOrOrdered, &S](CodeGenFunction &CGF) {
2913+
auto &&CodeGen = [DynamicOrOrdered, &S, &LoopArgs](CodeGenFunction &CGF) {
29142914
if (!DynamicOrOrdered)
29152915
CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2916-
S.getDirectiveKind());
2916+
LoopArgs.DKind);
29172917
};
29182918
OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
29192919
}
@@ -3019,6 +3019,7 @@ void CodeGenFunction::EmitOMPForOuterLoop(
30193019
OuterLoopArgs.Cond = S.getCond();
30203020
OuterLoopArgs.NextLB = S.getNextLowerBound();
30213021
OuterLoopArgs.NextUB = S.getNextUpperBound();
3022+
OuterLoopArgs.DKind = LoopArgs.DKind;
30223023
EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
30233024
emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
30243025
}
@@ -3080,6 +3081,7 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop(
30803081
OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
30813082
? S.getCombinedNextUpperBound()
30823083
: S.getNextUpperBound();
3084+
OuterLoopArgs.DKind = OMPD_distribute;
30833085

30843086
EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
30853087
LoopScope, OuterLoopArgs, CodeGenLoopContent,
@@ -3452,15 +3454,16 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
34523454
// Tell the runtime we are done.
34533455
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
34543456
CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3455-
S.getDirectiveKind());
3457+
OMPD_for);
34563458
};
34573459
OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
34583460
} else {
34593461
// Emit the outer loop, which requests its work chunk [LB..UB] from
34603462
// runtime and runs the inner loop to process it.
3461-
const OMPLoopArguments LoopArguments(
3463+
OMPLoopArguments LoopArguments(
34623464
LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
34633465
IL.getAddress(*this), Chunk, EUB);
3466+
LoopArguments.DKind = OMPD_for;
34643467
EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
34653468
LoopArguments, CGDispatchBounds);
34663469
}
@@ -4082,7 +4085,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
40824085
// Tell the runtime we are done.
40834086
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
40844087
CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
4085-
S.getDirectiveKind());
4088+
OMPD_sections);
40864089
};
40874090
CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
40884091
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
@@ -5782,7 +5785,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
57825785
});
57835786
EmitBlock(LoopExit.getBlock());
57845787
// Tell the runtime we are done.
5785-
RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind());
5788+
RT.emitForStaticFinish(*this, S.getEndLoc(), OMPD_distribute);
57865789
} else {
57875790
// Emit the outer loop, which requests its work chunk [LB..UB] from
57885791
// runtime and runs the inner loop to process it.

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3831,6 +3831,8 @@ class CodeGenFunction : public CodeGenTypeCache {
38313831
Expr *NextLB = nullptr;
38323832
/// Update of UB after a whole chunk has been executed
38333833
Expr *NextUB = nullptr;
3834+
/// Distinguish between the for distribute and sections
3835+
OpenMPDirectiveKind DKind = llvm::omp::OMPD_unknown;
38343836
OMPLoopArguments() = default;
38353837
OMPLoopArguments(Address LB, Address UB, Address ST, Address IL,
38363838
llvm::Value *Chunk = nullptr, Expr *EUB = nullptr,

clang/test/OpenMP/amdgcn_target_device_vla.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,7 @@ int main() {
539539
// CHECK: omp.loop.exit:
540540
// CHECK-NEXT: [[TMP34:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
541541
// CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4
542-
// CHECK-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP35]])
542+
// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP35]])
543543
// CHECK-NEXT: br label [[OMP_PRECOND_END]]
544544
// CHECK: omp.precond.end:
545545
// CHECK-NEXT: ret void

clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ void write_to_aligned_array(int *a, int N) {
301301
// CHECK-AMD: omp.loop.exit:
302302
// CHECK-AMD-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
303303
// CHECK-AMD-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
304-
// CHECK-AMD-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP18]])
304+
// CHECK-AMD-NEXT: call void @__kmpc_for_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr), i32 [[TMP18]])
305305
// CHECK-AMD-NEXT: br label [[OMP_PRECOND_END]]
306306
// CHECK-AMD: omp.precond.end:
307307
// CHECK-AMD-NEXT: ret void

clang/test/OpenMP/bug60602.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,7 @@ int kernel_within_loop(int *a, int *b, int N, int num_iters) {
564564
// CHECK: omp.loop.exit:
565565
// CHECK-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
566566
// CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
567-
// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP23]])
567+
// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP23]])
568568
// CHECK-NEXT: br label [[OMP_PRECOND_END]]
569569
// CHECK: omp.precond.end:
570570
// CHECK-NEXT: ret void

0 commit comments

Comments
 (0)