-
Notifications
You must be signed in to change notification settings - Fork 14.1k
Revert "[OpenMP][SIMD][FIX] Use conservative "omp simd ordered" lowering" #126079
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang-codegen Author: Alexey Bataev (alexey-bataev) ChangesReverts llvm/llvm-project#123867 to fix the test failures https://lab.llvm.org/buildbot/#/builders/144/builds/17521 Patch is 42.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/126079.diff 3 Files Affected:
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 3542e939678cf4..8e694b95dc7e73 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -2457,86 +2457,10 @@ static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
}
}
-// Check for the presence of an `OMPOrderedDirective`,
-// i.e., `ordered` in `#pragma omp ordered simd`.
-//
-// Consider the following source code:
-// ```
-// __attribute__((noinline)) void omp_simd_loop(float X[ARRAY_SIZE][ARRAY_SIZE])
-// {
-// for (int r = 1; r < ARRAY_SIZE; ++r) {
-// for (int c = 1; c < ARRAY_SIZE; ++c) {
-// #pragma omp simd
-// for (int k = 2; k < ARRAY_SIZE; ++k) {
-// #pragma omp ordered simd
-// X[r][k] = X[r][k - 2] + sinf((float)(r / c));
-// }
-// }
-// }
-// }
-// ```
-//
-// Suppose we are in `CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective
-// &D)`. By examining `D.dump()` we have the following AST containing
-// `OMPOrderedDirective`:
-//
-// ```
-// OMPSimdDirective 0x1c32950
-// `-CapturedStmt 0x1c32028
-// |-CapturedDecl 0x1c310e8
-// | |-ForStmt 0x1c31e30
-// | | |-DeclStmt 0x1c31298
-// | | | `-VarDecl 0x1c31208 used k 'int' cinit
-// | | | `-IntegerLiteral 0x1c31278 'int' 2
-// | | |-<<<NULL>>>
-// | | |-BinaryOperator 0x1c31308 'int' '<'
-// | | | |-ImplicitCastExpr 0x1c312f0 'int' <LValueToRValue>
-// | | | | `-DeclRefExpr 0x1c312b0 'int' lvalue Var 0x1c31208 'k' 'int'
-// | | | `-IntegerLiteral 0x1c312d0 'int' 256
-// | | |-UnaryOperator 0x1c31348 'int' prefix '++'
-// | | | `-DeclRefExpr 0x1c31328 'int' lvalue Var 0x1c31208 'k' 'int'
-// | | `-CompoundStmt 0x1c31e18
-// | | `-OMPOrderedDirective 0x1c31dd8
-// | | |-OMPSimdClause 0x1c31380
-// | | `-CapturedStmt 0x1c31cd0
-// ```
-//
-// Note the presence of `OMPOrderedDirective` above:
-// It's (transitively) nested in a `CapturedStmt` representing the pragma
-// annotated compound statement. Thus, we need to consider this nesting and
-// include checking the `getCapturedStmt` in this case.
-static bool hasOrderedDirective(const Stmt *S) {
- if (isa<OMPOrderedDirective>(S))
- return true;
-
- if (const auto *CS = dyn_cast<CapturedStmt>(S))
- return hasOrderedDirective(CS->getCapturedStmt());
-
- for (const Stmt *Child : S->children()) {
- if (Child && hasOrderedDirective(Child))
- return true;
- }
-
- return false;
-}
-
-static void applyConservativeSimdOrderedDirective(const Stmt &AssociatedStmt,
- LoopInfoStack &LoopStack) {
- // Check for the presence of an `OMPOrderedDirective`
- // i.e., `ordered` in `#pragma omp ordered simd`
- bool HasOrderedDirective = hasOrderedDirective(&AssociatedStmt);
- // If present then conservatively disable loop vectorization
- // analogously to how `emitSimdlenSafelenClause` does.
- if (HasOrderedDirective)
- LoopStack.setParallel(/*Enable=*/false);
-}
-
void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
// Walk clauses and process safelen/lastprivate.
LoopStack.setParallel(/*Enable=*/true);
LoopStack.setVectorizeEnable();
- const Stmt *AssociatedStmt = D.getAssociatedStmt();
- applyConservativeSimdOrderedDirective(*AssociatedStmt, LoopStack);
emitSimdlenSafelenClause(*this, D);
if (const auto *C = D.getSingleClause<OMPOrderClause>())
if (C->getKind() == OMPC_ORDER_concurrent)
diff --git a/clang/test/OpenMP/ordered_codegen.cpp b/clang/test/OpenMP/ordered_codegen.cpp
index 5cd95f1927e5ce..67285cfaef34d5 100644
--- a/clang/test/OpenMP/ordered_codegen.cpp
+++ b/clang/test/OpenMP/ordered_codegen.cpp
@@ -572,30 +572,30 @@ void foo_simd(int low, int up) {
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK1-NEXT: [[ADD6:%.*]] = add i32 [[TMP9]], 1
// CHECK1-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TMP8]], [[ADD6]]
// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP11]], 1
// CHECK1-NEXT: [[ADD8:%.*]] = add i32 [[TMP10]], [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I5]], align 4
+// CHECK1-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM]]
-// CHECK1-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4
-// CHECK1-NEXT: call void @__captured_stmt(ptr [[I5]])
+// CHECK1-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-NEXT: call void @__captured_stmt(ptr [[I5]]), !llvm.access.group [[ACC_GRP3]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP13]], 1
-// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
@@ -645,31 +645,31 @@ void foo_simd(int low, int up) {
// CHECK1-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV16]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]]
// CHECK1: omp.inner.for.cond29:
-// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
-// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]]
+// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1
// CHECK1-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]]
// CHECK1-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]]
// CHECK1: omp.inner.for.body32:
-// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1
// CHECK1-NEXT: [[ADD34:%.*]] = add i32 [[TMP30]], [[MUL33]]
-// CHECK1-NEXT: store i32 [[ADD34]], ptr [[I28]], align 4
-// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[I28]], align 4
+// CHECK1-NEXT: store i32 [[ADD34]], ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-NEXT: [[IDXPROM35:%.*]] = sext i32 [[TMP32]] to i64
// CHECK1-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM35]]
-// CHECK1-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX36]], align 4
-// CHECK1-NEXT: call void @__captured_stmt.1(ptr [[I28]])
+// CHECK1-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX36]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-NEXT: call void @__captured_stmt.1(ptr [[I28]]), !llvm.access.group [[ACC_GRP7]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]]
// CHECK1: omp.body.continue37:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]]
// CHECK1: omp.inner.for.inc38:
-// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1
-// CHECK1-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_IV16]], align 4
-// CHECK1-NEXT: call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[TMP0]])
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK1-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-NEXT: call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group [[ACC_GRP7]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]]
// CHECK1: omp.inner.for.end40:
// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK1: omp.dispatch.inc:
@@ -1201,32 +1201,32 @@ void foo_simd(int low, int up) {
// CHECK1-IRBUILDER-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1-IRBUILDER: omp.inner.for.cond:
-// CHECK1-IRBUILDER-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-IRBUILDER-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-IRBUILDER-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
+// CHECK1-IRBUILDER-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK1-IRBUILDER-NEXT: [[ADD6:%.*]] = add i32 [[TMP8]], 1
// CHECK1-IRBUILDER-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TMP7]], [[ADD6]]
// CHECK1-IRBUILDER-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1-IRBUILDER: omp.inner.for.body:
-// CHECK1-IRBUILDER-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-IRBUILDER-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-IRBUILDER-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-IRBUILDER-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK1-IRBUILDER-NEXT: [[MUL:%.*]] = mul i32 [[TMP10]], 1
// CHECK1-IRBUILDER-NEXT: [[ADD8:%.*]] = add i32 [[TMP9]], [[MUL]]
-// CHECK1-IRBUILDER-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4
-// CHECK1-IRBUILDER-NEXT: [[TMP11:%.*]] = load i32, ptr [[I5]], align 4
+// CHECK1-IRBUILDER-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-IRBUILDER-NEXT: [[TMP11:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK1-IRBUILDER-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64
// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM]]
-// CHECK1-IRBUILDER-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4
-// CHECK1-IRBUILDER-NEXT: call void @__captured_stmt(ptr [[I5]])
+// CHECK1-IRBUILDER-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-IRBUILDER-NEXT: call void @__captured_stmt(ptr [[I5]]), !llvm.access.group [[ACC_GRP3]]
// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]]
// CHECK1-IRBUILDER: omp.inner.for.body.ordered.after:
// CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1-IRBUILDER: omp.body.continue:
// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1-IRBUILDER: omp.inner.for.inc:
-// CHECK1-IRBUILDER-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-IRBUILDER-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK1-IRBUILDER-NEXT: [[ADD9:%.*]] = add i32 [[TMP12]], 1
-// CHECK1-IRBUILDER-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK1-IRBUILDER-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
// CHECK1-IRBUILDER: omp.inner.for.end:
// CHECK1-IRBUILDER-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-IRBUILDER-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
@@ -1278,34 +1278,34 @@ void foo_simd(int low, int up) {
// CHECK1-IRBUILDER-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV16]], align 4
// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30:%.*]]
// CHECK1-IRBUILDER: omp.inner.for.cond30:
-// CHECK1-IRBUILDER-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
-// CHECK1-IRBUILDER-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK1-IRBUILDER-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]]
+// CHECK1-IRBUILDER-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-IRBUILDER-NEXT: [[ADD31:%.*]] = add i32 [[TMP28]], 1
// CHECK1-IRBUILDER-NEXT: [[CMP32:%.*]] = icmp ult i32 [[TMP27]], [[ADD31]]
// CHECK1-IRBUILDER-NEXT: br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END42:%.*]]
// CHECK1-IRBUILDER: omp.inner.for.body33:
-// CHECK1-IRBUILDER-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK1-IRBUILDER-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
+// CHECK1-IRBUILDER-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-IRBUILDER-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-IRBUILDER-NEXT: [[MUL34:%.*]] = mul i32 [[TMP30]], 1
// CHECK1-IRBUILDER-NEXT: [[ADD35:%.*]] = add i32 [[TMP29]], [[MUL34]]
-// CHECK1-IRBUILDER-NEXT: store i32 [[ADD35]], ptr [[I28]], align 4
-// CHECK1-IRBUILDER-NEXT: [[TMP31:%.*]] = load i32, ptr [[I28]], align 4
+// CHECK1-IRBUILDER-NEXT: store i32 [[ADD35]], ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-IRBUILDER-NEXT: [[TMP31:%.*]] = load i32, ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-IRBUILDER-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP31]] to i64
// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM36]]
-// CHECK1-IRBUILDER-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX37]], align 4
-// CHECK1-IRBUILDER-NEXT: call void @__captured_stmt.1(ptr [[I28]])
+// CHECK1-IRBUILDER-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX37]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-IRBUILDER-NEXT: call void @__captured_stmt.1(ptr [[I28]]), !llvm.access.group [[ACC_GRP7]]
// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY33_ORDERED_AFTER:%.*]]
// CHECK1-IRBUILDER: omp.inner.for.body33.ordered.after:
// CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE38:%.*]]
// CHECK1-IRBUILDER: omp.body.continue38:
// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC39:%.*]]
// CHECK1-IRBUILDER: omp.inner.for.inc39:
-// CHECK1-IRBUILDER-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
+// CHECK1-IRBUILDER-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-IRBUILDER-NEXT: [[ADD40:%.*]] = add i32 [[TMP32]], 1
-// CHECK1-IRBUILDER-NEXT: store i32 [[ADD40]], ptr [[DOTOMP_IV16]], align 4
+// CHECK1-IRBUILDER-NEXT: store i32 [[ADD40]], ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB12]])
-// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]])
-// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]]), !llvm.access.group [[ACC_GRP7]]
+// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP8:![0-9]+]]
// CHECK1-IRBUILDER: omp.inner.for.end42:
// CHECK1-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK1-IRBUILDER: omp.dispatch.inc:
@@ -1812,30 +1812,30 @@ void foo_simd(int low, int up) {
// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK3-NEXT: [[ADD6:%.*]] = add i32 [[TMP9]], 1
// CHECK3-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TMP8]], [[ADD6]]
// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP11]], 1
// CHECK3-NEXT: [[ADD8:%.*]] = add i32 [[TMP10]], [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I5]], align 4
+// CHECK3-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM]]
-// CHECK3-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4
-// CHECK3-NEXT: call void @__captured_stmt(ptr [[I5]])
+// CHECK3-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK3-NEXT: call void @__captured_stmt(ptr [[I5]]), !llvm.access.group [[ACC_GRP3]]
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP13]], 1
-// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
...
[truncated]
|
@llvm/pr-subscribers-clang Author: Alexey Bataev (alexey-bataev) ChangesReverts llvm/llvm-project#123867 to fix the test failures https://lab.llvm.org/buildbot/#/builders/144/builds/17521 Patch is 42.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/126079.diff 3 Files Affected:
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 3542e939678cf4..8e694b95dc7e73 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -2457,86 +2457,10 @@ static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
}
}
-// Check for the presence of an `OMPOrderedDirective`,
-// i.e., `ordered` in `#pragma omp ordered simd`.
-//
-// Consider the following source code:
-// ```
-// __attribute__((noinline)) void omp_simd_loop(float X[ARRAY_SIZE][ARRAY_SIZE])
-// {
-// for (int r = 1; r < ARRAY_SIZE; ++r) {
-// for (int c = 1; c < ARRAY_SIZE; ++c) {
-// #pragma omp simd
-// for (int k = 2; k < ARRAY_SIZE; ++k) {
-// #pragma omp ordered simd
-// X[r][k] = X[r][k - 2] + sinf((float)(r / c));
-// }
-// }
-// }
-// }
-// ```
-//
-// Suppose we are in `CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective
-// &D)`. By examining `D.dump()` we have the following AST containing
-// `OMPOrderedDirective`:
-//
-// ```
-// OMPSimdDirective 0x1c32950
-// `-CapturedStmt 0x1c32028
-// |-CapturedDecl 0x1c310e8
-// | |-ForStmt 0x1c31e30
-// | | |-DeclStmt 0x1c31298
-// | | | `-VarDecl 0x1c31208 used k 'int' cinit
-// | | | `-IntegerLiteral 0x1c31278 'int' 2
-// | | |-<<<NULL>>>
-// | | |-BinaryOperator 0x1c31308 'int' '<'
-// | | | |-ImplicitCastExpr 0x1c312f0 'int' <LValueToRValue>
-// | | | | `-DeclRefExpr 0x1c312b0 'int' lvalue Var 0x1c31208 'k' 'int'
-// | | | `-IntegerLiteral 0x1c312d0 'int' 256
-// | | |-UnaryOperator 0x1c31348 'int' prefix '++'
-// | | | `-DeclRefExpr 0x1c31328 'int' lvalue Var 0x1c31208 'k' 'int'
-// | | `-CompoundStmt 0x1c31e18
-// | | `-OMPOrderedDirective 0x1c31dd8
-// | | |-OMPSimdClause 0x1c31380
-// | | `-CapturedStmt 0x1c31cd0
-// ```
-//
-// Note the presence of `OMPOrderedDirective` above:
-// It's (transitively) nested in a `CapturedStmt` representing the pragma
-// annotated compound statement. Thus, we need to consider this nesting and
-// include checking the `getCapturedStmt` in this case.
-static bool hasOrderedDirective(const Stmt *S) {
- if (isa<OMPOrderedDirective>(S))
- return true;
-
- if (const auto *CS = dyn_cast<CapturedStmt>(S))
- return hasOrderedDirective(CS->getCapturedStmt());
-
- for (const Stmt *Child : S->children()) {
- if (Child && hasOrderedDirective(Child))
- return true;
- }
-
- return false;
-}
-
-static void applyConservativeSimdOrderedDirective(const Stmt &AssociatedStmt,
- LoopInfoStack &LoopStack) {
- // Check for the presence of an `OMPOrderedDirective`
- // i.e., `ordered` in `#pragma omp ordered simd`
- bool HasOrderedDirective = hasOrderedDirective(&AssociatedStmt);
- // If present then conservatively disable loop vectorization
- // analogously to how `emitSimdlenSafelenClause` does.
- if (HasOrderedDirective)
- LoopStack.setParallel(/*Enable=*/false);
-}
-
void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
// Walk clauses and process safelen/lastprivate.
LoopStack.setParallel(/*Enable=*/true);
LoopStack.setVectorizeEnable();
- const Stmt *AssociatedStmt = D.getAssociatedStmt();
- applyConservativeSimdOrderedDirective(*AssociatedStmt, LoopStack);
emitSimdlenSafelenClause(*this, D);
if (const auto *C = D.getSingleClause<OMPOrderClause>())
if (C->getKind() == OMPC_ORDER_concurrent)
diff --git a/clang/test/OpenMP/ordered_codegen.cpp b/clang/test/OpenMP/ordered_codegen.cpp
index 5cd95f1927e5ce..67285cfaef34d5 100644
--- a/clang/test/OpenMP/ordered_codegen.cpp
+++ b/clang/test/OpenMP/ordered_codegen.cpp
@@ -572,30 +572,30 @@ void foo_simd(int low, int up) {
// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK1-NEXT: [[ADD6:%.*]] = add i32 [[TMP9]], 1
// CHECK1-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TMP8]], [[ADD6]]
// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP11]], 1
// CHECK1-NEXT: [[ADD8:%.*]] = add i32 [[TMP10]], [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I5]], align 4
+// CHECK1-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM]]
-// CHECK1-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4
-// CHECK1-NEXT: call void @__captured_stmt(ptr [[I5]])
+// CHECK1-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-NEXT: call void @__captured_stmt(ptr [[I5]]), !llvm.access.group [[ACC_GRP3]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP13]], 1
-// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
@@ -645,31 +645,31 @@ void foo_simd(int low, int up) {
// CHECK1-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV16]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]]
// CHECK1: omp.inner.for.cond29:
-// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
-// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]]
+// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1
// CHECK1-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]]
// CHECK1-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]]
// CHECK1: omp.inner.for.body32:
-// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1
// CHECK1-NEXT: [[ADD34:%.*]] = add i32 [[TMP30]], [[MUL33]]
-// CHECK1-NEXT: store i32 [[ADD34]], ptr [[I28]], align 4
-// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[I28]], align 4
+// CHECK1-NEXT: store i32 [[ADD34]], ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-NEXT: [[IDXPROM35:%.*]] = sext i32 [[TMP32]] to i64
// CHECK1-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM35]]
-// CHECK1-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX36]], align 4
-// CHECK1-NEXT: call void @__captured_stmt.1(ptr [[I28]])
+// CHECK1-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX36]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-NEXT: call void @__captured_stmt.1(ptr [[I28]]), !llvm.access.group [[ACC_GRP7]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]]
// CHECK1: omp.body.continue37:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]]
// CHECK1: omp.inner.for.inc38:
-// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1
-// CHECK1-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_IV16]], align 4
-// CHECK1-NEXT: call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[TMP0]])
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK1-NEXT: store i32 [[ADD39]], ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-NEXT: call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group [[ACC_GRP7]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]]
// CHECK1: omp.inner.for.end40:
// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK1: omp.dispatch.inc:
@@ -1201,32 +1201,32 @@ void foo_simd(int low, int up) {
// CHECK1-IRBUILDER-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1-IRBUILDER: omp.inner.for.cond:
-// CHECK1-IRBUILDER-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-IRBUILDER-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-IRBUILDER-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
+// CHECK1-IRBUILDER-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK1-IRBUILDER-NEXT: [[ADD6:%.*]] = add i32 [[TMP8]], 1
// CHECK1-IRBUILDER-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TMP7]], [[ADD6]]
// CHECK1-IRBUILDER-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1-IRBUILDER: omp.inner.for.body:
-// CHECK1-IRBUILDER-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-IRBUILDER-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-IRBUILDER-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-IRBUILDER-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK1-IRBUILDER-NEXT: [[MUL:%.*]] = mul i32 [[TMP10]], 1
// CHECK1-IRBUILDER-NEXT: [[ADD8:%.*]] = add i32 [[TMP9]], [[MUL]]
-// CHECK1-IRBUILDER-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4
-// CHECK1-IRBUILDER-NEXT: [[TMP11:%.*]] = load i32, ptr [[I5]], align 4
+// CHECK1-IRBUILDER-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-IRBUILDER-NEXT: [[TMP11:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK1-IRBUILDER-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64
// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM]]
-// CHECK1-IRBUILDER-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4
-// CHECK1-IRBUILDER-NEXT: call void @__captured_stmt(ptr [[I5]])
+// CHECK1-IRBUILDER-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-IRBUILDER-NEXT: call void @__captured_stmt(ptr [[I5]]), !llvm.access.group [[ACC_GRP3]]
// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]]
// CHECK1-IRBUILDER: omp.inner.for.body.ordered.after:
// CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1-IRBUILDER: omp.body.continue:
// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1-IRBUILDER: omp.inner.for.inc:
-// CHECK1-IRBUILDER-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-IRBUILDER-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK1-IRBUILDER-NEXT: [[ADD9:%.*]] = add i32 [[TMP12]], 1
-// CHECK1-IRBUILDER-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK1-IRBUILDER-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
// CHECK1-IRBUILDER: omp.inner.for.end:
// CHECK1-IRBUILDER-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-IRBUILDER-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
@@ -1278,34 +1278,34 @@ void foo_simd(int low, int up) {
// CHECK1-IRBUILDER-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV16]], align 4
// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30:%.*]]
// CHECK1-IRBUILDER: omp.inner.for.cond30:
-// CHECK1-IRBUILDER-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
-// CHECK1-IRBUILDER-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK1-IRBUILDER-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]]
+// CHECK1-IRBUILDER-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-IRBUILDER-NEXT: [[ADD31:%.*]] = add i32 [[TMP28]], 1
// CHECK1-IRBUILDER-NEXT: [[CMP32:%.*]] = icmp ult i32 [[TMP27]], [[ADD31]]
// CHECK1-IRBUILDER-NEXT: br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END42:%.*]]
// CHECK1-IRBUILDER: omp.inner.for.body33:
-// CHECK1-IRBUILDER-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK1-IRBUILDER-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
+// CHECK1-IRBUILDER-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-IRBUILDER-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-IRBUILDER-NEXT: [[MUL34:%.*]] = mul i32 [[TMP30]], 1
// CHECK1-IRBUILDER-NEXT: [[ADD35:%.*]] = add i32 [[TMP29]], [[MUL34]]
-// CHECK1-IRBUILDER-NEXT: store i32 [[ADD35]], ptr [[I28]], align 4
-// CHECK1-IRBUILDER-NEXT: [[TMP31:%.*]] = load i32, ptr [[I28]], align 4
+// CHECK1-IRBUILDER-NEXT: store i32 [[ADD35]], ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-IRBUILDER-NEXT: [[TMP31:%.*]] = load i32, ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-IRBUILDER-NEXT: [[IDXPROM36:%.*]] = sext i32 [[TMP31]] to i64
// CHECK1-IRBUILDER-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM36]]
-// CHECK1-IRBUILDER-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX37]], align 4
-// CHECK1-IRBUILDER-NEXT: call void @__captured_stmt.1(ptr [[I28]])
+// CHECK1-IRBUILDER-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX37]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-IRBUILDER-NEXT: call void @__captured_stmt.1(ptr [[I28]]), !llvm.access.group [[ACC_GRP7]]
// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_BODY33_ORDERED_AFTER:%.*]]
// CHECK1-IRBUILDER: omp.inner.for.body33.ordered.after:
// CHECK1-IRBUILDER-NEXT: br label [[OMP_BODY_CONTINUE38:%.*]]
// CHECK1-IRBUILDER: omp.body.continue38:
// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_INC39:%.*]]
// CHECK1-IRBUILDER: omp.inner.for.inc39:
-// CHECK1-IRBUILDER-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
+// CHECK1-IRBUILDER-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-IRBUILDER-NEXT: [[ADD40:%.*]] = add i32 [[TMP32]], 1
-// CHECK1-IRBUILDER-NEXT: store i32 [[ADD40]], ptr [[DOTOMP_IV16]], align 4
+// CHECK1-IRBUILDER-NEXT: store i32 [[ADD40]], ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
// CHECK1-IRBUILDER-NEXT: [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB12]])
-// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]])
-// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK1-IRBUILDER-NEXT: call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]]), !llvm.access.group [[ACC_GRP7]]
+// CHECK1-IRBUILDER-NEXT: br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP8:![0-9]+]]
// CHECK1-IRBUILDER: omp.inner.for.end42:
// CHECK1-IRBUILDER-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK1-IRBUILDER: omp.dispatch.inc:
@@ -1812,30 +1812,30 @@ void foo_simd(int low, int up) {
// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK3-NEXT: [[ADD6:%.*]] = add i32 [[TMP9]], 1
// CHECK3-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TMP8]], [[ADD6]]
// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP11]], 1
// CHECK3-NEXT: [[ADD8:%.*]] = add i32 [[TMP10]], [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I5]], align 4
+// CHECK3-NEXT: store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM]]
-// CHECK3-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4
-// CHECK3-NEXT: call void @__captured_stmt(ptr [[I5]])
+// CHECK3-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK3-NEXT: call void @__captured_stmt(ptr [[I5]]), !llvm.access.group [[ACC_GRP3]]
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
// CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP13]], 1
-// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
...
[truncated]
|
…ing" (llvm#126079) Reverts llvm#123867 to fix the test failures https://lab.llvm.org/buildbot/#/builders/144/builds/17521
Reverts #123867 to fix the test failures https://lab.llvm.org/buildbot/#/builders/144/builds/17521