Skip to content

Revert "[OpenMP][SIMD][FIX] Use conservative "omp simd ordered" lowering" #126079

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 6, 2025

Conversation

alexey-bataev
Copy link
Member

@llvmbot llvmbot added clang Clang issues not falling into any other category clang:codegen IR generation bugs: mangling, exceptions, etc. clang:openmp OpenMP related changes to Clang labels Feb 6, 2025
@llvmbot
Copy link
Member

llvmbot commented Feb 6, 2025

@llvm/pr-subscribers-clang-codegen

Author: Alexey Bataev (alexey-bataev)

Changes

Reverts llvm/llvm-project#123867 to fix the test failures https://lab.llvm.org/buildbot/#/builders/144/builds/17521


Patch is 42.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/126079.diff

3 Files Affected:

  • (modified) clang/lib/CodeGen/CGStmtOpenMP.cpp (-76)
  • (modified) clang/test/OpenMP/ordered_codegen.cpp (+116-116)
  • (removed) clang/test/OpenMP/simd_conservative_ordered.c (-110)
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 3542e939678cf4..8e694b95dc7e73 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -2457,86 +2457,10 @@ static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
   }
 }
 
-// Check for the presence of an `OMPOrderedDirective`,
-// i.e., `ordered` in `#pragma omp ordered simd`.
-//
-// Consider the following source code:
-// ```
-// __attribute__((noinline)) void omp_simd_loop(float X[ARRAY_SIZE][ARRAY_SIZE])
-// {
-//     for (int r = 1; r < ARRAY_SIZE; ++r) {
-//         for (int c = 1; c < ARRAY_SIZE; ++c) {
-// #pragma omp simd
-//             for (int k = 2; k < ARRAY_SIZE; ++k) {
-// #pragma omp ordered simd
-//                 X[r][k] = X[r][k - 2] + sinf((float)(r / c));
-//             }
-//         }
-//     }
-// }
-// ```
-//
-// Suppose we are in `CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective
-// &D)`. By examining `D.dump()` we have the following AST containing
-// `OMPOrderedDirective`:
-//
-// ```
-// OMPSimdDirective 0x1c32950
-// `-CapturedStmt 0x1c32028
-//   |-CapturedDecl 0x1c310e8
-//   | |-ForStmt 0x1c31e30
-//   | | |-DeclStmt 0x1c31298
-//   | | | `-VarDecl 0x1c31208  used k 'int' cinit
-//   | | |   `-IntegerLiteral 0x1c31278 'int' 2
-//   | | |-<<<NULL>>>
-//   | | |-BinaryOperator 0x1c31308 'int' '<'
-//   | | | |-ImplicitCastExpr 0x1c312f0 'int' <LValueToRValue>
-//   | | | | `-DeclRefExpr 0x1c312b0 'int' lvalue Var 0x1c31208 'k' 'int'
-//   | | | `-IntegerLiteral 0x1c312d0 'int' 256
-//   | | |-UnaryOperator 0x1c31348 'int' prefix '++'
-//   | | | `-DeclRefExpr 0x1c31328 'int' lvalue Var 0x1c31208 'k' 'int'
-//   | | `-CompoundStmt 0x1c31e18
-//   | |   `-OMPOrderedDirective 0x1c31dd8
-//   | |     |-OMPSimdClause 0x1c31380
-//   | |     `-CapturedStmt 0x1c31cd0
-// ```
-//
-// Note the presence of `OMPOrderedDirective` above:
-// It's (transitively) nested in a `CapturedStmt` representing the pragma
-// annotated compound statement. Thus, we need to consider this nesting and
-// include checking the `getCapturedStmt` in this case.
-static bool hasOrderedDirective(const Stmt *S) {
-  if (isa<OMPOrderedDirective>(S))
-    return true;
-
-  if (const auto *CS = dyn_cast<CapturedStmt>(S))
-    return hasOrderedDirective(CS->getCapturedStmt());
-
-  for (const Stmt *Child : S->children()) {
-    if (Child && hasOrderedDirective(Child))
-      return true;
-  }
-
-  return false;
-}
-
-static void applyConservativeSimdOrderedDirective(const Stmt &AssociatedStmt,
-                                                  LoopInfoStack &LoopStack) {
-  // Check for the presence of an `OMPOrderedDirective`
-  // i.e., `ordered` in `#pragma omp ordered simd`
-  bool HasOrderedDirective = hasOrderedDirective(&AssociatedStmt);
-  // If present then conservatively disable loop vectorization
-  // analogously to how `emitSimdlenSafelenClause` does.
-  if (HasOrderedDirective)
-    LoopStack.setParallel(/*Enable=*/false);
-}
-
 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
   // Walk clauses and process safelen/lastprivate.
   LoopStack.setParallel(/*Enable=*/true);
   LoopStack.setVectorizeEnable();
-  const Stmt *AssociatedStmt = D.getAssociatedStmt();
-  applyConservativeSimdOrderedDirective(*AssociatedStmt, LoopStack);
   emitSimdlenSafelenClause(*this, D);
   if (const auto *C = D.getSingleClause<OMPOrderClause>())
     if (C->getKind() == OMPC_ORDER_concurrent)
diff --git a/clang/test/OpenMP/ordered_codegen.cpp b/clang/test/OpenMP/ordered_codegen.cpp
index 5cd95f1927e5ce..67285cfaef34d5 100644
--- a/clang/test/OpenMP/ordered_codegen.cpp
+++ b/clang/test/OpenMP/ordered_codegen.cpp
@@ -572,30 +572,30 @@ void foo_simd(int low, int up) {
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
 // CHECK1:       omp.inner.for.cond:
-// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK1-NEXT:    [[ADD6:%.*]] = add i32 [[TMP9]], 1
 // CHECK1-NEXT:    [[CMP7:%.*]] = icmp ult i32 [[TMP8]], [[ADD6]]
 // CHECK1-NEXT:    br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
 // CHECK1:       omp.inner.for.body:
-// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK1-NEXT:    [[MUL:%.*]] = mul i32 [[TMP11]], 1
 // CHECK1-NEXT:    [[ADD8:%.*]] = add i32 [[TMP10]], [[MUL]]
-// CHECK1-NEXT:    store i32 [[ADD8]], ptr [[I5]], align 4
-// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[I5]], align 4
+// CHECK1-NEXT:    store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK1-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64
 // CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM]]
-// CHECK1-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX]], align 4
-// CHECK1-NEXT:    call void @__captured_stmt(ptr [[I5]])
+// CHECK1-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-NEXT:    call void @__captured_stmt(ptr [[I5]]), !llvm.access.group [[ACC_GRP3]]
 // CHECK1-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK1:       omp.body.continue:
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK1:       omp.inner.for.inc:
-// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK1-NEXT:    [[ADD9:%.*]] = add i32 [[TMP13]], 1
-// CHECK1-NEXT:    store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK1-NEXT:    store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
 // CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
@@ -645,31 +645,31 @@ void foo_simd(int low, int up) {
 // CHECK1-NEXT:    store i32 [[TMP27]], ptr [[DOTOMP_IV16]], align 4
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND29:%.*]]
 // CHECK1:       omp.inner.for.cond29:
-// CHECK1-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
-// CHECK1-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK1-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]]
+// CHECK1-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]]
 // CHECK1-NEXT:    [[ADD30:%.*]] = add i32 [[TMP29]], 1
 // CHECK1-NEXT:    [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]]
 // CHECK1-NEXT:    br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]]
 // CHECK1:       omp.inner.for.body32:
-// CHECK1-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK1-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
+// CHECK1-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
 // CHECK1-NEXT:    [[MUL33:%.*]] = mul i32 [[TMP31]], 1
 // CHECK1-NEXT:    [[ADD34:%.*]] = add i32 [[TMP30]], [[MUL33]]
-// CHECK1-NEXT:    store i32 [[ADD34]], ptr [[I28]], align 4
-// CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[I28]], align 4
+// CHECK1-NEXT:    store i32 [[ADD34]], ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]]
 // CHECK1-NEXT:    [[IDXPROM35:%.*]] = sext i32 [[TMP32]] to i64
 // CHECK1-NEXT:    [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM35]]
-// CHECK1-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX36]], align 4
-// CHECK1-NEXT:    call void @__captured_stmt.1(ptr [[I28]])
+// CHECK1-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX36]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-NEXT:    call void @__captured_stmt.1(ptr [[I28]]), !llvm.access.group [[ACC_GRP7]]
 // CHECK1-NEXT:    br label [[OMP_BODY_CONTINUE37:%.*]]
 // CHECK1:       omp.body.continue37:
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_INC38:%.*]]
 // CHECK1:       omp.inner.for.inc38:
-// CHECK1-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
+// CHECK1-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
 // CHECK1-NEXT:    [[ADD39:%.*]] = add i32 [[TMP33]], 1
-// CHECK1-NEXT:    store i32 [[ADD39]], ptr [[DOTOMP_IV16]], align 4
-// CHECK1-NEXT:    call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[TMP0]])
-// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK1-NEXT:    store i32 [[ADD39]], ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-NEXT:    call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group [[ACC_GRP7]]
+// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]]
 // CHECK1:       omp.inner.for.end40:
 // CHECK1-NEXT:    br label [[OMP_DISPATCH_INC:%.*]]
 // CHECK1:       omp.dispatch.inc:
@@ -1201,32 +1201,32 @@ void foo_simd(int low, int up) {
 // CHECK1-IRBUILDER-NEXT:    store i32 0, ptr [[DOTOMP_IV]], align 4
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
 // CHECK1-IRBUILDER:       omp.inner.for.cond:
-// CHECK1-IRBUILDER-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-IRBUILDER-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-IRBUILDER-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
+// CHECK1-IRBUILDER-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK1-IRBUILDER-NEXT:    [[ADD6:%.*]] = add i32 [[TMP8]], 1
 // CHECK1-IRBUILDER-NEXT:    [[CMP7:%.*]] = icmp ult i32 [[TMP7]], [[ADD6]]
 // CHECK1-IRBUILDER-NEXT:    br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
 // CHECK1-IRBUILDER:       omp.inner.for.body:
-// CHECK1-IRBUILDER-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-IRBUILDER-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-IRBUILDER-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-IRBUILDER-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK1-IRBUILDER-NEXT:    [[MUL:%.*]] = mul i32 [[TMP10]], 1
 // CHECK1-IRBUILDER-NEXT:    [[ADD8:%.*]] = add i32 [[TMP9]], [[MUL]]
-// CHECK1-IRBUILDER-NEXT:    store i32 [[ADD8]], ptr [[I5]], align 4
-// CHECK1-IRBUILDER-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I5]], align 4
+// CHECK1-IRBUILDER-NEXT:    store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-IRBUILDER-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK1-IRBUILDER-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64
 // CHECK1-IRBUILDER-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM]]
-// CHECK1-IRBUILDER-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX]], align 4
-// CHECK1-IRBUILDER-NEXT:    call void @__captured_stmt(ptr [[I5]])
+// CHECK1-IRBUILDER-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-IRBUILDER-NEXT:    call void @__captured_stmt(ptr [[I5]]), !llvm.access.group [[ACC_GRP3]]
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]]
 // CHECK1-IRBUILDER:       omp.inner.for.body.ordered.after:
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK1-IRBUILDER:       omp.body.continue:
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK1-IRBUILDER:       omp.inner.for.inc:
-// CHECK1-IRBUILDER-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-IRBUILDER-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK1-IRBUILDER-NEXT:    [[ADD9:%.*]] = add i32 [[TMP12]], 1
-// CHECK1-IRBUILDER-NEXT:    store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK1-IRBUILDER-NEXT:    store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
 // CHECK1-IRBUILDER:       omp.inner.for.end:
 // CHECK1-IRBUILDER-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
 // CHECK1-IRBUILDER-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
@@ -1278,34 +1278,34 @@ void foo_simd(int low, int up) {
 // CHECK1-IRBUILDER-NEXT:    store i32 [[TMP26]], ptr [[DOTOMP_IV16]], align 4
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_COND30:%.*]]
 // CHECK1-IRBUILDER:       omp.inner.for.cond30:
-// CHECK1-IRBUILDER-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
-// CHECK1-IRBUILDER-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK1-IRBUILDER-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]]
+// CHECK1-IRBUILDER-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]]
 // CHECK1-IRBUILDER-NEXT:    [[ADD31:%.*]] = add i32 [[TMP28]], 1
 // CHECK1-IRBUILDER-NEXT:    [[CMP32:%.*]] = icmp ult i32 [[TMP27]], [[ADD31]]
 // CHECK1-IRBUILDER-NEXT:    br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END42:%.*]]
 // CHECK1-IRBUILDER:       omp.inner.for.body33:
-// CHECK1-IRBUILDER-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK1-IRBUILDER-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
+// CHECK1-IRBUILDER-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-IRBUILDER-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
 // CHECK1-IRBUILDER-NEXT:    [[MUL34:%.*]] = mul i32 [[TMP30]], 1
 // CHECK1-IRBUILDER-NEXT:    [[ADD35:%.*]] = add i32 [[TMP29]], [[MUL34]]
-// CHECK1-IRBUILDER-NEXT:    store i32 [[ADD35]], ptr [[I28]], align 4
-// CHECK1-IRBUILDER-NEXT:    [[TMP31:%.*]] = load i32, ptr [[I28]], align 4
+// CHECK1-IRBUILDER-NEXT:    store i32 [[ADD35]], ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-IRBUILDER-NEXT:    [[TMP31:%.*]] = load i32, ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]]
 // CHECK1-IRBUILDER-NEXT:    [[IDXPROM36:%.*]] = sext i32 [[TMP31]] to i64
 // CHECK1-IRBUILDER-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM36]]
-// CHECK1-IRBUILDER-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX37]], align 4
-// CHECK1-IRBUILDER-NEXT:    call void @__captured_stmt.1(ptr [[I28]])
+// CHECK1-IRBUILDER-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX37]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-IRBUILDER-NEXT:    call void @__captured_stmt.1(ptr [[I28]]), !llvm.access.group [[ACC_GRP7]]
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_BODY33_ORDERED_AFTER:%.*]]
 // CHECK1-IRBUILDER:       omp.inner.for.body33.ordered.after:
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_BODY_CONTINUE38:%.*]]
 // CHECK1-IRBUILDER:       omp.body.continue38:
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_INC39:%.*]]
 // CHECK1-IRBUILDER:       omp.inner.for.inc39:
-// CHECK1-IRBUILDER-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
+// CHECK1-IRBUILDER-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
 // CHECK1-IRBUILDER-NEXT:    [[ADD40:%.*]] = add i32 [[TMP32]], 1
-// CHECK1-IRBUILDER-NEXT:    store i32 [[ADD40]], ptr [[DOTOMP_IV16]], align 4
+// CHECK1-IRBUILDER-NEXT:    store i32 [[ADD40]], ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
 // CHECK1-IRBUILDER-NEXT:    [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB12]])
-// CHECK1-IRBUILDER-NEXT:    call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]])
-// CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK1-IRBUILDER-NEXT:    call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]]), !llvm.access.group [[ACC_GRP7]]
+// CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP8:![0-9]+]]
 // CHECK1-IRBUILDER:       omp.inner.for.end42:
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_DISPATCH_INC:%.*]]
 // CHECK1-IRBUILDER:       omp.dispatch.inc:
@@ -1812,30 +1812,30 @@ void foo_simd(int low, int up) {
 // CHECK3-NEXT:    store i32 0, ptr [[DOTOMP_IV]], align 4
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
 // CHECK3:       omp.inner.for.cond:
-// CHECK3-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK3-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
+// CHECK3-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK3-NEXT:    [[ADD6:%.*]] = add i32 [[TMP9]], 1
 // CHECK3-NEXT:    [[CMP7:%.*]] = icmp ult i32 [[TMP8]], [[ADD6]]
 // CHECK3-NEXT:    br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
 // CHECK3:       omp.inner.for.body:
-// CHECK3-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK3-NEXT:    [[MUL:%.*]] = mul i32 [[TMP11]], 1
 // CHECK3-NEXT:    [[ADD8:%.*]] = add i32 [[TMP10]], [[MUL]]
-// CHECK3-NEXT:    store i32 [[ADD8]], ptr [[I5]], align 4
-// CHECK3-NEXT:    [[TMP12:%.*]] = load i32, ptr [[I5]], align 4
+// CHECK3-NEXT:    store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK3-NEXT:    [[TMP12:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK3-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64
 // CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM]]
-// CHECK3-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX]], align 4
-// CHECK3-NEXT:    call void @__captured_stmt(ptr [[I5]])
+// CHECK3-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK3-NEXT:    call void @__captured_stmt(ptr [[I5]]), !llvm.access.group [[ACC_GRP3]]
 // CHECK3-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK3:       omp.body.continue:
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK3:       omp.inner.for.inc:
-// CHECK3-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK3-NEXT:    [[ADD9:%.*]] = add i32 [[TMP13]], 1
-// CHECK3-NEXT:    store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
...
[truncated]

@llvmbot
Copy link
Member

llvmbot commented Feb 6, 2025

@llvm/pr-subscribers-clang

Author: Alexey Bataev (alexey-bataev)

Changes

Reverts llvm/llvm-project#123867 to fix the test failures https://lab.llvm.org/buildbot/#/builders/144/builds/17521


Patch is 42.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/126079.diff

3 Files Affected:

  • (modified) clang/lib/CodeGen/CGStmtOpenMP.cpp (-76)
  • (modified) clang/test/OpenMP/ordered_codegen.cpp (+116-116)
  • (removed) clang/test/OpenMP/simd_conservative_ordered.c (-110)
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 3542e939678cf4..8e694b95dc7e73 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -2457,86 +2457,10 @@ static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
   }
 }
 
-// Check for the presence of an `OMPOrderedDirective`,
-// i.e., `ordered` in `#pragma omp ordered simd`.
-//
-// Consider the following source code:
-// ```
-// __attribute__((noinline)) void omp_simd_loop(float X[ARRAY_SIZE][ARRAY_SIZE])
-// {
-//     for (int r = 1; r < ARRAY_SIZE; ++r) {
-//         for (int c = 1; c < ARRAY_SIZE; ++c) {
-// #pragma omp simd
-//             for (int k = 2; k < ARRAY_SIZE; ++k) {
-// #pragma omp ordered simd
-//                 X[r][k] = X[r][k - 2] + sinf((float)(r / c));
-//             }
-//         }
-//     }
-// }
-// ```
-//
-// Suppose we are in `CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective
-// &D)`. By examining `D.dump()` we have the following AST containing
-// `OMPOrderedDirective`:
-//
-// ```
-// OMPSimdDirective 0x1c32950
-// `-CapturedStmt 0x1c32028
-//   |-CapturedDecl 0x1c310e8
-//   | |-ForStmt 0x1c31e30
-//   | | |-DeclStmt 0x1c31298
-//   | | | `-VarDecl 0x1c31208  used k 'int' cinit
-//   | | |   `-IntegerLiteral 0x1c31278 'int' 2
-//   | | |-<<<NULL>>>
-//   | | |-BinaryOperator 0x1c31308 'int' '<'
-//   | | | |-ImplicitCastExpr 0x1c312f0 'int' <LValueToRValue>
-//   | | | | `-DeclRefExpr 0x1c312b0 'int' lvalue Var 0x1c31208 'k' 'int'
-//   | | | `-IntegerLiteral 0x1c312d0 'int' 256
-//   | | |-UnaryOperator 0x1c31348 'int' prefix '++'
-//   | | | `-DeclRefExpr 0x1c31328 'int' lvalue Var 0x1c31208 'k' 'int'
-//   | | `-CompoundStmt 0x1c31e18
-//   | |   `-OMPOrderedDirective 0x1c31dd8
-//   | |     |-OMPSimdClause 0x1c31380
-//   | |     `-CapturedStmt 0x1c31cd0
-// ```
-//
-// Note the presence of `OMPOrderedDirective` above:
-// It's (transitively) nested in a `CapturedStmt` representing the pragma
-// annotated compound statement. Thus, we need to consider this nesting and
-// include checking the `getCapturedStmt` in this case.
-static bool hasOrderedDirective(const Stmt *S) {
-  if (isa<OMPOrderedDirective>(S))
-    return true;
-
-  if (const auto *CS = dyn_cast<CapturedStmt>(S))
-    return hasOrderedDirective(CS->getCapturedStmt());
-
-  for (const Stmt *Child : S->children()) {
-    if (Child && hasOrderedDirective(Child))
-      return true;
-  }
-
-  return false;
-}
-
-static void applyConservativeSimdOrderedDirective(const Stmt &AssociatedStmt,
-                                                  LoopInfoStack &LoopStack) {
-  // Check for the presence of an `OMPOrderedDirective`
-  // i.e., `ordered` in `#pragma omp ordered simd`
-  bool HasOrderedDirective = hasOrderedDirective(&AssociatedStmt);
-  // If present then conservatively disable loop vectorization
-  // analogously to how `emitSimdlenSafelenClause` does.
-  if (HasOrderedDirective)
-    LoopStack.setParallel(/*Enable=*/false);
-}
-
 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
   // Walk clauses and process safelen/lastprivate.
   LoopStack.setParallel(/*Enable=*/true);
   LoopStack.setVectorizeEnable();
-  const Stmt *AssociatedStmt = D.getAssociatedStmt();
-  applyConservativeSimdOrderedDirective(*AssociatedStmt, LoopStack);
   emitSimdlenSafelenClause(*this, D);
   if (const auto *C = D.getSingleClause<OMPOrderClause>())
     if (C->getKind() == OMPC_ORDER_concurrent)
diff --git a/clang/test/OpenMP/ordered_codegen.cpp b/clang/test/OpenMP/ordered_codegen.cpp
index 5cd95f1927e5ce..67285cfaef34d5 100644
--- a/clang/test/OpenMP/ordered_codegen.cpp
+++ b/clang/test/OpenMP/ordered_codegen.cpp
@@ -572,30 +572,30 @@ void foo_simd(int low, int up) {
 // CHECK1-NEXT:    store i32 0, ptr [[DOTOMP_IV]], align 4
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
 // CHECK1:       omp.inner.for.cond:
-// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK1-NEXT:    [[ADD6:%.*]] = add i32 [[TMP9]], 1
 // CHECK1-NEXT:    [[CMP7:%.*]] = icmp ult i32 [[TMP8]], [[ADD6]]
 // CHECK1-NEXT:    br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
 // CHECK1:       omp.inner.for.body:
-// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK1-NEXT:    [[MUL:%.*]] = mul i32 [[TMP11]], 1
 // CHECK1-NEXT:    [[ADD8:%.*]] = add i32 [[TMP10]], [[MUL]]
-// CHECK1-NEXT:    store i32 [[ADD8]], ptr [[I5]], align 4
-// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[I5]], align 4
+// CHECK1-NEXT:    store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK1-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64
 // CHECK1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM]]
-// CHECK1-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX]], align 4
-// CHECK1-NEXT:    call void @__captured_stmt(ptr [[I5]])
+// CHECK1-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-NEXT:    call void @__captured_stmt(ptr [[I5]]), !llvm.access.group [[ACC_GRP3]]
 // CHECK1-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK1:       omp.body.continue:
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK1:       omp.inner.for.inc:
-// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK1-NEXT:    [[ADD9:%.*]] = add i32 [[TMP13]], 1
-// CHECK1-NEXT:    store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK1-NEXT:    store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
 // CHECK1:       omp.inner.for.end:
 // CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
 // CHECK1-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
@@ -645,31 +645,31 @@ void foo_simd(int low, int up) {
 // CHECK1-NEXT:    store i32 [[TMP27]], ptr [[DOTOMP_IV16]], align 4
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND29:%.*]]
 // CHECK1:       omp.inner.for.cond29:
-// CHECK1-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
-// CHECK1-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK1-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]]
+// CHECK1-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]]
 // CHECK1-NEXT:    [[ADD30:%.*]] = add i32 [[TMP29]], 1
 // CHECK1-NEXT:    [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]]
 // CHECK1-NEXT:    br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]]
 // CHECK1:       omp.inner.for.body32:
-// CHECK1-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK1-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
+// CHECK1-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
 // CHECK1-NEXT:    [[MUL33:%.*]] = mul i32 [[TMP31]], 1
 // CHECK1-NEXT:    [[ADD34:%.*]] = add i32 [[TMP30]], [[MUL33]]
-// CHECK1-NEXT:    store i32 [[ADD34]], ptr [[I28]], align 4
-// CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[I28]], align 4
+// CHECK1-NEXT:    store i32 [[ADD34]], ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-NEXT:    [[TMP32:%.*]] = load i32, ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]]
 // CHECK1-NEXT:    [[IDXPROM35:%.*]] = sext i32 [[TMP32]] to i64
 // CHECK1-NEXT:    [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM35]]
-// CHECK1-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX36]], align 4
-// CHECK1-NEXT:    call void @__captured_stmt.1(ptr [[I28]])
+// CHECK1-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX36]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-NEXT:    call void @__captured_stmt.1(ptr [[I28]]), !llvm.access.group [[ACC_GRP7]]
 // CHECK1-NEXT:    br label [[OMP_BODY_CONTINUE37:%.*]]
 // CHECK1:       omp.body.continue37:
 // CHECK1-NEXT:    br label [[OMP_INNER_FOR_INC38:%.*]]
 // CHECK1:       omp.inner.for.inc38:
-// CHECK1-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
+// CHECK1-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
 // CHECK1-NEXT:    [[ADD39:%.*]] = add i32 [[TMP33]], 1
-// CHECK1-NEXT:    store i32 [[ADD39]], ptr [[DOTOMP_IV16]], align 4
-// CHECK1-NEXT:    call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[TMP0]])
-// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK1-NEXT:    store i32 [[ADD39]], ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-NEXT:    call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group [[ACC_GRP7]]
+// CHECK1-NEXT:    br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]]
 // CHECK1:       omp.inner.for.end40:
 // CHECK1-NEXT:    br label [[OMP_DISPATCH_INC:%.*]]
 // CHECK1:       omp.dispatch.inc:
@@ -1201,32 +1201,32 @@ void foo_simd(int low, int up) {
 // CHECK1-IRBUILDER-NEXT:    store i32 0, ptr [[DOTOMP_IV]], align 4
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
 // CHECK1-IRBUILDER:       omp.inner.for.cond:
-// CHECK1-IRBUILDER-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-IRBUILDER-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-IRBUILDER-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
+// CHECK1-IRBUILDER-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK1-IRBUILDER-NEXT:    [[ADD6:%.*]] = add i32 [[TMP8]], 1
 // CHECK1-IRBUILDER-NEXT:    [[CMP7:%.*]] = icmp ult i32 [[TMP7]], [[ADD6]]
 // CHECK1-IRBUILDER-NEXT:    br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
 // CHECK1-IRBUILDER:       omp.inner.for.body:
-// CHECK1-IRBUILDER-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-IRBUILDER-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-IRBUILDER-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-IRBUILDER-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK1-IRBUILDER-NEXT:    [[MUL:%.*]] = mul i32 [[TMP10]], 1
 // CHECK1-IRBUILDER-NEXT:    [[ADD8:%.*]] = add i32 [[TMP9]], [[MUL]]
-// CHECK1-IRBUILDER-NEXT:    store i32 [[ADD8]], ptr [[I5]], align 4
-// CHECK1-IRBUILDER-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I5]], align 4
+// CHECK1-IRBUILDER-NEXT:    store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-IRBUILDER-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK1-IRBUILDER-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64
 // CHECK1-IRBUILDER-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM]]
-// CHECK1-IRBUILDER-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX]], align 4
-// CHECK1-IRBUILDER-NEXT:    call void @__captured_stmt(ptr [[I5]])
+// CHECK1-IRBUILDER-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-IRBUILDER-NEXT:    call void @__captured_stmt(ptr [[I5]]), !llvm.access.group [[ACC_GRP3]]
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_BODY_ORDERED_AFTER:%.*]]
 // CHECK1-IRBUILDER:       omp.inner.for.body.ordered.after:
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK1-IRBUILDER:       omp.body.continue:
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK1-IRBUILDER:       omp.inner.for.inc:
-// CHECK1-IRBUILDER-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-IRBUILDER-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK1-IRBUILDER-NEXT:    [[ADD9:%.*]] = add i32 [[TMP12]], 1
-// CHECK1-IRBUILDER-NEXT:    store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK1-IRBUILDER-NEXT:    store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
 // CHECK1-IRBUILDER:       omp.inner.for.end:
 // CHECK1-IRBUILDER-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
 // CHECK1-IRBUILDER-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
@@ -1278,34 +1278,34 @@ void foo_simd(int low, int up) {
 // CHECK1-IRBUILDER-NEXT:    store i32 [[TMP26]], ptr [[DOTOMP_IV16]], align 4
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_COND30:%.*]]
 // CHECK1-IRBUILDER:       omp.inner.for.cond30:
-// CHECK1-IRBUILDER-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
-// CHECK1-IRBUILDER-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK1-IRBUILDER-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]]
+// CHECK1-IRBUILDER-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP7]]
 // CHECK1-IRBUILDER-NEXT:    [[ADD31:%.*]] = add i32 [[TMP28]], 1
 // CHECK1-IRBUILDER-NEXT:    [[CMP32:%.*]] = icmp ult i32 [[TMP27]], [[ADD31]]
 // CHECK1-IRBUILDER-NEXT:    br i1 [[CMP32]], label [[OMP_INNER_FOR_BODY33:%.*]], label [[OMP_INNER_FOR_END42:%.*]]
 // CHECK1-IRBUILDER:       omp.inner.for.body33:
-// CHECK1-IRBUILDER-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4
-// CHECK1-IRBUILDER-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
+// CHECK1-IRBUILDER-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-IRBUILDER-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
 // CHECK1-IRBUILDER-NEXT:    [[MUL34:%.*]] = mul i32 [[TMP30]], 1
 // CHECK1-IRBUILDER-NEXT:    [[ADD35:%.*]] = add i32 [[TMP29]], [[MUL34]]
-// CHECK1-IRBUILDER-NEXT:    store i32 [[ADD35]], ptr [[I28]], align 4
-// CHECK1-IRBUILDER-NEXT:    [[TMP31:%.*]] = load i32, ptr [[I28]], align 4
+// CHECK1-IRBUILDER-NEXT:    store i32 [[ADD35]], ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-IRBUILDER-NEXT:    [[TMP31:%.*]] = load i32, ptr [[I28]], align 4, !llvm.access.group [[ACC_GRP7]]
 // CHECK1-IRBUILDER-NEXT:    [[IDXPROM36:%.*]] = sext i32 [[TMP31]] to i64
 // CHECK1-IRBUILDER-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM36]]
-// CHECK1-IRBUILDER-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX37]], align 4
-// CHECK1-IRBUILDER-NEXT:    call void @__captured_stmt.1(ptr [[I28]])
+// CHECK1-IRBUILDER-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX37]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK1-IRBUILDER-NEXT:    call void @__captured_stmt.1(ptr [[I28]]), !llvm.access.group [[ACC_GRP7]]
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_BODY33_ORDERED_AFTER:%.*]]
 // CHECK1-IRBUILDER:       omp.inner.for.body33.ordered.after:
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_BODY_CONTINUE38:%.*]]
 // CHECK1-IRBUILDER:       omp.body.continue38:
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_INC39:%.*]]
 // CHECK1-IRBUILDER:       omp.inner.for.inc39:
-// CHECK1-IRBUILDER-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4
+// CHECK1-IRBUILDER-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
 // CHECK1-IRBUILDER-NEXT:    [[ADD40:%.*]] = add i32 [[TMP32]], 1
-// CHECK1-IRBUILDER-NEXT:    store i32 [[ADD40]], ptr [[DOTOMP_IV16]], align 4
+// CHECK1-IRBUILDER-NEXT:    store i32 [[ADD40]], ptr [[DOTOMP_IV16]], align 4, !llvm.access.group [[ACC_GRP7]]
 // CHECK1-IRBUILDER-NEXT:    [[OMP_GLOBAL_THREAD_NUM41:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB12]])
-// CHECK1-IRBUILDER-NEXT:    call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]])
-// CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK1-IRBUILDER-NEXT:    call void @__kmpc_dispatch_fini_4u(ptr @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM41]]), !llvm.access.group [[ACC_GRP7]]
+// CHECK1-IRBUILDER-NEXT:    br label [[OMP_INNER_FOR_COND30]], !llvm.loop [[LOOP8:![0-9]+]]
 // CHECK1-IRBUILDER:       omp.inner.for.end42:
 // CHECK1-IRBUILDER-NEXT:    br label [[OMP_DISPATCH_INC:%.*]]
 // CHECK1-IRBUILDER:       omp.dispatch.inc:
@@ -1812,30 +1812,30 @@ void foo_simd(int low, int up) {
 // CHECK3-NEXT:    store i32 0, ptr [[DOTOMP_IV]], align 4
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
 // CHECK3:       omp.inner.for.cond:
-// CHECK3-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK3-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3:![0-9]+]]
+// CHECK3-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK3-NEXT:    [[ADD6:%.*]] = add i32 [[TMP9]], 1
 // CHECK3-NEXT:    [[CMP7:%.*]] = icmp ult i32 [[TMP8]], [[ADD6]]
 // CHECK3-NEXT:    br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
 // CHECK3:       omp.inner.for.body:
-// CHECK3-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK3-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK3-NEXT:    [[MUL:%.*]] = mul i32 [[TMP11]], 1
 // CHECK3-NEXT:    [[ADD8:%.*]] = add i32 [[TMP10]], [[MUL]]
-// CHECK3-NEXT:    store i32 [[ADD8]], ptr [[I5]], align 4
-// CHECK3-NEXT:    [[TMP12:%.*]] = load i32, ptr [[I5]], align 4
+// CHECK3-NEXT:    store i32 [[ADD8]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK3-NEXT:    [[TMP12:%.*]] = load i32, ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK3-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64
 // CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr @f, i64 0, i64 [[IDXPROM]]
-// CHECK3-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX]], align 4
-// CHECK3-NEXT:    call void @__captured_stmt(ptr [[I5]])
+// CHECK3-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP3]]
+// CHECK3-NEXT:    call void @__captured_stmt(ptr [[I5]]), !llvm.access.group [[ACC_GRP3]]
 // CHECK3-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // CHECK3:       omp.body.continue:
 // CHECK3-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // CHECK3:       omp.inner.for.inc:
-// CHECK3-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP3]]
 // CHECK3-NEXT:    [[ADD9:%.*]] = add i32 [[TMP13]], 1
-// CHECK3-NEXT:    store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
...
[truncated]

@alexey-bataev alexey-bataev merged commit 3041dd5 into main Feb 6, 2025
9 of 10 checks passed
@alexey-bataev alexey-bataev deleted the revert-123867-simd_conservative_ordered branch February 6, 2025 15:04
Icohedron pushed a commit to Icohedron/llvm-project that referenced this pull request Feb 11, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
clang:codegen IR generation bugs: mangling, exceptions, etc. clang:openmp OpenMP related changes to Clang clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants