Skip to content

Commit db39bc2

Browse files
author
Matt P. Dziubinski
committed
[OpenMP][SIMD][FIX] Use conservative "omp simd ordered" lowering
A proposed fix for #95611 [OpenMP][SIMD] ordered has no effect in a loop SIMD region as of LLVM 18.1.0 Changes: - Implement new lowering behavior: Conservatively serialize "omp simd" loops that have `omp simd ordered` directive to prevent incorrect vectorization (which results in incorrect execution behavior of the miscompiled program). Implementation outline: - We start with the optimistic default initial value of `LoopStack.setParallel(/Enable=/true);` in `CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D)`. - We only disable the loop parallel memory access assumption with `if (HasOrderedDirective) LoopStack.setParallel(/Enable=/false);` using the `HasOrderedDirective` (which tests for the presence of an `OMPOrderedDirective`). - This results in no longer incorrectly vectorizing the loop when the `omp simd ordered` directive is present. Motivation: We'd like to prevent incorrect vectorization of the loops marked with the `#pragma omp ordered simd` directive which has previously resulted in miscompiled code. At the same time, we'd like the usage outside of the `#pragma omp ordered simd` context to remain unaffected: Note that in the test "clang/test/OpenMP/ordered_codegen.cpp" we only "lose" the `!llvm.access.group` metadata in `foo_simd` alone. This is a conservative, in that it's possible some of the loops would be possible to vectorize, but we prefer to avoid miscompilation of the loops that are currently illegal to vectorize. A concrete example follows: ```cpp // "test.c" #include <float.h> #include <math.h> #include <omp.h> #include <stdio.h> #include <stdlib.h> #include <time.h> int compare_float(float x1, float x2, float scalar) { const float diff = fabsf(x1 - x2); x1 = fabsf(x1); x2 = fabsf(x2); const float l = (x2 > x1) ? x2 : x1; if (diff <= l * scalar * FLT_EPSILON) return 1; else return 0; } #define ARRAY_SIZE 256 __attribute__((noinline)) void initialization_loop( float X[ARRAY_SIZE][ARRAY_SIZE], float Y[ARRAY_SIZE][ARRAY_SIZE]) { const float max = 1000.0; srand(time(NULL)); for (int r = 0; r < ARRAY_SIZE; r++) { for (int c = 0; c < ARRAY_SIZE; c++) { X[r][c] = ((float)rand() / (float)(RAND_MAX)) * max; Y[r][c] = X[r][c]; } } } __attribute__((noinline)) void omp_simd_loop(float X[ARRAY_SIZE][ARRAY_SIZE]) { for (int r = 1; r < ARRAY_SIZE; ++r) { for (int c = 1; c < ARRAY_SIZE; ++c) { #pragma omp simd for (int k = 2; k < ARRAY_SIZE; ++k) { #pragma omp ordered simd X[r][k] = X[r][k - 2] + sinf((float)(r / c)); } } } } __attribute__((noinline)) int comparison_loop(float X[ARRAY_SIZE][ARRAY_SIZE], float Y[ARRAY_SIZE][ARRAY_SIZE]) { int totalErrors_simd = 0; const float scalar = 1.0; for (int r = 1; r < ARRAY_SIZE; ++r) { for (int c = 1; c < ARRAY_SIZE; ++c) { for (int k = 2; k < ARRAY_SIZE; ++k) { Y[r][k] = Y[r][k - 2] + sinf((float)(r / c)); } } // check row for simd update for (int k = 0; k < ARRAY_SIZE; ++k) { if (!compare_float(X[r][k], Y[r][k], scalar)) { ++totalErrors_simd; } } } return totalErrors_simd; } int main(void) { float X[ARRAY_SIZE][ARRAY_SIZE]; float Y[ARRAY_SIZE][ARRAY_SIZE]; initialization_loop(X, Y); omp_simd_loop(X); const int totalErrors_simd = comparison_loop(X, Y); if (totalErrors_simd) { fprintf(stdout, "totalErrors_simd: %d \n", totalErrors_simd); fprintf(stdout, "%s : %d - FAIL: error in ordered simd computation.\n", __FILE__, __LINE__); } else { fprintf(stdout, "Success!\n"); } return totalErrors_simd; } ``` Before: ``` $ clang -fopenmp-simd -O3 -ffast-math -lm test.c -o test && ./test totalErrors_simd: 15408 test.c : 76 - FAIL: error in ordered simd computation. ``` clang 19.1.0: https://godbolt.org/z/6EvhxqEhe After: ``` $ clang -fopenmp-simd -O3 -ffast-math test.c -o test && ./test Success! ```
1 parent 07d4965 commit db39bc2

File tree

3 files changed

+275
-116
lines changed

3 files changed

+275
-116
lines changed

clang/lib/CodeGen/CGStmtOpenMP.cpp

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2457,10 +2457,86 @@ static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
24572457
}
24582458
}
24592459

2460+
// Check for the presence of an `OMPOrderedDirective`,
2461+
// i.e., `ordered` in `#pragma omp ordered simd`.
2462+
//
2463+
// Consider the following source code:
2464+
// ```
2465+
// __attribute__((noinline)) void omp_simd_loop(float X[ARRAY_SIZE][ARRAY_SIZE])
2466+
// {
2467+
// for (int r = 1; r < ARRAY_SIZE; ++r) {
2468+
// for (int c = 1; c < ARRAY_SIZE; ++c) {
2469+
// #pragma omp simd
2470+
// for (int k = 2; k < ARRAY_SIZE; ++k) {
2471+
// #pragma omp ordered simd
2472+
// X[r][k] = X[r][k - 2] + sinf((float)(r / c));
2473+
// }
2474+
// }
2475+
// }
2476+
// }
2477+
// ```
2478+
//
2479+
// Suppose we are in `CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective
2480+
// &D)`. By examining `D.dump()` we have the following AST containing
2481+
// `OMPOrderedDirective`:
2482+
//
2483+
// ```
2484+
// OMPSimdDirective 0x1c32950
2485+
// `-CapturedStmt 0x1c32028
2486+
// |-CapturedDecl 0x1c310e8
2487+
// | |-ForStmt 0x1c31e30
2488+
// | | |-DeclStmt 0x1c31298
2489+
// | | | `-VarDecl 0x1c31208 used k 'int' cinit
2490+
// | | | `-IntegerLiteral 0x1c31278 'int' 2
2491+
// | | |-<<<NULL>>>
2492+
// | | |-BinaryOperator 0x1c31308 'int' '<'
2493+
// | | | |-ImplicitCastExpr 0x1c312f0 'int' <LValueToRValue>
2494+
// | | | | `-DeclRefExpr 0x1c312b0 'int' lvalue Var 0x1c31208 'k' 'int'
2495+
// | | | `-IntegerLiteral 0x1c312d0 'int' 256
2496+
// | | |-UnaryOperator 0x1c31348 'int' prefix '++'
2497+
// | | | `-DeclRefExpr 0x1c31328 'int' lvalue Var 0x1c31208 'k' 'int'
2498+
// | | `-CompoundStmt 0x1c31e18
2499+
// | | `-OMPOrderedDirective 0x1c31dd8
2500+
// | | |-OMPSimdClause 0x1c31380
2501+
// | | `-CapturedStmt 0x1c31cd0
2502+
// ```
2503+
//
2504+
// Note the presence of `OMPOrderedDirective` above:
2505+
// It's (transitively) nested in a `CapturedStmt` representing the pragma
2506+
// annotated compound statement. Thus, we need to consider this nesting and
2507+
// include checking the `getCapturedStmt` in this case.
2508+
static bool hasOrderedDirective(const Stmt *S) {
2509+
if (isa<OMPOrderedDirective>(S))
2510+
return true;
2511+
2512+
if (const auto *CS = dyn_cast<CapturedStmt>(S))
2513+
return hasOrderedDirective(CS->getCapturedStmt());
2514+
2515+
for (const Stmt *Child : S->children()) {
2516+
if (Child && hasOrderedDirective(Child))
2517+
return true;
2518+
}
2519+
2520+
return false;
2521+
}
2522+
2523+
static void applyConservativeSimdOrderedDirective(const Stmt &AssociatedStmt,
2524+
LoopInfoStack &LoopStack) {
2525+
// Check for the presence of an `OMPOrderedDirective`
2526+
// i.e., `ordered` in `#pragma omp ordered simd`
2527+
bool HasOrderedDirective = hasOrderedDirective(&AssociatedStmt);
2528+
// If present then conservatively disable loop vectorization
2529+
// analogously to how `emitSimdlenSafelenClause` does.
2530+
if (HasOrderedDirective)
2531+
LoopStack.setParallel(/*Enable=*/false);
2532+
}
2533+
24602534
void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
24612535
// Walk clauses and process safelen/lastprivate.
24622536
LoopStack.setParallel(/*Enable=*/true);
24632537
LoopStack.setVectorizeEnable();
2538+
const Stmt *AssociatedStmt = D.getAssociatedStmt();
2539+
applyConservativeSimdOrderedDirective(*AssociatedStmt, LoopStack);
24642540
emitSimdlenSafelenClause(*this, D);
24652541
if (const auto *C = D.getSingleClause<OMPOrderClause>())
24662542
if (C->getKind() == OMPC_ORDER_concurrent)

0 commit comments

Comments
 (0)