[InstCombine] Avoid folding `select(umin(X, Y), X)` with min/max values in false arm #143020

thevar1able · 2025-06-05T18:55:07Z

Please suggest a better fix if I'm doing something wrong, I have no understanding of InstCombine optimization.
Missing optimization is a blocker for switching ClickHouse build to clang-20.

The issue is that the following snippet:

declare i8 @llvm.umin.i8(i8, i8)

define i8 @masked_min_fold_bug(i8 %acc, i8 %val, i8 %mask) {
; CHECK-LABEL: @masked_min_fold_bug(
; CHECK:       %cond = icmp eq i8 %mask, 0
; CHECK:       %masked_val = select i1 %cond, i8 %val, i8 255
; CHECK:       call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val)
;
  %cond = icmp eq i8 %mask, 0
  %masked_val = select i1 %cond, i8 %val, i8 255
  %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val)
  ret i8 %res
}

is being optimized to the following code, which can not be vectorized later.

declare i8 @llvm.umin.i8(i8, i8) #0

define i8 @masked_min_fold_bug(i8 %acc, i8 %val, i8 %mask) {
  %cond = icmp eq i8 %mask, 0
  %1 = call i8 @llvm.umin.i8(i8 %acc, i8 %val)
  %res = select i1 %cond, i8 %1, i8 %acc
  ret i8 %res
}

attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

Expected:

declare i8 @llvm.umin.i8(i8, i8) #0

define i8 @masked_min_fold_bug(i8 %acc, i8 %val, i8 %mask) {
  %cond = icmp eq i8 %mask, 0
  %masked_val = select i1 %cond, i8 %val, i8 -1
  %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val)
  ret i8 %res
}

attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

https://godbolt.org/z/cYMheKE5r

llvmbot · 2025-06-05T19:00:41Z

@llvm/pr-subscribers-llvm-transforms

Author: Konstantin Bogdanov (thevar1able)

Changes

Fixes #139050.

Please suggest a better fix if I'm doing something wrong, I have no understanding of InstCombine optimization.
Missing optimization is a blocker for switching ClickHouse build to clang-20.

The issue is that the following snippet:

declare i8 @<!-- -->llvm.umin.i8(i8, i8)

define i8 @<!-- -->masked_min_fold_bug(i8 %acc, i8 %val, i8 %mask) {
; CHECK-LABEL: @<!-- -->masked_min_fold_bug(
; CHECK:       %cond = icmp eq i8 %mask, 0
; CHECK:       %masked_val = select i1 %cond, i8 %val, i8 255
; CHECK:       call i8 @<!-- -->llvm.umin.i8(i8 %acc, i8 %masked_val)
;
  %cond = icmp eq i8 %mask, 0
  %masked_val = select i1 %cond, i8 %val, i8 255
  %res = call i8 @<!-- -->llvm.umin.i8(i8 %acc, i8 %masked_val)
  ret i8 %res
}

is being optimized to the following code, which can not be vectorized later.

declare i8 @<!-- -->llvm.umin.i8(i8, i8) #<!-- -->0

define i8 @<!-- -->masked_min_fold_bug(i8 %acc, i8 %val, i8 %mask) {
  %cond = icmp eq i8 %mask, 0
  %1 = call i8 @<!-- -->llvm.umin.i8(i8 %acc, i8 %val)
  %res = select i1 %cond, i8 %1, i8 %acc
  ret i8 %res
}

attributes #<!-- -->0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

Expected:

declare i8 @<!-- -->llvm.umin.i8(i8, i8) #<!-- -->0

define i8 @<!-- -->masked_min_fold_bug(i8 %acc, i8 %val, i8 %mask) {
  %cond = icmp eq i8 %mask, 0
  %masked_val = select i1 %cond, i8 %val, i8 -1
  %res = call i8 @<!-- -->llvm.umin.i8(i8 %acc, i8 %masked_val)
  ret i8 %res
}

attributes #<!-- -->0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

https://godbolt.org/z/cYMheKE5r

Full diff: https://github.com/llvm/llvm-project/pull/143020.diff

2 Files Affected:

(modified) llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp (+19)
(modified) llvm/test/Transforms/InstCombine/select.ll (+15)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index cfb4af391b540..930819d24393a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1654,6 +1654,25 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
   if (Value *FreedOp = getFreedOperand(&CI, &TLI))
     return visitFree(CI, FreedOp);
 
+  if (Function *F = CI.getCalledFunction()) {
+    if (F->getIntrinsicID() == Intrinsic::umin || F->getIntrinsicID() == Intrinsic::umax) {
+      for (Value *Arg : CI.args()) {
+        auto *SI = dyn_cast<SelectInst>(Arg);
+        if (!SI)
+          continue;
+
+        auto *TrueC = dyn_cast<Constant>(SI->getTrueValue());
+        auto *FalseC = dyn_cast<Constant>(SI->getFalseValue());
+
+        // Block only if the select is masking, e.g. select(cond, val, -1)
+        if ((TrueC && TrueC->isAllOnesValue()) || (FalseC && FalseC->isAllOnesValue())) {
+          LLVM_DEBUG(dbgs() << "InstCombine: skipping umin/umax folding for masked select\n");
+          return nullptr;
+        }
+      }
+    }
+  }
+
   // If the caller function (i.e. us, the function that contains this CallInst)
   // is nounwind, mark the call as nounwind, even if the callee isn't.
   if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index e16f6ad2cfc9b..09cb84cde07ca 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -5047,3 +5047,18 @@ define <2 x ptr> @select_freeze_constant_expression_vector_gep(i1 %cond, <2 x pt
   %sel = select i1 %cond, <2 x ptr> %y, <2 x ptr> %freeze
   ret <2 x ptr> %sel
 }
+
+declare i8 @llvm.umin.i8(i8, i8)
+
+define i8 @no_fold_masked_min(i8 %acc, i8 %val, i8 %mask) {
+; CHECK-LABEL: @no_fold_masked_min(
+; CHECK-NEXT:  [[COND:%.*]] = icmp eq i8 [[MASK:%.*]], 0
+; CHECK-NEXT:  [[MASKED_VAL:%.*]] = select i1 [[COND:%.*]], i8 [[VAL:%.*]], i8 -1
+; CHECK-NEXT:  [[RES:%.*]] = call i8 @llvm.umin.i8(i8 [[ACC:%.*]], i8 [[MASKED_VAL:%.*]])
+; CHECK-NEXT:  ret i8 [[RES]]
+;
+  %cond = icmp eq i8 %mask, 0
+  %masked_val = select i1 %cond, i8 %val, i8 -1
+  %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val)
+  ret i8 %res
+}

nikic

Why does one vectorize and not the other?

github-actions · 2025-06-05T19:01:54Z

✅ With the latest revision this PR passed the C/C++ code formatter.

thevar1able · 2025-06-05T19:06:53Z

@nikic

remark: loop not vectorized: value that could not be identified as reduction is used outside the loop [-Rpass-analysis=loop-vectorize]
https://clang.godbolt.org/z/sqhYnrsYb

nikic · 2025-06-05T19:39:30Z

I see. The issue has no relation to the specific form of the select here, the problem is that we are breaking up a reduction pattern.

nikic · 2025-06-05T19:44:37Z

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

+        }
+      }
+    }
+  }


The fold you are actually interested in is located at

llvm-project/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines 3870 to 3879 in a19889c

// Try to fold intrinsic into select operands. This is legal if:

// * The intrinsic is speculatable.

// * The select condition is not a vector, or the intrinsic does not

// perform cross-lane operations.

if (isSafeToSpeculativelyExecuteWithVariableReplaced(&CI) &&

isNotCrossLaneOperation(II))

for (Value *Op : II->args())

if (auto *Sel = dyn_cast<SelectInst>(Op))

if (Instruction *R = FoldOpIntoSelect(*II, Sel))

return R;

If you want to add extra checks, they should be added there or inside FoldOpIntoSelect.

Though I'm not entirely sure precisely what we should be restricting.

Maybe it'd be better to revert #116073?

Maybe it'd be better to revert #116073?

It may take more time to evaluate performance regression caused by the revert...

I'd like to disable this fold when the binary intrinsic is a reduction. We have a similar helper matchSimpleRecurrence in ValueTracking.

I'd like to disable this fold when the binary intrinsic is a reduction. We have a similar helper matchSimpleRecurrence in ValueTracking.

I agree that testing specifically for a recurrence would be best.

I think a simple check would look something like this (untested):

if (isa<MinMaxIntrinsic>(&Op)) for (Value *IntrinOp : Op.operands()) if (auto *PN = dyn_cast<PHINode>(IntrinOp)) for (Value *PhiOp = PN->operands()) if (PhiOp == &Op) return nullptr;

nikic · 2025-06-06T15:54:10Z

I've landed a phase ordering test in cef5a31. Please rebase & regenerate when updating this PR.

thevar1able · 2025-06-11T19:11:06Z

@nikic The test you added now fails, what's the next step?

nikic · 2025-06-11T20:06:34Z

@nikic The test you added now fails, what's the next step?

Regenerate it using update_test_checks.py and commit the result.

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

llvm/test/Transforms/InstCombine/select.ll

nikic · 2025-06-12T12:56:47Z

Why is this reverted back to the old code again?

thevar1able · 2025-06-12T13:04:19Z

@nikic I wasn't able to make your snippet work, sorry.

nikic · 2025-06-12T13:08:41Z

@thevar1able Doesn't exactly the code you had work? I tried it and the test was vectorized: https://gist.github.com/nikic/4cf44aac838daa41be953c8a7d06920c

thevar1able · 2025-06-12T13:10:13Z

@nikic No, it did not. I didn't realize at first that update_test_checks.py was picking up my opt-19 from system. Later ninja -C build check-llvm had failed assertions on finding vector body section.

thevar1able · 2025-06-13T10:49:57Z

@nikic Sorry, I was wrong.

dtcxzyw

LG

nikic

LGTM

thevar1able · 2025-06-13T21:03:41Z

Is it ready for merge?

thevar1able · 2025-06-14T16:55:19Z

Thank you!

I see 20.1.7 was released just a couple hours before merge; is 20.1.8 expected, or should I expect this to land in 21?

…es in false arm (llvm#143020) Fixes llvm#139050. This patch adds a check to avoid folding min/max reduction into select, which may block loop vectorization. The issue is that the following snippet: ``` declare i8 @llvm.umin.i8(i8, i8) define i8 @masked_min_fold_bug(i8 %acc, i8 %val, i8 %mask) { ; CHECK-LABEL: @masked_min_fold_bug( ; CHECK: %cond = icmp eq i8 %mask, 0 ; CHECK: %masked_val = select i1 %cond, i8 %val, i8 255 ; CHECK: call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val) ; %cond = icmp eq i8 %mask, 0 %masked_val = select i1 %cond, i8 %val, i8 255 %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val) ret i8 %res } ``` is being optimized to the following code, which can not be vectorized later. ``` declare i8 @llvm.umin.i8(i8, i8) #0 define i8 @masked_min_fold_bug(i8 %acc, i8 %val, i8 %mask) { %cond = icmp eq i8 %mask, 0 %1 = call i8 @llvm.umin.i8(i8 %acc, i8 %val) %res = select i1 %cond, i8 %1, i8 %acc ret i8 %res } attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ``` Expected: ``` declare i8 @llvm.umin.i8(i8, i8) #0 define i8 @masked_min_fold_bug(i8 %acc, i8 %val, i8 %mask) { %cond = icmp eq i8 %mask, 0 %masked_val = select i1 %cond, i8 %val, i8 -1 %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val) ret i8 %res } attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ``` https://godbolt.org/z/cYMheKE5r

thevar1able requested a review from nikic as a code owner June 5, 2025 18:55

thevar1able changed the title ~~[InstCombine] Avoid folding select(umin(X, Y), X) with non-constant mask~~ [InstCombine] Avoid folding select(umin(X, Y), X) with non-constant mask Jun 5, 2025

llvmbot added llvm:instcombine llvm:transforms labels Jun 5, 2025

nikic reviewed Jun 5, 2025

View reviewed changes

thevar1able changed the title ~~[InstCombine] Avoid folding select(umin(X, Y), X) with non-constant mask~~ [InstCombine] Avoid folding select(umin(X, Y), X) with min/max values in false arm Jun 5, 2025

thevar1able added 4 commits June 7, 2025 06:56

Prototype fix

084efd9

Fix formatting

1b3e662

Move checks to FoldOpIntoSelect

d87b193

Fix formatting

69eaf81

thevar1able force-pushed the instrcombine-avoid-folding-select-icmpinstr-nonconstant-arm branch from bfeceb2 to 69eaf81 Compare June 7, 2025 05:03

Apply suggested fix

8240c6c

nikic mentioned this pull request Jun 11, 2025

Task submission dtcxzyw/llvm-opt-benchmark#1312

Open

dtcxzyw mentioned this pull request Jun 11, 2025

pre-commit: PR143020 dtcxzyw/llvm-opt-benchmark#2426

Open

nikic reviewed Jun 11, 2025

View reviewed changes

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp Show resolved Hide resolved

llvm/test/Transforms/InstCombine/select.ll Outdated Show resolved Hide resolved

thevar1able added 7 commits June 11, 2025 22:29

Update expected output

8a7663b

Add a comment

7929ed1

Add a test

22ef252

Update tests assertions

d424540

Revert suggested fix

304cf20

Update tests assertions

1165581

Reformat

0d48fc4

dtcxzyw mentioned this pull request Jun 13, 2025

[InstCombine] Combine interleaved PHI reduction chains. #143878

Open

Reapply suggested fix

22183ad

dtcxzyw approved these changes Jun 13, 2025

View reviewed changes

nikic approved these changes Jun 13, 2025

View reviewed changes

dtcxzyw merged commit 07fa6d1 into llvm:main Jun 14, 2025
6 of 7 checks passed

thevar1able deleted the instrcombine-avoid-folding-select-icmpinstr-nonconstant-arm branch June 14, 2025 16:55

thevar1able mentioned this pull request Jun 14, 2025

Use clang-20 ClickHouse/ClickHouse#77352

Open

	// Try to fold intrinsic into select operands. This is legal if:
	// * The intrinsic is speculatable.
	// * The select condition is not a vector, or the intrinsic does not
	// perform cross-lane operations.
	if (isSafeToSpeculativelyExecuteWithVariableReplaced(&CI) &&
	isNotCrossLaneOperation(II))
	for (Value *Op : II->args())
	if (auto *Sel = dyn_cast<SelectInst>(Op))
	if (Instruction R = FoldOpIntoSelect(II, Sel))
	return R;

[InstCombine] Avoid folding select(umin(X, Y), X) with min/max values in false arm #143020

[InstCombine] Avoid folding select(umin(X, Y), X) with min/max values in false arm #143020

Conversation

thevar1able commented Jun 5, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jun 5, 2025

Uh oh!

nikic left a comment

Choose a reason for hiding this comment

Uh oh!

github-actions bot commented Jun 5, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

thevar1able commented Jun 5, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

nikic commented Jun 5, 2025

Uh oh!

nikic Jun 5, 2025

Choose a reason for hiding this comment

Uh oh!

thevar1able Jun 5, 2025

Choose a reason for hiding this comment

Uh oh!

dtcxzyw Jun 6, 2025

Choose a reason for hiding this comment

Uh oh!

nikic Jun 6, 2025

Choose a reason for hiding this comment

Uh oh!

nikic commented Jun 6, 2025

Uh oh!

thevar1able commented Jun 11, 2025

Uh oh!

nikic commented Jun 11, 2025

Uh oh!

Uh oh!

Uh oh!

nikic commented Jun 12, 2025

Uh oh!

thevar1able commented Jun 12, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

nikic commented Jun 12, 2025

Uh oh!

thevar1able commented Jun 12, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

thevar1able commented Jun 13, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

dtcxzyw left a comment

Choose a reason for hiding this comment

Uh oh!

nikic left a comment

Choose a reason for hiding this comment

Uh oh!

thevar1able commented Jun 13, 2025

Uh oh!

Uh oh!

thevar1able commented Jun 14, 2025

Uh oh!

Uh oh!

[InstCombine] Avoid folding `select(umin(X, Y), X)` with min/max values in false arm #143020

[InstCombine] Avoid folding `select(umin(X, Y), X)` with min/max values in false arm #143020

thevar1able commented Jun 5, 2025 •

edited

Loading

github-actions bot commented Jun 5, 2025 •

edited

Loading

thevar1able commented Jun 5, 2025 •

edited

Loading

thevar1able commented Jun 12, 2025 •

edited

Loading

thevar1able commented Jun 12, 2025 •

edited

Loading

thevar1able commented Jun 13, 2025 •

edited

Loading