-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[InstCombine] Fold rotate patterns with ZExt/Trunc at different Points #142578
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Rotation pattern now fold with Trunc or if changes in the width happen at different points for the left and right side. https://alive2.llvm.org/ce/z/RkALLB fixes llvm#138334
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-llvm-transforms Author: Thomas Saller (sallto) ChangesRotation pattern now fold with Trunc or if changes in the width happen at different points for the left and right side. https://alive2.llvm.org/ce/z/RkALLB The previous approach would've needed >10 pattern since L and R are independent and both have multiple possible variants. For cases where L is Zext(And(Shamt, 32)) my solution will reuse the Zext as rotating amount, which leads to an additional unnecessary AND. Alternatively one could create a new Zext(Shamt) since the And is a noop. This would lead to an additional instruction if the ZExt(And(...)) has a second use. I was unsure which would be the preferred approach here. As a side effect this now recognizes pattern equivalent to fsh(X,X,And(Shamt, 99)) and fsh(X,X,And(Shamt,5)) (so with a too small or too big Mask) @RKSimon PS: Sorry for the delay, I was sick the past 2 weeks Full diff: https://github.com/llvm/llvm-project/pull/142578.diff 4 Files Affected:
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index 6242a686e7bc0..b3061e6d4ccdc 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -2160,6 +2160,13 @@ m_ZExtOrSelf(const OpTy &Op) {
return m_CombineOr(m_ZExt(Op), Op);
}
+template <typename OpTy>
+inline match_combine_or<CastInst_match<OpTy, TruncInst>,
+ match_combine_or<CastInst_match<OpTy, ZExtInst>, OpTy>>
+m_TruncOrZExtOrSelf(const OpTy &Op) {
+ return m_CombineOr(m_Trunc(Op), m_ZExtOrSelf(Op));
+}
+
template <typename OpTy>
inline match_combine_or<CastInst_match<OpTy, SExtInst>, OpTy>
m_SExtOrSelf(const OpTy &Op) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 59b46ebdb72e2..65614d0c23720 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2969,31 +2969,53 @@ InstCombinerImpl::convertOrOfShiftsToFunnelShift(Instruction &Or) {
if (!isPowerOf2_32(Width))
return nullptr;
- // The shift amount may be masked with negation:
- // (shl ShVal, (X & (Width - 1))) | (lshr ShVal, ((-X) & (Width - 1)))
+ // Check that L and R operate on the same value X. Since the bitwidth of X
+ // can differ from L and R, there are multiple possible locations of ZExt
+ // or Trunc.
Value *X;
+ const APInt *LMask = nullptr;
+ const APInt *RMask = nullptr;
unsigned Mask = Width - 1;
- if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
- match(R, m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask))))
- return X;
+ // L is essentially a no-op except for changing the type of X.
+ // There are multiple pattern like X & LMask or ZExt/Trunc
+ match(L, m_TruncOrZExtOrSelf(m_CombineOr(
+ m_And(m_TruncOrZExtOrSelf(m_Value(X)), m_APInt(LMask)),
+ m_Value(X))));
+
+ // R should be -X, sometimes (-X) & RMask is used, which is equivalent if
+ // RMask >= BitWidth - 1
+ const Value *ValueToNegate = nullptr;
+ if (!match(R, m_TruncOrZExtOrSelf(m_CombineOr(
+ m_And(m_Neg(m_Value(ValueToNegate)), m_APInt(RMask)),
+ m_Neg(m_Value(ValueToNegate))))) ||
+ (RMask && RMask->ult(Mask)))
+ return nullptr;
- // (shl ShVal, X) | (lshr ShVal, ((-X) & (Width - 1)))
- if (match(R, m_And(m_Neg(m_Specific(L)), m_SpecificInt(Mask))))
- return L;
+ // ValueToNegate can be L if the rotate uses a bitwise-and on the shift
+ // amount before the rotate pattern.
+ if (!match(ValueToNegate, m_TruncOrZExtOrSelf(
+ m_CombineOr(m_Specific(X), m_Specific(L)))))
+ return nullptr;
- // Similar to above, but the shift amount may be extended after masking,
- // so return the extended value as the parameter for the intrinsic.
- if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
- match(R,
- m_And(m_Neg(m_ZExt(m_And(m_Specific(X), m_SpecificInt(Mask)))),
- m_SpecificInt(Mask))))
+ // L is a no-op, and L is guaranteed to be the same type as the rotate.
+ // We reuse the existing Zext/Trunc.
+ if (!LMask)
return L;
- if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
- match(R, m_ZExt(m_And(m_Neg(m_Specific(X)), m_SpecificInt(Mask)))))
- return L;
+ // We can still fold with an LMask < Mask if R soley depends on L (not on
+ // X directly)
+ if (LMask->ult(Mask))
+ return (match(ValueToNegate, m_TruncOrZExtOrSelf(m_Specific(L))))
+ ? L
+ : nullptr;
- return nullptr;
+ // X has the same width as L and LMask >= BitWidth - 1, so L is a no-op.
+ Value *matchedX;
+ if (match(L, m_And(m_Value(matchedX), m_Value())))
+ return matchedX;
+
+ // L is Zext(And(...)), we can't reuse the Zext/Trunc.
+ return L;
};
Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, Width);
diff --git a/llvm/test/Transforms/InstCombine/rotate.ll b/llvm/test/Transforms/InstCombine/rotate.ll
index a4d4ec375954f..3c416699f8dd7 100644
--- a/llvm/test/Transforms/InstCombine/rotate.ll
+++ b/llvm/test/Transforms/InstCombine/rotate.ll
@@ -698,6 +698,104 @@ define i64 @rotateleft_64_zext_neg_mask_amount(i64 %0, i32 %1) {
ret i64 %10
}
+define i64 @rotateright_64_zext_double_conversion(i64 %x, i32 %y) {
+; CHECK-LABEL: @rotateright_64_zext_double_conversion(
+; CHECK-NEXT: [[Z:%.*]] = zext nneg i32 [[Y:%.*]] to i64
+; CHECK-NEXT: [[OR:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Z]])
+; CHECK-NEXT: ret i64 [[OR]]
+;
+ %z = zext i32 %y to i64
+ %neg = sub nsw i32 0, %y
+ %and2 = and i32 %neg, 63
+ %conv = zext i32 %and2 to i64
+ %shl = shl i64 %x, %conv
+ %shr = lshr i64 %x, %z
+ %or = or i64 %shr, %shl
+ ret i64 %or
+}
+
+define i32 @rotateright_32_trunc_early(i32 %x, i64 %y) {
+; CHECK-LABEL: @rotateright_32_trunc_early(
+; CHECK-NEXT: [[Z:%.*]] = trunc i64 [[Y:%.*]] to i32
+; CHECK-NEXT: [[OR:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Z]])
+; CHECK-NEXT: ret i32 [[OR]]
+;
+ %z = trunc i64 %y to i32
+ %neg = sub nsw i32 0, %z
+ %and2 = and i32 %neg, 31
+ %shl = shl i32 %x, %and2
+ %shr = lshr i32 %x, %z
+ %or = or i32 %shr, %shl
+ ret i32 %or
+}
+
+define i32 @rotateright_32_trunc_neg_mask_amount(i32 %x, i64 %y) {
+; CHECK-LABEL: @rotateright_32_trunc_neg_mask_amount(
+; CHECK-NEXT: [[Z:%.*]] = trunc i64 [[Y:%.*]] to i32
+; CHECK-NEXT: [[OR:%.*]] = call i32 @llvm.fshr.i32(i32 [[X:%.*]], i32 [[X]], i32 [[Z]])
+; CHECK-NEXT: ret i32 [[OR]]
+;
+ %z = trunc i64 %y to i32
+ %neg = sub i64 0, %y
+ %and2 = and i64 %neg, 31
+ %conv = trunc i64 %and2 to i32
+ %shl = shl i32 %x, %conv
+ %shr = lshr i32 %x, %z
+ %or = or i32 %shr, %shl
+ ret i32 %or
+}
+
+; restrict the shift amount before rotating
+
+define i32 @rotateleft_32_restricted_shamt(i32 %x, i32 %shAmt) {
+; CHECK-LABEL: @rotateleft_32_restricted_shamt(
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], 30
+; CHECK-NEXT: [[OR:%.*]] = call i32 @llvm.fshl.i32(i32 [[X]], i32 [[X]], i32 [[AND]])
+; CHECK-NEXT: ret i32 [[OR]]
+;
+ %and = and i32 %x, 30
+ %shl = shl i32 %x, %and
+ %sub = sub i32 0, %and
+ %shr = lshr i32 %x, %sub
+ %or = or i32 %shl, %shr
+ ret i32 %or
+}
+
+; unncessarily large and masks
+
+define i32 @rotateleft_32_non_restricted_shamt(i32 %x, i32 %t) {
+; CHECK-LABEL: @rotateleft_32_non_restricted_shamt(
+; CHECK-NEXT: [[OR:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[T:%.*]])
+; CHECK-NEXT: ret i32 [[OR]]
+;
+ %and = and i32 %t, 31
+ %shl = shl i32 %x, %and
+ %sub = sub nsw i32 0, %and
+ %and2 = and i32 %sub, 31
+ %shr = lshr i32 %x, %and2
+ %or = or i32 %shl, %shr
+ ret i32 %or
+}
+
+; negative test - right and mask is too small (should be >=31)
+
+define i32 @rotateleft_32_incorrect_right_mask(i32 %x, i32 %t) {
+; CHECK-LABEL: @rotateleft_32_incorrect_right_mask(
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X:%.*]], [[T:%.*]]
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 0, [[T]]
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[SUB]], 30
+; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[X]], [[AND]]
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]]
+; CHECK-NEXT: ret i32 [[OR]]
+;
+ %shl = shl i32 %x, %t
+ %sub = sub nsw i32 0, %t
+ %and = and i32 %sub, 30
+ %shr = lshr i32 %x, %and
+ %or = or i32 %shl, %shr
+ ret i32 %or
+}
+
; Non-power-of-2 types. This could be transformed, but it's not a typical rotate pattern.
define i9 @rotateleft_9_neg_mask_wide_amount_commute(i9 %v, i33 %shamt) {
@@ -1086,3 +1184,42 @@ define i32 @not_rotl_i32_add_less(i32 %x, i32 %y) {
%r = add i32 %shr, %shl
ret i32 %r
}
+
+; multi-use tests
+define i32 @rotateleft_32_use_zext(i32 %x, i16 %shAmt) {
+; CHECK-LABEL: @rotateleft_32_use_zext(
+; CHECK-NEXT: [[CONV:%.*]] = zext i16 [[SHAMT:%.*]] to i32
+; CHECK-NEXT: call void @use(i32 [[CONV]])
+; CHECK-NEXT: [[OR:%.*]] = call i32 @llvm.fshl.i32(i32 [[X:%.*]], i32 [[X]], i32 [[CONV]])
+; CHECK-NEXT: ret i32 [[OR]]
+;
+ %conv = zext i16 %shAmt to i32
+ call void @use(i32 %conv)
+ %shl = shl i32 %x, %conv
+ %sub = sub i32 0, %conv
+ %shr = lshr i32 %x, %sub
+ %or = or i32 %shl, %shr
+ ret i32 %or
+}
+
+define i64 @rotateleft_64_use_and(i64 %x, i32 %y) {
+; CHECK-LABEL: @rotateleft_64_use_and(
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y:%.*]], 63
+; CHECK-NEXT: [[Z:%.*]] = zext nneg i32 [[AND]] to i64
+; CHECK-NEXT: call void @use(i64 [[Z]])
+; CHECK-NEXT: [[OR:%.*]] = call i64 @llvm.fshr.i64(i64 [[X:%.*]], i64 [[X]], i64 [[Z]])
+; CHECK-NEXT: ret i64 [[OR]]
+;
+ %and = and i32 %y, 63
+ %z = zext i32 %and to i64
+ call void @use(i64 %z)
+ %neg = sub nsw i32 0, %y
+ %and2 = and i32 %neg, 63
+ %conv = zext i32 %and2 to i64
+ %shl = shl i64 %x, %conv
+ %shr = lshr i64 %x, %z
+ %or = or i64 %shr, %shl
+ ret i64 %or
+}
+
+declare void @use(i32)
diff --git a/llvm/unittests/IR/PatternMatch.cpp b/llvm/unittests/IR/PatternMatch.cpp
index 7b3a4ce365453..a5687b92e8310 100644
--- a/llvm/unittests/IR/PatternMatch.cpp
+++ b/llvm/unittests/IR/PatternMatch.cpp
@@ -635,6 +635,39 @@ TEST_F(PatternMatchTest, ZExtSExtSelf) {
EXPECT_TRUE(m_ZExtOrSExtOrSelf(m_One()).match(One64S));
}
+TEST_F(PatternMatchTest, TruncZextSelf) {
+ LLVMContext &Ctx = IRB.getContext();
+
+ Value *One32 = IRB.getInt32(1);
+ Value *One64 = IRB.getInt64(1);
+ Value *One32T = IRB.CreateTrunc(One64, IRB.getInt32Ty());
+ Value *One64Z = IRB.CreateZExt(One32, IntegerType::getInt64Ty(Ctx));
+ Value *One64S = IRB.CreateSExt(One32, IntegerType::getInt64Ty(Ctx));
+
+ EXPECT_TRUE(m_One().match(One32));
+ EXPECT_TRUE(m_One().match(One64));
+ EXPECT_FALSE(m_One().match(One32T));
+ EXPECT_FALSE(m_One().match(One64Z));
+ EXPECT_FALSE(m_One().match(One64S));
+
+ EXPECT_FALSE(m_Trunc(m_One()).match(One32));
+ EXPECT_TRUE(m_Trunc(m_One()).match(One32T));
+ EXPECT_FALSE(m_Trunc(m_One()).match(One64Z));
+ EXPECT_FALSE(m_Trunc(m_One()).match(One64S));
+
+ EXPECT_FALSE(m_ZExt(m_One()).match(One32));
+ EXPECT_FALSE(m_ZExt(m_One()).match(One64));
+ EXPECT_FALSE(m_ZExt(m_One()).match(One32T));
+ EXPECT_TRUE(m_ZExt(m_One()).match(One64Z));
+ EXPECT_FALSE(m_ZExt(m_One()).match(One64S));
+
+ EXPECT_TRUE(m_TruncOrZExtOrSelf(m_One()).match(One32));
+ EXPECT_TRUE(m_TruncOrZExtOrSelf(m_One()).match(One64));
+ EXPECT_TRUE(m_TruncOrZExtOrSelf(m_One()).match(One32T));
+ EXPECT_TRUE(m_TruncOrZExtOrSelf(m_One()).match(One64Z));
+ EXPECT_FALSE(m_TruncOrZExtOrSelf(m_One()).match(One64S));
+}
+
TEST_F(PatternMatchTest, BitCast) {
Value *OneDouble = ConstantFP::get(IRB.getDoubleTy(), APFloat(1.0));
Value *ScalableDouble = ConstantFP::get(
|
Rotation pattern now fold with Trunc or if changes in the width happen at different points for the left and right side. https://alive2.llvm.org/ce/z/RkALLB
fixes #138334
The previous approach would've needed >10 pattern since L and R are independent and both have multiple possible variants.
For cases where L is Zext(And(Shamt, 32)) my solution will reuse the Zext as rotating amount, which leads to an additional unnecessary AND. Alternatively one could create a new Zext(Shamt) since the And is a noop. This would lead to an additional instruction if the ZExt(And(...)) has a second use. I was unsure which would be the preferred approach here.
As a side effect this now recognizes pattern equivalent to fsh(X,X,And(Shamt, 99)) and fsh(X,X,And(Shamt,5)) (so with a too small or too big Mask)
@RKSimon
PS: Sorry for the delay, I was sick the past 2 weeks