Skip to content

Commit e2f011e

Browse files
committed
[InstCombine] Extend bitmask->select combine to match and->mul
Change-Id: I1cc2acd3804dde50636518f3ef2c9581848ae9f6
1 parent 5145549 commit e2f011e

File tree

2 files changed

+164
-56
lines changed

2 files changed

+164
-56
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

Lines changed: 79 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -3560,6 +3560,72 @@ static Value *foldOrOfInversions(BinaryOperator &I,
35603560
return nullptr;
35613561
}
35623562

3563+
struct DecomposedBitMaskMul {
3564+
Value *X;
3565+
APInt Factor;
3566+
APInt Mask;
3567+
};
3568+
3569+
static std::optional<DecomposedBitMaskMul> matchBitmaskMul(Value *V) {
3570+
Instruction *Op = dyn_cast<Instruction>(V);
3571+
if (!Op)
3572+
return std::nullopt;
3573+
3574+
Value *MulOp = nullptr;
3575+
const APInt *MulConst = nullptr;
3576+
if (match(Op, m_Mul(m_Value(MulOp), m_APInt(MulConst)))) {
3577+
Value *Original = nullptr;
3578+
const APInt *Mask = nullptr;
3579+
if (!MulConst->isStrictlyPositive())
3580+
return std::nullopt;
3581+
3582+
if (match(MulOp, m_And(m_Value(Original), m_APInt(Mask)))) {
3583+
if (!Mask->isStrictlyPositive())
3584+
return std::nullopt;
3585+
DecomposedBitMaskMul Ret;
3586+
Ret.X = Original;
3587+
Ret.Mask = *Mask;
3588+
Ret.Factor = *MulConst;
3589+
return Ret;
3590+
}
3591+
return std::nullopt;
3592+
}
3593+
3594+
Value *Cond = nullptr;
3595+
const APInt *EqZero = nullptr, *NeZero = nullptr;
3596+
3597+
// (!(A & N) ? 0 : N * C) + (!(A & M) ? 0 : M * C) -> A & (N + M) * C
3598+
if (match(Op, m_Select(m_Value(Cond), m_APInt(EqZero), m_APInt(NeZero)))) {
3599+
auto ICmpDecompose =
3600+
decomposeBitTest(Cond, /*LookThruTrunc=*/true,
3601+
/*AllowNonZeroC=*/false, /*DecomposeBitMask=*/true);
3602+
if (!ICmpDecompose.has_value())
3603+
return std::nullopt;
3604+
3605+
if (ICmpDecompose->Pred == ICmpInst::ICMP_NE)
3606+
std::swap(EqZero, NeZero);
3607+
3608+
if (!EqZero->isZero() || !NeZero->isStrictlyPositive())
3609+
return std::nullopt;
3610+
3611+
if (!ICmpInst::isEquality(ICmpDecompose->Pred) ||
3612+
!ICmpDecompose->C.isZero() || !ICmpDecompose->Mask.isPowerOf2() ||
3613+
ICmpDecompose->Mask.isNegative())
3614+
return std::nullopt;
3615+
3616+
if (!NeZero->urem(ICmpDecompose->Mask).isZero())
3617+
return std::nullopt;
3618+
3619+
DecomposedBitMaskMul Ret;
3620+
Ret.X = ICmpDecompose->X;
3621+
Ret.Mask = ICmpDecompose->Mask;
3622+
Ret.Factor = NeZero->udiv(ICmpDecompose->Mask);
3623+
return Ret;
3624+
}
3625+
3626+
return std::nullopt;
3627+
}
3628+
35633629
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
35643630
// here. We should standardize that construct where it is needed or choose some
35653631
// other way to ensure that commutated variants of patterns are not missed.
@@ -3642,51 +3708,19 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
36423708
/*NSW=*/true, /*NUW=*/true))
36433709
return R;
36443710

3645-
Value *Cond0 = nullptr, *Cond1 = nullptr;
3646-
const APInt *Op0Eq = nullptr, *Op0Ne = nullptr;
3647-
const APInt *Op1Eq = nullptr, *Op1Ne = nullptr;
3648-
3649-
// (!(A & N) ? 0 : N * C) + (!(A & M) ? 0 : M * C) -> A & (N + M) * C
3650-
if (match(I.getOperand(0),
3651-
m_Select(m_Value(Cond0), m_APInt(Op0Eq), m_APInt(Op0Ne))) &&
3652-
match(I.getOperand(1),
3653-
m_Select(m_Value(Cond1), m_APInt(Op1Eq), m_APInt(Op1Ne)))) {
3654-
CmpPredicate Pred0, Pred1;
3655-
3656-
auto LHSDecompose =
3657-
decomposeBitTest(Cond0, /*LookThruTrunc=*/true,
3658-
/*AllowNonZeroC=*/false, /*DecomposeAnd=*/true);
3659-
auto RHSDecompose =
3660-
decomposeBitTest(Cond1, /*LookThruTrunc=*/true,
3661-
/*AllowNonZeroC=*/false, /*DecomposeAnd=*/true);
3662-
3663-
if (LHSDecompose && RHSDecompose && LHSDecompose->X == RHSDecompose->X &&
3664-
(ICmpInst::isEquality(LHSDecompose->Pred)) &&
3665-
!RHSDecompose->Mask.isNegative() &&
3666-
!LHSDecompose->Mask.isNegative() && RHSDecompose->Mask.isPowerOf2() &&
3667-
LHSDecompose->Mask.isPowerOf2() &&
3668-
LHSDecompose->Mask != RHSDecompose->Mask &&
3669-
LHSDecompose->C.isZero() && RHSDecompose->C.isZero()) {
3670-
if (LHSDecompose->Pred == ICmpInst::ICMP_NE)
3671-
std::swap(Op0Eq, Op0Ne);
3672-
if (RHSDecompose->Pred == ICmpInst::ICMP_NE)
3673-
std::swap(Op1Eq, Op1Ne);
3674-
3675-
if (Op0Ne->isStrictlyPositive() && Op1Ne->isStrictlyPositive() &&
3676-
Op0Eq->isZero() && Op1Eq->isZero() &&
3677-
Op0Ne->urem(LHSDecompose->Mask).isZero() &&
3678-
Op1Ne->urem(RHSDecompose->Mask).isZero() &&
3679-
Op0Ne->udiv(LHSDecompose->Mask) ==
3680-
Op1Ne->udiv(RHSDecompose->Mask)) {
3681-
auto NewAnd = Builder.CreateAnd(
3682-
LHSDecompose->X,
3683-
ConstantInt::get(LHSDecompose->X->getType(),
3684-
(LHSDecompose->Mask + RHSDecompose->Mask)));
3685-
3686-
return BinaryOperator::CreateMul(
3687-
NewAnd, ConstantInt::get(NewAnd->getType(),
3688-
Op0Ne->udiv(LHSDecompose->Mask)));
3689-
}
3711+
auto Decomp0 = matchBitmaskMul(I.getOperand(0));
3712+
auto Decomp1 = matchBitmaskMul(I.getOperand(1));
3713+
3714+
if (Decomp0 && Decomp1) {
3715+
if (Decomp0->X == Decomp1->X &&
3716+
(Decomp0->Mask & Decomp1->Mask).isZero() &&
3717+
Decomp0->Factor == Decomp1->Factor) {
3718+
auto NewAnd = Builder.CreateAnd(
3719+
Decomp0->X, ConstantInt::get(Decomp0->X->getType(),
3720+
(Decomp0->Mask + Decomp1->Mask)));
3721+
3722+
return BinaryOperator::CreateMul(
3723+
NewAnd, ConstantInt::get(NewAnd->getType(), Decomp1->Factor));
36903724
}
36913725
}
36923726
}

llvm/test/Transforms/InstCombine/or-bitmask.ll

Lines changed: 85 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,9 @@ define i32 @add_select_cmp_and2(i32 %in) {
3636

3737
define i32 @add_select_cmp_and3(i32 %in) {
3838
; CHECK-LABEL: @add_select_cmp_and3(
39-
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
39+
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 7
4040
; CHECK-NEXT: [[TEMP:%.*]] = mul nuw nsw i32 [[TMP1]], 72
41-
; CHECK-NEXT: [[BITOP2:%.*]] = and i32 [[IN]], 4
42-
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[BITOP2]], 0
43-
; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 0, i32 288
44-
; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[TEMP]], [[SEL2]]
45-
; CHECK-NEXT: ret i32 [[OUT]]
41+
; CHECK-NEXT: ret i32 [[TEMP]]
4642
;
4743
%bitop0 = and i32 %in, 1
4844
%cmp0 = icmp eq i32 %bitop0, 0
@@ -60,12 +56,9 @@ define i32 @add_select_cmp_and3(i32 %in) {
6056

6157
define i32 @add_select_cmp_and4(i32 %in) {
6258
; CHECK-LABEL: @add_select_cmp_and4(
63-
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
64-
; CHECK-NEXT: [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
65-
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[IN]], 12
59+
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[IN:%.*]], 15
6660
; CHECK-NEXT: [[TEMP3:%.*]] = mul nuw nsw i32 [[TMP2]], 72
67-
; CHECK-NEXT: [[OUT1:%.*]] = or disjoint i32 [[OUT]], [[TEMP3]]
68-
; CHECK-NEXT: ret i32 [[OUT1]]
61+
; CHECK-NEXT: ret i32 [[TEMP3]]
6962
;
7063
%bitop0 = and i32 %in, 1
7164
%cmp0 = icmp eq i32 %bitop0, 0
@@ -323,6 +316,87 @@ define <2 x i32> @add_select_cmp_vec_nonunique(<2 x i32> %in) {
323316
%out = or disjoint <2 x i32> %sel0, %sel1
324317
ret <2 x i32> %out
325318
}
319+
320+
define i32 @add_select_cmp_mixed1(i32 %in) {
321+
; CHECK-LABEL: @add_select_cmp_mixed1(
322+
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
323+
; CHECK-NEXT: [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
324+
; CHECK-NEXT: ret i32 [[OUT]]
325+
;
326+
%mask = and i32 %in, 1
327+
%sel0 = mul i32 %mask, 72
328+
%bitop1 = and i32 %in, 2
329+
%cmp1 = icmp eq i32 %bitop1, 0
330+
%sel1 = select i1 %cmp1, i32 0, i32 144
331+
%out = or disjoint i32 %sel0, %sel1
332+
ret i32 %out
333+
}
334+
335+
define i32 @add_select_cmp_mixed2(i32 %in) {
336+
; CHECK-LABEL: @add_select_cmp_mixed2(
337+
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
338+
; CHECK-NEXT: [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
339+
; CHECK-NEXT: ret i32 [[OUT]]
340+
;
341+
%bitop0 = and i32 %in, 1
342+
%cmp0 = icmp eq i32 %bitop0, 0
343+
%mask = and i32 %in, 2
344+
%sel0 = select i1 %cmp0, i32 0, i32 72
345+
%sel1 = mul i32 %mask, 72
346+
%out = or disjoint i32 %sel0, %sel1
347+
ret i32 %out
348+
}
349+
350+
define i32 @add_select_cmp_and_mul(i32 %in) {
351+
; CHECK-LABEL: @add_select_cmp_and_mul(
352+
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IN:%.*]], 3
353+
; CHECK-NEXT: [[OUT:%.*]] = mul nuw nsw i32 [[TMP1]], 72
354+
; CHECK-NEXT: ret i32 [[OUT]]
355+
;
356+
%mask0 = and i32 %in, 1
357+
%sel0 = mul i32 %mask0, 72
358+
%mask1 = and i32 %in, 2
359+
%sel1 = mul i32 %mask1, 72
360+
%out = or disjoint i32 %sel0, %sel1
361+
ret i32 %out
362+
}
363+
364+
define i32 @add_select_cmp_mixed2_mismatch(i32 %in) {
365+
; CHECK-LABEL: @add_select_cmp_mixed2_mismatch(
366+
; CHECK-NEXT: [[BITOP0:%.*]] = and i32 [[IN:%.*]], 1
367+
; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i32 [[BITOP0]], 0
368+
; CHECK-NEXT: [[MASK:%.*]] = and i32 [[IN]], 2
369+
; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 73
370+
; CHECK-NEXT: [[SEL1:%.*]] = mul nuw nsw i32 [[MASK]], 72
371+
; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
372+
; CHECK-NEXT: ret i32 [[OUT]]
373+
;
374+
%bitop0 = and i32 %in, 1
375+
%cmp0 = icmp eq i32 %bitop0, 0
376+
%mask = and i32 %in, 2
377+
%sel0 = select i1 %cmp0, i32 0, i32 73
378+
%sel1 = mul i32 %mask, 72
379+
%out = or disjoint i32 %sel0, %sel1
380+
ret i32 %out
381+
}
382+
383+
define i32 @add_select_cmp_and_mul_mismatch(i32 %in) {
384+
; CHECK-LABEL: @add_select_cmp_and_mul_mismatch(
385+
; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[IN:%.*]] to i1
386+
; CHECK-NEXT: [[SEL0:%.*]] = select i1 [[TMP1]], i32 73, i32 0
387+
; CHECK-NEXT: [[MASK1:%.*]] = and i32 [[IN]], 2
388+
; CHECK-NEXT: [[SEL1:%.*]] = mul nuw nsw i32 [[MASK1]], 72
389+
; CHECK-NEXT: [[OUT:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
390+
; CHECK-NEXT: ret i32 [[OUT]]
391+
;
392+
%mask0 = and i32 %in, 1
393+
%sel0 = mul i32 %mask0, 73
394+
%mask1 = and i32 %in, 2
395+
%sel1 = mul i32 %mask1, 72
396+
%out = or disjoint i32 %sel0, %sel1
397+
ret i32 %out
398+
}
399+
326400
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
327401
; CONSTSPLAT: {{.*}}
328402
; CONSTVEC: {{.*}}

0 commit comments

Comments
 (0)