Skip to content

Commit 96336b2

Browse files
authored
[AggressiveInstCombine] Improve popcount matching if the input has known zero bits (#142501)
If the input has known zero bits, InstCombine may have simplied one of the expected And masks. Teach AggressiveInstCombine to use MaskedValueIsZero to recover these missing bits. Fixes #142042.
1 parent a144f58 commit 96336b2

File tree

2 files changed

+73
-7
lines changed

2 files changed

+73
-7
lines changed

llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -328,15 +328,33 @@ static bool tryToRecognizePopCount(Instruction &I) {
328328
m_SpecificInt(Mask33))))) {
329329
Value *Root, *SubOp1;
330330
// Matching "i - ((i >> 1) & 0x55555555...)".
331+
const APInt *AndMask;
331332
if (match(AndOp0, m_Sub(m_Value(Root), m_Value(SubOp1))) &&
332333
match(SubOp1, m_And(m_LShr(m_Specific(Root), m_SpecificInt(1)),
333-
m_SpecificInt(Mask55)))) {
334-
LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
335-
IRBuilder<> Builder(&I);
336-
I.replaceAllUsesWith(
337-
Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {Root}));
338-
++NumPopCountRecognized;
339-
return true;
334+
m_APInt(AndMask)))) {
335+
auto CheckAndMask = [&]() {
336+
if (*AndMask == Mask55)
337+
return true;
338+
339+
// Exact match failed, see if any bits are known to be 0 where we
340+
// expect a 1 in the mask.
341+
if (!AndMask->isSubsetOf(Mask55))
342+
return false;
343+
344+
APInt NeededMask = Mask55 & ~*AndMask;
345+
return MaskedValueIsZero(cast<Instruction>(SubOp1)->getOperand(0),
346+
NeededMask,
347+
SimplifyQuery(I.getDataLayout()));
348+
};
349+
350+
if (CheckAndMask()) {
351+
LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
352+
IRBuilder<> Builder(&I);
353+
I.replaceAllUsesWith(
354+
Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {Root}));
355+
++NumPopCountRecognized;
356+
return true;
357+
}
340358
}
341359
}
342360
}

llvm/test/Transforms/AggressiveInstCombine/popcount.ll

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,3 +191,51 @@ define <4 x i32> @popcount32vec(<4 x i32> %0) {
191191
%13 = lshr <4 x i32> %12, <i32 24, i32 24, i32 24, i32 24>
192192
ret <4 x i32> %13
193193
}
194+
195+
define i32 @popcount64_zext(i32 %x) {
196+
; CHECK-LABEL: @popcount64_zext(
197+
; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[X:%.*]] to i64
198+
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.ctpop.i64(i64 [[ZEXT]])
199+
; CHECK-NEXT: [[TMP13:%.*]] = trunc nuw nsw i64 [[TMP12]] to i32
200+
; CHECK-NEXT: ret i32 [[TMP13]]
201+
;
202+
%zext = zext i32 %x to i64
203+
%1 = lshr i64 %zext, 1
204+
%2 = and i64 %1, 1431655765
205+
%3 = sub nsw i64 %zext, %2
206+
%4 = and i64 %3, 3689348814741910323
207+
%5 = lshr i64 %3, 2
208+
%6 = and i64 %5, 3689348814741910323
209+
%7 = add nuw nsw i64 %6, %4
210+
%8 = lshr i64 %7, 4
211+
%9 = add nuw nsw i64 %8, %7
212+
%10 = and i64 %9, 1085102592571150095
213+
%11 = mul i64 %10, 72340172838076673
214+
%12 = lshr i64 %11, 56
215+
%13 = trunc nuw nsw i64 %12 to i32
216+
ret i32 %13
217+
}
218+
219+
define i32 @popcount64_mask(i64 %x) {
220+
; CHECK-LABEL: @popcount64_mask(
221+
; CHECK-NEXT: [[MASK:%.*]] = and i64 [[X:%.*]], -281470681808896
222+
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.ctpop.i64(i64 [[MASK]])
223+
; CHECK-NEXT: [[TMP13:%.*]] = trunc nuw nsw i64 [[TMP12]] to i32
224+
; CHECK-NEXT: ret i32 [[TMP13]]
225+
;
226+
%mask = and i64 %x, -281470681808896 ; 0xffff0000ffff0000
227+
%1 = lshr i64 %mask, 1
228+
%2 = and i64 %1, 6148820867675914240 ; 0x0x5555000055550000
229+
%3 = sub nsw i64 %mask, %2
230+
%4 = and i64 %3, 3689348814741910323
231+
%5 = lshr i64 %3, 2
232+
%6 = and i64 %5, 3689348814741910323
233+
%7 = add nuw nsw i64 %6, %4
234+
%8 = lshr i64 %7, 4
235+
%9 = add nuw nsw i64 %8, %7
236+
%10 = and i64 %9, 1085102592571150095
237+
%11 = mul i64 %10, 72340172838076673
238+
%12 = lshr i64 %11, 56
239+
%13 = trunc nuw nsw i64 %12 to i32
240+
ret i32 %13
241+
}

0 commit comments

Comments
 (0)