Skip to content

Commit c6cb302

Browse files
committed
[X86] Distribute Certain Bitwise Operations over SELECT
InstCombine canonicalizes `(select P (and X (- X)) X)` to `(and (select P (- X) umax) X)`. This is counterproductive for the X86 backend when BMI is available because we can encode `(and X (- X))` using the `BLSI` instruction. A similar situation arises if we have `(select P (and X (sub X 1)) X)` (prevents use of `BLSR` instruction) or `(select P (xor X (sub X 1)) X)` (prevents use of `BLSMSK` instruction). Trigger the inverse transformation in the X86 backend if BMI is available and we can use the mentioned BMI instructions. This is done by overriding the appropriate `shouldFoldSelectWithIdentityConstant()` overload. In this way, we get `(select P (and X (- X)) X)` again, which enables the use of `BLSI` (similar for the other cases described above). Alive proofs: https://alive2.llvm.org/ce/z/MT_pKi Fixes #131587, fixes #133848.
1 parent 733d9d4 commit c6cb302

File tree

3 files changed

+118
-219
lines changed

3 files changed

+118
-219
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "llvm/Analysis/BlockFrequencyInfo.h"
2828
#include "llvm/Analysis/ProfileSummaryInfo.h"
2929
#include "llvm/Analysis/VectorUtils.h"
30+
#include "llvm/CodeGen/ISDOpcodes.h"
3031
#include "llvm/CodeGen/IntrinsicLowering.h"
3132
#include "llvm/CodeGen/LivePhysRegs.h"
3233
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -35385,18 +35386,48 @@ bool X86TargetLowering::isNarrowingProfitable(SDNode *N, EVT SrcVT,
3538535386

3538635387
bool X86TargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode,
3538735388
EVT VT) const {
35389+
if (!VT.isVector()) {
35390+
if (!Subtarget.hasBMI() || (VT != MVT::i32 && VT != MVT::i64))
35391+
return false;
35392+
if (Opcode != ISD::AND && Opcode != ISD::XOR)
35393+
return false;
35394+
return true;
35395+
}
35396+
3538835397
// TODO: This is too general. There are cases where pre-AVX512 codegen would
3538935398
// benefit. The transform may also be profitable for scalar code.
3539035399
if (!Subtarget.hasAVX512())
3539135400
return false;
3539235401
if (!Subtarget.hasVLX() && !VT.is512BitVector())
3539335402
return false;
35394-
if (!VT.isVector() || VT.getScalarType() == MVT::i1)
35403+
if (VT.getScalarType() == MVT::i1)
3539535404
return false;
3539635405

3539735406
return true;
3539835407
}
3539935408

35409+
bool X86TargetLowering::shouldFoldSelectWithIdentityConstant(
35410+
unsigned BinOpcode, unsigned SelectOpcode, SDValue X,
35411+
SDValue NonIdConstNode) const {
35412+
using namespace llvm::SDPatternMatch;
35413+
35414+
if (SelectOpcode == ISD::VSELECT)
35415+
return true;
35416+
// BLSI
35417+
if (BinOpcode == ISD::AND && sd_match(NonIdConstNode, m_Neg(m_Specific(X))))
35418+
return true;
35419+
// BLSR
35420+
if (BinOpcode == ISD::AND &&
35421+
sd_match(NonIdConstNode, m_Add(m_Specific(X), m_AllOnes())))
35422+
return true;
35423+
// BLSMSK
35424+
if (BinOpcode == ISD::XOR &&
35425+
sd_match(NonIdConstNode, m_Add(m_Specific(X), m_AllOnes())))
35426+
return true;
35427+
35428+
return false;
35429+
}
35430+
3540035431
/// Targets can use this to indicate that they only support *some*
3540135432
/// VECTOR_SHUFFLE operations, those with specific masks.
3540235433
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1463,6 +1463,11 @@ namespace llvm {
14631463
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
14641464
EVT VT) const override;
14651465

1466+
bool
1467+
shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
1468+
unsigned SelectOpcode, SDValue X,
1469+
SDValue NonIdConstNode) const override;
1470+
14661471
/// Given an intrinsic, checks if on the target the intrinsic will need to map
14671472
/// to a MemIntrinsicNode (touches memory). If this is the case, it returns
14681473
/// true and stores the intrinsic information into the IntrinsicInfo that was

0 commit comments

Comments
 (0)