Skip to content

Commit 5336d0b

Browse files
committed
[SelectionDAG] Handle more opcodes in isGuaranteedNotToBeUndefOrPoison
Add special handling of EXTRACT_SUBVECTOR, INSERT_SUBVECTOR, EXTRACT_VECTOR_ELT, INSERT_VECTOR_ELT and SCALAR_TO_VECTOR in isGuaranteedNotToBeUndefOrPoison. Make use of DemandedElts to improve the analysis and only check relevant elements for each operand. Also start using DemandedElts in the recursive calls that check isGuaranteedNotToBeUndefOrPoison for all operands for operations that do not create undef/poison. We can do that for a number of elementwise operations for which the DemandedElts can be applied to every operand (e.g. ADD, OR, BITREVERSE, TRUNCATE).
1 parent b1aece9 commit 5336d0b

File tree

2 files changed

+135
-10
lines changed

2 files changed

+135
-10
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5458,6 +5458,83 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
54585458
}
54595459
return true;
54605460

5461+
case ISD::EXTRACT_SUBVECTOR: {
5462+
SDValue Src = Op.getOperand(0);
5463+
if (Src.getValueType().isScalableVector())
5464+
break;
5465+
uint64_t Idx = Op.getConstantOperandVal(1);
5466+
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
5467+
APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
5468+
return isGuaranteedNotToBeUndefOrPoison(Src, DemandedSrcElts, PoisonOnly,
5469+
Depth + 1);
5470+
}
5471+
5472+
case ISD::INSERT_SUBVECTOR: {
5473+
if (Op.getValueType().isScalableVector())
5474+
break;
5475+
SDValue Src = Op.getOperand(0);
5476+
SDValue Sub = Op.getOperand(1);
5477+
uint64_t Idx = Op.getConstantOperandVal(2);
5478+
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
5479+
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
5480+
APInt DemandedSrcElts = DemandedElts;
5481+
DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
5482+
5483+
if (!!DemandedSubElts && !isGuaranteedNotToBeUndefOrPoison(
5484+
Sub, DemandedSubElts, PoisonOnly, Depth + 1))
5485+
return false;
5486+
if (!!DemandedSrcElts && !isGuaranteedNotToBeUndefOrPoison(
5487+
Src, DemandedSrcElts, PoisonOnly, Depth + 1))
5488+
return false;
5489+
return true;
5490+
}
5491+
5492+
case ISD::EXTRACT_VECTOR_ELT: {
5493+
SDValue Src = Op.getOperand(0);
5494+
auto *IndexC = dyn_cast<ConstantSDNode>(Op.getOperand(1));
5495+
EVT SrcVT = Src.getValueType();
5496+
if (SrcVT.isFixedLengthVector() && IndexC &&
5497+
IndexC->getAPIntValue().ult(SrcVT.getVectorNumElements())) {
5498+
APInt DemandedSrcElts = APInt::getOneBitSet(SrcVT.getVectorNumElements(),
5499+
IndexC->getZExtValue());
5500+
return isGuaranteedNotToBeUndefOrPoison(Src, DemandedSrcElts, PoisonOnly,
5501+
Depth + 1);
5502+
}
5503+
break;
5504+
}
5505+
5506+
case ISD::INSERT_VECTOR_ELT: {
5507+
SDValue InVec = Op.getOperand(0);
5508+
SDValue InVal = Op.getOperand(1);
5509+
SDValue EltNo = Op.getOperand(2);
5510+
EVT VT = InVec.getValueType();
5511+
auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
5512+
if (IndexC && VT.isFixedLengthVector() &&
5513+
IndexC->getZExtValue() < VT.getVectorNumElements()) {
5514+
if (DemandedElts[IndexC->getZExtValue()] &&
5515+
!isGuaranteedNotToBeUndefOrPoison(InVal, PoisonOnly, Depth + 1))
5516+
return false;
5517+
APInt InVecDemandedElts = DemandedElts;
5518+
InVecDemandedElts.clearBit(IndexC->getZExtValue());
5519+
if (!!InVecDemandedElts &&
5520+
!isGuaranteedNotToBeUndefOrPoison(InVec, InVecDemandedElts,
5521+
PoisonOnly, Depth + 1))
5522+
return false;
5523+
return true;
5524+
}
5525+
break;
5526+
}
5527+
5528+
case ISD::SCALAR_TO_VECTOR:
5529+
// Check upper (known undef) elements.
5530+
if (DemandedElts.ugt(1) && !PoisonOnly)
5531+
return false;
5532+
// Check element zero.
5533+
if (DemandedElts[0] && !isGuaranteedNotToBeUndefOrPoison(
5534+
Op.getOperand(0), PoisonOnly, Depth + 1))
5535+
return false;
5536+
return true;
5537+
54615538
case ISD::SPLAT_VECTOR:
54625539
return isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), PoisonOnly,
54635540
Depth + 1);
@@ -5480,6 +5557,52 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
54805557
return true;
54815558
}
54825559

5560+
case ISD::SHL:
5561+
case ISD::SRL:
5562+
case ISD::SRA:
5563+
// Shift amount operand is checked by canCreateUndefOrPoison. So it is
5564+
// enough to check operand 0 if Op can't create undef/poison.
5565+
return !canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly,
5566+
/*ConsiderFlags*/ true, Depth) &&
5567+
isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), DemandedElts,
5568+
PoisonOnly, Depth + 1);
5569+
5570+
case ISD::BSWAP:
5571+
case ISD::CTPOP:
5572+
case ISD::BITREVERSE:
5573+
case ISD::AND:
5574+
case ISD::OR:
5575+
case ISD::XOR:
5576+
case ISD::ADD:
5577+
case ISD::SUB:
5578+
case ISD::MUL:
5579+
case ISD::SADDSAT:
5580+
case ISD::UADDSAT:
5581+
case ISD::SSUBSAT:
5582+
case ISD::USUBSAT:
5583+
case ISD::SSHLSAT:
5584+
case ISD::USHLSAT:
5585+
case ISD::SMIN:
5586+
case ISD::SMAX:
5587+
case ISD::UMIN:
5588+
case ISD::UMAX:
5589+
case ISD::ZERO_EXTEND:
5590+
case ISD::SIGN_EXTEND:
5591+
case ISD::ANY_EXTEND:
5592+
case ISD::TRUNCATE:
5593+
case ISD::VSELECT: {
5594+
// If Op can't create undef/poison and none of its operands are undef/poison
5595+
// then Op is never undef/poison. A difference from the more common check
5596+
// below, outside the switch, is that we handle elementwise operations for
5597+
// which the DemandedElts mask is valid for all operands here.
5598+
return !canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly,
5599+
/*ConsiderFlags*/ true, Depth) &&
5600+
all_of(Op->ops(), [&](SDValue V) {
5601+
return isGuaranteedNotToBeUndefOrPoison(V, DemandedElts,
5602+
PoisonOnly, Depth + 1);
5603+
});
5604+
}
5605+
54835606
// TODO: Search for noundef attributes from library functions.
54845607

54855608
// TODO: Pointers dereferenced by ISD::LOAD/STORE ops are noundef.

llvm/test/CodeGen/X86/pr62286.ll

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@ define i64 @PR62286(i32 %a) {
2828
; AVX1-NEXT: vmovd %edi, %xmm0
2929
; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
3030
; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm0
31+
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
32+
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7]
3133
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
32-
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
3334
; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0
3435
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
3536
; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1
@@ -42,10 +43,10 @@ define i64 @PR62286(i32 %a) {
4243
; AVX2-LABEL: PR62286:
4344
; AVX2: # %bb.0:
4445
; AVX2-NEXT: vmovd %edi, %xmm0
45-
; AVX2-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
46-
; AVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm0
47-
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
48-
; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0
46+
; AVX2-NEXT: vpaddd %xmm0, %xmm0, %xmm1
47+
; AVX2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3]
48+
; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
49+
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
4950
; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
5051
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
5152
; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
@@ -58,12 +59,13 @@ define i64 @PR62286(i32 %a) {
5859
; AVX512-LABEL: PR62286:
5960
; AVX512: # %bb.0:
6061
; AVX512-NEXT: vmovd %edi, %xmm0
61-
; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,1,0]
62-
; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
63-
; AVX512-NEXT: movw $4369, %ax # imm = 0x1111
62+
; AVX512-NEXT: movb $8, %al
6463
; AVX512-NEXT: kmovd %eax, %k1
65-
; AVX512-NEXT: vpaddd %zmm0, %zmm0, %zmm1 {%k1}
66-
; AVX512-NEXT: vpmovsxdq %ymm1, %zmm0
64+
; AVX512-NEXT: vpexpandd %ymm0, %ymm1 {%k1} {z}
65+
; AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
66+
; AVX512-NEXT: vpaddd %ymm0, %ymm0, %ymm0
67+
; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
68+
; AVX512-NEXT: vpmovsxdq %ymm0, %zmm0
6769
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
6870
; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0
6971
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1

0 commit comments

Comments
 (0)