Skip to content

[NVPTX] support packed f32 instructions for sm_100+ #126337

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 30 commits into
base: main
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
ac2ea8e
legalize v2f32 as i64 reg and add test cases
Prince781 Feb 8, 2025
35e3fc6
support fadd, fsub, fmul, fma and load on v2f32
Prince781 Feb 9, 2025
2e8b29a
set proxyreg for v2f32 = bitcast i64
Prince781 Feb 9, 2025
7b5e336
handle fdiv and other instructions where v2f32 is illegal
Prince781 Feb 9, 2025
43969f1
ProxyReg v2f32 -> ProxyRegI64
Prince781 Feb 9, 2025
7601e31
support select v2f32
Prince781 Feb 9, 2025
6f6cc43
support v2f32 = bitconvert f64
Prince781 Feb 9, 2025
7eb428f
support extract_vector_elt with dynamic indices
Prince781 Feb 9, 2025
0770b80
promote extract_vector_elt nodes to unpacking mov
Prince781 Feb 9, 2025
6ab0f76
[NVPTX] add combiner rule for v2[b]f16 = fp_round v2f32
Prince781 Feb 12, 2025
1d6143f
[NVPTX] expand fp_extend v2f32
Prince781 Feb 12, 2025
763cbc4
[NVPTX] expand fexp2 and flog2 for v2f32
Prince781 Feb 12, 2025
f5f9e67
[NVPTX] handle v2f32 for LDU/LDG
Prince781 Feb 12, 2025
7bf2122
[NVPTX] only legalze fadd, fsub, fmul, fma for v2f32 on sm_100+
Prince781 Feb 12, 2025
5bee5cf
[NVPTX] lower store v2f32 to st.b64
Prince781 Feb 12, 2025
805f4ea
[NVPTX] expand vector_shuffle, insertelt for v2f32 and lower i64 bitcast
Prince781 Feb 12, 2025
b8afe76
[NVPTX] add combiner rule to peek through bitcast of BUILD_VECTOR
Prince781 Feb 12, 2025
3685776
[NVPTX] loads, stores of v2f32 are untyped
Prince781 Feb 13, 2025
4b44953
[NVPTX] add combiner rule for expanding StoreRetval vector parameters
Prince781 Feb 25, 2025
43a120a
[NVPTX] add combiner rule for expanding LOAD, LoadV2, LoadParam, Load…
Prince781 Feb 28, 2025
6b7c127
[NVPTX] update combiner rule for more types of loads
Prince781 Mar 6, 2025
d090ca2
[NVPTX] support generic LDG/LDU for packed data types
Prince781 Mar 6, 2025
65581cf
[NVPTX] fold v2f32 = bitcast (i64,i64,... = NVPTXISD::Load*)
Prince781 Mar 7, 2025
d88b40b
[NVPTX] handle more cases for loads and stores
Prince781 Mar 12, 2025
e8f37e1
[NVPTX] add coverage for v2f32 in ldg-invariant and fp-contract
Prince781 Mar 15, 2025
f72fa6d
[NVPTX] expand v2f32 SELECT_CC and BR_CC
Prince781 Apr 1, 2025
b5d296e
[NVPTX] update tests for mov.b32 canonicalization
Prince781 Apr 1, 2025
447c952
[NVPTX] add f32x2 version of fp-contract test
Prince781 Apr 1, 2025
94796ba
[NVPTX] use sink symbol for single-element unpacking of v2f32s
Prince781 Apr 9, 2025
2b31795
[tests] update f32x2-instructions selp.b16 and sink moves
Prince781 Apr 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
[NVPTX] add combiner rule to peek through bitcast of BUILD_VECTOR
  • Loading branch information
Prince781 committed Apr 11, 2025
commit b8afe76ad250e46a1b6d390b99a54e0a8b9ea0f2
48 changes: 47 additions & 1 deletion llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -832,7 +832,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
// We have some custom DAG combine patterns for these nodes
setTargetDAGCombine({ISD::ADD, ISD::AND, ISD::EXTRACT_VECTOR_ELT, ISD::FADD,
ISD::MUL, ISD::SHL, ISD::SREM, ISD::UREM, ISD::VSELECT,
ISD::BUILD_VECTOR, ISD::ADDRSPACECAST, ISD::FP_ROUND});
ISD::BUILD_VECTOR, ISD::ADDRSPACECAST, ISD::FP_ROUND,
ISD::TRUNCATE});

// setcc for f16x2 and bf16x2 needs special handling to prevent
// legalizer's attempt to scalarize it due to v2i1 not being legal.
Expand Down Expand Up @@ -5732,6 +5733,49 @@ static SDValue PerformFP_ROUNDCombine(SDNode *N,
return SDValue();
}

static SDValue PerformTRUNCATECombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SDLoc DL(N);
SDValue Op = N->getOperand(0);
EVT FromVT = Op.getValueType();
EVT ResultVT = N->getValueType(0);

if (FromVT == MVT::i64 && ResultVT == MVT::i32) {
// i32 = truncate (i64 = bitcast (v2f32 = BUILD_VECTOR (f32 A, f32 B)))
// -> i32 = bitcast (f32 A)
if (Op.getOpcode() == ISD::BITCAST) {
SDValue BV = Op.getOperand(0);
if (BV.getOpcode() == ISD::BUILD_VECTOR &&
BV.getValueType() == MVT::v2f32) {
// get lower
return DCI.DAG.getNode(ISD::BITCAST, DL, ResultVT, BV.getOperand(0));
}
}

// i32 = truncate (i64 = srl
// (i64 = bitcast
// (v2f32 = BUILD_VECTOR (f32 A, f32 B))), 32)
// -> i32 = bitcast (f32 B)
if (Op.getOpcode() == ISD::SRL) {
if (auto *ShAmt = dyn_cast<ConstantSDNode>(Op.getOperand(1));
ShAmt && ShAmt->getAsAPIntVal() == 32) {
SDValue Cast = Op.getOperand(0);
if (Cast.getOpcode() == ISD::BITCAST) {
SDValue BV = Cast.getOperand(0);
if (BV.getOpcode() == ISD::BUILD_VECTOR &&
BV.getValueType() == MVT::v2f32) {
// get upper
return DCI.DAG.getNode(ISD::BITCAST, DL, ResultVT,
BV.getOperand(1));
}
}
}
}
}

return SDValue();
}

SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
CodeGenOptLevel OptLevel = getTargetMachine().getOptLevel();
Expand Down Expand Up @@ -5770,6 +5814,8 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
return combineADDRSPACECAST(N, DCI);
case ISD::FP_ROUND:
return PerformFP_ROUNDCombine(N, DCI);
case ISD::TRUNCATE:
return PerformTRUNCATECombine(N, DCI);
}
return SDValue();
}
Expand Down