Skip to content

Commit c40d3a4

Browse files
authored
[PowerPC] Add dense math bfloat16 floating-point outer-product accumulate to DMR instructions (#133109)
This patch adds the following Dense Math Facility bfloat16 floating-point calculation instructions: dmxvbf16gerx2, dmxvbf16gerx2pp,dmxvbf16gerx2pn, dmxvbf16gerx2np, dmxvbf16gerx2nn, pmdmxvbf16gerx2, pmdmxvbf16gerx2pp, pmdmxvbf16gerx2pn, pmdmxvbf16gerx2np, pmdmxvbf16gerx2nn, along with their corresponding intrinsics and tests.
1 parent 3ca2fa7 commit c40d3a4

File tree

6 files changed

+779
-2
lines changed

6 files changed

+779
-2
lines changed

llvm/include/llvm/IR/IntrinsicsPowerPC.td

+23
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,22 @@ multiclass PowerPC_MMA_ACC_PP_Intrinsic<list<LLVMType> args> {
280280
[IntrNoMem]>;
281281
}
282282

283+
multiclass PowerPC_MMA_DMR_Intrinsic<list<LLVMType> args> {
284+
def NAME: DefaultAttrsIntrinsic<[llvm_v1024i1_ty], args, [IntrNoMem]>;
285+
def pp : DefaultAttrsIntrinsic<[llvm_v1024i1_ty],
286+
!listconcat([llvm_v1024i1_ty], args),
287+
[IntrNoMem]>;
288+
def pn : DefaultAttrsIntrinsic<[llvm_v1024i1_ty],
289+
!listconcat([llvm_v1024i1_ty], args),
290+
[IntrNoMem]>;
291+
def np : DefaultAttrsIntrinsic<[llvm_v1024i1_ty],
292+
!listconcat([llvm_v1024i1_ty], args),
293+
[IntrNoMem]>;
294+
def nn : DefaultAttrsIntrinsic<[llvm_v1024i1_ty],
295+
!listconcat([llvm_v1024i1_ty], args),
296+
[IntrNoMem]>;
297+
}
298+
283299
multiclass PowerPC_MMA_DMR_PP_Intrinsic<list<LLVMType> args> {
284300
def NAME: DefaultAttrsIntrinsic<[llvm_v1024i1_ty], args, [IntrNoMem]>;
285301
def pp : DefaultAttrsIntrinsic<[llvm_v1024i1_ty],
@@ -1732,6 +1748,13 @@ let TargetPrefix = "ppc" in {
17321748
[llvm_v1024i1_ty, llvm_v256i1_ty, llvm_v16i8_ty,
17331749
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
17341750
[IntrNoMem]>;
1751+
1752+
// MMA+ Reduced-Precision: bfloat16 Outer Product Intrinsic Definitions.
1753+
defm int_ppc_mma_dmxvbf16gerx2 :
1754+
PowerPC_MMA_DMR_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty]>;
1755+
defm int_ppc_mma_pmdmxvbf16gerx2 :
1756+
PowerPC_MMA_DMR_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty, llvm_i32_ty,
1757+
llvm_i32_ty, llvm_i32_ty]>;
17351758
}
17361759

17371760
// XL Compat intrinsics.

llvm/lib/Target/PowerPC/PPCInstrFutureMMA.td

+161-1
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ class MMIRR_XX3Form_X8YP4_XAp5B6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
9595
list<dag> pattern>
9696
: PI<1, opcode, OOL, IOL, asmstr, itin> {
9797
bits<3> AT;
98-
bits<6> XAp;
98+
bits<5> XAp;
9999
bits<6> XB;
100100
bits<8> XMSK;
101101
bits<4> YMSK;
@@ -123,6 +123,40 @@ class MMIRR_XX3Form_X8YP4_XAp5B6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
123123
let Inst{63} = 0;
124124
}
125125

126+
class MMIRR_XX3Form_X8Y4P2_XAp5B6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
127+
string asmstr, InstrItinClass itin,
128+
list<dag> pattern>
129+
: PI<1, opcode, OOL, IOL, asmstr, itin> {
130+
bits<3> AT;
131+
bits<5> XAp;
132+
bits<6> XB;
133+
bits<8> XMSK;
134+
bits<4> YMSK;
135+
bits<2> PMSK;
136+
137+
let Pattern = pattern;
138+
139+
// The prefix.
140+
let Inst{6-7} = 3;
141+
let Inst{8-11} = 9;
142+
let Inst{12-15} = 0;
143+
let Inst{16-17} = PMSK;
144+
let Inst{18-19} = 0;
145+
let Inst{20-27} = XMSK;
146+
let Inst{28-31} = YMSK;
147+
148+
// The instruction.
149+
let Inst{38-40} = AT;
150+
let Inst{41-42} = 0;
151+
let Inst{43-46} = XAp{3-0};
152+
let Inst{47} = 0;
153+
let Inst{48-52} = XB{4-0};
154+
let Inst{53-60} = xo;
155+
let Inst{61} = XAp{4};
156+
let Inst{62} = XB{5};
157+
let Inst{63} = 0;
158+
}
159+
126160
multiclass DMR_UM_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
127161
string asmstr> {
128162
let Predicates = [MMA, IsISAFuture] in {
@@ -159,6 +193,83 @@ multiclass DMR_UM_M448_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
159193
}
160194
}
161195

196+
multiclass DMR_BF16_UM_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
197+
string asmstr> {
198+
let Predicates = [MMA, IsISAFuture] in {
199+
def NAME :
200+
XX3Form_AT3_XAp5B6<opcode, !or(xo, 0x11), (outs dmr:$AT), IOL,
201+
!strconcat(asmbase#" ", asmstr), IIC_VecFP, []>,
202+
RegConstraint<"@earlyclobber $AT">;
203+
def PP :
204+
XX3Form_AT3_XAp5B6<opcode, xo, (outs dmr:$AT), !con((ins dmr:$ATi), IOL),
205+
!strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>,
206+
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
207+
}
208+
}
209+
210+
multiclass DMR_UM_M284_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
211+
string asmstr> {
212+
defm NAME : DMR_BF16_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
213+
let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
214+
def PM#NAME :
215+
MMIRR_XX3Form_X8Y4P2_XAp5B6<
216+
opcode, !or(xo, 0x11), (outs dmr:$AT),
217+
!con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)),
218+
!strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
219+
IIC_VecFP, []>,
220+
RegConstraint<"@earlyclobber $AT">;
221+
def PM#NAME#PP :
222+
MMIRR_XX3Form_X8Y4P2_XAp5B6<
223+
opcode, xo, (outs dmr:$AT),
224+
!con((ins dmr:$ATi), !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
225+
!strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
226+
IIC_VecFP, []>,
227+
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
228+
}
229+
}
230+
231+
multiclass DMR_NEG_UM_M284_XOXORf939a0<bits<6> opcode, bits<8> xo, dag IOL,
232+
string asmbase, string asmstr> {
233+
defm NAME : DMR_UM_M284_XOEO<opcode, xo, IOL, asmbase, asmstr>;
234+
let Predicates = [MMA, IsISAFuture] in {
235+
def PN : XX3Form_AT3_XAp5B6<
236+
opcode, !xor(xo, 0xF9), (outs dmr:$AT), !con((ins dmr:$ATi), IOL),
237+
!strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>,
238+
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
239+
def NP : XX3Form_AT3_XAp5B6<
240+
opcode, !xor(xo, 0x39), (outs dmr:$AT), !con((ins dmr:$ATi), IOL),
241+
!strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>,
242+
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
243+
def NN : XX3Form_AT3_XAp5B6<
244+
opcode, !xor(xo, 0xA0), (outs dmr:$AT), !con((ins dmr:$ATi), IOL),
245+
!strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>,
246+
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
247+
}
248+
let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
249+
def PM#NAME#PN :
250+
MMIRR_XX3Form_X8Y4P2_XAp5B6<
251+
opcode, !xor(xo, 0xF9), (outs dmr:$AT),
252+
!con((ins dmr:$ATi), !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
253+
!strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK, $PMSK"),
254+
IIC_VecFP, []>,
255+
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
256+
def PM#NAME#NP :
257+
MMIRR_XX3Form_X8Y4P2_XAp5B6<
258+
opcode, !xor(xo, 0x39), (outs dmr:$AT),
259+
!con((ins dmr:$ATi), !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
260+
!strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK, $PMSK"),
261+
IIC_VecFP, []>,
262+
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
263+
def PM#NAME#NN :
264+
MMIRR_XX3Form_X8Y4P2_XAp5B6<
265+
opcode, !xor(xo, 0xA0), (outs dmr:$AT),
266+
!con((ins dmr:$ATi), !con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
267+
!strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK, $PMSK"),
268+
IIC_VecFP, []>,
269+
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
270+
}
271+
}
272+
162273
let Predicates = [IsISAFuture] in {
163274
def DMXXEXTFDMR512 : XX3Form_AT3_XABp5_P1<60, 226,
164275
(outs vsrprc:$XAp, vsrprc:$XBp),
@@ -231,6 +342,11 @@ let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
231342
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
232343
}
233344

345+
// DMXVBF16GERX2, DMXVBF16GERX2PP, DMXVBF16GERX2PN, dMXVBF16GERX2NP, DMXVBF16GERX2NN
346+
// PMDMXVBF16GERX2, PMDMXVBF16GERX2PP, PMDMXVBF16GERX2PN, PMDMXVBF16GERX2NP, PMDMXVBF16GERX2NN
347+
defm DMXVBF16GERX2 : DMR_NEG_UM_M284_XOXORf939a0<59, 74, (ins vsrprc:$XAp, vsrc:$XB),
348+
"dmxvbf16gerx2", "$AT, $XAp, $XB">;
349+
234350
// MMA+ Intrinsics
235351
let Predicates = [MMA, IsISAFuture] in {
236352
def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4 v256i1:$XAp, v16i8:$XB)),
@@ -240,6 +356,21 @@ let Predicates = [MMA, IsISAFuture] in {
240356

241357
def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4spp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)),
242358
(DMXVI8GERX4SPP $ATi, $XAp, RCCp.BToVSRC)>;
359+
360+
def : Pat<(v1024i1 (int_ppc_mma_dmxvbf16gerx2 v256i1:$XAp, v16i8:$XB)),
361+
(DMXVBF16GERX2 $XAp, RCCp.BToVSRC)>;
362+
363+
def : Pat<(v1024i1 (int_ppc_mma_dmxvbf16gerx2pp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)),
364+
(DMXVBF16GERX2PP $ATi, $XAp, RCCp.BToVSRC)>;
365+
366+
def : Pat<(v1024i1 (int_ppc_mma_dmxvbf16gerx2pn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)),
367+
(DMXVBF16GERX2PN $ATi, $XAp, RCCp.BToVSRC)>;
368+
369+
def : Pat<(v1024i1 (int_ppc_mma_dmxvbf16gerx2np v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)),
370+
(DMXVBF16GERX2NP $ATi, $XAp, RCCp.BToVSRC)>;
371+
372+
def : Pat<(v1024i1 (int_ppc_mma_dmxvbf16gerx2nn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)),
373+
(DMXVBF16GERX2NN $ATi, $XAp, RCCp.BToVSRC)>;
243374
}
244375

245376
let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
@@ -259,4 +390,33 @@ let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
259390
Msk4Imm:$PMSK)),
260391
(PMDMXVI8GERX4SPP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
261392
Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
393+
394+
def : Pat<(v1024i1 (int_ppc_mma_pmdmxvbf16gerx2 v256i1:$XAp, v16i8:$XB, Msk8Imm:$XMSK,
395+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
396+
(PMDMXVBF16GERX2 $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
397+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
398+
399+
def : Pat<(v1024i1 (int_ppc_mma_pmdmxvbf16gerx2pp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB,
400+
Msk8Imm:$XMSK, Msk4Imm:$YMSK,
401+
Msk2Imm:$PMSK)),
402+
(PMDMXVBF16GERX2PP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
403+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
404+
405+
def : Pat<(v1024i1 (int_ppc_mma_pmdmxvbf16gerx2pn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB,
406+
Msk8Imm:$XMSK, Msk4Imm:$YMSK,
407+
Msk2Imm:$PMSK)),
408+
(PMDMXVBF16GERX2PN $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
409+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
410+
411+
def : Pat<(v1024i1 (int_ppc_mma_pmdmxvbf16gerx2np v1024i1:$ATi, v256i1:$XAp, v16i8:$XB,
412+
Msk8Imm:$XMSK, Msk4Imm:$YMSK,
413+
Msk2Imm:$PMSK)),
414+
(PMDMXVBF16GERX2NP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
415+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
416+
417+
def : Pat<(v1024i1 (int_ppc_mma_pmdmxvbf16gerx2nn v1024i1:$ATi, v256i1:$XAp, v16i8:$XB,
418+
Msk8Imm:$XMSK, Msk4Imm:$YMSK,
419+
Msk2Imm:$PMSK)),
420+
(PMDMXVBF16GERX2NN $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
421+
Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
262422
}

0 commit comments

Comments
 (0)