@@ -207,33 +207,39 @@ class ValueToRegClass<ValueType T> {
207207// Some Common Instruction Class Templates
208208//===----------------------------------------------------------------------===//
209209
210+ // Utility class to wrap up information about a register and DAG type for more
211+ // convenient iteration and parameterization
212+ class RegTyInfo<ValueType ty, NVPTXRegClass rc, Operand imm> {
213+ ValueType Ty = ty;
214+ NVPTXRegClass RC = rc;
215+ Operand Imm = imm;
216+ int Size = ty.Size;
217+ }
218+
219+ def I16RT : RegTyInfo<i16, Int16Regs, i16imm>;
220+ def I32RT : RegTyInfo<i32, Int32Regs, i32imm>;
221+ def I64RT : RegTyInfo<i64, Int64Regs, i64imm>;
222+
210223// Template for instructions which take three int64, int32, or int16 args.
211224// The instructions are named "<OpcStr><Width>" (e.g. "add.s64").
212- multiclass I3<string OpcStr, SDNode OpNode> {
213- def i64rr :
214- NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
215- !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
216- [(set i64:$dst, (OpNode i64:$a, i64:$b))]>;
217- def i64ri :
218- NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
219- !strconcat(OpcStr, "64 \t$dst, $a, $b;"),
220- [(set i64:$dst, (OpNode i64:$a, imm:$b))]>;
221- def i32rr :
222- NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
223- !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
224- [(set i32:$dst, (OpNode i32:$a, i32:$b))]>;
225- def i32ri :
226- NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
227- !strconcat(OpcStr, "32 \t$dst, $a, $b;"),
228- [(set i32:$dst, (OpNode i32:$a, imm:$b))]>;
229- def i16rr :
230- NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
231- !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
232- [(set i16:$dst, (OpNode i16:$a, i16:$b))]>;
233- def i16ri :
234- NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
235- !strconcat(OpcStr, "16 \t$dst, $a, $b;"),
236- [(set i16:$dst, (OpNode i16:$a, (imm):$b))]>;
225+ multiclass I3<string OpcStr, SDNode OpNode, bit commutative> {
226+ foreach t = [I16RT, I32RT, I64RT] in {
227+ defvar asmstr = OpcStr # t.Size # " \t$dst, $a, $b;";
228+
229+ def t.Ty # rr :
230+ NVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b),
231+ asmstr,
232+ [(set t.Ty:$dst, (OpNode t.Ty:$a, t.Ty:$b))]>;
233+ def t.Ty # ri :
234+ NVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.Imm:$b),
235+ asmstr,
236+ [(set t.Ty:$dst, (OpNode t.RC:$a, imm:$b))]>;
237+ if !not(commutative) then
238+ def t.Ty # ir :
239+ NVPTXInst<(outs t.RC:$dst), (ins t.Imm:$a, t.RC:$b),
240+ asmstr,
241+ [(set t.Ty:$dst, (OpNode imm:$a, t.RC:$b))]>;
242+ }
237243}
238244
239245class I16x2<string OpcStr, SDNode OpNode> :
@@ -870,8 +876,8 @@ defm SUB_i1 : ADD_SUB_i1<sub>;
870876
871877// int16, int32, and int64 signed addition. Since nvptx is 2's complement, we
872878// also use these for unsigned arithmetic.
873- defm ADD : I3<"add.s", add>;
874- defm SUB : I3<"sub.s", sub>;
879+ defm ADD : I3<"add.s", add, /*commutative=*/ true >;
880+ defm SUB : I3<"sub.s", sub, /*commutative=*/ false >;
875881
876882def ADD16x2 : I16x2<"add.s", add>;
877883
@@ -883,18 +889,18 @@ defm SUBCC : ADD_SUB_INT_CARRY<"sub.cc", subc>;
883889defm ADDCCC : ADD_SUB_INT_CARRY<"addc.cc", adde>;
884890defm SUBCCC : ADD_SUB_INT_CARRY<"subc.cc", sube>;
885891
886- defm MULT : I3<"mul.lo.s", mul>;
892+ defm MULT : I3<"mul.lo.s", mul, /*commutative=*/ true >;
887893
888- defm MULTHS : I3<"mul.hi.s", mulhs>;
889- defm MULTHU : I3<"mul.hi.u", mulhu>;
894+ defm MULTHS : I3<"mul.hi.s", mulhs, /*commutative=*/ true >;
895+ defm MULTHU : I3<"mul.hi.u", mulhu, /*commutative=*/ true >;
890896
891- defm SDIV : I3<"div.s", sdiv>;
892- defm UDIV : I3<"div.u", udiv>;
897+ defm SDIV : I3<"div.s", sdiv, /*commutative=*/ false >;
898+ defm UDIV : I3<"div.u", udiv, /*commutative=*/ false >;
893899
894900// The ri versions of rem.s and rem.u won't be selected; DAGCombiner::visitSREM
895901// will lower it.
896- defm SREM : I3<"rem.s", srem>;
897- defm UREM : I3<"rem.u", urem>;
902+ defm SREM : I3<"rem.s", srem, /*commutative=*/ false >;
903+ defm UREM : I3<"rem.u", urem, /*commutative=*/ false >;
898904
899905// Integer absolute value. NumBits should be one minus the bit width of RC.
900906// This idiom implements the algorithm at
@@ -909,10 +915,10 @@ defm ABS_32 : ABS<i32, Int32Regs, ".s32">;
909915defm ABS_64 : ABS<i64, Int64Regs, ".s64">;
910916
911917// Integer min/max.
912- defm SMAX : I3<"max.s", smax>;
913- defm UMAX : I3<"max.u", umax>;
914- defm SMIN : I3<"min.s", smin>;
915- defm UMIN : I3<"min.u", umin>;
918+ defm SMAX : I3<"max.s", smax, /*commutative=*/ true >;
919+ defm UMAX : I3<"max.u", umax, /*commutative=*/ true >;
920+ defm SMIN : I3<"min.s", smin, /*commutative=*/ true >;
921+ defm UMIN : I3<"min.u", umin, /*commutative=*/ true >;
916922
917923def SMAX16x2 : I16x2<"max.s", smax>;
918924def UMAX16x2 : I16x2<"max.u", umax>;
@@ -1392,25 +1398,32 @@ def FDIV32ri_prec :
13921398//
13931399
13941400multiclass FMA<string OpcStr, RegisterClass RC, Operand ImmCls, Predicate Pred> {
1395- def rrr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c),
1396- !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
1397- [(set RC:$dst, (fma RC:$a, RC:$b, RC:$c))]>,
1398- Requires<[Pred]>;
1399- def rri : NVPTXInst<(outs RC:$dst),
1400- (ins RC:$a, RC:$b, ImmCls:$c),
1401- !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
1402- [(set RC:$dst, (fma RC:$a, RC:$b, fpimm:$c))]>,
1403- Requires<[Pred]>;
1404- def rir : NVPTXInst<(outs RC:$dst),
1405- (ins RC:$a, ImmCls:$b, RC:$c),
1406- !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
1407- [(set RC:$dst, (fma RC:$a, fpimm:$b, RC:$c))]>,
1408- Requires<[Pred]>;
1409- def rii : NVPTXInst<(outs RC:$dst),
1410- (ins RC:$a, ImmCls:$b, ImmCls:$c),
1411- !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
1412- [(set RC:$dst, (fma RC:$a, fpimm:$b, fpimm:$c))]>,
1413- Requires<[Pred]>;
1401+ defvar asmstr = OpcStr # " \t$dst, $a, $b, $c;";
1402+ def rrr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c),
1403+ asmstr,
1404+ [(set RC:$dst, (fma RC:$a, RC:$b, RC:$c))]>,
1405+ Requires<[Pred]>;
1406+ def rri : NVPTXInst<(outs RC:$dst),
1407+ (ins RC:$a, RC:$b, ImmCls:$c),
1408+ asmstr,
1409+ [(set RC:$dst, (fma RC:$a, RC:$b, fpimm:$c))]>,
1410+ Requires<[Pred]>;
1411+ def rir : NVPTXInst<(outs RC:$dst),
1412+ (ins RC:$a, ImmCls:$b, RC:$c),
1413+ asmstr,
1414+ [(set RC:$dst, (fma RC:$a, fpimm:$b, RC:$c))]>,
1415+ Requires<[Pred]>;
1416+ def rii : NVPTXInst<(outs RC:$dst),
1417+ (ins RC:$a, ImmCls:$b, ImmCls:$c),
1418+ asmstr,
1419+ [(set RC:$dst, (fma RC:$a, fpimm:$b, fpimm:$c))]>,
1420+ Requires<[Pred]>;
1421+ def iir : NVPTXInst<(outs RC:$dst),
1422+ (ins ImmCls:$a, ImmCls:$b, RC:$c),
1423+ asmstr,
1424+ [(set RC:$dst, (fma fpimm:$a, fpimm:$b, RC:$c))]>,
1425+ Requires<[Pred]>;
1426+
14141427}
14151428
14161429multiclass FMA_F16<string OpcStr, ValueType T, RegisterClass RC, Predicate Pred> {
0 commit comments