Skip to content

Commit b4a1fa2

Browse files
authored
Handle more than 64 registers - The finale (#103387)
* Add predicate registers * Increase REGNUM_BITS to 7 * Assign float registers if node is mask * Remove some TODO-SVE present for predicate register implementation * Make sure to use vector registers if there is no mask * handle some more printing of predicate registers * jit format * try to fix gcc failure * Revert "try to fix gcc failure" This reverts commit 5452f6a. * proper gcc-14 build error fix
1 parent 0d60428 commit b4a1fa2

11 files changed

+69
-65
lines changed

src/coreclr/jit/codegenarm64test.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6254,7 +6254,7 @@ void CodeGen::genArm64EmitterUnitTestsSve()
62546254

62556255
// IF_SVE_CW_4A
62566256
theEmitter->emitIns_R_R_R(INS_sve_mov, EA_SCALABLE, REG_V0, REG_P0, REG_V30, INS_OPTS_SCALABLE_H,
6257-
INS_SCALABLE_OPTS_PREDICATE_MERGE_MOV); // MOV <Zd>.<T>, <Pv>/M, <Zn>.<T>
6257+
INS_SCALABLE_OPTS_PREDICATE_MERGE); // MOV <Zd>.<T>, <Pv>/M, <Zn>.<T>
62586258
theEmitter->emitIns_R_R_R_R(INS_sve_sel, EA_SCALABLE, REG_V29, REG_P15, REG_V28, REG_V4, INS_OPTS_SCALABLE_D,
62596259
INS_SCALABLE_OPTS_UNPREDICATED); // SEL <Zd>.<T>, <Pv>, <Zn>.<T>, <Zm>.<T>
62606260
theEmitter->emitIns_R_R_R_R(INS_sve_sel, EA_SCALABLE, REG_V5, REG_P13, REG_V27, REG_V5, INS_OPTS_SCALABLE_S,

src/coreclr/jit/emit.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -758,7 +758,7 @@ class emitter
758758
// x86: 38 bits
759759
// amd64: 38 bits
760760
// arm: 32 bits
761-
// arm64: 44 bits
761+
// arm64: 46 bits
762762
// loongarch64: 28 bits
763763
// risc-v: 28 bits
764764

@@ -828,7 +828,7 @@ class emitter
828828
// x86: 48 bits
829829
// amd64: 48 bits
830830
// arm: 48 bits
831-
// arm64: 53 bits
831+
// arm64: 55 bits
832832
// loongarch64: 46 bits
833833
// risc-v: 46 bits
834834

@@ -840,7 +840,7 @@ class emitter
840840
#if defined(TARGET_ARM)
841841
#define ID_EXTRA_BITFIELD_BITS (16)
842842
#elif defined(TARGET_ARM64)
843-
#define ID_EXTRA_BITFIELD_BITS (21)
843+
#define ID_EXTRA_BITFIELD_BITS (23)
844844
#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
845845
#define ID_EXTRA_BITFIELD_BITS (14)
846846
#elif defined(TARGET_XARCH)
@@ -881,7 +881,7 @@ class emitter
881881
// x86: 54/50 bits
882882
// amd64: 55/50 bits
883883
// arm: 54/50 bits
884-
// arm64: 60/55 bits
884+
// arm64: 62/57 bits
885885
// loongarch64: 53/48 bits
886886
// risc-v: 53/48 bits
887887

@@ -897,7 +897,7 @@ class emitter
897897
// x86: 10/14 bits
898898
// amd64: 9/14 bits
899899
// arm: 10/14 bits
900-
// arm64: 4/9 bits
900+
// arm64: 2/7 bits
901901
// loongarch64: 11/16 bits
902902
// risc-v: 11/16 bits
903903

src/coreclr/jit/emitarm64.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4250,11 +4250,9 @@ void emitter::emitIns_Mov(
42504250

42514251
case INS_sve_mov:
42524252
{
4253-
// TODO-SVE: Remove check for insOptsNone() when predicate registers
4254-
// are present.
4255-
if (insOptsNone(opt) && isPredicateRegister(dstReg) && isPredicateRegister(srcReg))
4253+
if (isPredicateRegister(dstReg) && isPredicateRegister(srcReg))
42564254
{
4257-
// assert(insOptsNone(opt));
4255+
assert(insOptsNone(opt));
42584256

42594257
opt = INS_OPTS_SCALABLE_B;
42604258
attr = EA_SCALABLE;

src/coreclr/jit/emitarm64sve.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3786,9 +3786,7 @@ void emitter::emitInsSve_R_R_R(instruction ins,
37863786
// MOV is an alias for CPY, and is always the preferred disassembly.
37873787
ins = INS_sve_mov;
37883788
}
3789-
// TODO-SVE: Change the below check to INS_SCALABLE_OPTS_PREDICATE_MERGE
3790-
// once predicate registers are present.
3791-
else if (sopt == INS_SCALABLE_OPTS_PREDICATE_MERGE_MOV)
3789+
else if (sopt == INS_SCALABLE_OPTS_PREDICATE_MERGE)
37923790
{
37933791
assert(isVectorRegister(reg1));
37943792
assert(isPredicateRegister(reg2));
@@ -5909,7 +5907,7 @@ void emitter::emitInsSve_R_R_R_R(instruction ins,
59095907
{
59105908
// mov is a preferred alias for sel
59115909
return emitInsSve_R_R_R(INS_sve_mov, attr, reg1, reg2, reg3, opt,
5912-
INS_SCALABLE_OPTS_PREDICATE_MERGE_MOV);
5910+
INS_SCALABLE_OPTS_PREDICATE_MERGE);
59135911
}
59145912

59155913
assert(insOptsScalableStandard(opt));

src/coreclr/jit/instr.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,6 @@ enum insScalableOpts : unsigned
386386
INS_SCALABLE_OPTS_TO_PREDICATE, // Variants moving to a predicate from a vector (e.g. pmov)
387387
INS_SCALABLE_OPTS_TO_VECTOR, // Variants moving to a vector from a predicate (e.g. pmov)
388388
INS_SCALABLE_OPTS_BROADCAST, // Used to distinguish mov from cpy, where mov is an alias for both
389-
INS_SCALABLE_OPTS_PREDICATE_MERGE_MOV, // Use to distinguish mov (predicated) from other variants
390389
};
391390

392391
// Maps directly to the pattern used in SVE instructions such as cntb.

src/coreclr/jit/lsra.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11518,7 +11518,13 @@ void LinearScan::dumpRegRecordTitleIfNeeded()
1151811518
if ((lastDumpedRegisters != registersToDump) || (rowCountSinceLastTitle > MAX_ROWS_BETWEEN_TITLES))
1151911519
{
1152011520
lastUsedRegNumIndex = 0;
11521-
int lastRegNumIndex = compiler->compFloatingPointUsed ? REG_FP_LAST : REG_INT_LAST;
11521+
int lastRegNumIndex = compiler->compFloatingPointUsed ?
11522+
#ifdef HAS_MORE_THAN_64_REGISTERS
11523+
REG_MASK_LAST
11524+
#else
11525+
REG_FP_LAST
11526+
#endif
11527+
: REG_INT_LAST;
1152211528
for (int regNumIndex = 0; regNumIndex <= lastRegNumIndex; regNumIndex++)
1152311529
{
1152411530
if (registersToDump.IsRegNumInMask((regNumber)regNumIndex))
@@ -12129,7 +12135,7 @@ void LinearScan::verifyFinalAllocation()
1212912135

1213012136
case RefTypeKill:
1213112137
dumpLsraAllocationEvent(LSRA_EVENT_KILL_REGS, nullptr, REG_NA, currentBlock, NONE,
12132-
currentRefPosition.registerAssignment);
12138+
currentRefPosition.getKillRegisterAssignment());
1213312139
break;
1213412140

1213512141
case RefTypeFixedReg:

src/coreclr/jit/lsraarm64.cpp

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1588,30 +1588,37 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
15881588
}
15891589
else if (HWIntrinsicInfo::IsMaskedOperation(intrin.id))
15901590
{
1591-
SingleTypeRegSet predMask = RBM_ALLMASK.GetPredicateRegSet();
1592-
if (intrin.id == NI_Sve_ConditionalSelect)
1591+
if (!varTypeIsMask(intrin.op1->TypeGet()) && !HWIntrinsicInfo::IsExplicitMaskedOperation(intrin.id))
15931592
{
1594-
// If this is conditional select, make sure to check the embedded
1595-
// operation to determine the predicate mask.
1596-
assert(intrinsicTree->GetOperandCount() == 3);
1597-
assert(!HWIntrinsicInfo::IsLowMaskedOperation(intrin.id));
1598-
1599-
if (intrin.op2->OperIs(GT_HWINTRINSIC))
1593+
srcCount += BuildOperandUses(intrin.op1);
1594+
}
1595+
else
1596+
{
1597+
SingleTypeRegSet predMask = RBM_ALLMASK.GetPredicateRegSet();
1598+
if (intrin.id == NI_Sve_ConditionalSelect)
16001599
{
1601-
GenTreeHWIntrinsic* embOp2Node = intrin.op2->AsHWIntrinsic();
1602-
const HWIntrinsic intrinEmb(embOp2Node);
1603-
if (HWIntrinsicInfo::IsLowMaskedOperation(intrinEmb.id))
1600+
// If this is conditional select, make sure to check the embedded
1601+
// operation to determine the predicate mask.
1602+
assert(intrinsicTree->GetOperandCount() == 3);
1603+
assert(!HWIntrinsicInfo::IsLowMaskedOperation(intrin.id));
1604+
1605+
if (intrin.op2->OperIs(GT_HWINTRINSIC))
16041606
{
1605-
predMask = RBM_LOWMASK.GetPredicateRegSet();
1607+
GenTreeHWIntrinsic* embOp2Node = intrin.op2->AsHWIntrinsic();
1608+
const HWIntrinsic intrinEmb(embOp2Node);
1609+
if (HWIntrinsicInfo::IsLowMaskedOperation(intrinEmb.id))
1610+
{
1611+
predMask = RBM_LOWMASK.GetPredicateRegSet();
1612+
}
16061613
}
16071614
}
1608-
}
1609-
else if (HWIntrinsicInfo::IsLowMaskedOperation(intrin.id))
1610-
{
1611-
predMask = RBM_LOWMASK.GetPredicateRegSet();
1612-
}
1615+
else if (HWIntrinsicInfo::IsLowMaskedOperation(intrin.id))
1616+
{
1617+
predMask = RBM_LOWMASK.GetPredicateRegSet();
1618+
}
16131619

1614-
srcCount += BuildOperandUses(intrin.op1, predMask);
1620+
srcCount += BuildOperandUses(intrin.op1, predMask);
1621+
}
16151622
}
16161623
else if (intrinsicTree->OperIsMemoryLoadOrStore())
16171624
{

src/coreclr/jit/registerarm64.h

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -94,28 +94,31 @@ REGDEF(V29, 29+VBASE, VMASK(29), "d29", "s29")
9494
REGDEF(V30, 30+VBASE, VMASK(30), "d30", "s30")
9595
REGDEF(V31, 31+VBASE, VMASK(31), "d31", "s31")
9696

97-
// TODO-SVE: Fix once we add predicate registers
98-
REGALIAS(P0, V0)
99-
REGALIAS(P1, V1)
100-
REGALIAS(P2, V2)
101-
REGALIAS(P3, V3)
102-
REGALIAS(P4, V4)
103-
REGALIAS(P5, V5)
104-
REGALIAS(P6, V6)
105-
REGALIAS(P7, V7)
106-
REGALIAS(P8, V8)
107-
REGALIAS(P9, V9)
108-
REGALIAS(P10, V10)
109-
REGALIAS(P11, V11)
110-
REGALIAS(P12, V12)
111-
REGALIAS(P13, V13)
112-
REGALIAS(P14, V14)
113-
REGALIAS(P15, V15)
97+
#define PBASE 64
98+
#define PMASK(x) (1ULL << x)
11499

100+
/*
101+
REGDEF(name, rnum, mask, xname, wname) */
102+
REGDEF(P0, 0+PBASE, PMASK(0), "p0" , "na")
103+
REGDEF(P1, 1+PBASE, PMASK(1), "p1" , "na")
104+
REGDEF(P2, 2+PBASE, PMASK(2), "p2" , "na")
105+
REGDEF(P3, 3+PBASE, PMASK(3), "p3" , "na")
106+
REGDEF(P4, 4+PBASE, PMASK(4), "p4" , "na")
107+
REGDEF(P5, 5+PBASE, PMASK(5), "p5" , "na")
108+
REGDEF(P6, 6+PBASE, PMASK(6), "p6" , "na")
109+
REGDEF(P7, 7+PBASE, PMASK(7), "p7" , "na")
110+
REGDEF(P8, 8+PBASE, PMASK(8), "p8" , "na")
111+
REGDEF(P9, 9+PBASE, PMASK(9), "p9" , "na")
112+
REGDEF(P10, 10+PBASE, PMASK(10), "p10", "na")
113+
REGDEF(P11, 11+PBASE, PMASK(11), "p11", "na")
114+
REGDEF(P12, 12+PBASE, PMASK(12), "p12", "na")
115+
REGDEF(P13, 13+PBASE, PMASK(13), "p13", "na")
116+
REGDEF(P14, 14+PBASE, PMASK(14), "p14", "na")
117+
REGDEF(P15, 15+PBASE, PMASK(15), "p15", "na")
115118

116119

117-
// The registers with values 64 (NBASE) and above are not real register numbers
118-
#define NBASE 64
120+
// The registers with values 80 (NBASE) and above are not real register numbers
121+
#define NBASE 80
119122

120123
REGDEF(SP, 0+NBASE, 0x0000, "sp", "wsp?")
121124
// This must be last!

src/coreclr/jit/target.h

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ typedef uint64_t regMaskSmall;
230230
#endif
231231

232232
#ifdef TARGET_ARM64
233-
// #define HAS_MORE_THAN_64_REGISTERS 1
233+
#define HAS_MORE_THAN_64_REGISTERS 1
234234
#endif // TARGET_ARM64
235235

236236
// TODO: Rename regMaskSmall as RegSet64 (at least for 64-bit)
@@ -267,14 +267,7 @@ struct regMaskTP
267267
static constexpr regMaskTP CreateFromRegNum(regNumber reg, regMaskSmall mask)
268268
{
269269
#ifdef HAS_MORE_THAN_64_REGISTERS
270-
if (reg < 64)
271-
{
272-
return regMaskTP(mask, RBM_NONE);
273-
}
274-
else
275-
{
276-
return regMaskTP(RBM_NONE, mask);
277-
}
270+
return (reg < 64) ? regMaskTP(mask, RBM_NONE) : regMaskTP(RBM_NONE, mask);
278271
#else
279272
return regMaskTP(mask, RBM_NONE);
280273
#endif

src/coreclr/jit/targetarm64.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060

6161
static_assert_no_msg(REG_PREDICATE_HIGH_LAST == REG_PREDICATE_LAST);
6262

63-
#define REGNUM_BITS 6 // number of bits in a REG_*
63+
#define REGNUM_BITS 7 // number of bits in a REG_*
6464
#define REGSIZE_BYTES 8 // number of bytes in one general purpose register
6565
#define FP_REGSIZE_BYTES 16 // number of bytes in one FP/SIMD register
6666
#define FPSAVE_REGSIZE_BYTES 8 // number of bytes in one FP/SIMD register that are saved/restored, for callee-saved registers

src/coreclr/jit/utils.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,7 @@ const char* dspRegRange(regMaskTP regMask, size_t& minSiz, const char* sep, regN
308308
{
309309
regMaskTP regBit = genRegMask(regNum);
310310

311-
if ((regMask & regBit) != 0)
311+
if ((regMask & regBit).IsNonEmpty())
312312
{
313313
// We have a register to display. It gets displayed now if:
314314
// 1. This is the first register to display of a new range of registers (possibly because

0 commit comments

Comments
 (0)