Skip to content

Commit 9561bed

Browse files
Add ARM64 encodings for group IF_SVE_GQ_3A (#98352)
* Add ARM64 encodings for group IF_SVE_GQ_2A * Address review comments
1 parent 13e63af commit 9561bed

File tree

4 files changed

+157
-4
lines changed

4 files changed

+157
-4
lines changed

src/coreclr/jit/codegenarm64test.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5035,6 +5035,18 @@ void CodeGen::genArm64EmitterUnitTestsSve()
50355035
theEmitter->emitIns_R_R_R_R(INS_sve_nmatch, EA_SCALABLE, REG_P0, REG_P7, REG_V11, REG_V31,
50365036
INS_OPTS_SCALABLE_H); // NMATCH <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
50375037

5038+
// IF_SVE_GQ_3A
5039+
theEmitter->emitIns_R_R_R(INS_sve_bfcvtnt, EA_SCALABLE, REG_V3, REG_P0, REG_V4); // BFCVTNT <Zd>.H, <Pg>/M, <Zn>.S
5040+
theEmitter->emitIns_R_R_R(INS_sve_fcvtlt, EA_SCALABLE, REG_V0, REG_P7, REG_V1,
5041+
INS_OPTS_S_TO_D); // FCVTLT <Zd>.D, <Pg>/M, <Zn>.S
5042+
theEmitter->emitIns_R_R_R(INS_sve_fcvtlt, EA_SCALABLE, REG_V14, REG_P7, REG_V20,
5043+
INS_OPTS_H_TO_S); // FCVTLT <Zd>.S, <Pg>/M, <Zn>.H
5044+
theEmitter->emitIns_R_R_R(INS_sve_fcvtnt, EA_SCALABLE, REG_V18, REG_P3, REG_V9,
5045+
INS_OPTS_S_TO_H); // FCVTNT <Zd>.H, <Pg>/M, <Zn>.S
5046+
theEmitter->emitIns_R_R_R(INS_sve_fcvtnt, EA_SCALABLE, REG_V12, REG_P3, REG_V5,
5047+
INS_OPTS_D_TO_S); // FCVTNT <Zd>.S, <Pg>/M, <Zn>.D
5048+
theEmitter->emitIns_R_R_R(INS_sve_fcvtxnt, EA_SCALABLE, REG_V1, REG_P2, REG_V3); // FCVTXNT <Zd>.S, <Pg>/M, <Zn>.D
5049+
50385050
// IF_SVE_GR_3A
50395051
theEmitter->emitIns_R_R_R(INS_sve_faddp, EA_SCALABLE, REG_V16, REG_P3, REG_V19,
50405052
INS_OPTS_SCALABLE_H); // FADDP <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>

src/coreclr/jit/emitarm64.cpp

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1343,6 +1343,25 @@ void emitter::emitInsSanityCheck(instrDesc* id)
13431343
assert(isVectorRegister(id->idReg4())); // mmmmm
13441344
break;
13451345

1346+
case IF_SVE_GQ_3A: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision odd elements
1347+
switch (id->idIns())
1348+
{
1349+
case INS_sve_fcvtnt:
1350+
case INS_sve_fcvtlt:
1351+
assert(insOptsConvertFloatStepwise(id->idInsOpt()));
1352+
FALLTHROUGH;
1353+
case INS_sve_fcvtxnt:
1354+
case INS_sve_bfcvtnt:
1355+
assert(isVectorRegister(id->idReg1())); // ddddd
1356+
assert(isLowPredicateRegister(id->idReg2())); // ggg
1357+
assert(isVectorRegister(id->idReg3())); // nnnnn
1358+
break;
1359+
default:
1360+
assert(!"unreachable");
1361+
break;
1362+
}
1363+
break;
1364+
13461365
case IF_SVE_HT_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors
13471366
elemsize = id->idOpSize();
13481367
assert(isScalableVectorSize(elemsize));
@@ -6551,6 +6570,53 @@ emitter::code_t emitter::emitInsCodeSve(instruction ins, insFormat fmt)
65516570
return registerListSize;
65526571
}
65536572

6573+
/*****************************************************************************
6574+
*
6575+
* Expands an option that has different size operands (INS_OPTS_*_TO_*) into
6576+
* a pair of scalable options where the first describes the size of the
6577+
* destination operand and the second describes the size of the source operand.
6578+
*/
6579+
6580+
/*static*/ void emitter::optExpandConversionPair(insOpts opt, insOpts& dst, insOpts& src)
6581+
{
6582+
dst = INS_OPTS_NONE;
6583+
src = INS_OPTS_NONE;
6584+
6585+
switch (opt)
6586+
{
6587+
case INS_OPTS_H_TO_S:
6588+
dst = INS_OPTS_SCALABLE_S;
6589+
src = INS_OPTS_SCALABLE_H;
6590+
break;
6591+
case INS_OPTS_S_TO_H:
6592+
dst = INS_OPTS_SCALABLE_H;
6593+
src = INS_OPTS_SCALABLE_S;
6594+
break;
6595+
case INS_OPTS_S_TO_D:
6596+
dst = INS_OPTS_SCALABLE_D;
6597+
src = INS_OPTS_SCALABLE_S;
6598+
break;
6599+
case INS_OPTS_D_TO_S:
6600+
dst = INS_OPTS_SCALABLE_S;
6601+
src = INS_OPTS_SCALABLE_D;
6602+
break;
6603+
case INS_OPTS_H_TO_D:
6604+
dst = INS_OPTS_SCALABLE_D;
6605+
src = INS_OPTS_SCALABLE_H;
6606+
break;
6607+
case INS_OPTS_D_TO_H:
6608+
dst = INS_OPTS_SCALABLE_H;
6609+
src = INS_OPTS_SCALABLE_D;
6610+
break;
6611+
default:
6612+
noway_assert(!"unreachable");
6613+
break;
6614+
}
6615+
6616+
assert(dst != INS_OPTS_NONE && src != INS_OPTS_NONE);
6617+
return;
6618+
}
6619+
65546620
// For the given 'arrangement' returns the 'datasize' specified by the vector register arrangement
65556621
// asserts and returns EA_UNKNOWN if an invalid 'arrangement' value is passed
65566622
//
@@ -10890,6 +10956,18 @@ void emitter::emitIns_R_R_R(instruction ins,
1089010956
fmt = IF_SVE_EU_3A;
1089110957
break;
1089210958

10959+
case INS_sve_fcvtnt:
10960+
case INS_sve_fcvtlt:
10961+
assert(insOptsConvertFloatStepwise(opt));
10962+
FALLTHROUGH;
10963+
case INS_sve_fcvtxnt:
10964+
case INS_sve_bfcvtnt:
10965+
assert(isVectorRegister(reg1)); // ddddd
10966+
assert(isLowPredicateRegister(reg2)); // ggg
10967+
assert(isVectorRegister(reg3)); // nnnnn
10968+
fmt = IF_SVE_GQ_3A;
10969+
break;
10970+
1089310971
case INS_sve_faddp:
1089410972
case INS_sve_fmaxnmp:
1089510973
case INS_sve_fmaxp:
@@ -21284,6 +21362,24 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id)
2128421362
dst += emitOutput_Instr(dst, code);
2128521363
break;
2128621364

21365+
case IF_SVE_GQ_3A: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision odd elements
21366+
code = emitInsCodeSve(ins, fmt);
21367+
21368+
if (ins == INS_sve_fcvtnt && id->idInsOpt() == INS_OPTS_D_TO_S)
21369+
{
21370+
code |= (1 << 22 | 1 << 17);
21371+
}
21372+
else if (ins == INS_sve_fcvtlt && id->idInsOpt() == INS_OPTS_S_TO_D)
21373+
{
21374+
code |= (1 << 22 | 1 << 17);
21375+
}
21376+
21377+
code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd
21378+
code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg
21379+
code |= insEncodeReg_V_9_to_5(id->idReg3()); // nnnnn
21380+
dst += emitOutput_Instr(dst, code);
21381+
break;
21382+
2128721383
// Scalable to general register.
2128821384
case IF_SVE_CO_3A: // ........xx...... ...gggmmmmmddddd -- SVE conditionally extract element to general register
2128921385
case IF_SVE_CS_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to general register
@@ -25287,6 +25383,37 @@ void emitter::emitDispInsHelp(
2528725383
break;
2528825384
}
2528925385

25386+
// <Zd>.H, <Pg>/M, <Zn>.S
25387+
// <Zd>.S, <Pg>/M, <Zn>.D
25388+
// <Zd>.D, <Pg>/M, <Zn>.S
25389+
// <Zd>.S, <Pg>/M, <Zn>.H
25390+
case IF_SVE_GQ_3A: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision odd elements
25391+
{
25392+
insOpts opt = id->idInsOpt();
25393+
25394+
switch (ins)
25395+
{
25396+
// These cases have only one combination of operands so the option may be omitted.
25397+
case INS_sve_fcvtxnt:
25398+
opt = INS_OPTS_D_TO_S;
25399+
break;
25400+
case INS_sve_bfcvtnt:
25401+
opt = INS_OPTS_S_TO_H;
25402+
break;
25403+
default:
25404+
break;
25405+
}
25406+
25407+
insOpts dst = INS_OPTS_NONE;
25408+
insOpts src = INS_OPTS_NONE;
25409+
optExpandConversionPair(opt, dst, src);
25410+
25411+
emitDispSveReg(id->idReg1(), dst, true); // ddddd
25412+
emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg
25413+
emitDispSveReg(id->idReg3(), src, false); // nnnnn
25414+
break;
25415+
}
25416+
2529025417
// { <Zt>.D }, <Pg>/Z, [<Xn|SP>{, #<imm>, MUL VL}]
2529125418
// Some of these formats may allow changing the element size instead of using 'D' for all instructions.
2529225419
case IF_SVE_IH_3A: // ............iiii ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus
@@ -28396,6 +28523,11 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
2839628523
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
2839728524
break;
2839828525

28526+
case IF_SVE_GQ_3A: // ................ ...gggnnnnnddddd -- SVE floating-point convert precision odd elements
28527+
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
28528+
result.insLatency = PERFSCORE_LATENCY_3C;
28529+
break;
28530+
2839928531
// Floating point arithmetic
2840028532
// Floating point min/max pairwise
2840128533
case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations

src/coreclr/jit/emitarm64.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -848,6 +848,10 @@ static insOpts optMakeArrangement(emitAttr datasize, emitAttr elemsize);
848848
// For the given 'datasize' and 'opt' returns true if it specifies a valid vector register arrangement
849849
static bool isValidArrangement(emitAttr datasize, insOpts opt);
850850

851+
// Expands an option that has different size operands (INS_OPTS_*_TO_*) into a pair of scalable options where
852+
// the first describes the size of the destination operand and the second describes the size of the source operand.
853+
static void optExpandConversionPair(insOpts opt, insOpts& dst, insOpts& src);
854+
851855
// For the given 'arrangement' returns the 'datasize' specified by the vector register arrangement
852856
static emitAttr optGetDatasize(insOpts arrangement);
853857

@@ -1156,6 +1160,11 @@ inline static bool insOptsAnyArrangement(insOpts opt)
11561160
return ((opt >= INS_OPTS_8B) && (opt <= INS_OPTS_2D));
11571161
}
11581162

1163+
inline static bool insOptsConvertFloatStepwise(insOpts opt)
1164+
{
1165+
return (opt == INS_OPTS_H_TO_S || opt == INS_OPTS_S_TO_H || opt == INS_OPTS_D_TO_S || opt == INS_OPTS_S_TO_D);
1166+
}
1167+
11591168
inline static bool insOptsConvertFloatToFloat(insOpts opt)
11601169
{
11611170
return ((opt >= INS_OPTS_S_TO_D) && (opt <= INS_OPTS_D_TO_H));

src/coreclr/jit/instrsarm64sve.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1312,8 +1312,8 @@ INST2(pmullt, "pmullt", 0, IF_SV
13121312

13131313

13141314
// enum name info SVE_GQ_3A SVE_HG_2A
1315-
INST2(fcvtnt, "fcvtnt", 0, IF_SVE_2BJ, 0x64CAA000, 0x650A3C00 )
1316-
// FCVTNT <Zd>.S, <Pg>/M, <Zn>.D SVE_GQ_3A 0110010011001010 101gggnnnnnddddd 64CA A000
1315+
INST2(fcvtnt, "fcvtnt", 0, IF_SVE_2BJ, 0x6488A000, 0x650A3C00 )
1316+
// FCVTNT <Zd>.H, <Pg>/M, <Zn>.S SVE_GQ_3A 0110010010001000 101gggnnnnnddddd 6488 A000
13171317
// FCVTNT <Zd>.B, {<Zn1>.S-<Zn2>.S } SVE_HG_2A 0110010100001010 001111nnnn0ddddd 650A 3C00
13181318

13191319

@@ -2669,8 +2669,8 @@ INST1(histcnt, "histcnt", 0, IF_SV
26692669
INST1(bfcvtnt, "bfcvtnt", 0, IF_SVE_GQ_3A, 0x648AA000 )
26702670
// BFCVTNT <Zd>.H, <Pg>/M, <Zn>.S SVE_GQ_3A 0110010010001010 101gggnnnnnddddd 648A A000
26712671

2672-
INST1(fcvtlt, "fcvtlt", 0, IF_SVE_GQ_3A, 0x64CBA000 )
2673-
// FCVTLT <Zd>.D, <Pg>/M, <Zn>.S SVE_GQ_3A 0110010011001011 101gggnnnnnddddd 64CB A000
2672+
INST1(fcvtlt, "fcvtlt", 0, IF_SVE_GQ_3A, 0x6489A000 )
2673+
// FCVTLT <Zd>.S, <Pg>/M, <Zn>.H SVE_GQ_3A 0110010010001001 101gggnnnnnddddd 6489 A000
26742674

26752675
INST1(fcvtxnt, "fcvtxnt", 0, IF_SVE_GQ_3A, 0x640AA000 )
26762676
// FCVTXNT <Zd>.S, <Pg>/M, <Zn>.D SVE_GQ_3A 0110010000001010 101gggnnnnnddddd 640A A000

0 commit comments

Comments
 (0)