Skip to content

Commit b937677

Browse files
authored
- Enable full float32 support (#56111)
-- Add instructions for FP operations: abs[f]/ceil[f]/floor[f]/round[f]/sqrt[f]/trunc[f] -- Enable these instructions in mono_arch_emit_inst_for_method() -- Handle return of float32 results -- Correct rounding mode for OP_LCONV_TO_R_UN processing - Enable MONO_OPT_LINEAR optimizations -- Correct prolog processing of structure returned variable - Rework OP_LOCALLOC for cases where alloc size > 4k - Add OP_POPCNTxx support - Minor typo (missing tab)
1 parent 57fa2fc commit b937677

File tree

4 files changed

+406
-104
lines changed

4 files changed

+406
-104
lines changed

src/mono/mono/arch/s390x/s390x-codegen.h

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -896,11 +896,18 @@ typedef struct {
896896

897897
#define S390_RIE_4(c,opc,g1,i2,m3) do \
898898
{ \
899-
s390_emit16(c, ((opc & 0xff00) | (g1) << 4); \
899+
s390_emit16(c, ((opc & 0xff00) | (g1) << 4)); \
900900
s390_emit16(c, (i2)); \
901901
s390_emit16(c, ((m3) << 12 | (opc & 0xff))); \
902902
} while (0)
903903

904+
#define S390_RIE_6(c,opc,g1,g2,i3,i4,i5) do \
905+
{ \
906+
s390_emit16(c, ((opc & 0xff00) | ((g1) << 4) | g2)); \
907+
s390_emit16(c, ((i3) << 8) | i4); \
908+
s390_emit16(c, ((i5) << 8 | (opc & 0xff))); \
909+
} while (0)
910+
904911
#define S390_RIL_1(c,opc,g1,m2) do \
905912
{ \
906913
s390_emit16(c, ((opc >> 4) << 8 | (g1) << 4 | (opc & 0xf))); \
@@ -918,17 +925,24 @@ typedef struct {
918925
s390_emit16(c, ((opc, & 0xff00) | (r1) << 4) | (r2)); \
919926
s390_emit16(c, ((b) << 12) | (d)); \
920927
s390_emit16(c, ((i) << 4) | ((opc) & 0xff)); \
921-
}
928+
} while (0)
922929

923930
#define S390_RRS(c,opc,r1,r2,m3,b,d) do \
924931
{ \
925932
s390_emit16(c, ((opc, & 0xff00) | (r1) << 4) | (r2)); \
926933
s390_emit16(c, ((b) << 12) | (d)); \
927934
s390_emit16(c, ((m3) << 12) | ((opc) & 0xff)); \
928-
}
935+
} while (0)
929936

930937
#define S390_SI(c,opc,s1,p1,m2) s390_emit32(c, (opc << 24 | (m2) << 16 | (s1) << 12 | ((p1) & 0xfff)));
931938

939+
#define S390_SIL(c, opc, b, d, i) do \
940+
{ \
941+
s390_emit16(c, opc); \
942+
s390_emit16(c, ((b) << 12) | ((d) & 0x0fff)); \
943+
s390_emit16(c, ((i) & 0xffff)); \
944+
} while (0)
945+
932946
#define S390_SIY(c,opc,s1,p1,m2) do \
933947
{ \
934948
s390_emit16(c, ((opc & 0xff00) | m2)); \
@@ -1242,7 +1256,7 @@ typedef struct {
12421256
#define s390_clgij(c, r, i, b) S390_RIE_3(c, 0xec7d, r, i, m, d)
12431257
#define s390_clgr(c, r1, r2) S390_RRE(c, 0xb921, r1, r2)
12441258
#define s390_clgdbr(c, r1, m3, r2, m4) S390_RRF_4(c, 0xb3ad, r1, m3, r2, m4)
1245-
#define s390_clgebr(c, r1, m3, r2, m4) S390_RRF_4(c, 0xb39c, r1, m3, r2, m4)
1259+
#define s390_clgebr(c, r1, m3, r2, m4) S390_RRF_4(c, 0xb3ac, r1, m3, r2, m4)
12461260
#define s390_clgrj(c, r1, r2, m, v) S390_RIE_2(c, 0xec65, r1, r2, m, v)
12471261
#define s390_clgrb(c, r1, r2, m3, b, d) S390_RRS(c, 0xece5, r1, r2, m3, b, d)
12481262
#define s390_cli(c, b, d, v) S390_SI(c, 0x95, b, d, v)
@@ -1257,6 +1271,7 @@ typedef struct {
12571271
#define s390_crl(c, r, v) S390_RIL_1(c, 0xc6d, r, v)
12581272
#define s390_crt(c, r1, r2, m3) S390_RRF_2(c, 0xb972, r1, r2, m3);
12591273
#define s390_cgrt(c, r1, r2, m3) S390_RRF_2(c, 0xb960, r1, r2, m3);
1274+
#define s390_cpsdr(c, r1, r2, r3) S390_RRF_2(c, 0xb372, r1, r2, r3);
12601275
#define s390_cs(c, r1, r2, b, d) S390_RX(c, 0xba, r1, r2, b, d)
12611276
#define s390_csg(c, r1, r2, b, d) S390_RSY_1(c, 0xeb30, r1, r2, b, d)
12621277
#define s390_csst(c, d1, b1, d2, b2, r) S390_SSF(c, 0xc82, b1, d1, b2, d2, r)
@@ -1272,6 +1287,9 @@ typedef struct {
12721287
#define s390_dsgfr(c, r1, r2) S390_RRE(c, 0xb91d, r1, r2)
12731288
#define s390_dsgr(c, r1, r2) S390_RRE(c, 0xb90d, r1, r2)
12741289
#define s390_ear(c, r1, r2) S390_RRE(c, 0xb24f, r1, r2)
1290+
#define s390_fidbra(c, r1, m3, r2, m4) S390_RRF_4(c, 0xb35f, r1, m3, r2, m4)
1291+
#define s390_fiebra(c, r1, m3, r2, m4) S390_RRF_4(c, 0xb357, r1, m3, r2, m4)
1292+
#define s390_flogr(c, r1, r2) S390_RRE(c, 0xb983, r1, r2)
12751293
#define s390_ic(c, r, x, b, d) S390_RX(c, 0x43, r, x, b, d)
12761294
#define s390_icm(c, r, m, b, d) S390_RX(c, 0xbf, r, m, b, d)
12771295
#define s390_icmy(c, r, x, b, d) S390_RXY(c, 0xeb81, r, x, b, d)
@@ -1388,6 +1406,19 @@ typedef struct {
13881406
#define s390_lnebr(c, r1, r2) S390_RRE(c, 0xb301, r1, r2)
13891407
#define s390_lngr(c, r1, r2) S390_RRE(c, 0xb901, r1, r2)
13901408
#define s390_lnr(c, r1, r2) S390_RR(c, 0x11, r1, r2)
1409+
#define s390_loc(c, r, m, b, d) S390_RSY_2(c, 0xebf2, r, m, b, d)
1410+
#define s390_locg(c, r, m, b, d) S390_RSY_2(c, 0xebe2, r, m, b, d)
1411+
#define s390_locr(c, r1, m, r2) S390_RRF_2(c, 0xb9f2, r1, m, r2)
1412+
#define s390_locgr(c, r1, m, r2) S390_RRF_2(c, 0xb9e2, r1, m, r2)
1413+
#define s390_locfh(c, r, n, b, d) S390_RRF_2(c, 0xebe0, r, m, b, d)
1414+
#define s390_locfhr(c, r1, m, r2) S390_RRF_2(c, 0xb9e0, r1, m, r2)
1415+
#define s390_lpdbr(c, r1, r2) S390_RRE(c, 0xb310, r1, r2)
1416+
#define s390_lpebr(c, r1, r2) S390_RRE(c, 0xb300, r1, r2)
1417+
#define s390_lpgr(c, r1, r2) S390_RRE(c, 0xb900, r1, r2)
1418+
#define s390_lpr(c, r1, r2) S390_RR(c, 0x10, r1, r2)
1419+
#define s390_lr(c, r1, r2) S390_RR(c, 0x18, r1, r2)
1420+
#define s390_lrl(c, r1, d) S390_RIL_1(c, 0xc4d, r1, d)
1421+
#define s390_lt(c, r, x, b, d) S390_RXY(c, 0xe312, r, x, b, d)
13911422
#define s390_lpdbr(c, r1, r2) S390_RRE(c, 0xb310, r1, r2)
13921423
#define s390_lpgr(c, r1, r2) S390_RRE(c, 0xb900, r1, r2)
13931424
#define s390_lpr(c, r1, r2) S390_RR(c, 0x10, r1, r2)
@@ -1419,6 +1450,7 @@ typedef struct {
14191450
#define s390_msr(c, r1, r2) S390_RRE(c, 0xb252, r1, r2)
14201451
#define s390_msrkc(c, r1, r2, r3) S390_RRF_1(c, 0xb9fd, r1, r2, r3)
14211452
#define s390_mvc(c, l, b1, d1, b2, d2) S390_SS_1(c, 0xd2, l, b1, d1, b2, d2)
1453+
#define s390_mvghi(c, b1, d1, i2) S390_SIL(c, 0xe548, b1, d1, i2)
14221454
#define s390_mvcl(c, r1, r2) S390_RR(c, 0x0e, r1, r2)
14231455
#define s390_mvcle(c, r1, r3, d2, b2) S390_RS_1(c, 0xa8, r1, r3, d2, b2)
14241456
#define s390_mvi(c, b, d, v) S390_SI(c, 0x92, b, d, v)
@@ -1439,7 +1471,7 @@ typedef struct {
14391471
#define s390_mem(c) S390_RR(c, 0x07, 0xe, 0)
14401472
#define s390_nr(c, r1, r2) S390_RR(c, 0x14, r1, r2)
14411473
#define s390_nrk(c, r1, r2) S390_RRF_1(c, 0xb9f4, r1, r2)
1442-
#define s390_ny(c, r, x, b, d) S390_RRY(c, 0xe354, r1, r2)
1474+
#define s390_ny(c, r, x, b, d) S390_RXY(c, 0xe354, r, x, b, d)
14431475
#define s390_o(c, r, x, b, d) S390_RX(c, 0x56, r, x, b, d)
14441476
#define s390_oihf(c, r, v) S390_RIL_1(c, 0xc0c, r, v)
14451477
#define s390_oihh(c, r, v) S390_RI(c, 0xa58, r, v)
@@ -1452,6 +1484,9 @@ typedef struct {
14521484
#define s390_ogr(c, r1, r2) S390_RRE(c, 0xb981, r1, r2)
14531485
#define s390_ogrk(c, r1, r2, r3) S390_RRF_1(c, 0xb9e6, r1, r2, r3)
14541486
#define s390_or(c, r1, r2) S390_RR(c, 0x16, r1, r2)
1487+
#define s390_oy(c, r, x, b, d) S390_RXY(c, 0xe356, r, x, b, d)
1488+
#define s390_popcnt(c, r1, m, r2) S390_RRF_2(c, 0xb9e1, r1, m, r2)
1489+
#define s390_risbg(c,r1,r2,i3,i4,i5) S390_RIE_6(c, 0xec55, r1, r2, i3, i4, i5)
14551490
#define s390_s(c, r, x, b, d) S390_RX(c, 0x5b, r, x, b, d)
14561491
#define s390_sdb(c, r, x, b, d) S390_RXE(c, 0xed1b, r, x, b, d)
14571492
#define s390_sdbr(c, r1, r2) S390_RRE(c, 0xb31b, r1, r2)

src/mono/mono/mini/cpu-s390x.md

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,11 +147,13 @@ r4_conv_to_i4: dest:i src1:f len:16
147147
r4_conv_to_u4: dest:i src1:f len:32
148148
r4_conv_to_i8: dest:i src1:f len:32
149149
r4_conv_to_r8: dest:f src1:f len:17
150+
r4_conv_to_u8: dest:i src1:f len:17
150151
r4_conv_to_r4: dest:f src1:f len:17
151152
r4_add: dest:f src1:f src2:f clob:1 len:5
152153
r4_sub: dest:f src1:f src2:f clob:1 len:5
153154
r4_mul: dest:f src1:f src2:f clob:1 len:5
154155
r4_div: dest:f src1:f src2:f clob:1 len:5
156+
r4_rem: dest:f src1:f src2:f clob:1 len:12
155157
r4_neg: dest:f src1:f clob:1 len:23
156158
r4_ceq: dest:i src1:f src2:f len:35
157159
r4_cgt: dest:i src1:f src2:f len:35
@@ -187,11 +189,13 @@ loadu1_membase: dest:i src1:b len:30
187189
loadu2_membase: dest:i src1:b len:30
188190
loadu4_mem: dest:i len:8
189191
loadu4_membase: dest:i src1:b len:30
190-
localloc: dest:i src1:i len:110
192+
localloc: dest:i src1:i len:180
191193
memory_barrier: len:10
192194
move: dest:i src1:i len:4
193195
mul_imm: dest:i src1:i len:24
194196
nop: len:4
197+
popcnt32: dest:i src1:i len:38
198+
popcnt64: dest:i src1:i len:34
195199
relaxed_nop: len:4
196200
arglist: src1:i len:28
197201
bigmul: len:2 dest:i src1:a src2:i
@@ -218,7 +222,18 @@ zext_i4: dest:i src1:i len:4
218222
shl_imm: dest:i src1:i len:10
219223
shr_imm: dest:i src1:i len:10
220224
shr_un_imm: dest:i src1:i len:10
225+
abs: dest:f src1:f len:4
226+
absf: dest:f src1:f len:4
227+
ceil: dest:f src1:f len:4
228+
ceilf: dest:f src1:f len:4
229+
floor: dest:f src1:f len:4
230+
floorf: dest:f src1:f len:4
231+
round: dest:f src1:f len:4
221232
sqrt: dest:f src1:f len:4
233+
sqrtf: dest:f src1:f len:4
234+
trunc: dest:f src1:f len:4
235+
truncf: dest:f src1:f len:4
236+
fcopysign: dest:f src1:f src2:f len:4
222237
start_handler: len:26
223238
store_membase_imm: dest:b len:46
224239
store_membase_reg: dest:b src1:i len:26

0 commit comments

Comments
 (0)