Skip to content

Commit 4de58bf

Browse files
authored
[mono][jit] Implement JIT support for the arm64 Crc and Dp intrinsics sets. (#86106)
Also implement hw capacity detection for apple+arm64 platforms.
1 parent 83f71b5 commit 4de58bf

File tree

9 files changed

+196
-8
lines changed

9 files changed

+196
-8
lines changed

src/mono/mono/arch/arm64/arm64-codegen.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -950,6 +950,20 @@ arm_encode_arith_imm (int imm, guint32 *shift)
950950

951951
#define arm_autibsp(p) arm_format_autib ((p), 0b0011, 0b111)
952952

953+
/* CRC32 */
954+
955+
#define arm_format_crc32(p, sf, C, sz, rm, rn, rd) arm_emit ((p), ((sf) << 31) | (0b11010110 << 21) | (rm) << 16 | (0b010 << 13) | ((C) << 12) | ((sz) << 10) | ((rn) << 5) | ((rd) << 0))
956+
957+
#define arm_crc32b(p, rd, rn, rm) arm_format_crc32 ((p), 0, 0, 0b00, (rm), (rn), (rd))
958+
#define arm_crc32h(p, rd, rn, rm) arm_format_crc32 ((p), 0, 0, 0b01, (rm), (rn), (rd))
959+
#define arm_crc32w(p, rd, rn, rm) arm_format_crc32 ((p), 0, 0, 0b10, (rm), (rn), (rd))
960+
#define arm_crc32x(p, rd, rn, rm) arm_format_crc32 ((p), 1, 0, 0b11, (rm), (rn), (rd))
961+
962+
#define arm_crc32cb(p, rd, rn, rm) arm_format_crc32 ((p), 0, 1, 0b00, (rm), (rn), (rd))
963+
#define arm_crc32ch(p, rd, rn, rm) arm_format_crc32 ((p), 0, 1, 0b01, (rm), (rn), (rd))
964+
#define arm_crc32cw(p, rd, rn, rm) arm_format_crc32 ((p), 0, 1, 0b10, (rm), (rn), (rd))
965+
#define arm_crc32cx(p, rd, rn, rm) arm_format_crc32 ((p), 1, 1, 0b11, (rm), (rn), (rd))
966+
953967
/* C4.1.69 NEON vector ISA */
954968

955969
// Opcode naming convention is arm_neon_<operation>_[<op>_]<elem_count><type>

src/mono/mono/arch/arm64/codegen-test.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,16 @@ main (int argc, char *argv [])
482482
arm_neon_addp (code, VREG_FULL, TYPE_I8, ARMREG_R0, ARMREG_R1, ARMREG_R2);
483483
arm_neon_faddp (code, VREG_FULL, TYPE_F32, ARMREG_R0, ARMREG_R1, ARMREG_R2);
484484

485+
// crc32
486+
arm_crc32b (code, ARMREG_R1, ARMREG_R2, ARMREG_R3);
487+
arm_crc32h (code, ARMREG_R1, ARMREG_R2, ARMREG_R3);
488+
arm_crc32w (code, ARMREG_R1, ARMREG_R2, ARMREG_R3);
489+
arm_crc32x (code, ARMREG_R1, ARMREG_R2, ARMREG_R3);
490+
arm_crc32cb (code, ARMREG_R1, ARMREG_R2, ARMREG_R3);
491+
arm_crc32ch (code, ARMREG_R1, ARMREG_R2, ARMREG_R3);
492+
arm_crc32cw (code, ARMREG_R1, ARMREG_R2, ARMREG_R3);
493+
arm_crc32cx (code, ARMREG_R1, ARMREG_R2, ARMREG_R3);
494+
485495
for (i = 0; i < code - buf; ++i)
486496
printf (".byte %d\n", buf [i]);
487497
printf ("\n");

src/mono/mono/mini/cpu-arm64.mdesc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,9 @@ lscnt32: dest:i src1:i len:4
473473
lscnt64: dest:i src1:i len:4
474474
xop_i8_i8: dest:i src1:i len:4
475475
xop_i4_i4: dest:i src1:i len:4
476+
xop_i4_i4_i4: dest:i src1:i src2:i len:4
477+
xop_i4_i4_i8: dest:i src1:i src2:i len:4
478+
xop_ovr_x_x_x_x: dest:x src1:x src2:x src3:x len:4 clob:1
476479
arm64_smulh: dest:i src1:i src2:i len:4
477480
arm64_umulh: dest:i src1:i src2:i len:4
478481
arm64_hint: len:4
@@ -554,6 +557,7 @@ arm64_ushl: dest:x src1:x src2:x len:4
554557
arm64_ext_imm: dest:x src1:x src2:x len:4
555558
xinsert_i8: dest:x src1:x src2:i src3:i len:20
556559
xinsert_r8: dest:x src1:x src2:f src3:i len:20
560+
arm64_broadcast_elem: dest:x src1:x len:16
557561

558562
generic_class_init: src1:a len:44 clob:c
559563
gc_safe_point: src1:i len:12 clob:c

src/mono/mono/mini/mini-arm64.c

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <mono/arch/arm64/arm64-codegen.h>
2626
#include <mono/utils/mono-mmap.h>
2727
#include <mono/utils/mono-memory-model.h>
28+
#include <mono/utils/mono-hwcap.h>
2829
#include <mono/metadata/abi-details.h>
2930
#include <mono/metadata/tokentype.h>
3031
#include "llvm-intrinsics-types.h"
@@ -3835,6 +3836,28 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
38353836
}
38363837
break;
38373838
}
3839+
case OP_XOP_OVR_X_X_X_X: {
3840+
IntrinsicId iid = (IntrinsicId) ins->inst_c0;
3841+
g_assert (dreg == sreg1);
3842+
g_assert (mono_class_value_size (ins->klass, NULL) == 16);
3843+
switch (iid) {
3844+
case INTRINS_AARCH64_ADV_SIMD_SDOT:
3845+
arm_neon_sdot_4s (code, dreg, sreg2, sreg3);
3846+
break;
3847+
case INTRINS_AARCH64_ADV_SIMD_UDOT:
3848+
arm_neon_udot_4s (code, dreg, sreg2, sreg3);
3849+
break;
3850+
default:
3851+
g_assert_not_reached ();
3852+
break;
3853+
}
3854+
break;
3855+
}
3856+
case OP_ARM64_BROADCAST_ELEM:
3857+
arm_neon_smov (code, TYPE_I32, ARMREG_IP0, sreg1, ins->inst_c0);
3858+
arm_neon_dup_g_4s (code, dreg, ARMREG_IP0);
3859+
break;
3860+
38383861
case OP_XZERO:
38393862
arm_neon_eor_16b (code, dreg, dreg, dreg);
38403863
break;
@@ -5383,7 +5406,46 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
53835406
g_assert (ins->inst_c0 == INTRINS_BITREVERSE_I32);
53845407
arm_rbitw (code, dreg, sreg1);
53855408
break;
5386-
5409+
case OP_XOP_I4_I4_I4: {
5410+
switch (ins->inst_c0) {
5411+
case INTRINS_AARCH64_CRC32B:
5412+
arm_crc32b (code, dreg, sreg1, sreg2);
5413+
break;
5414+
case INTRINS_AARCH64_CRC32H:
5415+
arm_crc32h (code, dreg, sreg1, sreg2);
5416+
break;
5417+
case INTRINS_AARCH64_CRC32W:
5418+
arm_crc32w (code, dreg, sreg1, sreg2);
5419+
break;
5420+
case INTRINS_AARCH64_CRC32CB:
5421+
arm_crc32cb (code, dreg, sreg1, sreg2);
5422+
break;
5423+
case INTRINS_AARCH64_CRC32CH:
5424+
arm_crc32ch (code, dreg, sreg1, sreg2);
5425+
break;
5426+
case INTRINS_AARCH64_CRC32CW:
5427+
arm_crc32cw (code, dreg, sreg1, sreg2);
5428+
break;
5429+
default:
5430+
g_assert_not_reached ();
5431+
break;
5432+
}
5433+
break;
5434+
}
5435+
case OP_XOP_I4_I4_I8: {
5436+
switch (ins->inst_c0) {
5437+
case INTRINS_AARCH64_CRC32X:
5438+
arm_crc32x (code, dreg, sreg1, sreg2);
5439+
break;
5440+
case INTRINS_AARCH64_CRC32CX:
5441+
arm_crc32cx (code, dreg, sreg1, sreg2);
5442+
break;
5443+
default:
5444+
g_assert_not_reached ();
5445+
break;
5446+
}
5447+
break;
5448+
}
53875449
case OP_ARM64_HINT:
53885450
g_assert (ins->inst_c0 <= ARMHINT_SEVL);
53895451
arm_hint (code, ins->inst_c0);
@@ -6382,3 +6444,20 @@ mono_arm_emit_brx (guint8 *code, int reg)
63826444
{
63836445
return emit_brx (code, reg);
63846446
}
6447+
6448+
MonoCPUFeatures
6449+
mono_arch_get_cpu_features (void)
6450+
{
6451+
guint64 features = MONO_CPU_INITED;
6452+
6453+
if (mono_hwcap_arm64_has_crc32)
6454+
features |= MONO_CPU_ARM64_CRC;
6455+
if (mono_hwcap_arm64_has_dot)
6456+
features |= MONO_CPU_ARM64_DP;
6457+
if (mono_hwcap_arm64_has_rdm)
6458+
features |= MONO_CPU_ARM64_RDM;
6459+
if (mono_hwcap_arm64_has_sha1 && mono_hwcap_arm64_has_sha256 && mono_hwcap_arm64_has_aes)
6460+
features |= MONO_CPU_ARM64_CRYPTO;
6461+
6462+
return features;
6463+
}

src/mono/mono/mini/mini-ops.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1722,6 +1722,8 @@ MINI_OP(OP_ARM64_SQXTUN2, "arm64_sqxtun2", XREG, XREG, XREG)
17221722

17231723
MINI_OP(OP_ARM64_SELECT_SCALAR, "arm64_select_scalar", XREG, XREG, IREG)
17241724
MINI_OP(OP_ARM64_SELECT_QUAD, "arm64_select_quad", XREG, XREG, IREG)
1725+
/* Take a word elem of sreg1 identified by inst_c0 and broadcast it to all elements of dreg */
1726+
MINI_OP(OP_ARM64_BROADCAST_ELEM, "arm64_broadcast_elem", XREG, XREG, NONE)
17251727

17261728
MINI_OP(OP_ARM64_FCVTN, "arm64_fcvtn", XREG, XREG, NONE)
17271729
MINI_OP(OP_ARM64_FCVTN2, "arm64_fcvtn2", XREG, XREG, XREG)

src/mono/mono/mini/mini.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4455,9 +4455,11 @@ mini_get_cpu_features (MonoCompile* cfg)
44554455
#if !defined(MONO_CROSS_COMPILE)
44564456
if (!cfg->compile_aot || cfg->use_current_cpu) {
44574457
// detect current CPU features if we are in JIT mode or AOT with use_current_cpu flag.
4458-
#if defined(ENABLE_LLVM)
4459-
features = mono_llvm_get_cpu_features (); // llvm has a nice built-in API to detect features
4460-
#elif defined(TARGET_AMD64) || defined(TARGET_X86)
4458+
#if defined(ENABLE_LLVM) && !(defined(TARGET_ARM64) && defined(TARGET_OSX))
4459+
// llvm has a nice built-in API to detect features
4460+
// it is not implemented on some platforms like apple arm64
4461+
features = mono_llvm_get_cpu_features ();
4462+
#elif defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64)
44614463
features = mono_arch_get_cpu_features ();
44624464
#endif
44634465
}

src/mono/mono/mini/simd-intrinsics.c

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3589,8 +3589,8 @@ static const IntrinGroup supported_arm_intrinsics [] = {
35893589
{ "AdvSimd", MONO_CPU_ARM64_NEON, advsimd_methods, sizeof (advsimd_methods) },
35903590
{ "Aes", MONO_CPU_ARM64_CRYPTO, crypto_aes_methods, sizeof (crypto_aes_methods) },
35913591
{ "ArmBase", MONO_CPU_ARM64_BASE, armbase_methods, sizeof (armbase_methods), TRUE },
3592-
{ "Crc32", MONO_CPU_ARM64_CRC, crc32_methods, sizeof (crc32_methods) },
3593-
{ "Dp", MONO_CPU_ARM64_DP, dp_methods, sizeof (dp_methods) },
3592+
{ "Crc32", MONO_CPU_ARM64_CRC, crc32_methods, sizeof (crc32_methods), TRUE },
3593+
{ "Dp", MONO_CPU_ARM64_DP, dp_methods, sizeof (dp_methods), TRUE },
35943594
{ "Rdm", MONO_CPU_ARM64_RDM, rdm_methods, sizeof (rdm_methods) },
35953595
{ "Sha1", MONO_CPU_ARM64_CRYPTO, sha1_methods, sizeof (sha1_methods) },
35963596
{ "Sha256", MONO_CPU_ARM64_CRYPTO, sha256_methods, sizeof (sha256_methods) },
@@ -3976,8 +3976,24 @@ emit_arm64_intrinsics (
39763976
MonoClass *quad_klass = mono_class_from_mono_type_internal (fsig->params [2]);
39773977
gboolean is_unsigned = type_is_unsigned (fsig->ret);
39783978
int iid = is_unsigned ? INTRINS_AARCH64_ADV_SIMD_UDOT : INTRINS_AARCH64_ADV_SIMD_SDOT;
3979-
MonoInst *quad = emit_simd_ins (cfg, arg_klass, OP_ARM64_SELECT_QUAD, args [2]->dreg, args [3]->dreg);
3980-
quad->data.op [1].klass = quad_klass;
3979+
3980+
MonoInst *quad;
3981+
if (!COMPILE_LLVM (cfg)) {
3982+
if (mono_class_value_size (arg_klass, NULL) != 16 || mono_class_value_size (quad_klass, NULL) != 16)
3983+
return NULL;
3984+
// FIXME: The c# api has ConstantExpected(Max = (byte)(15)), but the hw only supports
3985+
// selecting one of the 4 32 bit words
3986+
if (args [3]->opcode != OP_ICONST || args [3]->inst_c0 < 0 || args [3]->inst_c0 > 3) {
3987+
// FIXME: Throw the right exception ?
3988+
mono_emit_jit_icall (cfg, mono_throw_platform_not_supported, NULL);
3989+
return NULL;
3990+
}
3991+
quad = emit_simd_ins (cfg, klass, OP_ARM64_BROADCAST_ELEM, args [2]->dreg, -1);
3992+
quad->inst_c0 = args [3]->inst_c0;
3993+
} else {
3994+
quad = emit_simd_ins (cfg, arg_klass, OP_ARM64_SELECT_QUAD, args [2]->dreg, args [3]->dreg);
3995+
quad->data.op [1].klass = quad_klass;
3996+
}
39813997
MonoInst *ret = emit_simd_ins (cfg, ret_klass, OP_XOP_OVR_X_X_X_X, args [0]->dreg, args [1]->dreg);
39823998
ret->sreg3 = quad->dreg;
39833999
ret->inst_c0 = iid;

src/mono/mono/utils/mono-hwcap-arm64.c

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,63 @@
66
* Licensed under the MIT license. See LICENSE file in the project root for full license information.
77
*/
88

9+
#ifdef __APPLE__
10+
#include <sys/types.h>
11+
#include <sys/sysctl.h>
12+
#endif
13+
914
#include "mono/utils/mono-hwcap.h"
1015

1116
void
1217
mono_hwcap_arch_init (void)
1318
{
19+
#ifdef __APPLE__
20+
const char *prop;
21+
guint val [16];
22+
size_t val_len;
23+
int res;
24+
25+
val_len = sizeof (val);
26+
prop = "hw.optional.armv8_crc32";
27+
res = sysctlbyname (prop, val, &val_len, NULL, 0);
28+
g_assert (res == 0);
29+
g_assert (val_len == 4);
30+
mono_hwcap_arm64_has_crc32 = *(int*)val;
31+
32+
val_len = sizeof (val);
33+
prop = "hw.optional.arm.FEAT_RDM";
34+
res = sysctlbyname (prop, val, &val_len, NULL, 0);
35+
g_assert (res == 0);
36+
g_assert (val_len == 4);
37+
mono_hwcap_arm64_has_rdm = *(int*)val;
38+
39+
val_len = sizeof (val);
40+
prop = "hw.optional.arm.FEAT_DotProd";
41+
res = sysctlbyname (prop, val, &val_len, NULL, 0);
42+
g_assert (res == 0);
43+
g_assert (val_len == 4);
44+
mono_hwcap_arm64_has_dot = *(int*)val;
45+
46+
val_len = sizeof (val);
47+
prop = "hw.optional.arm.FEAT_SHA1";
48+
res = sysctlbyname (prop, val, &val_len, NULL, 0);
49+
g_assert (res == 0);
50+
g_assert (val_len == 4);
51+
mono_hwcap_arm64_has_sha1 = *(int*)val;
52+
53+
val_len = sizeof (val);
54+
prop = "hw.optional.arm.FEAT_SHA256";
55+
res = sysctlbyname (prop, val, &val_len, NULL, 0);
56+
g_assert (res == 0);
57+
g_assert (val_len == 4);
58+
mono_hwcap_arm64_has_sha256 = *(int*)val;
59+
60+
val_len = sizeof (val);
61+
prop = "hw.optional.arm.FEAT_AES";
62+
res = sysctlbyname (prop, val, &val_len, NULL, 0);
63+
g_assert (res == 0);
64+
g_assert (val_len == 4);
65+
mono_hwcap_arm64_has_aes = *(int*)val;
66+
67+
#endif
1468
}

src/mono/mono/utils/mono-hwcap-vars.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,13 @@ MONO_HWCAP_VAR(arm_has_thumb2)
1717

1818
#elif defined (TARGET_ARM64)
1919

20+
MONO_HWCAP_VAR(arm64_has_crc32)
21+
MONO_HWCAP_VAR(arm64_has_dot)
22+
MONO_HWCAP_VAR(arm64_has_rdm)
23+
MONO_HWCAP_VAR(arm64_has_sha1)
24+
MONO_HWCAP_VAR(arm64_has_sha256)
25+
MONO_HWCAP_VAR(arm64_has_aes)
26+
2027
// Nothing here yet.
2128

2229
#elif defined (TARGET_POWERPC) || defined (TARGET_POWERPC64)

0 commit comments

Comments
 (0)