Skip to content

Commit 7abea9e

Browse files
authored
[mono][jit] Arm64 SIMD regs are now zeroed with movi instead of eor (#92882)
* SIMD regs are now zeroed with movi instead of eor. * Simplified vector length selection.
1 parent 3b0cd57 commit 7abea9e

File tree

3 files changed

+9
-11
lines changed

3 files changed

+9
-11
lines changed

src/mono/mono/arch/arm64/arm64-codegen.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2289,6 +2289,7 @@ arm_encode_arith_imm (int imm, guint32 *shift)
22892289

22902290
/* NEON :: modified immediate */
22912291
#define arm_neon_mimm_opcode(p, q, op, cmode, o2, imm, rd) arm_neon_opcode_1reg ((p), (q), 0b00001111000000000000010000000000 | (op) << 29 | (cmode) << 12 | (o2) << 11 | (imm & 0b11100000) << 11 | (imm & 0b11111) << 5, (rd))
2292+
#define arm_neon_movi_b(p, width, rd, imm) arm_neon_mimm_opcode ((p), (width), 0, 0b1110, 0, imm, rd)
22922293

22932294
#define ARM_IMM_FONE (0b01110000)
22942295
#define arm_neon_fmov_imm(p, width, type, rd, imm) arm_neon_mimm_opcode ((p), (width), (type), 0b1111, 0b0, (imm), (rd))

src/mono/mono/mini/mini-arm64.c

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3932,14 +3932,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
39323932
break;
39333933

39343934
case OP_XZERO:
3935-
if (ins->klass && mono_class_value_size (ins->klass, NULL) == 8)
3936-
arm_neon_eor_8b (code, dreg, dreg, dreg);
3937-
else
3938-
arm_neon_eor_16b (code, dreg, dreg, dreg);
3935+
arm_neon_movi_b (code, get_vector_size_macro (ins), dreg, 0);
39393936
break;
39403937
case OP_XONES:
3941-
arm_neon_eor_16b (code, dreg, dreg, dreg);
3942-
arm_neon_not_16b (code, dreg, dreg);
3938+
arm_neon_movi_b (code, get_vector_size_macro (ins), dreg, 0xff);
39433939
break;
39443940
case OP_XEXTRACT:
39453941
code = emit_xextract (code, (ins->inst_c1 == 8) ? VREG_LOW : VREG_FULL, GTMREG_TO_INT (ins->inst_c0), dreg, sreg1);
@@ -4133,7 +4129,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
41334129
}
41344130
case OP_CREATE_SCALAR_INT: {
41354131
const int t = get_type_size_macro (ins->inst_c1);
4136-
arm_neon_eor_16b (code, dreg, dreg, dreg);
4132+
arm_neon_movi_b (code, VREG_FULL, dreg, 0);
41374133
arm_neon_ins_g(code, t, dreg, sreg1, 0);
41384134
break;
41394135
}
@@ -4148,7 +4144,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
41484144
break;
41494145
}
41504146
// Use a temp register for zero op, as sreg1 and dreg share the same register here
4151-
arm_neon_eor_16b (code, NEON_TMP_REG, NEON_TMP_REG, NEON_TMP_REG);
4147+
arm_neon_movi_b (code, VREG_FULL, NEON_TMP_REG, 0);
41524148
arm_neon_ins_e(code, t, NEON_TMP_REG, sreg1, 0, 0);
41534149
arm_neon_mov (code, dreg, NEON_TMP_REG);
41544150
break;
@@ -4183,17 +4179,17 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
41834179
case OP_XLOWER: {
41844180
if (dreg == sreg1) {
41854181
// clean the upper half
4186-
arm_neon_eor (code, VREG_FULL, NEON_TMP_REG, NEON_TMP_REG, NEON_TMP_REG);
4182+
arm_neon_movi_b (code, VREG_FULL, NEON_TMP_REG, 0);
41874183
arm_neon_ins_e (code, SIZE_8, dreg, NEON_TMP_REG, 1, 0);
41884184
} else {
4189-
arm_neon_eor (code, VREG_FULL, dreg, dreg, dreg);
4185+
arm_neon_movi_b (code, VREG_FULL, dreg, 0);
41904186
arm_neon_mov_8b (code, dreg, sreg1);
41914187
}
41924188
break;
41934189
}
41944190
case OP_XUPPER:
41954191
// shift in 64 zeros from the left
4196-
arm_neon_eor (code, VREG_FULL, NEON_TMP_REG, NEON_TMP_REG, NEON_TMP_REG);
4192+
arm_neon_movi_b (code, VREG_FULL, NEON_TMP_REG, 0);
41974193
arm_neon_ext_16b (code, dreg, sreg1, NEON_TMP_REG, 8);
41984194
break;
41994195

src/mono/mono/mini/simd-intrinsics.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1291,6 +1291,7 @@ static MonoInst*
12911291
emit_msb_vector_mask (MonoCompile *cfg, MonoClass *arg_class, MonoTypeEnum arg_type)
12921292
{
12931293
guint64 msb_mask_value[2];
1294+
// TODO: with mini, one can emit movi to achieve broadcasting immediate i8/i16/i32
12941295

12951296
switch (arg_type) {
12961297
case MONO_TYPE_I1:

0 commit comments

Comments
 (0)