Skip to content

Commit f465d33

Browse files
authored
[mono] Adding support for Vector128::ExtractMostSignificantBits intrinsics on amd64 (#89997)
* Extract MSB amd64 * add SSSE3 check
1 parent 23886f1 commit f465d33

File tree

4 files changed

+69
-1
lines changed

4 files changed

+69
-1
lines changed

src/mono/mono/arch/amd64/amd64-codegen.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -895,6 +895,7 @@ typedef union {
895895

896896
#define amd64_sse_movsldup_reg_reg(inst,dreg,reg) emit_sse_reg_reg((inst), (dreg), (reg), 0xf3, 0x0f, 0x12)
897897

898+
#define amd64_sse_pshufb_reg_reg(inst,dreg,reg) emit_sse_reg_reg_op4((inst), (dreg), (reg), 0x66, 0x0f, 0x38, 0x00)
898899

899900
#define amd64_sse_pshufhw_reg_reg_imm(inst,dreg,reg,imm) emit_sse_reg_reg_imm((inst), (dreg), (reg), 0xf3, 0x0f, 0x70, (imm))
900901

@@ -947,6 +948,10 @@ typedef union {
947948

948949
#define amd64_sse_pmovmskb_reg_reg(inst,dreg,reg) emit_sse_reg_reg((inst), (dreg), (reg), 0x66, 0x0f, 0xd7)
949950

951+
#define amd64_sse_movmskps_reg_reg(inst,dreg,reg) emit_sse_reg_reg_op2((inst), (dreg), (reg), 0x0f, 0x50)
952+
953+
#define amd64_sse_movmskpd_reg_reg(inst,dreg,reg) emit_sse_reg_reg((inst), (dreg), (reg), 0x66, 0x0f, 0x50)
954+
950955

951956
#define amd64_sse_pand_reg_reg(inst, dreg, reg) emit_sse_reg_reg((inst), (dreg), (reg), 0x66, 0x0f, 0xdb)
952957

src/mono/mono/mini/cpu-amd64.mdesc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -827,6 +827,8 @@ expand_r4: dest:x src1:f len:16
827827
expand_r8: dest:x src1:f len:13
828828
xop_x_x_x: dest:x src1:x src2:x len:16 clob:1
829829
xop_x_x: dest:x src1:x len:16 clob:1
830+
sse_movmsk: dest:i src1:x len:5
831+
ssse3_shuffle: dest:x src1:x src2:x len:6 clob:1
830832
sse41_dpps_imm: dest:x src1:x src2:x len:7 clob:1
831833
sse41_dppd_imm: dest:x src1:x src2:x len:7 clob:1
832834
vector_andnot: dest:x src1:x src2:x len:7 clob:1

src/mono/mono/mini/mini-amd64.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7521,6 +7521,23 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
75217521
amd64_sse_movsd_reg_reg (code, ins->dreg, ins->sreg1);
75227522
amd64_sse_pshufd_reg_reg_imm (code, ins->dreg, ins->dreg, 0x44);
75237523
break;
7524+
case OP_SSE_MOVMSK: {
7525+
switch (ins->inst_c1) {
7526+
case MONO_TYPE_R4:
7527+
amd64_sse_movmskps_reg_reg (code, ins->dreg, ins->sreg1);
7528+
break;
7529+
case MONO_TYPE_R8:
7530+
amd64_sse_movmskpd_reg_reg (code, ins->dreg, ins->sreg1);
7531+
break;
7532+
default:
7533+
amd64_sse_pmovmskb_reg_reg (code, ins->dreg, ins->sreg1);
7534+
break;
7535+
}
7536+
break;
7537+
}
7538+
case OP_SSSE3_SHUFFLE:
7539+
amd64_sse_pshufb_reg_reg (code, ins->dreg, ins->sreg2);
7540+
break;
75247541
case OP_SSE41_ROUNDP: {
75257542
if (ins->inst_c1 == MONO_TYPE_R8)
75267543
amd64_sse_roundpd_reg_reg_imm (code, ins->dreg, ins->sreg1, ins->inst_c0);

src/mono/mono/mini/simd-intrinsics.c

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1839,7 +1839,51 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
18391839
}
18401840
return result_ins;
18411841
#elif defined(TARGET_AMD64)
1842-
return NULL;
1842+
int type = MONO_TYPE_I1;
1843+
1844+
switch (arg0_type) {
1845+
case MONO_TYPE_U2:
1846+
case MONO_TYPE_I2: {
1847+
if (!is_SIMD_feature_supported (cfg, MONO_CPU_X86_SSSE3))
1848+
return NULL;
1849+
1850+
type = type_enum_is_unsigned (arg0_type) ? MONO_TYPE_U1 : MONO_TYPE_I1;
1851+
MonoClass* arg_class = mono_class_from_mono_type_internal (fsig->params [0]);
1852+
1853+
guint64 shuffle_mask[2];
1854+
shuffle_mask[0] = 0x0F0D0B0907050301; // Place odd bytes in the lower half of vector
1855+
shuffle_mask[1] = 0x8080808080808080; // Zero the upper half
1856+
1857+
MonoInst* shuffle_vec = emit_xconst_v128 (cfg, arg_class, (guint8*)shuffle_mask);
1858+
shuffle_vec->klass = arg_class;
1859+
1860+
args [0] = emit_simd_ins (cfg, klass, OP_SSSE3_SHUFFLE, args [0]->dreg, shuffle_vec->dreg);
1861+
args [0]->inst_c1 = type;
1862+
break;
1863+
}
1864+
#if TARGET_SIZEOF_VOID_P == 4
1865+
case MONO_TYPE_I:
1866+
case MONO_TYPE_U:
1867+
#endif
1868+
case MONO_TYPE_U4:
1869+
case MONO_TYPE_I4:
1870+
case MONO_TYPE_R4: {
1871+
type = MONO_TYPE_R4;
1872+
break;
1873+
}
1874+
#if TARGET_SIZEOF_VOID_P == 8
1875+
case MONO_TYPE_I:
1876+
case MONO_TYPE_U:
1877+
#endif
1878+
case MONO_TYPE_U8:
1879+
case MONO_TYPE_I8:
1880+
case MONO_TYPE_R8: {
1881+
type = MONO_TYPE_R8;
1882+
break;
1883+
}
1884+
}
1885+
1886+
return emit_simd_ins_for_sig (cfg, klass, OP_SSE_MOVMSK, -1, type, fsig, args);
18431887
#endif
18441888
}
18451889
case SN_GetElement: {

0 commit comments

Comments
 (0)