@@ -3717,6 +3717,48 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
3717
3717
}
3718
3718
break ;
3719
3719
}
3720
+ /* SIMD that is not table-generated */
3721
+ /* TODO: once https://github.com/dotnet/runtime/issues/83252 is done,
3722
+ * move the following two to the codegen table in simd-arm64.h
3723
+ */
3724
+ case OP_ONES_COMPLEMENT :
3725
+ arm_neon_not (code , get_vector_size_macro (ins ), dreg , sreg1 );
3726
+ break ;
3727
+ case OP_NEGATION :
3728
+ if (is_type_float_macro (ins -> inst_c1 )) {
3729
+ arm_neon_fneg (code , get_vector_size_macro (ins ), get_type_size_macro (ins -> inst_c1 ), dreg , sreg1 );
3730
+ } else {
3731
+ arm_neon_neg (code , get_vector_size_macro (ins ), get_type_size_macro (ins -> inst_c1 ), dreg , sreg1 );
3732
+ }
3733
+ break ;
3734
+ case OP_XBINOP :
3735
+ switch (ins -> inst_c0 ) {
3736
+ case OP_IMAX :
3737
+ code = emit_smax_i8 (code , get_vector_size_macro (ins ), get_type_size_macro (ins -> inst_c1 ), dreg , sreg1 , sreg2 );
3738
+ break ;
3739
+ case OP_IMAX_UN :
3740
+ code = emit_umax_i8 (code , get_vector_size_macro (ins ), get_type_size_macro (ins -> inst_c1 ), dreg , sreg1 , sreg2 );
3741
+ break ;
3742
+ case OP_IMIN :
3743
+ code = emit_smin_i8 (code , get_vector_size_macro (ins ), get_type_size_macro (ins -> inst_c1 ), dreg , sreg1 , sreg2 );
3744
+ break ;
3745
+ case OP_IMIN_UN :
3746
+ code = emit_umin_i8 (code , get_vector_size_macro (ins ), get_type_size_macro (ins -> inst_c1 ), dreg , sreg1 , sreg2 );
3747
+ break ;
3748
+ default :
3749
+ g_assert_not_reached ();
3750
+ }
3751
+ break ;
3752
+ case OP_XZERO :
3753
+ arm_neon_eor_16b (code , dreg , dreg , dreg );
3754
+ break ;
3755
+ case OP_XONES :
3756
+ arm_neon_eor_16b (code , dreg , dreg , dreg );
3757
+ arm_neon_not_16b (code , dreg , dreg );
3758
+ break ;
3759
+ case OP_XEXTRACT :
3760
+ code = emit_xextract (code , VREG_FULL , ins -> inst_c0 , dreg , sreg1 );
3761
+ break ;
3720
3762
case OP_STOREX_MEMBASE :
3721
3763
code = emit_strfpq (code , sreg1 , dreg , ins -> inst_offset );
3722
3764
break ;
@@ -3730,10 +3772,10 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
3730
3772
if (cfg -> compile_aot && cfg -> code_exec_only ) {
3731
3773
mono_add_patch_info (cfg , offset , MONO_PATCH_INFO_X128_GOT , ins -> inst_p0 );
3732
3774
arm_ldrx_lit (code , ARMREG_IP0 , 0 );
3733
- arm_ldrfpq (code , ins -> dreg , ARMREG_IP0 , 0 );
3775
+ arm_ldrfpq (code , dreg , ARMREG_IP0 , 0 );
3734
3776
} else {
3735
3777
mono_add_patch_info (cfg , offset , MONO_PATCH_INFO_X128 , ins -> inst_p0 );
3736
- arm_neon_ldrq_lit (code , ins -> dreg , 0 );
3778
+ arm_neon_ldrq_lit (code , dreg , 0 );
3737
3779
}
3738
3780
break ;
3739
3781
}
@@ -3744,13 +3786,13 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
3744
3786
case OP_EXPAND_I4 :
3745
3787
case OP_EXPAND_I8 : {
3746
3788
const int t = get_type_size_macro (ins -> inst_c1 );
3747
- arm_neon_dup_g (code , VREG_FULL , t , ins -> dreg , ins -> sreg1 );
3789
+ arm_neon_dup_g (code , VREG_FULL , t , dreg , sreg1 );
3748
3790
break ;
3749
3791
}
3750
3792
case OP_EXPAND_R4 :
3751
3793
case OP_EXPAND_R8 : {
3752
3794
const int t = get_type_size_macro (ins -> inst_c1 );
3753
- arm_neon_fdup_e (code , VREG_FULL , t , ins -> dreg , ins -> sreg1 , 0 );
3795
+ arm_neon_fdup_e (code , VREG_FULL , t , dreg , sreg1 , 0 );
3754
3796
break ;
3755
3797
}
3756
3798
case OP_EXTRACT_I1 :
@@ -3760,9 +3802,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
3760
3802
const int t = get_type_size_macro (ins -> inst_c1 );
3761
3803
// smov is not defined for i64
3762
3804
if (is_type_unsigned_macro (ins -> inst_c1 ) || t == TYPE_I64 ) {
3763
- arm_neon_umov (code , t , ins -> dreg , ins -> sreg1 , ins -> inst_c0 );
3805
+ arm_neon_umov (code , t , dreg , sreg1 , ins -> inst_c0 );
3764
3806
} else {
3765
- arm_neon_smov (code , t , ins -> dreg , ins -> sreg1 , ins -> inst_c0 );
3807
+ arm_neon_smov (code , t , dreg , sreg1 , ins -> inst_c0 );
3766
3808
}
3767
3809
break ;
3768
3810
}
@@ -3773,17 +3815,39 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
3773
3815
// Technically, this broadcasts element #inst_c0 to all dest XREG elements; whereas it should
3774
3816
// set the FREG to the said element. Since FREG and XREG pool is the same on arm64 and the rest
3775
3817
// of the F/XREG is ignored in FREG mode, this operation remains valid.
3776
- arm_neon_fdup_e (code , VREG_FULL , t , ins -> dreg , ins -> sreg1 , ins -> inst_c0 );
3818
+ arm_neon_fdup_e (code , VREG_FULL , t , dreg , sreg1 , ins -> inst_c0 );
3777
3819
}
3778
3820
break ;
3821
+ case OP_INSERT_I1 :
3822
+ case OP_INSERT_I2 :
3823
+ case OP_INSERT_I4 :
3824
+ case OP_INSERT_I8 : {
3825
+ const int t = get_type_size_macro (ins -> inst_c1 );
3826
+ arm_neon_ins_g (code , t , dreg , sreg1 , ins -> inst_c0 );
3827
+ break ;
3828
+ }
3829
+ case OP_INSERT_R4 :
3830
+ case OP_INSERT_R8 : {
3831
+ int t = 0 ;
3832
+ switch (ins -> inst_c1 ) {
3833
+ case MONO_TYPE_R4 :
3834
+ t = SIZE_4 ;
3835
+ break ;
3836
+ case MONO_TYPE_R8 :
3837
+ t = SIZE_8 ;
3838
+ break ;
3839
+ }
3840
+ arm_neon_ins_e (code , t , dreg , sreg1 , ins -> inst_c0 , 0 );
3841
+ break ;
3842
+ }
3779
3843
case OP_ARM64_XADDV : {
3780
3844
switch (ins -> inst_c0 ) {
3781
3845
case INTRINS_AARCH64_ADV_SIMD_FADDV :
3782
3846
if (ins -> inst_c1 == MONO_TYPE_R8 ) {
3783
- arm_neon_faddp (code , VREG_FULL , TYPE_F64 , ins -> dreg , ins -> sreg1 , ins -> sreg1 );
3847
+ arm_neon_faddp (code , VREG_FULL , TYPE_F64 , dreg , sreg1 , sreg1 );
3784
3848
} else if (ins -> inst_c1 == MONO_TYPE_R4 ) {
3785
- arm_neon_faddp (code , VREG_FULL , TYPE_F32 , ins -> dreg , ins -> sreg1 , ins -> sreg1 );
3786
- arm_neon_faddp (code , VREG_FULL , TYPE_F32 , ins -> dreg , ins -> dreg , ins -> dreg );
3849
+ arm_neon_faddp (code , VREG_FULL , TYPE_F32 , dreg , sreg1 , sreg1 );
3850
+ arm_neon_faddp (code , VREG_FULL , TYPE_F32 , dreg , dreg , dreg );
3787
3851
} else {
3788
3852
g_assert_not_reached ();
3789
3853
}
@@ -3792,7 +3856,7 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
3792
3856
case INTRINS_AARCH64_ADV_SIMD_UADDV :
3793
3857
case INTRINS_AARCH64_ADV_SIMD_SADDV :
3794
3858
if (get_type_size_macro (ins -> inst_c1 ) == TYPE_I64 )
3795
- arm_neon_addp (code , VREG_FULL , TYPE_I64 , ins -> dreg , ins -> sreg1 , ins -> sreg1 );
3859
+ arm_neon_addp (code , VREG_FULL , TYPE_I64 , dreg , sreg1 , sreg1 );
3796
3860
else
3797
3861
g_assert_not_reached (); // remaining int types are handled through the codegen table
3798
3862
break ;
@@ -3802,6 +3866,52 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
3802
3866
}
3803
3867
break ;
3804
3868
}
3869
+ case OP_CREATE_SCALAR_INT : {
3870
+ const int t = get_type_size_macro (ins -> inst_c1 );
3871
+ arm_neon_eor_16b (code , dreg , dreg , dreg );
3872
+ arm_neon_ins_g (code , t , dreg , sreg1 , 0 );
3873
+ break ;
3874
+ }
3875
+ case OP_CREATE_SCALAR_FLOAT : {
3876
+ int t = 0 ;
3877
+ switch (ins -> inst_c1 ) {
3878
+ case MONO_TYPE_R4 :
3879
+ t = SIZE_4 ;
3880
+ break ;
3881
+ case MONO_TYPE_R8 :
3882
+ t = SIZE_8 ;
3883
+ break ;
3884
+ }
3885
+ // Use a temp register for zero op, as sreg1 and dreg share the same register here
3886
+ arm_neon_eor_16b (code , NEON_TMP_REG , NEON_TMP_REG , NEON_TMP_REG );
3887
+ arm_neon_ins_e (code , t , NEON_TMP_REG , sreg1 , 0 , 0 );
3888
+ arm_neon_mov (code , dreg , NEON_TMP_REG );
3889
+ break ;
3890
+ }
3891
+ case OP_CREATE_SCALAR_UNSAFE_INT : {
3892
+ const int t = get_type_size_macro (ins -> inst_c1 );
3893
+ arm_neon_ins_g (code , t , dreg , sreg1 , 0 );
3894
+ break ;
3895
+ }
3896
+ case OP_CREATE_SCALAR_UNSAFE_FLOAT : {
3897
+ if (dreg != sreg1 ) {
3898
+ int t = 0 ;
3899
+ switch (ins -> inst_c1 ) {
3900
+ case MONO_TYPE_R4 :
3901
+ t = SIZE_4 ;
3902
+ break ;
3903
+ case MONO_TYPE_R8 :
3904
+ t = SIZE_8 ;
3905
+ break ;
3906
+ }
3907
+ arm_neon_ins_e (code , t , dreg , sreg1 , 0 , 0 );
3908
+ }
3909
+ break ;
3910
+ }
3911
+ // Enable this when adding support for Narrow and enable support for Create at the same time
3912
+ // case OP_XCONCAT:
3913
+ // arm_neon_ext_16b(code, dreg, sreg1, sreg2, 8);
3914
+ // break;
3805
3915
3806
3916
/* BRANCH */
3807
3917
case OP_BR :
@@ -3875,49 +3985,6 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
3875
3985
arm_cbnzx (code , sreg1 , 0 );
3876
3986
break ;
3877
3987
3878
- /* SIMD that is not table-generated */
3879
- /* TODO: once https://github.com/dotnet/runtime/issues/83252 is done,
3880
- * move the following two to the codegen table in simd-arm64.h
3881
- */
3882
- case OP_ONES_COMPLEMENT :
3883
- arm_neon_not (code , get_vector_size_macro (ins ), dreg , sreg1 );
3884
- break ;
3885
- case OP_NEGATION :
3886
- if (is_type_float_macro (ins -> inst_c1 )) {
3887
- arm_neon_fneg (code , get_vector_size_macro (ins ), get_type_size_macro (ins -> inst_c1 ), dreg , sreg1 );
3888
- } else {
3889
- arm_neon_neg (code , get_vector_size_macro (ins ), get_type_size_macro (ins -> inst_c1 ), dreg , sreg1 );
3890
- }
3891
- break ;
3892
- case OP_XBINOP :
3893
- switch (ins -> inst_c0 ) {
3894
- case OP_IMAX :
3895
- code = emit_smax_i8 (code , get_vector_size_macro (ins ), get_type_size_macro (ins -> inst_c1 ), dreg , sreg1 , sreg2 );
3896
- break ;
3897
- case OP_IMAX_UN :
3898
- code = emit_umax_i8 (code , get_vector_size_macro (ins ), get_type_size_macro (ins -> inst_c1 ), dreg , sreg1 , sreg2 );
3899
- break ;
3900
- case OP_IMIN :
3901
- code = emit_smin_i8 (code , get_vector_size_macro (ins ), get_type_size_macro (ins -> inst_c1 ), dreg , sreg1 , sreg2 );
3902
- break ;
3903
- case OP_IMIN_UN :
3904
- code = emit_umin_i8 (code , get_vector_size_macro (ins ), get_type_size_macro (ins -> inst_c1 ), dreg , sreg1 , sreg2 );
3905
- break ;
3906
- default :
3907
- g_assert_not_reached ();
3908
- }
3909
- break ;
3910
- case OP_XZERO :
3911
- arm_neon_eor_16b (code , dreg , dreg , dreg );
3912
- break ;
3913
- case OP_XONES :
3914
- arm_neon_eor_16b (code , dreg , dreg , dreg );
3915
- arm_neon_not_16b (code , dreg , dreg );
3916
- break ;
3917
- case OP_XEXTRACT :
3918
- code = emit_xextract (code , VREG_FULL , ins -> inst_c0 , dreg , sreg1 );
3919
- break ;
3920
-
3921
3988
/* ALU */
3922
3989
case OP_IADD :
3923
3990
arm_addw (code , dreg , sreg1 , sreg2 );
0 commit comments