Skip to content

Commit 1b237a2

Browse files
committed
Implement complete PackedSimd feature set in interpreter
Custom PackedSimd.Shuffle in jiterp Simplify definition of intrinsics with custom C implementations Clean up dummy interp dregs Many other commits squashed
1 parent 1db4357 commit 1b237a2

9 files changed

+966
-314
lines changed

src/mono/mono/mini/interp/interp-simd-intrins.def

+293-94
Large diffs are not rendered by default.

src/mono/mono/mini/interp/interp-simd.c

+249-12
Original file line numberDiff line numberDiff line change
@@ -585,42 +585,276 @@ _interp_wasm_simd_assert_not_reached (v128_t lhs, v128_t rhs) {
585585
g_assert_not_reached ();
586586
}
587587

588-
#define INTERP_WASM_SIMD_INTRINSIC_V_P(id, c_intrinsic, wasm_opcode) \
588+
#define LANE_COUNT(lane_type) (sizeof(v128_t) / sizeof(lane_type))
589+
590+
// ensure the lane is valid by wrapping it (in AOT it would fail to compile)
591+
#define WRAP_LANE(lane_type, lane_ptr) \
592+
*((unsigned char *)lane_ptr) & (LANE_COUNT(lane_type) - 1)
593+
594+
#define EXTRACT_LANE(result_type, lane_type) \
595+
int _lane = WRAP_LANE(lane_type, lane); \
596+
*((result_type *)res) = ((lane_type *)vec)[_lane];
597+
598+
#define REPLACE_LANE(lane_type) \
599+
int _lane = WRAP_LANE(lane_type, lane); \
600+
v128_t temp = *((v128_t *)vec); \
601+
((lane_type *)&temp)[_lane] = *(lane_type *)value; \
602+
*((v128_t *)res) = temp;
603+
604+
static void
605+
interp_packedsimd_extractlane_i1 (gpointer res, gpointer vec, gpointer lane) {
606+
EXTRACT_LANE(gint32, gint8);
607+
}
608+
609+
static void
610+
interp_packedsimd_extractlane_u1 (gpointer res, gpointer vec, gpointer lane) {
611+
EXTRACT_LANE(gint32, guint8);
612+
}
613+
614+
static void
615+
interp_packedsimd_extractlane_i2 (gpointer res, gpointer vec, gpointer lane) {
616+
EXTRACT_LANE(gint32, gint16);
617+
}
618+
619+
static void
620+
interp_packedsimd_extractlane_u2 (gpointer res, gpointer vec, gpointer lane) {
621+
EXTRACT_LANE(gint32, guint16);
622+
}
623+
624+
static void
625+
interp_packedsimd_extractlane_i4 (gpointer res, gpointer vec, gpointer lane) {
626+
EXTRACT_LANE(gint32, gint32);
627+
}
628+
629+
static void
630+
interp_packedsimd_extractlane_i8 (gpointer res, gpointer vec, gpointer lane) {
631+
EXTRACT_LANE(gint64, gint64);
632+
}
633+
634+
static void
635+
interp_packedsimd_extractlane_r4 (gpointer res, gpointer vec, gpointer lane) {
636+
EXTRACT_LANE(float, float);
637+
}
638+
639+
static void
640+
interp_packedsimd_extractlane_r8 (gpointer res, gpointer vec, gpointer lane) {
641+
EXTRACT_LANE(double, double);
642+
}
643+
644+
static void
645+
interp_packedsimd_replacelane_i1 (gpointer res, gpointer vec, gpointer lane, gpointer value) {
646+
REPLACE_LANE(gint8);
647+
}
648+
649+
static void
650+
interp_packedsimd_replacelane_i2 (gpointer res, gpointer vec, gpointer lane, gpointer value) {
651+
REPLACE_LANE(gint16);
652+
}
653+
654+
static void
655+
interp_packedsimd_replacelane_i4 (gpointer res, gpointer vec, gpointer lane, gpointer value) {
656+
REPLACE_LANE(gint32);
657+
}
658+
659+
static void
660+
interp_packedsimd_replacelane_i8 (gpointer res, gpointer vec, gpointer lane, gpointer value) {
661+
REPLACE_LANE(gint64);
662+
}
663+
664+
static void
665+
interp_packedsimd_replacelane_r4 (gpointer res, gpointer vec, gpointer lane, gpointer value) {
666+
REPLACE_LANE(float);
667+
}
668+
669+
static void
670+
interp_packedsimd_replacelane_r8 (gpointer res, gpointer vec, gpointer lane, gpointer value) {
671+
REPLACE_LANE(double);
672+
}
673+
674+
static void
675+
interp_packedsimd_shuffle (gpointer res, gpointer _lower, gpointer _upper, gpointer _indices) {
676+
v128_i1 indices = *((v128_i1 *)_indices),
677+
lower = *((v128_i1 *)_lower),
678+
upper = *((v128_i1 *)_upper),
679+
result = { 0 };
680+
681+
for (int i = 0; i < 16; i++) {
682+
int index = indices[i] & 31;
683+
if (index > 15)
684+
result[i] = upper[index - 16];
685+
else
686+
result[i] = lower[index];
687+
}
688+
689+
*((v128_i1 *)res) = result;
690+
}
691+
692+
#define INDIRECT_LOAD(fn) \
693+
*(v128_t*)res = fn(*(void **)addr_of_addr);
694+
695+
static void
696+
interp_packedsimd_load128 (gpointer res, gpointer addr_of_addr) {
697+
INDIRECT_LOAD(wasm_v128_load);
698+
}
699+
700+
static void
701+
interp_packedsimd_load32_zero (gpointer res, gpointer addr_of_addr) {
702+
INDIRECT_LOAD(wasm_v128_load32_zero);
703+
}
704+
705+
static void
706+
interp_packedsimd_load64_zero (gpointer res, gpointer addr_of_addr) {
707+
INDIRECT_LOAD(wasm_v128_load64_zero);
708+
}
709+
710+
static void
711+
interp_packedsimd_load8_splat (gpointer res, gpointer addr_of_addr) {
712+
INDIRECT_LOAD(wasm_v128_load8_splat);
713+
}
714+
715+
static void
716+
interp_packedsimd_load16_splat (gpointer res, gpointer addr_of_addr) {
717+
INDIRECT_LOAD(wasm_v128_load16_splat);
718+
}
719+
720+
static void
721+
interp_packedsimd_load32_splat (gpointer res, gpointer addr_of_addr) {
722+
INDIRECT_LOAD(wasm_v128_load32_splat);
723+
}
724+
725+
static void
726+
interp_packedsimd_load64_splat (gpointer res, gpointer addr_of_addr) {
727+
INDIRECT_LOAD(wasm_v128_load64_splat);
728+
}
729+
730+
static void
731+
interp_packedsimd_load8x8_s (gpointer res, gpointer addr_of_addr) {
732+
INDIRECT_LOAD(wasm_i16x8_load8x8);
733+
}
734+
735+
static void
736+
interp_packedsimd_load8x8_u (gpointer res, gpointer addr_of_addr) {
737+
INDIRECT_LOAD(wasm_u16x8_load8x8);
738+
}
739+
740+
static void
741+
interp_packedsimd_load16x4_s (gpointer res, gpointer addr_of_addr) {
742+
INDIRECT_LOAD(wasm_i32x4_load16x4);
743+
}
744+
745+
static void
746+
interp_packedsimd_load16x4_u (gpointer res, gpointer addr_of_addr) {
747+
INDIRECT_LOAD(wasm_u32x4_load16x4);
748+
}
749+
750+
static void
751+
interp_packedsimd_load32x2_s (gpointer res, gpointer addr_of_addr) {
752+
INDIRECT_LOAD(wasm_i64x2_load32x2);
753+
}
754+
755+
static void
756+
interp_packedsimd_load32x2_u (gpointer res, gpointer addr_of_addr) {
757+
INDIRECT_LOAD(wasm_u64x2_load32x2);
758+
}
759+
760+
static void
761+
interp_packedsimd_store (gpointer res, gpointer addr_of_addr, gpointer vec) {
762+
// HACK: Result is unused because Store has a void return value
763+
**(v128_t **)addr_of_addr = *(v128_t *)vec;
764+
}
765+
766+
#define INDIRECT_STORE_LANE(lane_type) \
767+
int _lane = WRAP_LANE(lane_type, lane); \
768+
**(lane_type **)addr_of_addr = ((lane_type *)vec)[_lane];
769+
770+
static void
771+
interp_packedsimd_store8_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
772+
INDIRECT_STORE_LANE(guint8);
773+
}
774+
775+
static void
776+
interp_packedsimd_store16_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
777+
INDIRECT_STORE_LANE(guint16);
778+
}
779+
780+
static void
781+
interp_packedsimd_store32_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
782+
INDIRECT_STORE_LANE(guint32);
783+
}
784+
785+
static void
786+
interp_packedsimd_store64_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
787+
INDIRECT_STORE_LANE(guint64);
788+
}
789+
790+
#define INDIRECT_LOAD_LANE(lane_type) \
791+
int _lane = WRAP_LANE(lane_type, lane); \
792+
/* we need temporary storage to do this since res may be the same as vec, addr_of_addr, or lane */ \
793+
lane_type lanes[LANE_COUNT(lane_type)]; \
794+
memcpy (lanes, vec, 16); \
795+
lanes[_lane] = **(lane_type **)addr_of_addr; \
796+
memcpy (res, lanes, 16);
797+
798+
static void
799+
interp_packedsimd_load8_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
800+
INDIRECT_LOAD_LANE(guint8);
801+
}
802+
803+
static void
804+
interp_packedsimd_load16_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
805+
INDIRECT_LOAD_LANE(guint16);
806+
}
807+
808+
static void
809+
interp_packedsimd_load32_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
810+
INDIRECT_LOAD_LANE(guint32);
811+
}
812+
813+
static void
814+
interp_packedsimd_load64_lane (gpointer res, gpointer addr_of_addr, gpointer vec, gpointer lane) {
815+
INDIRECT_LOAD_LANE(guint64);
816+
}
817+
818+
#define INTERP_WASM_SIMD_INTRINSIC_V_P(name, arg1, c_intrinsic, wasm_opcode) \
589819
static void \
590-
_mono_interp_simd_ ## id (gpointer res, gpointer v1) { \
820+
_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1) { \
591821
*((v128_t *)res) = c_intrinsic (v1); \
592822
}
593823

594-
#define INTERP_WASM_SIMD_INTRINSIC_V_V(id, c_intrinsic, wasm_opcode) \
824+
#define INTERP_WASM_SIMD_INTRINSIC_V_V(name, arg1, c_intrinsic, wasm_opcode) \
595825
static void \
596-
_mono_interp_simd_ ## id (gpointer res, gpointer v1) { \
826+
_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1) { \
597827
*((v128_t *)res) = c_intrinsic (*((v128_t *)v1)); \
598828
}
599829

600-
#define INTERP_WASM_SIMD_INTRINSIC_I_V(id, c_intrinsic, wasm_opcode) \
830+
#define INTERP_WASM_SIMD_INTRINSIC_I_V(name, arg1, c_intrinsic, wasm_opcode) \
601831
static void \
602-
_mono_interp_simd_ ## id (gpointer res, gpointer v1) { \
832+
_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1) { \
603833
*((int32_t *)res) = c_intrinsic (*((v128_t *)v1)); \
604834
}
605835

606-
#define INTERP_WASM_SIMD_INTRINSIC_V_VV(id, c_intrinsic, wasm_opcode) \
836+
#define INTERP_WASM_SIMD_INTRINSIC_V_VV(name, arg1, c_intrinsic, wasm_opcode) \
607837
static void \
608-
_mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2) { \
838+
_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1, gpointer v2) { \
609839
*((v128_t *)res) = c_intrinsic (*((v128_t *)v1), *((v128_t *)v2)); \
610840
}
611841

612-
#define INTERP_WASM_SIMD_INTRINSIC_V_VI(id, c_intrinsic, wasm_opcode) \
842+
#define INTERP_WASM_SIMD_INTRINSIC_V_VI(name, arg1, c_intrinsic, wasm_opcode) \
613843
static void \
614-
_mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2) { \
844+
_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1, gpointer v2) { \
615845
*((v128_t *)res) = c_intrinsic (*((v128_t *)v1), *((int *)v2)); \
616846
}
617847

618-
#define INTERP_WASM_SIMD_INTRINSIC_V_VVV(id, c_intrinsic, wasm_opcode) \
848+
#define INTERP_WASM_SIMD_INTRINSIC_V_VVV(name, arg1, c_intrinsic, wasm_opcode) \
619849
static void \
620-
_mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2, gpointer v3) { \
850+
_mono_interp_simd_ ## c_intrinsic (gpointer res, gpointer v1, gpointer v2, gpointer v3) { \
621851
*((v128_t *)res) = c_intrinsic (*((v128_t *)v1), *((v128_t *)v2), *((v128_t *)v3)); \
622852
}
623853

854+
#define INTERP_WASM_SIMD_INTRINSIC_V_C1(name, arg1, c_function, wasm_opcode)
855+
#define INTERP_WASM_SIMD_INTRINSIC_V_C2(name, arg1, c_function, wasm_opcode)
856+
#define INTERP_WASM_SIMD_INTRINSIC_V_C3(name, arg1, c_function, wasm_opcode)
857+
624858
#include "interp-simd-intrins.def"
625859

626860
#undef INTERP_WASM_SIMD_INTRINSIC_V_P
@@ -629,6 +863,9 @@ _mono_interp_simd_ ## id (gpointer res, gpointer v1, gpointer v2, gpointer v3) {
629863
#undef INTERP_WASM_SIMD_INTRINSIC_V_VV
630864
#undef INTERP_WASM_SIMD_INTRINSIC_V_VI
631865
#undef INTERP_WASM_SIMD_INTRINSIC_V_VVV
866+
#undef INTERP_WASM_SIMD_INTRINSIC_V_C1
867+
#undef INTERP_WASM_SIMD_INTRINSIC_V_C2
868+
#undef INTERP_WASM_SIMD_INTRINSIC_V_C3
632869

633870
// Now generate the wasm opcode tables for the intrinsics
634871

0 commit comments

Comments
 (0)