Skip to content

Commit 4da62e4

Browse files
authored
Fix vector's Narrow intrinsics (#81843)
The BCL Vector classes have non-saturating Narrow methods, while wasm instructions are saturating. AFAIK wasm does not have non-saturating narrow instructions. So instead of i8x16.narrow_i16x8_s i8x16.narrow_i16x8_u i16x8.narrow_i32x4_s i16x8.narrow_i32x4_u use `v8x16.shuffle` instruction to implement the extract narrow operation. This fixes `System.Numerics.Tests.GenericVectorTests.Narrow[U]Int*` tests.
1 parent 6726915 commit 4da62e4

File tree

3 files changed

+35
-12
lines changed

3 files changed

+35
-12
lines changed

src/mono/mono/mini/mini-llvm.c

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9833,6 +9833,37 @@ MONO_RESTORE_WARNING
98339833
values [ins->dreg] = LLVMBuildShuffleVector (builder, lhs, LLVMGetUndef (LLVMTypeOf (lhs)), shuffle_val, "");
98349834
break;
98359835
}
9836+
case OP_WASM_EXTRACT_NARROW: {
9837+
int nelems = LLVMGetVectorSize (LLVMTypeOf (lhs));
9838+
int bytes = 16 / (nelems * 2);
9839+
LLVMTypeRef itype;
9840+
9841+
switch(nelems) {
9842+
case 2:
9843+
itype = i4_t;
9844+
break;
9845+
case 4:
9846+
itype = i2_t;
9847+
break;
9848+
case 8:
9849+
itype = i1_t;
9850+
break;
9851+
default:
9852+
g_assert_not_reached();
9853+
}
9854+
9855+
LLVMValueRef mask = LLVMConstNull (LLVMVectorType (i1_t, 16));
9856+
for (int i = 0; i < nelems; ++i) {
9857+
for (int j = 0; j < bytes; ++j) {
9858+
mask = LLVMBuildInsertElement (builder, mask, const_int8 (i * bytes * 2 + j), const_int32 (i * bytes + j), "");
9859+
mask = LLVMBuildInsertElement (builder, mask, const_int8 (16 + i * bytes * 2 + j), const_int32 (8 + i * bytes + j), "");
9860+
}
9861+
}
9862+
9863+
LLVMValueRef shuffle = LLVMBuildShuffleVector (builder, LLVMBuildBitCast (builder, lhs, LLVMVectorType (i1_t, 16), ""), LLVMBuildBitCast (builder, rhs, LLVMVectorType (i1_t, 16), ""), mask, "");
9864+
values [ins->dreg] = LLVMBuildBitCast (builder, shuffle, LLVMVectorType (itype, nelems * 2), "");
9865+
break;
9866+
}
98369867
#endif
98379868
#if defined(TARGET_ARM64) || defined(TARGET_X86) || defined(TARGET_AMD64) || defined(TARGET_WASM)
98389869
case OP_XEQUAL: {

src/mono/mono/mini/mini-ops.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -853,6 +853,7 @@ MINI_OP(OP_WASM_SIMD_BITMASK, "wasm_bitmask", IREG, XREG, NONE)
853853
MINI_OP3(OP_WASM_SIMD_SHUFFLE, "wasm_shuffle", XREG, XREG, XREG, XREG)
854854
MINI_OP(OP_WASM_SIMD_SUM, "wasm_sum", XREG, XREG, NONE)
855855
MINI_OP(OP_WASM_SIMD_SWIZZLE, "wasm_swizzle", XREG, XREG, XREG)
856+
MINI_OP(OP_WASM_EXTRACT_NARROW, "wasm_extract_narrow", XREG, XREG, XREG)
856857
#endif
857858

858859
#if defined(TARGET_X86) || defined(TARGET_AMD64) || defined(TARGET_WASM)

src/mono/mono/mini/simd-intrinsics.c

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1621,25 +1621,16 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
16211621
if (size != 16)
16221622
return NULL;
16231623

1624-
int intrins = -1;
16251624
switch (arg0_type) {
16261625
case MONO_TYPE_I2:
1627-
intrins = INTRINS_WASM_NARROW_SIGNED_V16;
1628-
break;
16291626
case MONO_TYPE_I4:
1630-
intrins = INTRINS_WASM_NARROW_SIGNED_V8;
1631-
break;
1627+
case MONO_TYPE_I8:
16321628
case MONO_TYPE_U2:
1633-
intrins = INTRINS_WASM_NARROW_UNSIGNED_V16;
1634-
break;
16351629
case MONO_TYPE_U4:
1636-
intrins = INTRINS_WASM_NARROW_UNSIGNED_V8;
1637-
break;
1630+
case MONO_TYPE_U8:
1631+
return emit_simd_ins_for_sig (cfg, klass, OP_WASM_EXTRACT_NARROW, -1, -1, fsig, args);
16381632
}
16391633

1640-
if (intrins != -1)
1641-
return emit_simd_ins_for_sig (cfg, klass, OP_XOP_X_X_X, intrins, arg0_type, fsig, args);
1642-
16431634
return NULL;
16441635
#else
16451636
return NULL;

0 commit comments

Comments
 (0)