-
Notifications
You must be signed in to change notification settings - Fork 300
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
wasmi_core
: add support for the Wasm simd
proposal
#1395
Conversation
Codecov ReportAttention: Patch coverage is
Additional details and impacted files@@ Coverage Diff @@
## main #1395 +/- ##
==========================================
- Coverage 70.52% 69.35% -1.18%
==========================================
Files 157 158 +1
Lines 14414 14695 +281
==========================================
+ Hits 10165 10191 +26
- Misses 4249 4504 +255 ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
|
This is the skeleton with which is it going to be possible to implement most of the V128 API in an efficient way.
0095b78
to
53e526f
Compare
This will help later to remove the lint again.
This now exposes `load`, `load_at`, `store` and `store_at` functions additionally to the load_extend and store_wrap ones. This way we no longer require the weird `WrapInto` and `ExtendInto` impls for `T -> T`.
This allows to infer the LaneIdx type from more types such as i8, u32, f64, etc..
The The next step is to add Wasm |
The new `simd` crate feature is a full replacement for it.
- Fixes an overflow issue in the avgr_u SIMD instructions. - Now uses div_ceil as suggested by clippy. - Deduplicated logic via macro.
simd
proposalwasmi_core
: add support for the Wasm simd
proposal
Implements
wasmi_core
part of #1364.This PR implements the
simd
submodule inwasmi_core
which provides basic types and functionality for Wasmsimd
proposal support in Wasmi. This includes theV128
type, several lane types, and the entire Wasmsimd
proposal API which can then be used in Wasmi in const-evaluation, execution and initializer expressions.value128
crate feature fromwasmi_core
.simd
crate feature towasmi_core
.ToDo: Instructions
simd
Instructionv128.const(imm: ImmByte[16]) -> v128
i8x16.splat(x: i32) -> v128
i16x8.splat(x: i32) -> v128
i32x4.splat(x: i32) -> v128
i64x2.splat(x: i64) -> v128
f32x4.splat(x: f32) -> v128
f64x2.splat(x: f64) -> v128
i8x16.extract_lane_s(a: v128, imm: ImmLaneIdx16) -> i32
i8x16.extract_lane_u(a: v128, imm: ImmLaneIdx16) -> i32
i16x8.extract_lane_s(a: v128, imm: ImmLaneIdx8) -> i32
i16x8.extract_lane_u(a: v128, imm: ImmLaneIdx8) -> i32
i32x4.extract_lane(a: v128, imm: ImmLaneIdx4) -> i32
i64x2.extract_lane(a: v128, imm: ImmLaneIdx2) -> i64
f32x4.extract_lane(a: v128, imm: ImmLaneIdx4) -> f32
f64x2.extract_lane(a: v128, imm: ImmLaneIdx2) -> f64
i8x16.replace_lane(a: v128, imm: ImmLaneIdx16, x: i32) -> v128
i16x8.replace_lane(a: v128, imm: ImmLaneIdx8, x: i32) -> v128
i32x4.replace_lane(a: v128, imm: ImmLaneIdx4, x: i32) -> v128
i64x2.replace_lane(a: v128, imm: ImmLaneIdx2, x: i64) -> v128
f32x4.replace_lane(a: v128, imm: ImmLaneIdx4, x: f32) -> v128
f64x2.replace_lane(a: v128, imm: ImmLaneIdx2, x: f64) -> v128
i8x16.shuffle(a: v128, b: v128, imm: ImmLaneIdx32[16]) -> v128
i8x16.swizzle(a: v128, s: v128) -> v128
i8x16.add(a: v128, b: v128) -> v128
i16x8.add(a: v128, b: v128) -> v128
i32x4.add(a: v128, b: v128) -> v128
i64x2.add(a: v128, b: v128) -> v128
i8x16.sub(a: v128, b: v128) -> v128
i16x8.sub(a: v128, b: v128) -> v128
i32x4.sub(a: v128, b: v128) -> v128
i64x2.sub(a: v128, b: v128) -> v128
i16x8.mul(a: v128, b: v128) -> v128
i32x4.mul(a: v128, b: v128) -> v128
i64x2.mul(a: v128, b: v128) -> v128
i32x4.dot_i16x8_s(a: v128, b: v128) -> v128
i8x16.neg(a: v128) -> v128
i16x8.neg(a: v128) -> v128
i32x4.neg(a: v128) -> v128
i64x2.neg(a: v128) -> v128
i16x8.extmul_low_i8x16_s(a: v128, b: v128) -> v128
i16x8.extmul_high_i8x16_s(a: v128, b: v128) -> v128
i16x8.extmul_low_i8x16_u(a: v128, b: v128) -> v128
i16x8.extmul_high_i8x16_u(a: v128, b: v128) -> v128
i32x4.extmul_low_i16x8_s(a: v128, b: v128) -> v128
i32x4.extmul_high_i16x8_s(a: v128, b: v128) -> v128
i32x4.extmul_low_i16x8_u(a: v128, b: v128) -> v128
i32x4.extmul_high_i16x8_u(a: v128, b: v128) -> v128
i64x2.extmul_low_i32x4_s(a: v128, b: v128) -> v128
i64x2.extmul_high_i32x4_s(a: v128, b: v128) -> v128
i64x2.extmul_low_i32x4_u(a: v128, b: v128) -> v128
i64x2.extmul_high_i32x4_u(a: v128, b: v128) -> v128
i16x8.extadd_pairwise_i8x16_s(a: v128) -> v128
i16x8.extadd_pairwise_i8x16_u(a: v128) -> v128
i32x4.extadd_pairwise_i16x8_s(a: v128) -> v128
i32x4.extadd_pairwise_i16x8_u(a: v128) -> v128
i8x16.add_sat_s(a: v128, b: v128) -> v128
i8x16.add_sat_u(a: v128, b: v128) -> v128
i16x8.add_sat_s(a: v128, b: v128) -> v128
i16x8.add_sat_u(a: v128, b: v128) -> v128
i8x16.sub_sat_s(a: v128, b: v128) -> v128
i8x16.sub_sat_u(a: v128, b: v128) -> v128
i16x8.sub_sat_s(a: v128, b: v128) -> v128
i16x8.sub_sat_u(a: v128, b: v128) -> v128
i16x8.q15mulr_sat_s(a: v128, b: v128) -> v128
i8x16.min_s(a: v128, b: v128) -> v128
i8x16.min_u(a: v128, b: v128) -> v128
i16x8.min_s(a: v128, b: v128) -> v128
i16x8.min_u(a: v128, b: v128) -> v128
i32x4.min_s(a: v128, b: v128) -> v128
i32x4.min_u(a: v128, b: v128) -> v128
i8x16.max_s(a: v128, b: v128) -> v128
i8x16.max_u(a: v128, b: v128) -> v128
i16x8.max_s(a: v128, b: v128) -> v128
i16x8.max_u(a: v128, b: v128) -> v128
i32x4.max_s(a: v128, b: v128) -> v128
i32x4.max_u(a: v128, b: v128) -> v128
i8x16.avgr_u(a: v128, b: v128) -> v128
i16x8.avgr_u(a: v128, b: v128) -> v128
i8x16.abs(a: v128) -> v128
i16x8.abs(a: v128) -> v128
i32x4.abs(a: v128) -> v128
i64x2.abs(a: v128) -> v128
i8x16.shl(a: v128, y: i32) -> v128
i16x8.shl(a: v128, y: i32) -> v128
i32x4.shl(a: v128, y: i32) -> v128
i64x2.shl(a: v128, y: i32) -> v128
i8x16.shr_s(a: v128, y: i32) -> v128
i8x16.shr_u(a: v128, y: i32) -> v128
i16x8.shr_s(a: v128, y: i32) -> v128
i16x8.shr_u(a: v128, y: i32) -> v128
i32x4.shr_s(a: v128, y: i32) -> v128
i32x4.shr_u(a: v128, y: i32) -> v128
i64x2.shr_s(a: v128, y: i32) -> v128
i64x2.shr_u(a: v128, y: i32) -> v128
v128.and(a: v128, b: v128) -> v128
v128.or(a: v128, b: v128) -> v128
v128.xor(a: v128, b: v128) -> v128
v128.not(a: v128) -> v128
v128.andnot(a: v128, b: v128) -> v128
v128.bitselect(v1: v128, v2: v128, c: v128) -> v128
i8x16.popcnt(v: v128) -> v128
v128.any_true(a: v128) -> i32
i8x16.all_true(a: v128) -> i32
i16x8.all_true(a: v128) -> i32
i32x4.all_true(a: v128) -> i32
i64x2.all_true(a: v128) -> i32
i8x16.bitmask(a: v128) -> i32
i16x8.bitmask(a: v128) -> i32
i32x4.bitmask(a: v128) -> i32
i64x2.bitmask(a: v128) -> i32
i8x16.eq(a: v128, b: v128) -> v128
i16x8.eq(a: v128, b: v128) -> v128
i32x4.eq(a: v128, b: v128) -> v128
i64x2.eq(a: v128, b: v128) -> v128
f32x4.eq(a: v128, b: v128) -> v128
f64x2.eq(a: v128, b: v128) -> v128
i8x16.ne(a: v128, b: v128) -> v128
i16x8.ne(a: v128, b: v128) -> v128
i32x4.ne(a: v128, b: v128) -> v128
i64x2.ne(a: v128, b: v128) -> v128
f32x4.ne(a: v128, b: v128) -> v128
f64x2.ne(a: v128, b: v128) -> v128
i8x16.lt_s(a: v128, b: v128) -> v128
i8x16.lt_u(a: v128, b: v128) -> v128
i16x8.lt_s(a: v128, b: v128) -> v128
i16x8.lt_u(a: v128, b: v128) -> v128
i32x4.lt_s(a: v128, b: v128) -> v128
i32x4.lt_u(a: v128, b: v128) -> v128
i64x2.lt_s(a: v128, b: v128) -> v128
f32x4.lt(a: v128, b: v128) -> v128
f64x2.lt(a: v128, b: v128) -> v128
i8x16.le_s(a: v128, b: v128) -> v128
i8x16.le_u(a: v128, b: v128) -> v128
i16x8.le_s(a: v128, b: v128) -> v128
i16x8.le_u(a: v128, b: v128) -> v128
i32x4.le_s(a: v128, b: v128) -> v128
i32x4.le_u(a: v128, b: v128) -> v128
i64x2.le_s(a: v128, b: v128) -> v128
f32x4.le(a: v128, b: v128) -> v128
f64x2.le(a: v128, b: v128) -> v128
i8x16.gt_s(a: v128, b: v128) -> v128
i8x16.gt_u(a: v128, b: v128) -> v128
i16x8.gt_s(a: v128, b: v128) -> v128
i16x8.gt_u(a: v128, b: v128) -> v128
i32x4.gt_s(a: v128, b: v128) -> v128
i32x4.gt_u(a: v128, b: v128) -> v128
i64x2.gt_s(a: v128, b: v128) -> v128
f32x4.gt(a: v128, b: v128) -> v128
f64x2.gt(a: v128, b: v128) -> v128
i8x16.ge_s(a: v128, b: v128) -> v128
i8x16.ge_u(a: v128, b: v128) -> v128
i16x8.ge_s(a: v128, b: v128) -> v128
i16x8.ge_u(a: v128, b: v128) -> v128
i32x4.ge_s(a: v128, b: v128) -> v128
i32x4.ge_u(a: v128, b: v128) -> v128
i64x2.ge_s(a: v128, b: v128) -> v128
f32x4.ge(a: v128, b: v128) -> v128
f64x2.ge(a: v128, b: v128) -> v128
v128.load(m: memarg) -> v128
v128.load32_zero(m: memarg) -> v128
v128.load64_zero(m: memarg) -> v128
v128.load8_splat(m: memarg) -> v128
v128.load16_splat(m: memarg) -> v128
v128.load32_splat(m: memarg) -> v128
v128.load64_splat(m: memarg) -> v128
v128.load8_lane(m: memarg, x: v128, imm: ImmLaneIdx16) -> v128
v128.load16_lane(m: memarg, x: v128, imm: ImmLaneIdx8) -> v128
v128.load32_lane(m: memarg, x: v128, imm: ImmLaneIdx4) -> v128
v128.load64_lane(m: memarg, x: v128, imm: ImmLaneIdx2) -> v128
v128.load8x8_s(m: memarg)
v128.load8x8_u(m: memarg)
v128.load16x4_s(m: memarg)
v128.load16x4_u(m: memarg)
v128.load32x2_s(m: memarg)
v128.load32x2_u(m: memarg)
v128.store(m: memarg, data: v128)
v128.store8_lane(m: memarg, data: v128, imm: ImmLaneIdx16)
v128.store16_lane(m: memarg, data: v128, imm: ImmLaneIdx8)
v128.store32_lane(m: memarg, data: v128, imm: ImmLaneIdx4)
v128.store64_lane(m: memarg, data: v128, imm: ImmLaneIdx2)
f32x4.neg(a: v128) -> v128
f64x2.neg(a: v128) -> v128
f32x4.abs(a: v128) -> v128
f64x2.abs(a: v128) -> v128
f32x4.min(a: v128, b: v128) -> v128
f64x2.min(a: v128, b: v128) -> v128
f32x4.max(a: v128, b: v128) -> v128
f64x2.max(a: v128, b: v128) -> v128
f32x4.pmin(a: v128, b: v128) -> v128
f64x2.pmin(a: v128, b: v128) -> v128
f32x4.pmax(a: v128, b: v128) -> v128
f64x2.pmax(a: v128, b: v128) -> v128
f32x4.add(a: v128, b: v128) -> v128
f64x2.add(a: v128, b: v128) -> v128
f32x4.sub(a: v128, b: v128) -> v128
f64x2.sub(a: v128, b: v128) -> v128
f32x4.div(a: v128, b: v128) -> v128
f64x2.div(a: v128, b: v128) -> v128
f32x4.mul(a: v128, b: v128) -> v128
f64x2.mul(a: v128, b: v128) -> v128
f32x4.sqrt(a: v128) -> v128
f64x2.sqrt(a: v128) -> v128
f32x4.ceil(a: v128) -> v128
f64x2.ceil(a: v128) -> v128
f32x4.floor(a: v128) -> v128
f64x2.floor(a: v128) -> v128
f32x4.trunc(a: v128) -> v128
f64x2.trunc(a: v128) -> v128
f32x4.nearest(a: v128) -> v128
f64x2.nearest(a: v128) -> v128
f32x4.convert_i32x4_s(a: v128) -> v128
f32x4.convert_i32x4_u(a: v128) -> v128
f64x2.convert_low_i32x4_s(a: v128) -> v128
f64x2.convert_low_i32x4_u(a: v128) -> v128
i32x4.trunc_sat_f32x4_s(a: v128) -> v128
i32x4.trunc_sat_f32x4_u(a: v128) -> v128
i32x4.trunc_sat_f64x2_s_zero(a: v128) -> v128
i32x4.trunc_sat_f64x2_u_zero(a: v128) -> v128
f32x4.demote_f64x2_zero(a: v128) -> v128
f64x2.promote_low_f32x4(a: v128) -> v128
i8x16.narrow_i16x8_s(a: v128, b: v128) -> v128
i8x16.narrow_i16x8_u(a: v128, b: v128) -> v128
i16x8.narrow_i32x4_s(a: v128, b: v128) -> v128
i16x8.narrow_i32x4_u(a: v128, b: v128) -> v128
i16x8.extend_low_i8x16_s(a: v128) -> v128
i16x8.extend_high_i8x16_s(a: v128) -> v128
i16x8.extend_low_i8x16_u(a: v128) -> v128
i16x8.extend_high_i8x16_u(a: v128) -> v128
i32x4.extend_low_i16x8_s(a: v128) -> v128
i32x4.extend_high_i16x8_s(a: v128) -> v128
i32x4.extend_low_i16x8_u(a: v128) -> v128
i32x4.extend_high_i16x8_u(a: v128) -> v128
i64x2.extend_low_i32x4_s(a: v128) -> v128
i64x2.extend_high_i32x4_s(a: v128) -> v128
i64x2.extend_low_i32x4_u(a: v128) -> v128
i64x2.extend_high_i32x4_u(a: v128) -> v128