Skip to content

Commit 44f526f

Browse files
committed
Implement reinterpret ops without bytemuck
1 parent 6ac9665 commit 44f526f

File tree

9 files changed

+96
-270
lines changed

9 files changed

+96
-270
lines changed

Cargo.lock

Lines changed: 0 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

fearless_simd/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,5 +38,4 @@ force_support_fallback = []
3838
workspace = true
3939

4040
[dependencies]
41-
bytemuck = "1.23.0"
4241
libm = { version = "0.2.15", optional = true }

fearless_simd/src/generated/avx2.rs

Lines changed: 30 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -179,31 +179,19 @@ impl Simd for Avx2 {
179179
}
180180
#[inline(always)]
181181
fn reinterpret_f64_f32x4(self, a: f32x4<Self>) -> f64x2<Self> {
182-
f64x2 {
183-
val: bytemuck::cast(a.val),
184-
simd: a.simd,
185-
}
182+
unsafe { _mm_castps_pd(a.into()).simd_into(self) }
186183
}
187184
#[inline(always)]
188185
fn reinterpret_i32_f32x4(self, a: f32x4<Self>) -> i32x4<Self> {
189-
i32x4 {
190-
val: bytemuck::cast(a.val),
191-
simd: a.simd,
192-
}
186+
unsafe { _mm_castps_si128(a.into()).simd_into(self) }
193187
}
194188
#[inline(always)]
195189
fn reinterpret_u8_f32x4(self, a: f32x4<Self>) -> u8x16<Self> {
196-
u8x16 {
197-
val: bytemuck::cast(a.val),
198-
simd: a.simd,
199-
}
190+
unsafe { _mm_castps_si128(a.into()).simd_into(self) }
200191
}
201192
#[inline(always)]
202193
fn reinterpret_u32_f32x4(self, a: f32x4<Self>) -> u32x4<Self> {
203-
u32x4 {
204-
val: bytemuck::cast(a.val),
205-
simd: a.simd,
206-
}
194+
unsafe { _mm_castps_si128(a.into()).simd_into(self) }
207195
}
208196
#[inline(always)]
209197
fn cvt_u32_f32x4(self, a: f32x4<Self>) -> u32x4<Self> {
@@ -352,17 +340,11 @@ impl Simd for Avx2 {
352340
}
353341
#[inline(always)]
354342
fn reinterpret_u8_i8x16(self, a: i8x16<Self>) -> u8x16<Self> {
355-
u8x16 {
356-
val: bytemuck::cast(a.val),
357-
simd: a.simd,
358-
}
343+
__m128i::from(a).simd_into(self)
359344
}
360345
#[inline(always)]
361346
fn reinterpret_u32_i8x16(self, a: i8x16<Self>) -> u32x4<Self> {
362-
u32x4 {
363-
val: bytemuck::cast(a.val),
364-
simd: a.simd,
365-
}
347+
__m128i::from(a).simd_into(self)
366348
}
367349
#[inline(always)]
368350
fn splat_u8x16(self, val: u8) -> u8x16<Self> {
@@ -511,10 +493,7 @@ impl Simd for Avx2 {
511493
}
512494
#[inline(always)]
513495
fn reinterpret_u32_u8x16(self, a: u8x16<Self>) -> u32x4<Self> {
514-
u32x4 {
515-
val: bytemuck::cast(a.val),
516-
simd: a.simd,
517-
}
496+
__m128i::from(a).simd_into(self)
518497
}
519498
#[inline(always)]
520499
fn splat_mask8x16(self, val: i8) -> mask8x16<Self> {
@@ -665,17 +644,11 @@ impl Simd for Avx2 {
665644
}
666645
#[inline(always)]
667646
fn reinterpret_u8_i16x8(self, a: i16x8<Self>) -> u8x16<Self> {
668-
u8x16 {
669-
val: bytemuck::cast(a.val),
670-
simd: a.simd,
671-
}
647+
__m128i::from(a).simd_into(self)
672648
}
673649
#[inline(always)]
674650
fn reinterpret_u32_i16x8(self, a: i16x8<Self>) -> u32x4<Self> {
675-
u32x4 {
676-
val: bytemuck::cast(a.val),
677-
simd: a.simd,
678-
}
651+
__m128i::from(a).simd_into(self)
679652
}
680653
#[inline(always)]
681654
fn splat_u16x8(self, val: u16) -> u16x8<Self> {
@@ -795,17 +768,11 @@ impl Simd for Avx2 {
795768
}
796769
#[inline(always)]
797770
fn reinterpret_u8_u16x8(self, a: u16x8<Self>) -> u8x16<Self> {
798-
u8x16 {
799-
val: bytemuck::cast(a.val),
800-
simd: a.simd,
801-
}
771+
__m128i::from(a).simd_into(self)
802772
}
803773
#[inline(always)]
804774
fn reinterpret_u32_u16x8(self, a: u16x8<Self>) -> u32x4<Self> {
805-
u32x4 {
806-
val: bytemuck::cast(a.val),
807-
simd: a.simd,
808-
}
775+
__m128i::from(a).simd_into(self)
809776
}
810777
#[inline(always)]
811778
fn splat_mask16x8(self, val: i16) -> mask16x8<Self> {
@@ -954,17 +921,11 @@ impl Simd for Avx2 {
954921
}
955922
#[inline(always)]
956923
fn reinterpret_u8_i32x4(self, a: i32x4<Self>) -> u8x16<Self> {
957-
u8x16 {
958-
val: bytemuck::cast(a.val),
959-
simd: a.simd,
960-
}
924+
__m128i::from(a).simd_into(self)
961925
}
962926
#[inline(always)]
963927
fn reinterpret_u32_i32x4(self, a: i32x4<Self>) -> u32x4<Self> {
964-
u32x4 {
965-
val: bytemuck::cast(a.val),
966-
simd: a.simd,
967-
}
928+
__m128i::from(a).simd_into(self)
968929
}
969930
#[inline(always)]
970931
fn cvt_f32_i32x4(self, a: i32x4<Self>) -> f32x4<Self> {
@@ -1086,10 +1047,7 @@ impl Simd for Avx2 {
10861047
}
10871048
#[inline(always)]
10881049
fn reinterpret_u8_u32x4(self, a: u32x4<Self>) -> u8x16<Self> {
1089-
u8x16 {
1090-
val: bytemuck::cast(a.val),
1091-
simd: a.simd,
1092-
}
1050+
__m128i::from(a).simd_into(self)
10931051
}
10941052
#[inline(always)]
10951053
fn cvt_f32_u32x4(self, a: u32x4<Self>) -> f32x4<Self> {
@@ -1253,10 +1211,7 @@ impl Simd for Avx2 {
12531211
}
12541212
#[inline(always)]
12551213
fn reinterpret_f32_f64x2(self, a: f64x2<Self>) -> f32x4<Self> {
1256-
f32x4 {
1257-
val: bytemuck::cast(a.val),
1258-
simd: a.simd,
1259-
}
1214+
unsafe { _mm_castpd_ps(a.into()).simd_into(self) }
12601215
}
12611216
#[inline(always)]
12621217
fn splat_mask64x2(self, val: i64) -> mask64x2<Self> {
@@ -1450,31 +1405,19 @@ impl Simd for Avx2 {
14501405
}
14511406
#[inline(always)]
14521407
fn reinterpret_f64_f32x8(self, a: f32x8<Self>) -> f64x4<Self> {
1453-
f64x4 {
1454-
val: bytemuck::cast(a.val),
1455-
simd: a.simd,
1456-
}
1408+
unsafe { _mm256_castps_pd(a.into()).simd_into(self) }
14571409
}
14581410
#[inline(always)]
14591411
fn reinterpret_i32_f32x8(self, a: f32x8<Self>) -> i32x8<Self> {
1460-
i32x8 {
1461-
val: bytemuck::cast(a.val),
1462-
simd: a.simd,
1463-
}
1412+
unsafe { _mm256_castps_si256(a.into()).simd_into(self) }
14641413
}
14651414
#[inline(always)]
14661415
fn reinterpret_u8_f32x8(self, a: f32x8<Self>) -> u8x32<Self> {
1467-
u8x32 {
1468-
val: bytemuck::cast(a.val),
1469-
simd: a.simd,
1470-
}
1416+
unsafe { _mm256_castps_si256(a.into()).simd_into(self) }
14711417
}
14721418
#[inline(always)]
14731419
fn reinterpret_u32_f32x8(self, a: f32x8<Self>) -> u32x8<Self> {
1474-
u32x8 {
1475-
val: bytemuck::cast(a.val),
1476-
simd: a.simd,
1477-
}
1420+
unsafe { _mm256_castps_si256(a.into()).simd_into(self) }
14781421
}
14791422
#[inline(always)]
14801423
fn cvt_u32_f32x8(self, a: f32x8<Self>) -> u32x8<Self> {
@@ -1663,17 +1606,11 @@ impl Simd for Avx2 {
16631606
}
16641607
#[inline(always)]
16651608
fn reinterpret_u8_i8x32(self, a: i8x32<Self>) -> u8x32<Self> {
1666-
u8x32 {
1667-
val: bytemuck::cast(a.val),
1668-
simd: a.simd,
1669-
}
1609+
__m256i::from(a).simd_into(self)
16701610
}
16711611
#[inline(always)]
16721612
fn reinterpret_u32_i8x32(self, a: i8x32<Self>) -> u32x8<Self> {
1673-
u32x8 {
1674-
val: bytemuck::cast(a.val),
1675-
simd: a.simd,
1676-
}
1613+
__m256i::from(a).simd_into(self)
16771614
}
16781615
#[inline(always)]
16791616
fn splat_u8x32(self, val: u8) -> u8x32<Self> {
@@ -1863,10 +1800,7 @@ impl Simd for Avx2 {
18631800
}
18641801
#[inline(always)]
18651802
fn reinterpret_u32_u8x32(self, a: u8x32<Self>) -> u32x8<Self> {
1866-
u32x8 {
1867-
val: bytemuck::cast(a.val),
1868-
simd: a.simd,
1869-
}
1803+
__m256i::from(a).simd_into(self)
18701804
}
18711805
#[inline(always)]
18721806
fn splat_mask8x32(self, val: i8) -> mask8x32<Self> {
@@ -2067,17 +2001,11 @@ impl Simd for Avx2 {
20672001
}
20682002
#[inline(always)]
20692003
fn reinterpret_u8_i16x16(self, a: i16x16<Self>) -> u8x32<Self> {
2070-
u8x32 {
2071-
val: bytemuck::cast(a.val),
2072-
simd: a.simd,
2073-
}
2004+
__m256i::from(a).simd_into(self)
20742005
}
20752006
#[inline(always)]
20762007
fn reinterpret_u32_i16x16(self, a: i16x16<Self>) -> u32x8<Self> {
2077-
u32x8 {
2078-
val: bytemuck::cast(a.val),
2079-
simd: a.simd,
2080-
}
2008+
__m256i::from(a).simd_into(self)
20812009
}
20822010
#[inline(always)]
20832011
fn splat_u16x16(self, val: u16) -> u16x16<Self> {
@@ -2247,17 +2175,11 @@ impl Simd for Avx2 {
22472175
}
22482176
#[inline(always)]
22492177
fn reinterpret_u8_u16x16(self, a: u16x16<Self>) -> u8x32<Self> {
2250-
u8x32 {
2251-
val: bytemuck::cast(a.val),
2252-
simd: a.simd,
2253-
}
2178+
__m256i::from(a).simd_into(self)
22542179
}
22552180
#[inline(always)]
22562181
fn reinterpret_u32_u16x16(self, a: u16x16<Self>) -> u32x8<Self> {
2257-
u32x8 {
2258-
val: bytemuck::cast(a.val),
2259-
simd: a.simd,
2260-
}
2182+
__m256i::from(a).simd_into(self)
22612183
}
22622184
#[inline(always)]
22632185
fn splat_mask16x16(self, val: i16) -> mask16x16<Self> {
@@ -2446,17 +2368,11 @@ impl Simd for Avx2 {
24462368
}
24472369
#[inline(always)]
24482370
fn reinterpret_u8_i32x8(self, a: i32x8<Self>) -> u8x32<Self> {
2449-
u8x32 {
2450-
val: bytemuck::cast(a.val),
2451-
simd: a.simd,
2452-
}
2371+
__m256i::from(a).simd_into(self)
24532372
}
24542373
#[inline(always)]
24552374
fn reinterpret_u32_i32x8(self, a: i32x8<Self>) -> u32x8<Self> {
2456-
u32x8 {
2457-
val: bytemuck::cast(a.val),
2458-
simd: a.simd,
2459-
}
2375+
__m256i::from(a).simd_into(self)
24602376
}
24612377
#[inline(always)]
24622378
fn cvt_f32_i32x8(self, a: i32x8<Self>) -> f32x8<Self> {
@@ -2606,10 +2522,7 @@ impl Simd for Avx2 {
26062522
}
26072523
#[inline(always)]
26082524
fn reinterpret_u8_u32x8(self, a: u32x8<Self>) -> u8x32<Self> {
2609-
u8x32 {
2610-
val: bytemuck::cast(a.val),
2611-
simd: a.simd,
2612-
}
2525+
__m256i::from(a).simd_into(self)
26132526
}
26142527
#[inline(always)]
26152528
fn cvt_f32_u32x8(self, a: u32x8<Self>) -> f32x8<Self> {
@@ -2819,10 +2732,7 @@ impl Simd for Avx2 {
28192732
}
28202733
#[inline(always)]
28212734
fn reinterpret_f32_f64x4(self, a: f64x4<Self>) -> f32x8<Self> {
2822-
f32x8 {
2823-
val: bytemuck::cast(a.val),
2824-
simd: a.simd,
2825-
}
2735+
unsafe { _mm256_castpd_ps(a.into()).simd_into(self) }
28262736
}
28272737
#[inline(always)]
28282738
fn splat_mask64x4(self, val: i64) -> mask64x4<Self> {

0 commit comments

Comments
 (0)