Skip to content

Commit 59904b7

Browse files
bjorn3gnzlbg
authored andcommitted
Use simd_saturating_* in x86/sse2.rs where possible
1 parent 76824d7 commit 59904b7

File tree

1 file changed

+8
-24
lines changed

1 file changed

+8
-24
lines changed

crates/core_arch/src/x86/sse2.rs

Lines changed: 8 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ pub unsafe fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
121121
#[cfg_attr(test, assert_instr(paddsb))]
122122
#[stable(feature = "simd_x86", since = "1.27.0")]
123123
pub unsafe fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
124-
transmute(paddsb(a.as_i8x16(), b.as_i8x16()))
124+
transmute(simd_saturating_add(a.as_i8x16(), b.as_i8x16()))
125125
}
126126

127127
/// Adds packed 16-bit integers in `a` and `b` using saturation.
@@ -132,7 +132,7 @@ pub unsafe fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
132132
#[cfg_attr(test, assert_instr(paddsw))]
133133
#[stable(feature = "simd_x86", since = "1.27.0")]
134134
pub unsafe fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
135-
transmute(paddsw(a.as_i16x8(), b.as_i16x8()))
135+
transmute(simd_saturating_add(a.as_i16x8(), b.as_i16x8()))
136136
}
137137

138138
/// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
@@ -143,7 +143,7 @@ pub unsafe fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
143143
#[cfg_attr(test, assert_instr(paddusb))]
144144
#[stable(feature = "simd_x86", since = "1.27.0")]
145145
pub unsafe fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
146-
transmute(paddsub(a.as_u8x16(), b.as_u8x16()))
146+
transmute(simd_saturating_add(a.as_u8x16(), b.as_u8x16()))
147147
}
148148

149149
/// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
@@ -154,7 +154,7 @@ pub unsafe fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
154154
#[cfg_attr(test, assert_instr(paddusw))]
155155
#[stable(feature = "simd_x86", since = "1.27.0")]
156156
pub unsafe fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
157-
transmute(paddsuw(a.as_u16x8(), b.as_u16x8()))
157+
transmute(simd_saturating_add(a.as_u16x8(), b.as_u16x8()))
158158
}
159159

160160
/// Averages packed unsigned 8-bit integers in `a` and `b`.
@@ -367,7 +367,7 @@ pub unsafe fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
367367
#[cfg_attr(test, assert_instr(psubsb))]
368368
#[stable(feature = "simd_x86", since = "1.27.0")]
369369
pub unsafe fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
370-
transmute(psubsb(a.as_i8x16(), b.as_i8x16()))
370+
transmute(simd_saturating_sub(a.as_i8x16(), b.as_i8x16()))
371371
}
372372

373373
/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
@@ -379,7 +379,7 @@ pub unsafe fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
379379
#[cfg_attr(test, assert_instr(psubsw))]
380380
#[stable(feature = "simd_x86", since = "1.27.0")]
381381
pub unsafe fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
382-
transmute(psubsw(a.as_i16x8(), b.as_i16x8()))
382+
transmute(simd_saturating_sub(a.as_i16x8(), b.as_i16x8()))
383383
}
384384

385385
/// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
@@ -391,7 +391,7 @@ pub unsafe fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
391391
#[cfg_attr(test, assert_instr(psubusb))]
392392
#[stable(feature = "simd_x86", since = "1.27.0")]
393393
pub unsafe fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
394-
transmute(psubusb(a.as_u8x16(), b.as_u8x16()))
394+
transmute(simd_saturating_sub(a.as_u8x16(), b.as_u8x16()))
395395
}
396396

397397
/// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit
@@ -403,7 +403,7 @@ pub unsafe fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
403403
#[cfg_attr(test, assert_instr(psubusw))]
404404
#[stable(feature = "simd_x86", since = "1.27.0")]
405405
pub unsafe fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
406-
transmute(psubusw(a.as_u16x8(), b.as_u16x8()))
406+
transmute(simd_saturating_sub(a.as_u16x8(), b.as_u16x8()))
407407
}
408408

409409
/// Shifts `a` left by `imm8` bytes while shifting in zeros.
@@ -3021,14 +3021,6 @@ extern "C" {
30213021
fn lfence();
30223022
#[link_name = "llvm.x86.sse2.mfence"]
30233023
fn mfence();
3024-
#[link_name = "llvm.x86.sse2.padds.b"]
3025-
fn paddsb(a: i8x16, b: i8x16) -> i8x16;
3026-
#[link_name = "llvm.x86.sse2.padds.w"]
3027-
fn paddsw(a: i16x8, b: i16x8) -> i16x8;
3028-
#[link_name = "llvm.x86.sse2.paddus.b"]
3029-
fn paddsub(a: u8x16, b: u8x16) -> u8x16;
3030-
#[link_name = "llvm.x86.sse2.paddus.w"]
3031-
fn paddsuw(a: u16x8, b: u16x8) -> u16x8;
30323024
#[link_name = "llvm.x86.sse2.pavg.b"]
30333025
fn pavgb(a: u8x16, b: u8x16) -> u8x16;
30343026
#[link_name = "llvm.x86.sse2.pavg.w"]
@@ -3051,14 +3043,6 @@ extern "C" {
30513043
fn pmuludq(a: u32x4, b: u32x4) -> u64x2;
30523044
#[link_name = "llvm.x86.sse2.psad.bw"]
30533045
fn psadbw(a: u8x16, b: u8x16) -> u64x2;
3054-
#[link_name = "llvm.x86.sse2.psubs.b"]
3055-
fn psubsb(a: i8x16, b: i8x16) -> i8x16;
3056-
#[link_name = "llvm.x86.sse2.psubs.w"]
3057-
fn psubsw(a: i16x8, b: i16x8) -> i16x8;
3058-
#[link_name = "llvm.x86.sse2.psubus.b"]
3059-
fn psubusb(a: u8x16, b: u8x16) -> u8x16;
3060-
#[link_name = "llvm.x86.sse2.psubus.w"]
3061-
fn psubusw(a: u16x8, b: u16x8) -> u16x8;
30623046
#[link_name = "llvm.x86.sse2.pslli.w"]
30633047
fn pslliw(a: i16x8, imm8: i32) -> i16x8;
30643048
#[link_name = "llvm.x86.sse2.psll.w"]

0 commit comments

Comments
 (0)