rust-lang · RalfJung · Nov 6, 2025 · Nov 4, 2025
diff --git a/src/shims/x86/avx.rs b/src/shims/x86/avx.rs
@@ -1,14 +1,12 @@
 use rustc_abi::CanonAbi;
 use rustc_apfloat::ieee::{Double, Single};
-use rustc_middle::mir;
 use rustc_middle::ty::Ty;
 use rustc_span::Symbol;
 use rustc_target::callconv::FnAbi;
 
 use super::{
     FloatBinOp, FloatUnaryOp, bin_op_simd_float_all, conditional_dot_product, convert_float_to_int,
-    horizontal_bin_op, mask_load, mask_store, round_all, test_bits_masked, test_high_bits_masked,
-    unary_op_ps,
+    mask_load, mask_store, round_all, test_bits_masked, test_high_bits_masked, unary_op_ps,
 };
 use crate::*;
 
@@ -93,21 +91,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
                 conditional_dot_product(this, left, right, imm, dest)?;
             }
-            // Used to implement the _mm256_h{add,sub}_p{s,d} functions.
-            // Horizontally add/subtract adjacent floating point values
-            // in `left` and `right`.
-            "hadd.ps.256" | "hadd.pd.256" | "hsub.ps.256" | "hsub.pd.256" => {
-                let [left, right] =
-                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
-
-                let which = match unprefixed_name {
-                    "hadd.ps.256" | "hadd.pd.256" => mir::BinOp::Add,
-                    "hsub.ps.256" | "hsub.pd.256" => mir::BinOp::Sub,
-                    _ => unreachable!(),
-                };
-
-                horizontal_bin_op(this, which, /*saturating*/ false, left, right, dest)?;
-            }
             // Used to implement the _mm256_cmp_ps function.
             // Performs a comparison operation on each component of `left`
             // and `right`. For each component, returns 0 if false or u32::MAX
@@ -251,40 +234,31 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 // Unaligned copy, which is what we want.
                 this.mem_copy(src_ptr, dest.ptr(), dest.layout.size, /*nonoverlapping*/ true)?;
             }
-            // Used to implement the _mm256_testz_si256, _mm256_testc_si256 and
-            // _mm256_testnzc_si256 functions.
-            // Tests `op & mask == 0`, `op & mask == mask` or
-            // `op & mask != 0 && op & mask != mask`
-            "ptestz.256" | "ptestc.256" | "ptestnzc.256" => {
+            // Used to implement the _mm256_testnzc_si256 function.
+            // Tests `op & mask != 0 && op & mask != mask`
+            "ptestnzc.256" => {
                 let [op, mask] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
 
                 let (all_zero, masked_set) = test_bits_masked(this, op, mask)?;
-                let res = match unprefixed_name {
-                    "ptestz.256" => all_zero,
-                    "ptestc.256" => masked_set,
-                    "ptestnzc.256" => !all_zero && !masked_set,
-                    _ => unreachable!(),
-                };
+                let res = !all_zero && !masked_set;
 
                 this.write_scalar(Scalar::from_i32(res.into()), dest)?;
             }
             // Used to implement the _mm256_testz_pd, _mm256_testc_pd, _mm256_testnzc_pd
-            // _mm_testz_pd, _mm_testc_pd, _mm_testnzc_pd, _mm256_testz_ps,
-            // _mm256_testc_ps, _mm256_testnzc_ps, _mm_testz_ps, _mm_testc_ps and
+            // _mm_testnzc_pd, _mm256_testz_ps, _mm256_testc_ps, _mm256_testnzc_ps and
             // _mm_testnzc_ps functions.
             // Calculates two booleans:
             // `direct`, which is true when the highest bit of each element of `op & mask` is zero.
             // `negated`, which is true when the highest bit of each element of `!op & mask` is zero.
             // Return `direct` (testz), `negated` (testc) or `!direct & !negated` (testnzc)
-            "vtestz.pd.256" | "vtestc.pd.256" | "vtestnzc.pd.256" | "vtestz.pd" | "vtestc.pd"
-            | "vtestnzc.pd" | "vtestz.ps.256" | "vtestc.ps.256" | "vtestnzc.ps.256"
-            | "vtestz.ps" | "vtestc.ps" | "vtestnzc.ps" => {
+            "vtestz.pd.256" | "vtestc.pd.256" | "vtestnzc.pd.256" | "vtestnzc.pd"
+            | "vtestz.ps.256" | "vtestc.ps.256" | "vtestnzc.ps.256" | "vtestnzc.ps" => {
                 let [op, mask] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
 
                 let (direct, negated) = test_high_bits_masked(this, op, mask)?;
                 let res = match unprefixed_name {
-                    "vtestz.pd.256" | "vtestz.pd" | "vtestz.ps.256" | "vtestz.ps" => direct,
-                    "vtestc.pd.256" | "vtestc.pd" | "vtestc.ps.256" | "vtestc.ps" => negated,
+                    "vtestz.pd.256" | "vtestz.ps.256" => direct,
+                    "vtestc.pd.256" | "vtestc.ps.256" => negated,
                     "vtestnzc.pd.256" | "vtestnzc.pd" | "vtestnzc.ps.256" | "vtestnzc.ps" =>
                         !direct && !negated,
                     _ => unreachable!(),

diff --git a/src/shims/x86/avx2.rs b/src/shims/x86/avx2.rs
@@ -5,8 +5,8 @@ use rustc_span::Symbol;
 use rustc_target::callconv::FnAbi;
 
 use super::{
-    ShiftOp, horizontal_bin_op, int_abs, mask_load, mask_store, mpsadbw, packssdw, packsswb,
-    packusdw, packuswb, pmulhrsw, psign, shift_simd_by_scalar, shift_simd_by_simd,
+    ShiftOp, horizontal_bin_op, mask_load, mask_store, mpsadbw, packssdw, packsswb, packusdw,
+    packuswb, pmulhrsw, psign, shift_simd_by_scalar, shift_simd_by_simd,
 };
 use crate::*;
 
@@ -25,29 +25,20 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.avx2.").unwrap();
 
         match unprefixed_name {
-            // Used to implement the _mm256_abs_epi{8,16,32} functions.
-            // Calculates the absolute value of packed 8/16/32-bit integers.
-            "pabs.b" | "pabs.w" | "pabs.d" => {
-                let [op] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
-
-                int_abs(this, op, dest)?;
-            }
-            // Used to implement the _mm256_h{add,adds,sub}_epi{16,32} functions.
-            // Horizontally add / add with saturation / subtract adjacent 16/32-bit
+            // Used to implement the _mm256_h{adds,subs}_epi16 functions.
+            // Horizontally add / subtract with saturation adjacent 16-bit
             // integer values in `left` and `right`.
-            "phadd.w" | "phadd.sw" | "phadd.d" | "phsub.w" | "phsub.sw" | "phsub.d" => {
+            "phadd.sw" | "phsub.sw" => {
                 let [left, right] =
                     this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
 
-                let (which, saturating) = match unprefixed_name {
-                    "phadd.w" | "phadd.d" => (mir::BinOp::Add, false),
-                    "phadd.sw" => (mir::BinOp::Add, true),
-                    "phsub.w" | "phsub.d" => (mir::BinOp::Sub, false),
-                    "phsub.sw" => (mir::BinOp::Sub, true),
+                let which = match unprefixed_name {
+                    "phadd.sw" => mir::BinOp::Add,
+                    "phsub.sw" => mir::BinOp::Sub,
                     _ => unreachable!(),
                 };
 
-                horizontal_bin_op(this, which, saturating, left, right, dest)?;
+                horizontal_bin_op(this, which, /*saturating*/ true, left, right, dest)?;
             }
             // Used to implement `_mm{,_mask}_{i32,i64}gather_{epi32,epi64,pd,ps}` functions
             // Gathers elements from `slice` using `offsets * scale` as indices.
@@ -110,42 +101,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                     this.write_scalar(Scalar::from_int(0, dest.layout.size), &dest)?;
                 }
             }
-            // Used to implement the _mm256_madd_epi16 function.
-            // Multiplies packed signed 16-bit integers in `left` and `right`, producing
-            // intermediate signed 32-bit integers. Horizontally add adjacent pairs of
-            // intermediate 32-bit integers, and pack the results in `dest`.
-            "pmadd.wd" => {
-                let [left, right] =
-                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
-
-                let (left, left_len) = this.project_to_simd(left)?;
-                let (right, right_len) = this.project_to_simd(right)?;
-                let (dest, dest_len) = this.project_to_simd(dest)?;
-
-                assert_eq!(left_len, right_len);
-                assert_eq!(dest_len.strict_mul(2), left_len);
-
-                for i in 0..dest_len {
-                    let j1 = i.strict_mul(2);
-                    let left1 = this.read_scalar(&this.project_index(&left, j1)?)?.to_i16()?;
-                    let right1 = this.read_scalar(&this.project_index(&right, j1)?)?.to_i16()?;
-
-                    let j2 = j1.strict_add(1);
-                    let left2 = this.read_scalar(&this.project_index(&left, j2)?)?.to_i16()?;
-                    let right2 = this.read_scalar(&this.project_index(&right, j2)?)?.to_i16()?;
-
-                    let dest = this.project_index(&dest, i)?;
-
-                    // Multiplications are i16*i16->i32, which will not overflow.
-                    let mul1 = i32::from(left1).strict_mul(right1.into());
-                    let mul2 = i32::from(left2).strict_mul(right2.into());
-                    // However, this addition can overflow in the most extreme case
-                    // (-0x8000)*(-0x8000)+(-0x8000)*(-0x8000) = 0x80000000
-                    let res = mul1.wrapping_add(mul2);
-
-                    this.write_scalar(Scalar::from_i32(res), &dest)?;
-                }
-            }
             // Used to implement the _mm256_maddubs_epi16 function.
             // Multiplies packed 8-bit unsigned integers from `left` and packed
             // signed 8-bit integers from `right` into 16-bit signed integers. Then,
@@ -285,39 +240,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                     this.copy_op(&left, &dest)?;
                 }
             }
-            // Used to implement the _mm256_permute2x128_si256 function.
-            // Shuffles 128-bit blocks of `a` and `b` using `imm` as pattern.
-            "vperm2i128" => {
-                let [left, right, imm] =
-                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
-
-                assert_eq!(left.layout.size.bits(), 256);
-                assert_eq!(right.layout.size.bits(), 256);
-                assert_eq!(dest.layout.size.bits(), 256);
-
-                // Transmute to `[i128; 2]`
-
-                let array_layout =
-                    this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.i128, 2))?;
-                let left = left.transmute(array_layout, this)?;
-                let right = right.transmute(array_layout, this)?;
-                let dest = dest.transmute(array_layout, this)?;
-
-                let imm = this.read_scalar(imm)?.to_u8()?;
-
-                for i in 0..2 {
-                    let dest = this.project_index(&dest, i)?;
-                    let src = match (imm >> i.strict_mul(4)) & 0b11 {
-                        0 => this.project_index(&left, 0)?,
-                        1 => this.project_index(&left, 1)?,
-                        2 => this.project_index(&right, 0)?,
-                        3 => this.project_index(&right, 1)?,
-                        _ => unreachable!(),
-                    };
-
-                    this.copy_op(&src, &dest)?;
-                }
-            }
             // Used to implement the _mm256_sad_epu8 function.
             // Compute the absolute differences of packed unsigned 8-bit integers
             // in `left` and `right`, then horizontally sum each consecutive 8

diff --git a/src/shims/x86/mod.rs b/src/shims/x86/mod.rs
@@ -59,28 +59,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 this.write_immediate(*sum, &this.project_field(dest, FieldIdx::ONE)?)?;
             }
 
-            // Used to implement the `_addcarryx_u{32, 64}` functions. They are semantically identical with the `_addcarry_u{32, 64}` functions,
-            // except for a slightly different type signature and the requirement for the "adx" target feature.
-            // https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/addcarryx-u32-addcarryx-u64.html
-            "addcarryx.u32" | "addcarryx.u64" => {
-                this.expect_target_feature_for_intrinsic(link_name, "adx")?;
-
-                let is_u64 = unprefixed_name.ends_with("64");
-                if is_u64 && this.tcx.sess.target.arch != Arch::X86_64 {
-                    return interp_ok(EmulateItemResult::NotSupported);
-                }
-                let [c_in, a, b, out] =
-                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
-                let out = this.deref_pointer_as(
-                    out,
-                    if is_u64 { this.machine.layouts.u64 } else { this.machine.layouts.u32 },
-                )?;
-
-                let (sum, c_out) = carrying_add(this, c_in, a, b, mir::BinOp::AddWithOverflow)?;
-                this.write_scalar(c_out, dest)?;
-                this.write_immediate(*sum, &out)?;
-            }
-
             // Used to implement the `_mm_pause` function.
             // The intrinsic is used to hint the processor that the code is in a spin-loop.
             // It is compiled down to a `pause` instruction. When SSE2 is not available,
@@ -719,36 +697,6 @@ fn convert_float_to_int<'tcx>(
     interp_ok(())
 }
 
-/// Calculates absolute value of integers in `op` and stores the result in `dest`.
-///
-/// In case of overflow (when the operand is the minimum value), the operation
-/// will wrap around.
-fn int_abs<'tcx>(
-    ecx: &mut crate::MiriInterpCx<'tcx>,
-    op: &OpTy<'tcx>,
-    dest: &MPlaceTy<'tcx>,
-) -> InterpResult<'tcx, ()> {
-    let (op, op_len) = ecx.project_to_simd(op)?;
-    let (dest, dest_len) = ecx.project_to_simd(dest)?;
-
-    assert_eq!(op_len, dest_len);
-
-    let zero = ImmTy::from_int(0, op.layout.field(ecx, 0));
-
-    for i in 0..dest_len {
-        let op = ecx.read_immediate(&ecx.project_index(&op, i)?)?;
-        let dest = ecx.project_index(&dest, i)?;
-
-        let lt_zero = ecx.binary_op(mir::BinOp::Lt, &op, &zero)?;
-        let res =
-            if lt_zero.to_scalar().to_bool()? { ecx.unary_op(mir::UnOp::Neg, &op)? } else { op };
-
-        ecx.write_immediate(*res, &dest)?;
-    }
-
-    interp_ok(())
-}
-
 /// Splits `op` (which must be a SIMD vector) into 128-bit chunks.
 ///
 /// Returns a tuple where:

diff --git a/src/shims/x86/sse.rs b/src/shims/x86/sse.rs
@@ -180,29 +180,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
                 this.write_immediate(*res, dest)?;
             }
-            // Used to implement the _mm_cvtsi32_ss and _mm_cvtsi64_ss functions.
-            // Converts `right` from i32/i64 to f32. Returns a SIMD vector with
-            // the result in the first component and the remaining components
-            // are copied from `left`.
-            // https://www.felixcloutier.com/x86/cvtsi2ss
-            "cvtsi2ss" | "cvtsi642ss" => {
-                let [left, right] =
-                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
-
-                let (left, left_len) = this.project_to_simd(left)?;
-                let (dest, dest_len) = this.project_to_simd(dest)?;
-
-                assert_eq!(dest_len, left_len);
-
-                let right = this.read_immediate(right)?;
-                let dest0 = this.project_index(&dest, 0)?;
-                let res0 = this.int_to_int_or_float(&right, dest0.layout)?;
-                this.write_immediate(*res0, &dest0)?;
-
-                for i in 1..dest_len {
-                    this.copy_op(&this.project_index(&left, i)?, &this.project_index(&dest, i)?)?;
-                }
-            }
             _ => return interp_ok(EmulateItemResult::NotSupported),
         }
         interp_ok(EmulateItemResult::NeedsReturn)

diff --git a/src/shims/x86/sse2.rs b/src/shims/x86/sse2.rs
@@ -36,42 +36,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
         // Intrinsincs sufixed with "epiX" or "epuX" operate with X-bit signed or unsigned
         // vectors.
         match unprefixed_name {
-            // Used to implement the _mm_madd_epi16 function.
-            // Multiplies packed signed 16-bit integers in `left` and `right`, producing
-            // intermediate signed 32-bit integers. Horizontally add adjacent pairs of
-            // intermediate 32-bit integers, and pack the results in `dest`.
-            "pmadd.wd" => {
-                let [left, right] =
-                    this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
-
-                let (left, left_len) = this.project_to_simd(left)?;
-                let (right, right_len) = this.project_to_simd(right)?;
-                let (dest, dest_len) = this.project_to_simd(dest)?;
-
-                assert_eq!(left_len, right_len);
-                assert_eq!(dest_len.strict_mul(2), left_len);
-
-                for i in 0..dest_len {
-                    let j1 = i.strict_mul(2);
-                    let left1 = this.read_scalar(&this.project_index(&left, j1)?)?.to_i16()?;
-                    let right1 = this.read_scalar(&this.project_index(&right, j1)?)?.to_i16()?;
-
-                    let j2 = j1.strict_add(1);
-                    let left2 = this.read_scalar(&this.project_index(&left, j2)?)?.to_i16()?;
-                    let right2 = this.read_scalar(&this.project_index(&right, j2)?)?.to_i16()?;
-
-                    let dest = this.project_index(&dest, i)?;
-
-                    // Multiplications are i16*i16->i32, which will not overflow.
-                    let mul1 = i32::from(left1).strict_mul(right1.into());
-                    let mul2 = i32::from(left2).strict_mul(right2.into());
-                    // However, this addition can overflow in the most extreme case
-                    // (-0x8000)*(-0x8000)+(-0x8000)*(-0x8000) = 0x80000000
-                    let res = mul1.wrapping_add(mul2);
-
-                    this.write_scalar(Scalar::from_i32(res), &dest)?;
-                }
-            }
             // Used to implement the _mm_sad_epu8 function.
             // Computes the absolute differences of packed unsigned 8-bit integers in `a`
             // and `b`, then horizontally sum each consecutive 8 differences to produce
@@ -320,10 +284,10 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
 
                 this.write_immediate(*res, dest)?;
             }
-            // Used to implement the _mm_cvtsd_ss and _mm_cvtss_sd functions.
-            // Converts the first f64/f32 from `right` to f32/f64 and copies
-            // the remaining elements from `left`
-            "cvtsd2ss" | "cvtss2sd" => {
+            // Used to implement the _mm_cvtsd_ss function.
+            // Converts the first f64 from `right` to f32 and copies the remaining
+            // elements from `left`
+            "cvtsd2ss" => {
                 let [left, right] =
                     this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
 
@@ -336,8 +300,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                 // Convert first element of `right`
                 let right0 = this.read_immediate(&this.project_index(&right, 0)?)?;
                 let dest0 = this.project_index(&dest, 0)?;
-                // `float_to_float_or_int` here will convert from f64 to f32 (cvtsd2ss) or
-                // from f32 to f64 (cvtss2sd).
                 let res0 = this.float_to_float_or_int(&right0, dest0.layout)?;
                 this.write_immediate(*res0, &dest0)?;