Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 10 additions & 36 deletions src/shims/x86/avx.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
use rustc_abi::CanonAbi;
use rustc_apfloat::ieee::{Double, Single};
use rustc_middle::mir;
use rustc_middle::ty::Ty;
use rustc_span::Symbol;
use rustc_target::callconv::FnAbi;

use super::{
FloatBinOp, FloatUnaryOp, bin_op_simd_float_all, conditional_dot_product, convert_float_to_int,
horizontal_bin_op, mask_load, mask_store, round_all, test_bits_masked, test_high_bits_masked,
unary_op_ps,
mask_load, mask_store, round_all, test_bits_masked, test_high_bits_masked, unary_op_ps,
};
use crate::*;

Expand Down Expand Up @@ -93,21 +91,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {

conditional_dot_product(this, left, right, imm, dest)?;
}
// Used to implement the _mm256_h{add,sub}_p{s,d} functions.
// Horizontally add/subtract adjacent floating point values
// in `left` and `right`.
"hadd.ps.256" | "hadd.pd.256" | "hsub.ps.256" | "hsub.pd.256" => {
let [left, right] =
this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;

let which = match unprefixed_name {
"hadd.ps.256" | "hadd.pd.256" => mir::BinOp::Add,
"hsub.ps.256" | "hsub.pd.256" => mir::BinOp::Sub,
_ => unreachable!(),
};

horizontal_bin_op(this, which, /*saturating*/ false, left, right, dest)?;
}
// Used to implement the _mm256_cmp_ps function.
// Performs a comparison operation on each component of `left`
// and `right`. For each component, returns 0 if false or u32::MAX
Expand Down Expand Up @@ -251,40 +234,31 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
// Unaligned copy, which is what we want.
this.mem_copy(src_ptr, dest.ptr(), dest.layout.size, /*nonoverlapping*/ true)?;
}
// Used to implement the _mm256_testz_si256, _mm256_testc_si256 and
// _mm256_testnzc_si256 functions.
// Tests `op & mask == 0`, `op & mask == mask` or
// `op & mask != 0 && op & mask != mask`
"ptestz.256" | "ptestc.256" | "ptestnzc.256" => {
// Used to implement the _mm256_testnzc_si256 function.
// Tests `op & mask != 0 && op & mask != mask`
"ptestnzc.256" => {
let [op, mask] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;

let (all_zero, masked_set) = test_bits_masked(this, op, mask)?;
let res = match unprefixed_name {
"ptestz.256" => all_zero,
"ptestc.256" => masked_set,
"ptestnzc.256" => !all_zero && !masked_set,
_ => unreachable!(),
};
let res = !all_zero && !masked_set;

this.write_scalar(Scalar::from_i32(res.into()), dest)?;
}
// Used to implement the _mm256_testz_pd, _mm256_testc_pd, _mm256_testnzc_pd
// _mm_testz_pd, _mm_testc_pd, _mm_testnzc_pd, _mm256_testz_ps,
// _mm256_testc_ps, _mm256_testnzc_ps, _mm_testz_ps, _mm_testc_ps and
// _mm_testnzc_pd, _mm256_testz_ps, _mm256_testc_ps, _mm256_testnzc_ps and
// _mm_testnzc_ps functions.
// Calculates two booleans:
// `direct`, which is true when the highest bit of each element of `op & mask` is zero.
// `negated`, which is true when the highest bit of each element of `!op & mask` is zero.
// Return `direct` (testz), `negated` (testc) or `!direct & !negated` (testnzc)
"vtestz.pd.256" | "vtestc.pd.256" | "vtestnzc.pd.256" | "vtestz.pd" | "vtestc.pd"
| "vtestnzc.pd" | "vtestz.ps.256" | "vtestc.ps.256" | "vtestnzc.ps.256"
| "vtestz.ps" | "vtestc.ps" | "vtestnzc.ps" => {
"vtestz.pd.256" | "vtestc.pd.256" | "vtestnzc.pd.256" | "vtestnzc.pd"
| "vtestz.ps.256" | "vtestc.ps.256" | "vtestnzc.ps.256" | "vtestnzc.ps" => {
let [op, mask] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;

let (direct, negated) = test_high_bits_masked(this, op, mask)?;
let res = match unprefixed_name {
"vtestz.pd.256" | "vtestz.pd" | "vtestz.ps.256" | "vtestz.ps" => direct,
"vtestc.pd.256" | "vtestc.pd" | "vtestc.ps.256" | "vtestc.ps" => negated,
"vtestz.pd.256" | "vtestz.ps.256" => direct,
"vtestc.pd.256" | "vtestc.ps.256" => negated,
"vtestnzc.pd.256" | "vtestnzc.pd" | "vtestnzc.ps.256" | "vtestnzc.ps" =>
!direct && !negated,
_ => unreachable!(),
Expand Down
96 changes: 9 additions & 87 deletions src/shims/x86/avx2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ use rustc_span::Symbol;
use rustc_target::callconv::FnAbi;

use super::{
ShiftOp, horizontal_bin_op, int_abs, mask_load, mask_store, mpsadbw, packssdw, packsswb,
packusdw, packuswb, pmulhrsw, psign, shift_simd_by_scalar, shift_simd_by_simd,
ShiftOp, horizontal_bin_op, mask_load, mask_store, mpsadbw, packssdw, packsswb, packusdw,
packuswb, pmulhrsw, psign, shift_simd_by_scalar, shift_simd_by_simd,
};
use crate::*;

Expand All @@ -25,29 +25,20 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.avx2.").unwrap();

match unprefixed_name {
// Used to implement the _mm256_abs_epi{8,16,32} functions.
// Calculates the absolute value of packed 8/16/32-bit integers.
"pabs.b" | "pabs.w" | "pabs.d" => {
let [op] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;

int_abs(this, op, dest)?;
}
// Used to implement the _mm256_h{add,adds,sub}_epi{16,32} functions.
// Horizontally add / add with saturation / subtract adjacent 16/32-bit
// Used to implement the _mm256_h{adds,subs}_epi16 functions.
// Horizontally add / subtract with saturation adjacent 16-bit
// integer values in `left` and `right`.
"phadd.w" | "phadd.sw" | "phadd.d" | "phsub.w" | "phsub.sw" | "phsub.d" => {
"phadd.sw" | "phsub.sw" => {
let [left, right] =
this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;

let (which, saturating) = match unprefixed_name {
"phadd.w" | "phadd.d" => (mir::BinOp::Add, false),
"phadd.sw" => (mir::BinOp::Add, true),
"phsub.w" | "phsub.d" => (mir::BinOp::Sub, false),
"phsub.sw" => (mir::BinOp::Sub, true),
let which = match unprefixed_name {
"phadd.sw" => mir::BinOp::Add,
"phsub.sw" => mir::BinOp::Sub,
_ => unreachable!(),
};

horizontal_bin_op(this, which, saturating, left, right, dest)?;
horizontal_bin_op(this, which, /*saturating*/ true, left, right, dest)?;
}
// Used to implement `_mm{,_mask}_{i32,i64}gather_{epi32,epi64,pd,ps}` functions
// Gathers elements from `slice` using `offsets * scale` as indices.
Expand Down Expand Up @@ -110,42 +101,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
this.write_scalar(Scalar::from_int(0, dest.layout.size), &dest)?;
}
}
// Used to implement the _mm256_madd_epi16 function.
// Multiplies packed signed 16-bit integers in `left` and `right`, producing
// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
// intermediate 32-bit integers, and pack the results in `dest`.
"pmadd.wd" => {
let [left, right] =
this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;

let (left, left_len) = this.project_to_simd(left)?;
let (right, right_len) = this.project_to_simd(right)?;
let (dest, dest_len) = this.project_to_simd(dest)?;

assert_eq!(left_len, right_len);
assert_eq!(dest_len.strict_mul(2), left_len);

for i in 0..dest_len {
let j1 = i.strict_mul(2);
let left1 = this.read_scalar(&this.project_index(&left, j1)?)?.to_i16()?;
let right1 = this.read_scalar(&this.project_index(&right, j1)?)?.to_i16()?;

let j2 = j1.strict_add(1);
let left2 = this.read_scalar(&this.project_index(&left, j2)?)?.to_i16()?;
let right2 = this.read_scalar(&this.project_index(&right, j2)?)?.to_i16()?;

let dest = this.project_index(&dest, i)?;

// Multiplications are i16*i16->i32, which will not overflow.
let mul1 = i32::from(left1).strict_mul(right1.into());
let mul2 = i32::from(left2).strict_mul(right2.into());
// However, this addition can overflow in the most extreme case
// (-0x8000)*(-0x8000)+(-0x8000)*(-0x8000) = 0x80000000
let res = mul1.wrapping_add(mul2);

this.write_scalar(Scalar::from_i32(res), &dest)?;
}
}
// Used to implement the _mm256_maddubs_epi16 function.
// Multiplies packed 8-bit unsigned integers from `left` and packed
// signed 8-bit integers from `right` into 16-bit signed integers. Then,
Expand Down Expand Up @@ -285,39 +240,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
this.copy_op(&left, &dest)?;
}
}
// Used to implement the _mm256_permute2x128_si256 function.
// Shuffles 128-bit blocks of `a` and `b` using `imm` as pattern.
"vperm2i128" => {
let [left, right, imm] =
this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;

assert_eq!(left.layout.size.bits(), 256);
assert_eq!(right.layout.size.bits(), 256);
assert_eq!(dest.layout.size.bits(), 256);

// Transmute to `[i128; 2]`

let array_layout =
this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.i128, 2))?;
let left = left.transmute(array_layout, this)?;
let right = right.transmute(array_layout, this)?;
let dest = dest.transmute(array_layout, this)?;

let imm = this.read_scalar(imm)?.to_u8()?;

for i in 0..2 {
let dest = this.project_index(&dest, i)?;
let src = match (imm >> i.strict_mul(4)) & 0b11 {
0 => this.project_index(&left, 0)?,
1 => this.project_index(&left, 1)?,
2 => this.project_index(&right, 0)?,
3 => this.project_index(&right, 1)?,
_ => unreachable!(),
};

this.copy_op(&src, &dest)?;
}
}
// Used to implement the _mm256_sad_epu8 function.
// Compute the absolute differences of packed unsigned 8-bit integers
// in `left` and `right`, then horizontally sum each consecutive 8
Expand Down
52 changes: 0 additions & 52 deletions src/shims/x86/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,28 +59,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
this.write_immediate(*sum, &this.project_field(dest, FieldIdx::ONE)?)?;
}

// Used to implement the `_addcarryx_u{32, 64}` functions. They are semantically identical with the `_addcarry_u{32, 64}` functions,
// except for a slightly different type signature and the requirement for the "adx" target feature.
// https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/addcarryx-u32-addcarryx-u64.html
"addcarryx.u32" | "addcarryx.u64" => {
this.expect_target_feature_for_intrinsic(link_name, "adx")?;

let is_u64 = unprefixed_name.ends_with("64");
if is_u64 && this.tcx.sess.target.arch != Arch::X86_64 {
return interp_ok(EmulateItemResult::NotSupported);
}
let [c_in, a, b, out] =
this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
let out = this.deref_pointer_as(
out,
if is_u64 { this.machine.layouts.u64 } else { this.machine.layouts.u32 },
)?;

let (sum, c_out) = carrying_add(this, c_in, a, b, mir::BinOp::AddWithOverflow)?;
this.write_scalar(c_out, dest)?;
this.write_immediate(*sum, &out)?;
}

// Used to implement the `_mm_pause` function.
// The intrinsic is used to hint the processor that the code is in a spin-loop.
// It is compiled down to a `pause` instruction. When SSE2 is not available,
Expand Down Expand Up @@ -719,36 +697,6 @@ fn convert_float_to_int<'tcx>(
interp_ok(())
}

/// Calculates absolute value of integers in `op` and stores the result in `dest`.
///
/// In case of overflow (when the operand is the minimum value), the operation
/// will wrap around.
fn int_abs<'tcx>(
ecx: &mut crate::MiriInterpCx<'tcx>,
op: &OpTy<'tcx>,
dest: &MPlaceTy<'tcx>,
) -> InterpResult<'tcx, ()> {
let (op, op_len) = ecx.project_to_simd(op)?;
let (dest, dest_len) = ecx.project_to_simd(dest)?;

assert_eq!(op_len, dest_len);

let zero = ImmTy::from_int(0, op.layout.field(ecx, 0));

for i in 0..dest_len {
let op = ecx.read_immediate(&ecx.project_index(&op, i)?)?;
let dest = ecx.project_index(&dest, i)?;

let lt_zero = ecx.binary_op(mir::BinOp::Lt, &op, &zero)?;
let res =
if lt_zero.to_scalar().to_bool()? { ecx.unary_op(mir::UnOp::Neg, &op)? } else { op };

ecx.write_immediate(*res, &dest)?;
}

interp_ok(())
}

/// Splits `op` (which must be a SIMD vector) into 128-bit chunks.
///
/// Returns a tuple where:
Expand Down
23 changes: 0 additions & 23 deletions src/shims/x86/sse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -180,29 +180,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {

this.write_immediate(*res, dest)?;
}
// Used to implement the _mm_cvtsi32_ss and _mm_cvtsi64_ss functions.
// Converts `right` from i32/i64 to f32. Returns a SIMD vector with
// the result in the first component and the remaining components
// are copied from `left`.
// https://www.felixcloutier.com/x86/cvtsi2ss
"cvtsi2ss" | "cvtsi642ss" => {
let [left, right] =
this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;

let (left, left_len) = this.project_to_simd(left)?;
let (dest, dest_len) = this.project_to_simd(dest)?;

assert_eq!(dest_len, left_len);

let right = this.read_immediate(right)?;
let dest0 = this.project_index(&dest, 0)?;
let res0 = this.int_to_int_or_float(&right, dest0.layout)?;
this.write_immediate(*res0, &dest0)?;

for i in 1..dest_len {
this.copy_op(&this.project_index(&left, i)?, &this.project_index(&dest, i)?)?;
}
}
_ => return interp_ok(EmulateItemResult::NotSupported),
}
interp_ok(EmulateItemResult::NeedsReturn)
Expand Down
46 changes: 4 additions & 42 deletions src/shims/x86/sse2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,42 +36,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
// Intrinsincs sufixed with "epiX" or "epuX" operate with X-bit signed or unsigned
// vectors.
match unprefixed_name {
// Used to implement the _mm_madd_epi16 function.
// Multiplies packed signed 16-bit integers in `left` and `right`, producing
// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
// intermediate 32-bit integers, and pack the results in `dest`.
"pmadd.wd" => {
let [left, right] =
this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;

let (left, left_len) = this.project_to_simd(left)?;
let (right, right_len) = this.project_to_simd(right)?;
let (dest, dest_len) = this.project_to_simd(dest)?;

assert_eq!(left_len, right_len);
assert_eq!(dest_len.strict_mul(2), left_len);

for i in 0..dest_len {
let j1 = i.strict_mul(2);
let left1 = this.read_scalar(&this.project_index(&left, j1)?)?.to_i16()?;
let right1 = this.read_scalar(&this.project_index(&right, j1)?)?.to_i16()?;

let j2 = j1.strict_add(1);
let left2 = this.read_scalar(&this.project_index(&left, j2)?)?.to_i16()?;
let right2 = this.read_scalar(&this.project_index(&right, j2)?)?.to_i16()?;

let dest = this.project_index(&dest, i)?;

// Multiplications are i16*i16->i32, which will not overflow.
let mul1 = i32::from(left1).strict_mul(right1.into());
let mul2 = i32::from(left2).strict_mul(right2.into());
// However, this addition can overflow in the most extreme case
// (-0x8000)*(-0x8000)+(-0x8000)*(-0x8000) = 0x80000000
let res = mul1.wrapping_add(mul2);

this.write_scalar(Scalar::from_i32(res), &dest)?;
}
}
// Used to implement the _mm_sad_epu8 function.
// Computes the absolute differences of packed unsigned 8-bit integers in `a`
// and `b`, then horizontally sum each consecutive 8 differences to produce
Expand Down Expand Up @@ -320,10 +284,10 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {

this.write_immediate(*res, dest)?;
}
// Used to implement the _mm_cvtsd_ss and _mm_cvtss_sd functions.
// Converts the first f64/f32 from `right` to f32/f64 and copies
// the remaining elements from `left`
"cvtsd2ss" | "cvtss2sd" => {
// Used to implement the _mm_cvtsd_ss function.
// Converts the first f64 from `right` to f32 and copies the remaining
// elements from `left`
"cvtsd2ss" => {
let [left, right] =
this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;

Expand All @@ -336,8 +300,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
// Convert first element of `right`
let right0 = this.read_immediate(&this.project_index(&right, 0)?)?;
let dest0 = this.project_index(&dest, 0)?;
// `float_to_float_or_int` here will convert from f64 to f32 (cvtsd2ss) or
// from f32 to f64 (cvtss2sd).
let res0 = this.float_to_float_or_int(&right0, dest0.layout)?;
this.write_immediate(*res0, &dest0)?;

Expand Down
Loading