Skip to content

Commit 885a2ac

Browse files
committed
Remove implementation of LLVM SIMD intrinsics that are not needed anymore
1 parent fb6a453 commit 885a2ac

File tree

8 files changed

+27
-278
lines changed

8 files changed

+27
-278
lines changed

src/shims/x86/avx.rs

Lines changed: 10 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
11
use rustc_abi::CanonAbi;
22
use rustc_apfloat::ieee::{Double, Single};
3-
use rustc_middle::mir;
43
use rustc_middle::ty::Ty;
54
use rustc_span::Symbol;
65
use rustc_target::callconv::FnAbi;
76

87
use super::{
98
FloatBinOp, FloatUnaryOp, bin_op_simd_float_all, conditional_dot_product, convert_float_to_int,
10-
horizontal_bin_op, mask_load, mask_store, round_all, test_bits_masked, test_high_bits_masked,
11-
unary_op_ps,
9+
mask_load, mask_store, round_all, test_bits_masked, test_high_bits_masked, unary_op_ps,
1210
};
1311
use crate::*;
1412

@@ -93,21 +91,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
9391

9492
conditional_dot_product(this, left, right, imm, dest)?;
9593
}
96-
// Used to implement the _mm256_h{add,sub}_p{s,d} functions.
97-
// Horizontally add/subtract adjacent floating point values
98-
// in `left` and `right`.
99-
"hadd.ps.256" | "hadd.pd.256" | "hsub.ps.256" | "hsub.pd.256" => {
100-
let [left, right] =
101-
this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
102-
103-
let which = match unprefixed_name {
104-
"hadd.ps.256" | "hadd.pd.256" => mir::BinOp::Add,
105-
"hsub.ps.256" | "hsub.pd.256" => mir::BinOp::Sub,
106-
_ => unreachable!(),
107-
};
108-
109-
horizontal_bin_op(this, which, /*saturating*/ false, left, right, dest)?;
110-
}
11194
// Used to implement the _mm256_cmp_ps function.
11295
// Performs a comparison operation on each component of `left`
11396
// and `right`. For each component, returns 0 if false or u32::MAX
@@ -251,40 +234,31 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
251234
// Unaligned copy, which is what we want.
252235
this.mem_copy(src_ptr, dest.ptr(), dest.layout.size, /*nonoverlapping*/ true)?;
253236
}
254-
// Used to implement the _mm256_testz_si256, _mm256_testc_si256 and
255-
// _mm256_testnzc_si256 functions.
256-
// Tests `op & mask == 0`, `op & mask == mask` or
257-
// `op & mask != 0 && op & mask != mask`
258-
"ptestz.256" | "ptestc.256" | "ptestnzc.256" => {
237+
// Used to implement the _mm256_testnzc_si256 function.
238+
// Tests `op & mask != 0 && op & mask != mask`
239+
"ptestnzc.256" => {
259240
let [op, mask] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
260241

261242
let (all_zero, masked_set) = test_bits_masked(this, op, mask)?;
262-
let res = match unprefixed_name {
263-
"ptestz.256" => all_zero,
264-
"ptestc.256" => masked_set,
265-
"ptestnzc.256" => !all_zero && !masked_set,
266-
_ => unreachable!(),
267-
};
243+
let res = !all_zero && !masked_set;
268244

269245
this.write_scalar(Scalar::from_i32(res.into()), dest)?;
270246
}
271247
// Used to implement the _mm256_testz_pd, _mm256_testc_pd, _mm256_testnzc_pd
272-
// _mm_testz_pd, _mm_testc_pd, _mm_testnzc_pd, _mm256_testz_ps,
273-
// _mm256_testc_ps, _mm256_testnzc_ps, _mm_testz_ps, _mm_testc_ps and
248+
// _mm_testnzc_pd, _mm256_testz_ps, _mm256_testc_ps, _mm256_testnzc_ps and
274249
// _mm_testnzc_ps functions.
275250
// Calculates two booleans:
276251
// `direct`, which is true when the highest bit of each element of `op & mask` is zero.
277252
// `negated`, which is true when the highest bit of each element of `!op & mask` is zero.
278253
// Return `direct` (testz), `negated` (testc) or `!direct & !negated` (testnzc)
279-
"vtestz.pd.256" | "vtestc.pd.256" | "vtestnzc.pd.256" | "vtestz.pd" | "vtestc.pd"
280-
| "vtestnzc.pd" | "vtestz.ps.256" | "vtestc.ps.256" | "vtestnzc.ps.256"
281-
| "vtestz.ps" | "vtestc.ps" | "vtestnzc.ps" => {
254+
"vtestz.pd.256" | "vtestc.pd.256" | "vtestnzc.pd.256" | "vtestnzc.pd"
255+
| "vtestz.ps.256" | "vtestc.ps.256" | "vtestnzc.ps.256" | "vtestnzc.ps" => {
282256
let [op, mask] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
283257

284258
let (direct, negated) = test_high_bits_masked(this, op, mask)?;
285259
let res = match unprefixed_name {
286-
"vtestz.pd.256" | "vtestz.pd" | "vtestz.ps.256" | "vtestz.ps" => direct,
287-
"vtestc.pd.256" | "vtestc.pd" | "vtestc.ps.256" | "vtestc.ps" => negated,
260+
"vtestz.pd.256" | "vtestz.ps.256" => direct,
261+
"vtestc.pd.256" | "vtestc.ps.256" => negated,
288262
"vtestnzc.pd.256" | "vtestnzc.pd" | "vtestnzc.ps.256" | "vtestnzc.ps" =>
289263
!direct && !negated,
290264
_ => unreachable!(),

src/shims/x86/avx2.rs

Lines changed: 5 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ use rustc_span::Symbol;
55
use rustc_target::callconv::FnAbi;
66

77
use super::{
8-
ShiftOp, horizontal_bin_op, int_abs, mask_load, mask_store, mpsadbw, packssdw, packsswb,
9-
packusdw, packuswb, pmulhrsw, psign, shift_simd_by_scalar, shift_simd_by_simd,
8+
ShiftOp, horizontal_bin_op, mask_load, mask_store, mpsadbw, packssdw, packsswb, packusdw,
9+
packuswb, pmulhrsw, psign, shift_simd_by_scalar, shift_simd_by_simd,
1010
};
1111
use crate::*;
1212

@@ -25,24 +25,15 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
2525
let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.avx2.").unwrap();
2626

2727
match unprefixed_name {
28-
// Used to implement the _mm256_abs_epi{8,16,32} functions.
29-
// Calculates the absolute value of packed 8/16/32-bit integers.
30-
"pabs.b" | "pabs.w" | "pabs.d" => {
31-
let [op] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
32-
33-
int_abs(this, op, dest)?;
34-
}
35-
// Used to implement the _mm256_h{add,adds,sub}_epi{16,32} functions.
36-
// Horizontally add / add with saturation / subtract adjacent 16/32-bit
28+
// Used to implement the _mm256_h{adds,subs}_epi16 functions.
29+
// Horizontally add / subtract with saturation adjacent 16-bit
3730
// integer values in `left` and `right`.
38-
"phadd.w" | "phadd.sw" | "phadd.d" | "phsub.w" | "phsub.sw" | "phsub.d" => {
31+
"phadd.sw" | "phsub.sw" => {
3932
let [left, right] =
4033
this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
4134

4235
let (which, saturating) = match unprefixed_name {
43-
"phadd.w" | "phadd.d" => (mir::BinOp::Add, false),
4436
"phadd.sw" => (mir::BinOp::Add, true),
45-
"phsub.w" | "phsub.d" => (mir::BinOp::Sub, false),
4637
"phsub.sw" => (mir::BinOp::Sub, true),
4738
_ => unreachable!(),
4839
};
@@ -110,42 +101,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
110101
this.write_scalar(Scalar::from_int(0, dest.layout.size), &dest)?;
111102
}
112103
}
113-
// Used to implement the _mm256_madd_epi16 function.
114-
// Multiplies packed signed 16-bit integers in `left` and `right`, producing
115-
// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
116-
// intermediate 32-bit integers, and pack the results in `dest`.
117-
"pmadd.wd" => {
118-
let [left, right] =
119-
this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
120-
121-
let (left, left_len) = this.project_to_simd(left)?;
122-
let (right, right_len) = this.project_to_simd(right)?;
123-
let (dest, dest_len) = this.project_to_simd(dest)?;
124-
125-
assert_eq!(left_len, right_len);
126-
assert_eq!(dest_len.strict_mul(2), left_len);
127-
128-
for i in 0..dest_len {
129-
let j1 = i.strict_mul(2);
130-
let left1 = this.read_scalar(&this.project_index(&left, j1)?)?.to_i16()?;
131-
let right1 = this.read_scalar(&this.project_index(&right, j1)?)?.to_i16()?;
132-
133-
let j2 = j1.strict_add(1);
134-
let left2 = this.read_scalar(&this.project_index(&left, j2)?)?.to_i16()?;
135-
let right2 = this.read_scalar(&this.project_index(&right, j2)?)?.to_i16()?;
136-
137-
let dest = this.project_index(&dest, i)?;
138-
139-
// Multiplications are i16*i16->i32, which will not overflow.
140-
let mul1 = i32::from(left1).strict_mul(right1.into());
141-
let mul2 = i32::from(left2).strict_mul(right2.into());
142-
// However, this addition can overflow in the most extreme case
143-
// (-0x8000)*(-0x8000)+(-0x8000)*(-0x8000) = 0x80000000
144-
let res = mul1.wrapping_add(mul2);
145-
146-
this.write_scalar(Scalar::from_i32(res), &dest)?;
147-
}
148-
}
149104
// Used to implement the _mm256_maddubs_epi16 function.
150105
// Multiplies packed 8-bit unsigned integers from `left` and packed
151106
// signed 8-bit integers from `right` into 16-bit signed integers. Then,
@@ -285,39 +240,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
285240
this.copy_op(&left, &dest)?;
286241
}
287242
}
288-
// Used to implement the _mm256_permute2x128_si256 function.
289-
// Shuffles 128-bit blocks of `a` and `b` using `imm` as pattern.
290-
"vperm2i128" => {
291-
let [left, right, imm] =
292-
this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
293-
294-
assert_eq!(left.layout.size.bits(), 256);
295-
assert_eq!(right.layout.size.bits(), 256);
296-
assert_eq!(dest.layout.size.bits(), 256);
297-
298-
// Transmute to `[i128; 2]`
299-
300-
let array_layout =
301-
this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.i128, 2))?;
302-
let left = left.transmute(array_layout, this)?;
303-
let right = right.transmute(array_layout, this)?;
304-
let dest = dest.transmute(array_layout, this)?;
305-
306-
let imm = this.read_scalar(imm)?.to_u8()?;
307-
308-
for i in 0..2 {
309-
let dest = this.project_index(&dest, i)?;
310-
let src = match (imm >> i.strict_mul(4)) & 0b11 {
311-
0 => this.project_index(&left, 0)?,
312-
1 => this.project_index(&left, 1)?,
313-
2 => this.project_index(&right, 0)?,
314-
3 => this.project_index(&right, 1)?,
315-
_ => unreachable!(),
316-
};
317-
318-
this.copy_op(&src, &dest)?;
319-
}
320-
}
321243
// Used to implement the _mm256_sad_epu8 function.
322244
// Compute the absolute differences of packed unsigned 8-bit integers
323245
// in `left` and `right`, then horizontally sum each consecutive 8

src/shims/x86/mod.rs

Lines changed: 0 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -58,28 +58,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
5858
this.write_immediate(*sum, &this.project_field(dest, FieldIdx::ONE)?)?;
5959
}
6060

61-
// Used to implement the `_addcarryx_u{32, 64}` functions. They are semantically identical with the `_addcarry_u{32, 64}` functions,
62-
// except for a slightly different type signature and the requirement for the "adx" target feature.
63-
// https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/addcarryx-u32-addcarryx-u64.html
64-
"addcarryx.u32" | "addcarryx.u64" => {
65-
this.expect_target_feature_for_intrinsic(link_name, "adx")?;
66-
67-
let is_u64 = unprefixed_name.ends_with("64");
68-
if is_u64 && this.tcx.sess.target.arch != "x86_64" {
69-
return interp_ok(EmulateItemResult::NotSupported);
70-
}
71-
let [c_in, a, b, out] =
72-
this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
73-
let out = this.deref_pointer_as(
74-
out,
75-
if is_u64 { this.machine.layouts.u64 } else { this.machine.layouts.u32 },
76-
)?;
77-
78-
let (sum, c_out) = carrying_add(this, c_in, a, b, mir::BinOp::AddWithOverflow)?;
79-
this.write_scalar(c_out, dest)?;
80-
this.write_immediate(*sum, &out)?;
81-
}
82-
8361
// Used to implement the `_mm_pause` function.
8462
// The intrinsic is used to hint the processor that the code is in a spin-loop.
8563
// It is compiled down to a `pause` instruction. When SSE2 is not available,
@@ -718,36 +696,6 @@ fn convert_float_to_int<'tcx>(
718696
interp_ok(())
719697
}
720698

721-
/// Calculates absolute value of integers in `op` and stores the result in `dest`.
722-
///
723-
/// In case of overflow (when the operand is the minimum value), the operation
724-
/// will wrap around.
725-
fn int_abs<'tcx>(
726-
ecx: &mut crate::MiriInterpCx<'tcx>,
727-
op: &OpTy<'tcx>,
728-
dest: &MPlaceTy<'tcx>,
729-
) -> InterpResult<'tcx, ()> {
730-
let (op, op_len) = ecx.project_to_simd(op)?;
731-
let (dest, dest_len) = ecx.project_to_simd(dest)?;
732-
733-
assert_eq!(op_len, dest_len);
734-
735-
let zero = ImmTy::from_int(0, op.layout.field(ecx, 0));
736-
737-
for i in 0..dest_len {
738-
let op = ecx.read_immediate(&ecx.project_index(&op, i)?)?;
739-
let dest = ecx.project_index(&dest, i)?;
740-
741-
let lt_zero = ecx.binary_op(mir::BinOp::Lt, &op, &zero)?;
742-
let res =
743-
if lt_zero.to_scalar().to_bool()? { ecx.unary_op(mir::UnOp::Neg, &op)? } else { op };
744-
745-
ecx.write_immediate(*res, &dest)?;
746-
}
747-
748-
interp_ok(())
749-
}
750-
751699
/// Splits `op` (which must be a SIMD vector) into 128-bit chunks.
752700
///
753701
/// Returns a tuple where:

src/shims/x86/sse.rs

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -180,29 +180,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
180180

181181
this.write_immediate(*res, dest)?;
182182
}
183-
// Used to implement the _mm_cvtsi32_ss and _mm_cvtsi64_ss functions.
184-
// Converts `right` from i32/i64 to f32. Returns a SIMD vector with
185-
// the result in the first component and the remaining components
186-
// are copied from `left`.
187-
// https://www.felixcloutier.com/x86/cvtsi2ss
188-
"cvtsi2ss" | "cvtsi642ss" => {
189-
let [left, right] =
190-
this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
191-
192-
let (left, left_len) = this.project_to_simd(left)?;
193-
let (dest, dest_len) = this.project_to_simd(dest)?;
194-
195-
assert_eq!(dest_len, left_len);
196-
197-
let right = this.read_immediate(right)?;
198-
let dest0 = this.project_index(&dest, 0)?;
199-
let res0 = this.int_to_int_or_float(&right, dest0.layout)?;
200-
this.write_immediate(*res0, &dest0)?;
201-
202-
for i in 1..dest_len {
203-
this.copy_op(&this.project_index(&left, i)?, &this.project_index(&dest, i)?)?;
204-
}
205-
}
206183
_ => return interp_ok(EmulateItemResult::NotSupported),
207184
}
208185
interp_ok(EmulateItemResult::NeedsReturn)

src/shims/x86/sse2.rs

Lines changed: 4 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -36,42 +36,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
3636
// Intrinsincs sufixed with "epiX" or "epuX" operate with X-bit signed or unsigned
3737
// vectors.
3838
match unprefixed_name {
39-
// Used to implement the _mm_madd_epi16 function.
40-
// Multiplies packed signed 16-bit integers in `left` and `right`, producing
41-
// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
42-
// intermediate 32-bit integers, and pack the results in `dest`.
43-
"pmadd.wd" => {
44-
let [left, right] =
45-
this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
46-
47-
let (left, left_len) = this.project_to_simd(left)?;
48-
let (right, right_len) = this.project_to_simd(right)?;
49-
let (dest, dest_len) = this.project_to_simd(dest)?;
50-
51-
assert_eq!(left_len, right_len);
52-
assert_eq!(dest_len.strict_mul(2), left_len);
53-
54-
for i in 0..dest_len {
55-
let j1 = i.strict_mul(2);
56-
let left1 = this.read_scalar(&this.project_index(&left, j1)?)?.to_i16()?;
57-
let right1 = this.read_scalar(&this.project_index(&right, j1)?)?.to_i16()?;
58-
59-
let j2 = j1.strict_add(1);
60-
let left2 = this.read_scalar(&this.project_index(&left, j2)?)?.to_i16()?;
61-
let right2 = this.read_scalar(&this.project_index(&right, j2)?)?.to_i16()?;
62-
63-
let dest = this.project_index(&dest, i)?;
64-
65-
// Multiplications are i16*i16->i32, which will not overflow.
66-
let mul1 = i32::from(left1).strict_mul(right1.into());
67-
let mul2 = i32::from(left2).strict_mul(right2.into());
68-
// However, this addition can overflow in the most extreme case
69-
// (-0x8000)*(-0x8000)+(-0x8000)*(-0x8000) = 0x80000000
70-
let res = mul1.wrapping_add(mul2);
71-
72-
this.write_scalar(Scalar::from_i32(res), &dest)?;
73-
}
74-
}
7539
// Used to implement the _mm_sad_epu8 function.
7640
// Computes the absolute differences of packed unsigned 8-bit integers in `a`
7741
// and `b`, then horizontally sum each consecutive 8 differences to produce
@@ -320,10 +284,10 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
320284

321285
this.write_immediate(*res, dest)?;
322286
}
323-
// Used to implement the _mm_cvtsd_ss and _mm_cvtss_sd functions.
324-
// Converts the first f64/f32 from `right` to f32/f64 and copies
325-
// the remaining elements from `left`
326-
"cvtsd2ss" | "cvtss2sd" => {
287+
// Used to implement the _mm_cvtsd_ss function.
288+
// Converts the first f64 from `right` to f32 and copies the remaining
289+
// elements from `left`
290+
"cvtsd2ss" => {
327291
let [left, right] =
328292
this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
329293

@@ -336,8 +300,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
336300
// Convert first element of `right`
337301
let right0 = this.read_immediate(&this.project_index(&right, 0)?)?;
338302
let dest0 = this.project_index(&dest, 0)?;
339-
// `float_to_float_or_int` here will convert from f64 to f32 (cvtsd2ss) or
340-
// from f32 to f64 (cvtss2sd).
341303
let res0 = this.float_to_float_or_int(&right0, dest0.layout)?;
342304
this.write_immediate(*res0, &dest0)?;
343305

0 commit comments

Comments
 (0)