Skip to content

Commit 11be405

Browse files
committed
Auto merge of #141309 - RalfJung:x86-simd-abi, r=<try>
x86 (32/64): go back to passing SIMD vectors by-ptr Fixes #139029 by partially reverting #135408 and going back to passing SIMD vectors by-ptr on x86. Sadly, by-val confuses the LLVM inliner so much that it's not worth it... r? `@tgross35` Cc `@nikic` try-job: `test-various*` try-job: x86_64-gnu-nopt try-job: dist-i586-gnu-i586-i686-musl try-job: x86_64-msvc-1
2 parents 444a627 + 436f812 commit 11be405

File tree

4 files changed

+19
-46
lines changed

4 files changed

+19
-46
lines changed

compiler/rustc_target/src/callconv/mod.rs

Lines changed: 5 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ use rustc_abi::{
88
};
99
use rustc_macros::HashStable_Generic;
1010

11-
use crate::spec::{HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, RustcAbi, WasmCAbi};
11+
use crate::spec::{HasTargetSpec, HasWasmCAbiOpt, HasX86AbiOpt, WasmCAbi};
1212

1313
mod aarch64;
1414
mod amdgpu;
@@ -733,24 +733,6 @@ impl<'a, Ty> FnAbi<'a, Ty> {
733733
_ => {}
734734
};
735735

736-
// Decides whether we can pass the given SIMD argument via `PassMode::Direct`.
737-
// May only return `true` if the target will always pass those arguments the same way,
738-
// no matter what the user does with `-Ctarget-feature`! In other words, whatever
739-
// target features are required to pass a SIMD value in registers must be listed in
740-
// the `abi_required_features` for the current target and ABI.
741-
let can_pass_simd_directly = |arg: &ArgAbi<'_, Ty>| match &*spec.arch {
742-
// On x86, if we have SSE2 (which we have by default for x86_64), we can always pass up
743-
// to 128-bit-sized vectors.
744-
"x86" if spec.rustc_abi == Some(RustcAbi::X86Sse2) => arg.layout.size.bits() <= 128,
745-
"x86_64" if spec.rustc_abi != Some(RustcAbi::X86Softfloat) => {
746-
// FIXME once https://github.com/bytecodealliance/wasmtime/issues/10254 is fixed
747-
// accept vectors up to 128bit rather than vectors of exactly 128bit.
748-
arg.layout.size.bits() == 128
749-
}
750-
// So far, we haven't implemented this logic for any other target.
751-
_ => false,
752-
};
753-
754736
for (arg_idx, arg) in self
755737
.args
756738
.iter_mut()
@@ -850,9 +832,10 @@ impl<'a, Ty> FnAbi<'a, Ty> {
850832
// target feature sets. Some more information about this
851833
// issue can be found in #44367.
852834
//
853-
// Note that the intrinsic ABI is exempt here as those are not
854-
// real functions anyway, and the backend expects very specific types.
855-
if spec.simd_types_indirect && !can_pass_simd_directly(arg) {
835+
// We *could* do better in some cases, e.g. on x86_64 targets where SSE2 is
836+
// required. However, it turns out that that makes LLVM worse at optimizing this
837+
// code, so we pass things indirectly even there. See #139029 for more on that.
838+
if spec.simd_types_indirect {
856839
arg.make_indirect();
857840
}
858841
}

tests/codegen/abi-x86-sse.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,9 @@ trait Copy {}
2727
#[repr(simd)]
2828
pub struct Sse([f32; 4]);
2929

30-
// x86-64: <4 x float> @sse_id(<4 x float> {{[^,]*}})
31-
// x86-32: <4 x float> @sse_id(<4 x float> {{[^,]*}})
30+
// FIXME: due to #139029 we are passing them all indirectly.
31+
// x86-64: void @sse_id(ptr{{( [^,]*)?}} sret([16 x i8]){{( .*)?}}, ptr{{( [^,]*)?}})
32+
// x86-32: void @sse_id(ptr{{( [^,]*)?}} sret([16 x i8]){{( .*)?}}, ptr{{( [^,]*)?}})
3233
// x86-32-nosse: void @sse_id(ptr{{( [^,]*)?}} sret([16 x i8]){{( .*)?}}, ptr{{( [^,]*)?}})
3334
#[no_mangle]
3435
pub fn sse_id(x: Sse) -> Sse {

tests/codegen/simd-intrinsic/simd-intrinsic-transmute-array.rs

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,5 @@
11
//
22
//@ compile-flags: -C no-prepopulate-passes
3-
// LLVM IR isn't very portable and the one tested here depends on the ABI
4-
// which is different between x86 (where we use SSE registers) and others.
5-
// `x86-64` and `x86-32-sse2` are identical, but compiletest does not support
6-
// taking the union of multiple `only` annotations.
7-
//@ revisions: x86-64 x86-32-sse2 other
8-
//@[x86-64] only-x86_64
9-
//@[x86-32-sse2] only-rustc_abi-x86-sse2
10-
//@[other] ignore-rustc_abi-x86-sse2
11-
//@[other] ignore-x86_64
123

134
#![crate_type = "lib"]
145
#![allow(non_camel_case_types)]
@@ -47,9 +38,7 @@ pub fn build_array_s(x: [f32; 4]) -> S<4> {
4738
#[no_mangle]
4839
pub fn build_array_transmute_s(x: [f32; 4]) -> S<4> {
4940
// CHECK: %[[VAL:.+]] = load <4 x float>, ptr %x, align [[ARRAY_ALIGN]]
50-
// x86-32: ret <4 x float> %[[VAL:.+]]
51-
// x86-64: ret <4 x float> %[[VAL:.+]]
52-
// other: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]]
41+
// CHECK: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]]
5342
unsafe { std::mem::transmute(x) }
5443
}
5544

@@ -64,8 +53,6 @@ pub fn build_array_t(x: [f32; 4]) -> T {
6453
#[no_mangle]
6554
pub fn build_array_transmute_t(x: [f32; 4]) -> T {
6655
// CHECK: %[[VAL:.+]] = load <4 x float>, ptr %x, align [[ARRAY_ALIGN]]
67-
// x86-32: ret <4 x float> %[[VAL:.+]]
68-
// x86-64: ret <4 x float> %[[VAL:.+]]
69-
// other: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]]
56+
// CHECK: store <4 x float> %[[VAL:.+]], ptr %_0, align [[VECTOR_ALIGN]]
7057
unsafe { std::mem::transmute(x) }
7158
}

tests/codegen/simd/packed-simd.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,18 @@ fn load<T, const N: usize>(v: PackedSimd<T, N>) -> FullSimd<T, N> {
3030
}
3131
}
3232

33-
// CHECK-LABEL: define <3 x float> @square_packed_full(ptr{{[a-z_ ]*}} align 4 {{[^,]*}})
33+
// CHECK-LABEL: square_packed_full
34+
// CHECK-SAME: ptr{{[a-z_ ]*}} sret([[RET_TYPE:[^)]+]]) [[RET_ALIGN:align (8|16)]]{{[^%]*}} [[RET_VREG:%[_0-9]*]]
35+
// CHECK-SAME: ptr{{[a-z_ ]*}} align 4
3436
#[no_mangle]
3537
pub fn square_packed_full(x: PackedSimd<f32, 3>) -> FullSimd<f32, 3> {
36-
// The unoptimized version of this is not very interesting to check
37-
// since `load` does not get inlined.
38-
// opt3-NEXT: start:
39-
// opt3-NEXT: load <3 x float>
38+
// CHECK-NEXT: start
39+
// noopt: alloca [[RET_TYPE]], [[RET_ALIGN]]
40+
// CHECK: load <3 x float>
4041
let x = load(x);
41-
// opt3-NEXT: [[VREG:%[a-z0-9_]+]] = fmul <3 x float>
42-
// opt3-NEXT: ret <3 x float> [[VREG:%[a-z0-9_]+]]
42+
// CHECK: [[VREG:%[a-z0-9_]+]] = fmul <3 x float>
43+
// CHECK-NEXT: store <3 x float> [[VREG]], ptr [[RET_VREG]], [[RET_ALIGN]]
44+
// CHECK-NEXT: ret void
4345
unsafe { intrinsics::simd_mul(x, x) }
4446
}
4547

0 commit comments

Comments
 (0)