Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Copy byval argument to alloca if alignment is insufficient #122212

Merged
merged 2 commits into from
Mar 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 55 additions & 49 deletions compiler/rustc_codegen_llvm/src/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,57 +203,63 @@ impl<'ll, 'tcx> ArgAbiExt<'ll, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
val: &'ll Value,
dst: PlaceRef<'tcx, &'ll Value>,
) {
if self.is_ignore() {
return;
}
if self.is_sized_indirect() {
OperandValue::Ref(val, None, self.layout.align.abi).store(bx, dst)
} else if self.is_unsized_indirect() {
bug!("unsized `ArgAbi` must be handled through `store_fn_arg`");
} else if let PassMode::Cast { cast, pad_i32: _ } = &self.mode {
// FIXME(eddyb): Figure out when the simpler Store is safe, clang
// uses it for i16 -> {i8, i8}, but not for i24 -> {i8, i8, i8}.
let can_store_through_cast_ptr = false;
if can_store_through_cast_ptr {
bx.store(val, dst.llval, self.layout.align.abi);
} else {
// The actual return type is a struct, but the ABI
// adaptation code has cast it into some scalar type. The
// code that follows is the only reliable way I have
// found to do a transform like i64 -> {i32,i32}.
// Basically we dump the data onto the stack then memcpy it.
//
// Other approaches I tried:
// - Casting rust ret pointer to the foreign type and using Store
// is (a) unsafe if size of foreign type > size of rust type and
// (b) runs afoul of strict aliasing rules, yielding invalid
// assembly under -O (specifically, the store gets removed).
// - Truncating foreign type to correct integral type and then
// bitcasting to the struct type yields invalid cast errors.

// We instead thus allocate some scratch space...
let scratch_size = cast.size(bx);
let scratch_align = cast.align(bx);
let llscratch = bx.alloca(cast.llvm_type(bx), scratch_align);
bx.lifetime_start(llscratch, scratch_size);

// ... where we first store the value...
bx.store(val, llscratch, scratch_align);

// ... and then memcpy it to the intended destination.
bx.memcpy(
dst.llval,
self.layout.align.abi,
llscratch,
scratch_align,
bx.const_usize(self.layout.size.bytes()),
MemFlags::empty(),
);
match &self.mode {
PassMode::Ignore => {}
// Sized indirect arguments
PassMode::Indirect { attrs, meta_attrs: None, on_stack: _ } => {
let align = attrs.pointee_align.unwrap_or(self.layout.align.abi);
OperandValue::Ref(val, None, align).store(bx, dst);
Comment on lines +210 to +211
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only functional change in this file is replacing

OperandValue::Ref(val, None, self.layout.align.abi).store(bx, dst)

with

let align = attrs.pointee_align.unwrap_or(self.layout.align.abi);
OperandValue::Ref(val, None, align).store(bx, dst);

since the argument alignment is not always equal to the Rust ABI alignment.

}
// Unsized indirect qrguments
PassMode::Indirect { attrs: _, meta_attrs: Some(_), on_stack: _ } => {
bug!("unsized `ArgAbi` must be handled through `store_fn_arg`");
}
PassMode::Cast { cast, pad_i32: _ } => {
// FIXME(eddyb): Figure out when the simpler Store is safe, clang
// uses it for i16 -> {i8, i8}, but not for i24 -> {i8, i8, i8}.
let can_store_through_cast_ptr = false;
if can_store_through_cast_ptr {
bx.store(val, dst.llval, self.layout.align.abi);
} else {
// The actual return type is a struct, but the ABI
// adaptation code has cast it into some scalar type. The
// code that follows is the only reliable way I have
// found to do a transform like i64 -> {i32,i32}.
// Basically we dump the data onto the stack then memcpy it.
//
// Other approaches I tried:
// - Casting rust ret pointer to the foreign type and using Store
// is (a) unsafe if size of foreign type > size of rust type and
// (b) runs afoul of strict aliasing rules, yielding invalid
// assembly under -O (specifically, the store gets removed).
// - Truncating foreign type to correct integral type and then
// bitcasting to the struct type yields invalid cast errors.

// We instead thus allocate some scratch space...
let scratch_size = cast.size(bx);
let scratch_align = cast.align(bx);
let llscratch = bx.alloca(cast.llvm_type(bx), scratch_align);
bx.lifetime_start(llscratch, scratch_size);

// ... where we first store the value...
bx.store(val, llscratch, scratch_align);

// ... and then memcpy it to the intended destination.
bx.memcpy(
dst.llval,
self.layout.align.abi,
llscratch,
scratch_align,
bx.const_usize(self.layout.size.bytes()),
MemFlags::empty(),
);

bx.lifetime_end(llscratch, scratch_size);
bx.lifetime_end(llscratch, scratch_size);
}
}
_ => {
OperandRef::from_immediate_or_packed_pair(bx, val, self.layout).val.store(bx, dst);
}
} else {
OperandRef::from_immediate_or_packed_pair(bx, val, self.layout).val.store(bx, dst);
}
}

Expand Down
62 changes: 39 additions & 23 deletions compiler/rustc_codegen_ssa/src/mir/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -367,29 +367,45 @@ fn arg_local_refs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
}
}

if arg.is_sized_indirect() {
// Don't copy an indirect argument to an alloca, the caller
// already put it in a temporary alloca and gave it up.
// FIXME: lifetimes
let llarg = bx.get_param(llarg_idx);
llarg_idx += 1;
LocalRef::Place(PlaceRef::new_sized(llarg, arg.layout))
} else if arg.is_unsized_indirect() {
// As the storage for the indirect argument lives during
// the whole function call, we just copy the fat pointer.
let llarg = bx.get_param(llarg_idx);
llarg_idx += 1;
let llextra = bx.get_param(llarg_idx);
llarg_idx += 1;
let indirect_operand = OperandValue::Pair(llarg, llextra);

let tmp = PlaceRef::alloca_unsized_indirect(bx, arg.layout);
indirect_operand.store(bx, tmp);
LocalRef::UnsizedPlace(tmp)
} else {
let tmp = PlaceRef::alloca(bx, arg.layout);
bx.store_fn_arg(arg, &mut llarg_idx, tmp);
LocalRef::Place(tmp)
match arg.mode {
// Sized indirect arguments
PassMode::Indirect { attrs, meta_attrs: None, on_stack: _ } => {
// Don't copy an indirect argument to an alloca, the caller already put it
// in a temporary alloca and gave it up.
// FIXME: lifetimes
if let Some(pointee_align) = attrs.pointee_align
&& pointee_align < arg.layout.align.abi
{
// ...unless the argument is underaligned, then we need to copy it to
// a higher-aligned alloca.
let tmp = PlaceRef::alloca(bx, arg.layout);
bx.store_fn_arg(arg, &mut llarg_idx, tmp);
LocalRef::Place(tmp)
} else {
Comment on lines +376 to +384
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the only functional change in this file.

let llarg = bx.get_param(llarg_idx);
llarg_idx += 1;
LocalRef::Place(PlaceRef::new_sized(llarg, arg.layout))
}
}
// Unsized indirect qrguments
PassMode::Indirect { attrs: _, meta_attrs: Some(_), on_stack: _ } => {
// As the storage for the indirect argument lives during
// the whole function call, we just copy the fat pointer.
let llarg = bx.get_param(llarg_idx);
llarg_idx += 1;
let llextra = bx.get_param(llarg_idx);
llarg_idx += 1;
let indirect_operand = OperandValue::Pair(llarg, llextra);

let tmp = PlaceRef::alloca_unsized_indirect(bx, arg.layout);
indirect_operand.store(bx, tmp);
LocalRef::UnsizedPlace(tmp)
}
_ => {
let tmp = PlaceRef::alloca(bx, arg.layout);
bx.store_fn_arg(arg, &mut llarg_idx, tmp);
LocalRef::Place(tmp)
}
}
})
.collect::<Vec<_>>();
Expand Down
4 changes: 1 addition & 3 deletions compiler/rustc_target/src/abi/call/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -633,10 +633,8 @@ impl<'a, Ty> ArgAbi<'a, Ty> {
/// If the resulting alignment differs from the type's alignment,
/// the argument will be copied to an alloca with sufficient alignment,
/// either in the caller (if the type's alignment is lower than the byval alignment)
/// or in the callee (if the type's alignment is higher than the byval alignment),
/// or in the callee (if the type's alignment is higher than the byval alignment),
/// to ensure that Rust code never sees an underaligned pointer.
///
/// † This is currently broken, see <https://github.com/rust-lang/rust/pull/122212>.
pub fn make_indirect_byval(&mut self, byval_align: Option<Align>) {
assert!(!self.layout.is_unsized(), "used byval ABI for unsized layout");
self.make_indirect();
Expand Down
126 changes: 126 additions & 0 deletions tests/codegen/align-byval-alignment-mismatch.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
// ignore-tidy-linelength
//@ revisions:i686-linux x86_64-linux

//@[i686-linux] compile-flags: --target i686-unknown-linux-gnu
//@[i686-linux] needs-llvm-components: x86
//@[x86_64-linux] compile-flags: --target x86_64-unknown-linux-gnu
//@[x86_64-linux] needs-llvm-components: x86

// Tests that we correctly copy arguments into allocas when the alignment of the byval argument
// is different from the alignment of the Rust type.

// For the following test cases:
// All of the `*_decreases_alignment` functions should codegen to a direct call, since the
// alignment is already sufficient.
// All off the `*_increases_alignment` functions should copy the argument to an alloca
// on i686-unknown-linux-gnu, since the alignment needs to be increased, and should codegen
// to a direct call on x86_64-unknown-linux-gnu, where byval alignment matches Rust alignment.

#![feature(no_core, lang_items)]
#![crate_type = "lib"]
#![no_std]
#![no_core]
#![allow(non_camel_case_types)]

#[lang = "sized"]
trait Sized {}
#[lang = "freeze"]
trait Freeze {}
#[lang = "copy"]
trait Copy {}

// This type has align 1 in Rust, but as a byval argument on i686-linux, it will have align 4.
#[repr(C)]
#[repr(packed)]
struct Align1 {
x: u128,
y: u128,
z: u128,
}

// This type has align 16 in Rust, but as a byval argument on i686-linux, it will have align 4.
#[repr(C)]
#[repr(align(16))]
struct Align16 {
x: u128,
y: u128,
z: u128,
}

extern "C" {
fn extern_c_align1(x: Align1);
fn extern_c_align16(x: Align16);
}

// CHECK-LABEL: @rust_to_c_increases_alignment
#[no_mangle]
pub unsafe fn rust_to_c_increases_alignment(x: Align1) {
// i686-linux: start:
// i686-linux-NEXT: [[ALLOCA:%[0-9a-z]+]] = alloca %Align1, align 4
// i686-linux-NEXT: call void @llvm.memcpy.{{.+}}(ptr {{.*}}align 4 {{.*}}[[ALLOCA]], ptr {{.*}}align 1 {{.*}}%x
// i686-linux-NEXT: call void @extern_c_align1({{.+}} [[ALLOCA]])

// x86_64-linux: start:
// x86_64-linux-NEXT: call void @extern_c_align1
extern_c_align1(x);
}

// CHECK-LABEL: @rust_to_c_decreases_alignment
#[no_mangle]
pub unsafe fn rust_to_c_decreases_alignment(x: Align16) {
// CHECK: start:
// CHECK-NEXT: call void @extern_c_align16
extern_c_align16(x);
}

extern "Rust" {
fn extern_rust_align1(x: Align1);
fn extern_rust_align16(x: Align16);
}

// CHECK-LABEL: @c_to_rust_decreases_alignment
#[no_mangle]
pub unsafe extern "C" fn c_to_rust_decreases_alignment(x: Align1) {
// CHECK: start:
// CHECK-NEXT: call void @extern_rust_align1
extern_rust_align1(x);
}

// CHECK-LABEL: @c_to_rust_increases_alignment
#[no_mangle]
pub unsafe extern "C" fn c_to_rust_increases_alignment(x: Align16) {
// i686-linux: start:
// i686-linux-NEXT: [[ALLOCA:%[0-9a-z]+]] = alloca %Align16, align 16
// i686-linux-NEXT: call void @llvm.memcpy.{{.+}}(ptr {{.*}}align 16 {{.*}}[[ALLOCA]], ptr {{.*}}align 4 {{.*}}%0
// i686-linux-NEXT: call void @extern_rust_align16({{.+}} [[ALLOCA]])

// x86_64-linux: start:
// x86_64-linux-NEXT: call void @extern_rust_align16
extern_rust_align16(x);
}

extern "Rust" {
fn extern_rust_ref_align1(x: &Align1);
fn extern_rust_ref_align16(x: &Align16);
}

// CHECK-LABEL: @c_to_rust_ref_decreases_alignment
#[no_mangle]
pub unsafe extern "C" fn c_to_rust_ref_decreases_alignment(x: Align1) {
// CHECK: start:
// CHECK-NEXT: call void @extern_rust_ref_align1
extern_rust_ref_align1(&x);
}

// CHECK-LABEL: @c_to_rust_ref_increases_alignment
#[no_mangle]
pub unsafe extern "C" fn c_to_rust_ref_increases_alignment(x: Align16) {
// i686-linux: start:
// i686-linux-NEXT: [[ALLOCA:%[0-9a-z]+]] = alloca %Align16, align 16
// i686-linux-NEXT: call void @llvm.memcpy.{{.+}}(ptr {{.*}}align 16 {{.*}}[[ALLOCA]], ptr {{.*}}align 4 {{.*}}%0
// i686-linux-NEXT: call void @extern_rust_ref_align16({{.+}} [[ALLOCA]])

// x86_64-linux: start:
// x86_64-linux-NEXT: call void @extern_rust_ref_align16
extern_rust_ref_align16(&x);
}
Loading