Skip to content

Commit

Permalink
Let codegen decide when to mem::swap with immediates
Browse files Browse the repository at this point in the history
Making `libcore` decide this is silly; the backend has so much better information about when it's a good idea.

So introduce a new `typed_swap` intrinsic with a fallback body, but replace that implementation for immediates and scalar pairs.
  • Loading branch information
scottmcm committed Mar 16, 2024
1 parent fd27e87 commit 725417f
Show file tree
Hide file tree
Showing 10 changed files with 191 additions and 27 deletions.
16 changes: 16 additions & 0 deletions compiler/rustc_codegen_ssa/src/mir/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,22 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
let name = bx.tcx().item_name(def_id);
let name_str = name.as_str();

// If we're swapping something that's *not* an `OperandValue::Ref`,
// then we can do it directly and avoid the alloca.
// Otherwise, we'll let the fallback MIR body take care of it.
if let sym::typed_swap = name {
let pointee_ty = fn_args.type_at(0);
let pointee_layout = bx.layout_of(pointee_ty);
if !bx.is_backend_ref(pointee_layout) {
let x_place = PlaceRef::new_sized(args[0].immediate(), pointee_layout);
let y_place = PlaceRef::new_sized(args[1].immediate(), pointee_layout);
let temp = bx.load_operand(x_place);
bx.typed_place_copy(x_place, y_place);
temp.val.store(bx, y_place);
return Ok(());
}
}

let llret_ty = bx.backend_type(bx.layout_of(ret_ty));
let result = PlaceRef::new_sized(llresult, fn_abi.ret.layout);

Expand Down
21 changes: 21 additions & 0 deletions compiler/rustc_codegen_ssa/src/traits/builder.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use super::abi::AbiBuilderMethods;
use super::asm::AsmBuilderMethods;
use super::consts::ConstMethods;
use super::coverageinfo::CoverageInfoBuilderMethods;
use super::debuginfo::DebugInfoBuilderMethods;
use super::intrinsic::IntrinsicCallMethods;
Expand Down Expand Up @@ -267,6 +268,26 @@ pub trait BuilderMethods<'a, 'tcx>:
flags: MemFlags,
);

/// *Typed* copy for non-overlapping places.
///
/// Has a default implementation in terms of `memcpy`, but specific backends
/// can override to do something smarter if possible.
///
/// (For example, typed load-stores with alias metadata.)
fn typed_place_copy(
&mut self,
dst: PlaceRef<'tcx, Self::Value>,
src: PlaceRef<'tcx, Self::Value>,
) {
debug_assert!(src.llextra.is_none());
debug_assert!(dst.llextra.is_none());
debug_assert_eq!(dst.layout.size, src.layout.size);
if !dst.layout.is_zst() {
let bytes = self.const_usize(dst.layout.size.bytes());
self.memcpy(dst.llval, dst.align, src.llval, src.align, bytes, MemFlags::empty());
}
}

fn select(
&mut self,
cond: Self::Value,
Expand Down
14 changes: 14 additions & 0 deletions compiler/rustc_codegen_ssa/src/traits/type_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,20 @@ pub trait LayoutTypeMethods<'tcx>: Backend<'tcx> {
immediate: bool,
) -> Self::Type;

/// A type that produces an [`OperandValue::Ref`] when loaded.
///
/// AKA one that's not a ZST, not `is_backend_immediate`, and
/// not `is_backend_scalar_pair`. For such a type, a
/// [`load_operand`] doesn't actually `load` anything.
///
/// [`OperandValue::Ref`]: crate::mir::operand::OperandValue::Ref
/// [`load_operand`]: super::BuilderMethods::load_operand
fn is_backend_ref(&self, layout: TyAndLayout<'tcx>) -> bool {
!(layout.is_zst()
|| self.is_backend_immediate(layout)
|| self.is_backend_scalar_pair(layout))
}

/// A type that can be used in a [`super::BuilderMethods::load`] +
/// [`super::BuilderMethods::store`] pair to implement a *typed* copy,
/// such as a MIR `*_0 = *_1`.
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_hir_analysis/src/check/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,8 @@ pub fn check_intrinsic_type(
(1, 0, vec![Ty::new_mut_ptr(tcx, param(0)), param(0)], Ty::new_unit(tcx))
}

sym::typed_swap => (1, 1, vec![Ty::new_mut_ptr(tcx, param(0)); 2], Ty::new_unit(tcx)),

sym::discriminant_value => {
let assoc_items = tcx.associated_item_def_ids(
tcx.require_lang_item(hir::LangItem::DiscriminantKind, None),
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1830,6 +1830,7 @@ symbols! {
type_macros,
type_name,
type_privacy_lints,
typed_swap,
u128,
u128_legacy_const_max,
u128_legacy_const_min,
Expand Down
34 changes: 11 additions & 23 deletions library/core/src/mem/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -726,31 +726,19 @@ pub unsafe fn uninitialized<T>() -> T {
#[rustc_const_unstable(feature = "const_swap", issue = "83163")]
#[rustc_diagnostic_item = "mem_swap"]
pub const fn swap<T>(x: &mut T, y: &mut T) {
// NOTE(eddyb) SPIR-V's Logical addressing model doesn't allow for arbitrary
// reinterpretation of values as (chunkable) byte arrays, and the loop in the
// block optimization in `swap_slice` is hard to rewrite back
// into the (unoptimized) direct swapping implementation, so we disable it.
#[cfg(not(any(target_arch = "spirv")))]
{
// For types that are larger multiples of their alignment, the simple way
// tends to copy the whole thing to stack rather than doing it one part
// at a time, so instead treat them as one-element slices and piggy-back
// the slice optimizations that will split up the swaps.
if const { size_of::<T>() / align_of::<T>() > 2 } {
// SAFETY: exclusive references always point to one non-overlapping
// element and are non-null and properly aligned.
return unsafe { ptr::swap_nonoverlapping(x, y, 1) };
cfg_if! {
// NOTE(eddyb) SPIR-V's Logical addressing model doesn't allow for arbitrary
// reinterpretation of values as (chunkable) byte arrays, and the loop in the
// block optimization in `typed_swap_many` is hard to rewrite back
// into the (unoptimized) direct swapping implementation, so we disable it.
if #[cfg(any(target_arch = "spirv"))] {
swap_simple(x, y)
} else {
// SAFETY: `&mut` guarantees these are typed readable and writable
// as well as non-overlapping.
unsafe { ptr::typed_swap(x, y) }
}
}

// If a scalar consists of just a small number of alignment units, let
// the codegen just swap those pieces directly, as it's likely just a
// few instructions and anything else is probably overcomplicated.
//
// Most importantly, this covers primitives and simd types that tend to
// have size=align where doing anything else can be a pessimization.
// (This will also be used for ZSTs, though any solution works for them.)
swap_simple(x, y);
}

/// Same as [`swap`] semantically, but always uses the simple implementation.
Expand Down
17 changes: 17 additions & 0 deletions library/core/src/ptr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -954,6 +954,23 @@ pub const unsafe fn swap<T>(x: *mut T, y: *mut T) {
}
}

/// Non-overlapping *typed* swap of a single value.
///
/// The codegen backends will replace this with a better implementation when
/// `T` is a simple type that can be an immediate.
///
/// # Safety
///
/// `x` and `y` are readable and writable as `T`, and non-overlapping.
#[rustc_nounwind]
#[inline]
#[cfg_attr(not(bootstrap), rustc_intrinsic)]
pub(crate) const unsafe fn typed_swap<T>(x: *mut T, y: *mut T) {
// SAFETY: The caller provided single non-overlapping items behind
// pointers, so swapping them with `count: 1` is fine.
unsafe { swap_nonoverlapping(x, y, 1) };
}

/// Swaps `count * size_of::<T>()` bytes between the two regions of memory
/// beginning at `x` and `y`. The two regions must *not* overlap.
///
Expand Down
50 changes: 50 additions & 0 deletions tests/assembly/x86_64-typed-swap.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
//@ only-x86_64
//@ assembly-output: emit-asm
//@ compile-flags: --crate-type=lib -O

use std::arch::x86_64::__m128;
use std::mem::swap;

// CHECK-LABEL: swap_i32:
#[no_mangle]
pub fn swap_i32(x: &mut i32, y: &mut i32) {
// CHECK: movl (%[[ARG1:.+]]), %[[T1:.+]]
// CHECK: movl (%[[ARG2:.+]]), %[[T2:.+]]
// CHECK: movl %[[T2]], (%[[ARG1]])
// CHECK: movl %[[T1]], (%[[ARG2]])
// CHECK: retq
swap(x, y)
}

// CHECK-LABEL: swap_pair:
#[no_mangle]
pub fn swap_pair(x: &mut (i32, u32), y: &mut (i32, u32)) {
// CHECK: movq (%[[ARG1]]), %[[T1:.+]]
// CHECK: movq (%[[ARG2]]), %[[T2:.+]]
// CHECK: movq %[[T2]], (%[[ARG1]])
// CHECK: movq %[[T1]], (%[[ARG2]])
// CHECK: retq
swap(x, y)
}

// CHECK-LABEL: swap_str:
#[no_mangle]
pub fn swap_str<'a>(x: &mut &'a str, y: &mut &'a str) {
// CHECK: movups (%[[ARG1]]), %[[T1:xmm.]]
// CHECK: movups (%[[ARG2]]), %[[T2:xmm.]]
// CHECK: movups %[[T2]], (%[[ARG1]])
// CHECK: movups %[[T1]], (%[[ARG2]])
// CHECK: retq
swap(x, y)
}

// CHECK-LABEL: swap_simd:
#[no_mangle]
pub fn swap_simd(x: &mut __m128, y: &mut __m128) {
// CHECK: movaps (%[[ARG1]]), %[[T1:xmm.]]
// CHECK: movaps (%[[ARG2]]), %[[T2:xmm.]]
// CHECK: movaps %[[T2]], (%[[ARG1]])
// CHECK: movaps %[[T1]], (%[[ARG2]])
// CHECK: retq
swap(x, y)
}
58 changes: 58 additions & 0 deletions tests/codegen/intrinsics/typed_swap.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
//@ compile-flags: -O -C no-prepopulate-passes
//@ only-64bit (so I don't need to worry about usize)

#![crate_type = "lib"]

use std::mem::swap;

// CHECK-LABEL: @swap_unit(
#[no_mangle]
pub fn swap_unit(x: &mut (), y: &mut ()) {
// CHECK: start
// CHECK-NEXT: ret void
swap(x, y)
}

// CHECK-LABEL: @swap_i32(
#[no_mangle]
pub fn swap_i32(x: &mut i32, y: &mut i32) {
// CHECK-NOT: alloca

// CHECK: %[[TEMP:.+]] = load i32, ptr %x, align 4
// CHECK-SAME: !noundef
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %x, ptr align 4 %y, i64 4, i1 false)
// CHECK: store i32 %0, ptr %y, align 4
// CHECK: ret void
swap(x, y)
}

// CHECK-LABEL: @swap_pair(
#[no_mangle]
pub fn swap_pair(x: &mut (i32, u32), y: &mut (i32, u32)) {
// CHECK-NOT: alloca

// CHECK: load i32
// CHECK-SAME: !noundef
// CHECK: load i32
// CHECK-SAME: !noundef
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %x, ptr align 4 %y, i64 8, i1 false)
// CHECK: store i32
// CHECK: store i32
swap(x, y)
}

// CHECK-LABEL: @swap_str(
#[no_mangle]
pub fn swap_str<'a>(x: &mut &'a str, y: &mut &'a str) {
// CHECK-NOT: alloca

// CHECK: load ptr
// CHECK-SAME: !nonnull
// CHECK-SAME: !noundef
// CHECK: load i64
// CHECK-SAME: !noundef
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %x, ptr align 8 %y, i64 16, i1 false)
// CHECK: store ptr
// CHECK: store i64
swap(x, y)
}
5 changes: 1 addition & 4 deletions tests/codegen/swap-small-types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,7 @@ pub fn swap_slices<'a>(x: &mut &'a [u32], y: &mut &'a [u32]) {
// CHECK-NOT: alloca
// CHECK: load ptr
// CHECK: load i64
// CHECK: load ptr
// CHECK: load i64
// CHECK: store ptr
// CHECK: store i64
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 16, i1 false)
// CHECK: store ptr
// CHECK: store i64
swap(x, y)
Expand Down

0 comments on commit 725417f

Please sign in to comment.