Skip to content

Commit

Permalink
Auto merge of #95837 - scottmcm:ptr-offset-from-unsigned, r=oli-obk
Browse files Browse the repository at this point in the history
Add `sub_ptr` on pointers (the `usize` version of `offset_from`)

We have `add`/`sub` which are the `usize` versions of `offset`, this adds the `usize` equivalent of `offset_from`.  Like how `.add(d)` replaced a whole bunch of `.offset(d as isize)`, you can see from the changes here that it's fairly common that code actually knows the order between the pointers and *wants* a `usize`, not an `isize`.

As a bonus, this can do `sub nuw`+`udiv exact`, rather than `sub`+`sdiv exact`, which can be optimized slightly better because it doesn't have to worry about negatives.  That's why the slice iterators weren't using `offset_from`, though I haven't updated that code in this PR because slices are so perf-critical that I'll do it as its own change.

This is an intrinsic, like `offset_from`, so that it can eventually be allowed in CTFE.  It also allows checking the extra safety condition -- see the test confirming that CTFE catches it if you pass the pointers in the wrong order.
  • Loading branch information
bors committed May 12, 2022
2 parents 0cd939e + 003b954 commit 1d2ea98
Show file tree
Hide file tree
Showing 20 changed files with 307 additions and 28 deletions.
13 changes: 10 additions & 3 deletions compiler/rustc_codegen_cranelift/src/intrinsics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -713,14 +713,21 @@ fn codegen_regular_intrinsic_call<'tcx>(
ret.write_cvalue(fx, val);
};

ptr_offset_from, (v ptr, v base) {
ptr_offset_from | ptr_offset_from_unsigned, (v ptr, v base) {
let ty = substs.type_at(0);
let isize_layout = fx.layout_of(fx.tcx.types.isize);

let pointee_size: u64 = fx.layout_of(ty).size.bytes();
let diff = fx.bcx.ins().isub(ptr, base);
let diff_bytes = fx.bcx.ins().isub(ptr, base);
// FIXME this can be an exact division.
let val = CValue::by_val(fx.bcx.ins().sdiv_imm(diff, pointee_size as i64), isize_layout);
let diff = if intrinsic == sym::ptr_offset_from_unsigned {
// Because diff_bytes ULE isize::MAX, this would be fine as signed,
// but unsigned is slightly easier to codegen, so might as well.
fx.bcx.ins().udiv_imm(diff_bytes, pointee_size as i64)
} else {
fx.bcx.ins().sdiv_imm(diff_bytes, pointee_size as i64)
};
let val = CValue::by_val(diff, isize_layout);
ret.write_cvalue(fx, val);
};

Expand Down
21 changes: 14 additions & 7 deletions compiler/rustc_codegen_ssa/src/mir/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -555,21 +555,28 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
}
}

sym::ptr_offset_from => {
sym::ptr_offset_from | sym::ptr_offset_from_unsigned => {
let ty = substs.type_at(0);
let pointee_size = bx.layout_of(ty).size;

// This is the same sequence that Clang emits for pointer subtraction.
// It can be neither `nsw` nor `nuw` because the input is treated as
// unsigned but then the output is treated as signed, so neither works.
let a = args[0].immediate();
let b = args[1].immediate();
let a = bx.ptrtoint(a, bx.type_isize());
let b = bx.ptrtoint(b, bx.type_isize());
let d = bx.sub(a, b);
let pointee_size = bx.const_usize(pointee_size.bytes());
// this is where the signed magic happens (notice the `s` in `exactsdiv`)
bx.exactsdiv(d, pointee_size)
if name == sym::ptr_offset_from {
// This is the same sequence that Clang emits for pointer subtraction.
// It can be neither `nsw` nor `nuw` because the input is treated as
// unsigned but then the output is treated as signed, so neither works.
let d = bx.sub(a, b);
// this is where the signed magic happens (notice the `s` in `exactsdiv`)
bx.exactsdiv(d, pointee_size)
} else {
// The `_unsigned` version knows the relative ordering of the pointers,
// so can use `sub nuw` and `udiv exact` instead of dealing in signed.
let d = bx.unchecked_usub(a, b);
bx.exactudiv(d, pointee_size)
}
}

_ => {
Expand Down
39 changes: 31 additions & 8 deletions compiler/rustc_const_eval/src/interpret/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
let offset_ptr = ptr.wrapping_signed_offset(offset_bytes, self);
self.write_pointer(offset_ptr, dest)?;
}
sym::ptr_offset_from => {
sym::ptr_offset_from | sym::ptr_offset_from_unsigned => {
let a = self.read_pointer(&args[0])?;
let b = self.read_pointer(&args[1])?;

Expand All @@ -330,8 +330,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
// Both are pointers. They must be into the same allocation.
if a_alloc_id != b_alloc_id {
throw_ub_format!(
"ptr_offset_from cannot compute offset of pointers into different \
allocations.",
"{} cannot compute offset of pointers into different allocations.",
intrinsic_name,
);
}
// And they must both be valid for zero-sized accesses ("in-bounds or one past the end").
Expand All @@ -348,16 +348,39 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
CheckInAllocMsg::OffsetFromTest,
)?;

if intrinsic_name == sym::ptr_offset_from_unsigned && a_offset < b_offset {
throw_ub_format!(
"{} cannot compute a negative offset, but {} < {}",
intrinsic_name,
a_offset.bytes(),
b_offset.bytes(),
);
}

// Compute offset.
let usize_layout = self.layout_of(self.tcx.types.usize)?;
let isize_layout = self.layout_of(self.tcx.types.isize)?;
let a_offset = ImmTy::from_uint(a_offset.bytes(), usize_layout);
let b_offset = ImmTy::from_uint(b_offset.bytes(), usize_layout);
let (val, _overflowed, _ty) =
let ret_layout = if intrinsic_name == sym::ptr_offset_from {
isize_layout
} else {
usize_layout
};

// The subtraction is always done in `isize` to enforce
// the "no more than `isize::MAX` apart" requirement.
let a_offset = ImmTy::from_uint(a_offset.bytes(), isize_layout);
let b_offset = ImmTy::from_uint(b_offset.bytes(), isize_layout);
let (val, overflowed, _ty) =
self.overflowing_binary_op(BinOp::Sub, &a_offset, &b_offset)?;
if overflowed {
throw_ub_format!("Pointers were too far apart for {}", intrinsic_name);
}

let pointee_layout = self.layout_of(substs.type_at(0))?;
let val = ImmTy::from_scalar(val, isize_layout);
let size = ImmTy::from_int(pointee_layout.size.bytes(), isize_layout);
// This re-interprets an isize at ret_layout, but we already checked
// that if ret_layout is usize, then the result must be non-negative.
let val = ImmTy::from_scalar(val, ret_layout);
let size = ImmTy::from_int(pointee_layout.size.bytes(), ret_layout);
self.exact_div(&val, &size, dest)?;
}
}
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1079,6 +1079,7 @@ symbols! {
ptr_null,
ptr_null_mut,
ptr_offset_from,
ptr_offset_from_unsigned,
pub_macro_rules,
pub_restricted,
pure,
Expand Down
3 changes: 3 additions & 0 deletions compiler/rustc_typeck/src/check/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,9 @@ pub fn check_intrinsic_type(tcx: TyCtxt<'_>, it: &hir::ForeignItem<'_>) {
sym::ptr_offset_from => {
(1, vec![tcx.mk_imm_ptr(param(0)), tcx.mk_imm_ptr(param(0))], tcx.types.isize)
}
sym::ptr_offset_from_unsigned => {
(1, vec![tcx.mk_imm_ptr(param(0)), tcx.mk_imm_ptr(param(0))], tcx.types.usize)
}
sym::unchecked_div | sym::unchecked_rem | sym::exact_div => {
(1, vec![param(0), param(0)], param(0))
}
Expand Down
1 change: 1 addition & 0 deletions library/alloc/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@
#![feature(pattern)]
#![feature(ptr_internals)]
#![feature(ptr_metadata)]
#![feature(ptr_sub_ptr)]
#![feature(receiver_trait)]
#![feature(set_ptr_value)]
#![feature(slice_group_by)]
Expand Down
2 changes: 1 addition & 1 deletion library/alloc/src/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1056,7 +1056,7 @@ where
fn drop(&mut self) {
// `T` is not a zero-sized type, and these are pointers into a slice's elements.
unsafe {
let len = self.end.offset_from(self.start) as usize;
let len = self.end.sub_ptr(self.start);
ptr::copy_nonoverlapping(self.start, self.dest, len);
}
}
Expand Down
2 changes: 1 addition & 1 deletion library/alloc/src/vec/drain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ impl<T, A: Allocator> Drop for Drain<'_, T, A> {
// it from the original vec but also avoid creating a &mut to the front since that could
// invalidate raw pointers to it which some unsafe code might rely on.
let vec_ptr = vec.as_mut().as_mut_ptr();
let drop_offset = drop_ptr.offset_from(vec_ptr) as usize;
let drop_offset = drop_ptr.sub_ptr(vec_ptr);
let to_drop = ptr::slice_from_raw_parts_mut(vec_ptr.add(drop_offset), drop_len);
ptr::drop_in_place(to_drop);
}
Expand Down
2 changes: 1 addition & 1 deletion library/alloc/src/vec/in_place_collect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ where
let sink =
self.try_fold::<_, _, Result<_, !>>(sink, write_in_place_with_drop(end)).unwrap();
// iteration succeeded, don't drop head
unsafe { ManuallyDrop::new(sink).dst.offset_from(dst_buf) as usize }
unsafe { ManuallyDrop::new(sink).dst.sub_ptr(dst_buf) }
}
}

Expand Down
2 changes: 1 addition & 1 deletion library/alloc/src/vec/in_place_drop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ pub(super) struct InPlaceDrop<T> {

impl<T> InPlaceDrop<T> {
fn len(&self) -> usize {
unsafe { self.dst.offset_from(self.inner) as usize }
unsafe { self.dst.sub_ptr(self.inner) }
}
}

Expand Down
2 changes: 1 addition & 1 deletion library/alloc/src/vec/into_iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> {
let exact = if mem::size_of::<T>() == 0 {
self.end.addr().wrapping_sub(self.ptr.addr())
} else {
unsafe { self.end.offset_from(self.ptr) as usize }
unsafe { self.end.sub_ptr(self.ptr) }
};
(exact, Some(exact))
}
Expand Down
13 changes: 13 additions & 0 deletions library/core/src/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1903,6 +1903,11 @@ extern "rust-intrinsic" {
#[rustc_const_unstable(feature = "const_ptr_offset_from", issue = "92980")]
pub fn ptr_offset_from<T>(ptr: *const T, base: *const T) -> isize;

/// See documentation of `<*const T>::sub_ptr` for details.
#[rustc_const_unstable(feature = "const_ptr_offset_from", issue = "92980")]
#[cfg(not(bootstrap))]
pub fn ptr_offset_from_unsigned<T>(ptr: *const T, base: *const T) -> usize;

/// See documentation of `<*const T>::guaranteed_eq` for details.
///
/// Note that, unlike most intrinsics, this is safe to call;
Expand Down Expand Up @@ -2385,3 +2390,11 @@ where
{
called_in_const.call_once(arg)
}

/// Bootstrap polyfill
#[cfg(bootstrap)]
pub const unsafe fn ptr_offset_from_unsigned<T>(ptr: *const T, base: *const T) -> usize {
// SAFETY: we have stricter preconditions than `ptr_offset_from`, so can
// call it, and its output has to be positive, so we can just cast.
unsafe { ptr_offset_from(ptr, base) as _ }
}
1 change: 1 addition & 0 deletions library/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@
#![feature(const_option)]
#![feature(const_option_ext)]
#![feature(const_pin)]
#![feature(const_ptr_sub_ptr)]
#![feature(const_replace)]
#![feature(const_ptr_as_ref)]
#![feature(const_ptr_is_null)]
Expand Down
77 changes: 77 additions & 0 deletions library/core/src/ptr/const_ptr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,83 @@ impl<T: ?Sized> *const T {
unsafe { intrinsics::ptr_offset_from(self, origin) }
}

/// Calculates the distance between two pointers, *where it's known that
/// `self` is equal to or greater than `origin`*. The returned value is in
/// units of T: the distance in bytes is divided by `mem::size_of::<T>()`.
///
/// This computes the same value that [`offset_from`](#method.offset_from)
/// would compute, but with the added precondition that that the offset is
/// guaranteed to be non-negative. This method is equivalent to
/// `usize::from(self.offset_from(origin)).unwrap_unchecked()`,
/// but it provides slightly more information to the optimizer, which can
/// sometimes allow it to optimize slightly better with some backends.
///
/// This method can be though of as recovering the `count` that was passed
/// to [`add`](#method.add) (or, with the parameters in the other order,
/// to [`sub`](#method.sub)). The following are all equivalent, assuming
/// that their safety preconditions are met:
/// ```rust
/// # #![feature(ptr_sub_ptr)]
/// # unsafe fn blah(ptr: *const i32, origin: *const i32, count: usize) -> bool {
/// ptr.sub_ptr(origin) == count
/// # &&
/// origin.add(count) == ptr
/// # &&
/// ptr.sub(count) == origin
/// # }
/// ```
///
/// # Safety
///
/// - The distance between the pointers must be non-negative (`self >= origin`)
///
/// - *All* the safety conditions of [`offset_from`](#method.offset_from)
/// apply to this method as well; see it for the full details.
///
/// Importantly, despite the return type of this method being able to represent
/// a larger offset, it's still *not permitted* to pass pointers which differ
/// by more than `isize::MAX` *bytes*. As such, the result of this method will
/// always be less than or equal to `isize::MAX as usize`.
///
/// # Panics
///
/// This function panics if `T` is a Zero-Sized Type ("ZST").
///
/// # Examples
///
/// ```
/// #![feature(ptr_sub_ptr)]
///
/// let a = [0; 5];
/// let ptr1: *const i32 = &a[1];
/// let ptr2: *const i32 = &a[3];
/// unsafe {
/// assert_eq!(ptr2.sub_ptr(ptr1), 2);
/// assert_eq!(ptr1.add(2), ptr2);
/// assert_eq!(ptr2.sub(2), ptr1);
/// assert_eq!(ptr2.sub_ptr(ptr2), 0);
/// }
///
/// // This would be incorrect, as the pointers are not correctly ordered:
/// // ptr1.offset_from(ptr2)
/// ```
#[unstable(feature = "ptr_sub_ptr", issue = "95892")]
#[rustc_const_unstable(feature = "const_ptr_sub_ptr", issue = "95892")]
#[inline]
pub const unsafe fn sub_ptr(self, origin: *const T) -> usize
where
T: Sized,
{
// SAFETY: The comparison has no side-effects, and the intrinsic
// does this check internally in the CTFE implementation.
unsafe { assert_unsafe_precondition!(self >= origin) };

let pointee_size = mem::size_of::<T>();
assert!(0 < pointee_size && pointee_size <= isize::MAX as usize);
// SAFETY: the caller must uphold the safety contract for `ptr_offset_from_unsigned`.
unsafe { intrinsics::ptr_offset_from_unsigned(self, origin) }
}

/// Returns whether two pointers are guaranteed to be equal.
///
/// At runtime this function behaves like `self == other`.
Expand Down
72 changes: 72 additions & 0 deletions library/core/src/ptr/mut_ptr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -787,6 +787,78 @@ impl<T: ?Sized> *mut T {
unsafe { (self as *const T).offset_from(origin) }
}

/// Calculates the distance between two pointers, *where it's known that
/// `self` is equal to or greater than `origin`*. The returned value is in
/// units of T: the distance in bytes is divided by `mem::size_of::<T>()`.
///
/// This computes the same value that [`offset_from`](#method.offset_from)
/// would compute, but with the added precondition that that the offset is
/// guaranteed to be non-negative. This method is equivalent to
/// `usize::from(self.offset_from(origin)).unwrap_unchecked()`,
/// but it provides slightly more information to the optimizer, which can
/// sometimes allow it to optimize slightly better with some backends.
///
/// This method can be though of as recovering the `count` that was passed
/// to [`add`](#method.add) (or, with the parameters in the other order,
/// to [`sub`](#method.sub)). The following are all equivalent, assuming
/// that their safety preconditions are met:
/// ```rust
/// # #![feature(ptr_sub_ptr)]
/// # unsafe fn blah(ptr: *mut i32, origin: *mut i32, count: usize) -> bool {
/// ptr.sub_ptr(origin) == count
/// # &&
/// origin.add(count) == ptr
/// # &&
/// ptr.sub(count) == origin
/// # }
/// ```
///
/// # Safety
///
/// - The distance between the pointers must be non-negative (`self >= origin`)
///
/// - *All* the safety conditions of [`offset_from`](#method.offset_from)
/// apply to this method as well; see it for the full details.
///
/// Importantly, despite the return type of this method being able to represent
/// a larger offset, it's still *not permitted* to pass pointers which differ
/// by more than `isize::MAX` *bytes*. As such, the result of this method will
/// always be less than or equal to `isize::MAX as usize`.
///
/// # Panics
///
/// This function panics if `T` is a Zero-Sized Type ("ZST").
///
/// # Examples
///
/// ```
/// #![feature(ptr_sub_ptr)]
///
/// let mut a = [0; 5];
/// let p: *mut i32 = a.as_mut_ptr();
/// unsafe {
/// let ptr1: *mut i32 = p.add(1);
/// let ptr2: *mut i32 = p.add(3);
///
/// assert_eq!(ptr2.sub_ptr(ptr1), 2);
/// assert_eq!(ptr1.add(2), ptr2);
/// assert_eq!(ptr2.sub(2), ptr1);
/// assert_eq!(ptr2.sub_ptr(ptr2), 0);
/// }
///
/// // This would be incorrect, as the pointers are not correctly ordered:
/// // ptr1.offset_from(ptr2)
#[unstable(feature = "ptr_sub_ptr", issue = "95892")]
#[rustc_const_unstable(feature = "const_ptr_sub_ptr", issue = "95892")]
#[inline]
pub const unsafe fn sub_ptr(self, origin: *const T) -> usize
where
T: Sized,
{
// SAFETY: the caller must uphold the safety contract for `sub_ptr`.
unsafe { (self as *const T).sub_ptr(origin) }
}

/// Calculates the offset from a pointer (convenience for `.offset(count as isize)`).
///
/// `count` is in units of T; e.g., a `count` of 3 represents a pointer
Expand Down
Loading

0 comments on commit 1d2ea98

Please sign in to comment.