Skip to content

Commit

Permalink
Rollup merge of #96165 - RalfJung:miri-provenance-cleanup, r=oli-obk
Browse files Browse the repository at this point in the history
Miri provenance cleanup

Reviewing #95826 by ``@carbotaniuman`` made me realize that we could clean things up a little here.

``@carbotaniuman`` please let me know if you're okay with landing this (it will create a lot of conflicts with your PR), or if you'd prefer incorporating the ideas from this PR into yours. I think we want to end up in a situation where the function you called `ptr_reify_alloc` returns just two things, a concrete tag and an offset. Getting an `AllocId` from a concrete tag should be infallible like now. However a concrete tag and `Tag` don't have to be the same type.

r? ``@oli-obk``
  • Loading branch information
Dylan-DPC authored Apr 19, 2022
2 parents f7d8f5b + 55f0977 commit 9d9d591
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 46 deletions.
2 changes: 1 addition & 1 deletion compiler/rustc_const_eval/src/const_eval/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ pub(crate) fn const_caller_location(
if intern_const_alloc_recursive(&mut ecx, InternKind::Constant, &loc_place).is_err() {
bug!("intern_const_alloc_recursive should not error in this case")
}
ConstValue::Scalar(Scalar::from_pointer(loc_place.ptr.into_pointer_or_addr().unwrap(), &tcx))
ConstValue::Scalar(Scalar::from_maybe_pointer(loc_place.ptr, &tcx))
}

/// Convert an evaluated constant to a type level constant
Expand Down
28 changes: 21 additions & 7 deletions compiler/rustc_const_eval/src/interpret/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ pub trait Machine<'mir, 'tcx>: Sized {
/// Pointers are "tagged" with provenance information; typically the `AllocId` they belong to.
type PointerTag: Provenance + Eq + Hash + 'static;

/// When getting the AllocId of a pointer, some extra data is also obtained from the tag
/// that is passed to memory access hooks so they can do things with it.
type TagExtra: Copy + 'static;

/// Machines can define extra (non-instance) things that represent values of function pointers.
/// For example, Miri uses this to return a function pointer from `dlsym`
/// that can later be called to execute the right thing.
Expand Down Expand Up @@ -122,6 +126,8 @@ pub trait Machine<'mir, 'tcx>: Sized {

/// Whether, when checking alignment, we should `force_int` and thus support
/// custom alignment logic based on whatever the integer address happens to be.
///
/// Requires PointerTag::OFFSET_IS_ADDR to be true.
fn force_int_for_alignment_check(ecx: &InterpCx<'mir, 'tcx, Self>) -> bool;

/// Whether to enforce the validity invariant
Expand Down Expand Up @@ -285,11 +291,14 @@ pub trait Machine<'mir, 'tcx>: Sized {
addr: u64,
) -> Pointer<Option<Self::PointerTag>>;

/// Convert a pointer with provenance into an allocation-offset pair.
/// Convert a pointer with provenance into an allocation-offset pair
/// and extra provenance info.
///
/// The returned `AllocId` must be the same as `ptr.provenance.get_alloc_id()`.
fn ptr_get_alloc(
ecx: &InterpCx<'mir, 'tcx, Self>,
ptr: Pointer<Self::PointerTag>,
) -> (AllocId, Size);
) -> (AllocId, Size, Self::TagExtra);

/// Called to initialize the "extra" state of an allocation and make the pointers
/// it contains (in relocations) tagged. The way we construct allocations is
Expand Down Expand Up @@ -321,7 +330,7 @@ pub trait Machine<'mir, 'tcx>: Sized {
_tcx: TyCtxt<'tcx>,
_machine: &Self,
_alloc_extra: &Self::AllocExtra,
_tag: Self::PointerTag,
_tag: (AllocId, Self::TagExtra),
_range: AllocRange,
) -> InterpResult<'tcx> {
Ok(())
Expand All @@ -333,7 +342,7 @@ pub trait Machine<'mir, 'tcx>: Sized {
_tcx: TyCtxt<'tcx>,
_machine: &mut Self,
_alloc_extra: &mut Self::AllocExtra,
_tag: Self::PointerTag,
_tag: (AllocId, Self::TagExtra),
_range: AllocRange,
) -> InterpResult<'tcx> {
Ok(())
Expand All @@ -345,7 +354,7 @@ pub trait Machine<'mir, 'tcx>: Sized {
_tcx: TyCtxt<'tcx>,
_machine: &mut Self,
_alloc_extra: &mut Self::AllocExtra,
_tag: Self::PointerTag,
_tag: (AllocId, Self::TagExtra),
_range: AllocRange,
) -> InterpResult<'tcx> {
Ok(())
Expand Down Expand Up @@ -397,6 +406,8 @@ pub trait Machine<'mir, 'tcx>: Sized {
// (CTFE and ConstProp) use the same instance. Here, we share that code.
pub macro compile_time_machine(<$mir: lifetime, $tcx: lifetime>) {
type PointerTag = AllocId;
type TagExtra = ();

type ExtraFnVal = !;

type MemoryMap =
Expand Down Expand Up @@ -474,9 +485,12 @@ pub macro compile_time_machine(<$mir: lifetime, $tcx: lifetime>) {
}

#[inline(always)]
fn ptr_get_alloc(_ecx: &InterpCx<$mir, $tcx, Self>, ptr: Pointer<AllocId>) -> (AllocId, Size) {
fn ptr_get_alloc(
_ecx: &InterpCx<$mir, $tcx, Self>,
ptr: Pointer<AllocId>,
) -> (AllocId, Size, Self::TagExtra) {
// We know `offset` is relative to the allocation, so we can use `into_parts`.
let (alloc_id, offset) = ptr.into_parts();
(alloc_id, offset)
(alloc_id, offset, ())
}
}
73 changes: 36 additions & 37 deletions compiler/rustc_const_eval/src/interpret/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
&self,
ptr: Pointer<AllocId>,
) -> InterpResult<'tcx, Pointer<M::PointerTag>> {
// We know `offset` is relative to the allocation, so we can use `into_parts`.
let (alloc_id, offset) = ptr.into_parts();
let alloc_id = ptr.provenance;
// We need to handle `extern static`.
match self.tcx.get_global_alloc(alloc_id) {
Some(GlobalAlloc::Static(def_id)) if self.tcx.is_thread_local_static(def_id) => {
Expand All @@ -171,7 +170,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
_ => {}
}
// And we need to get the tag.
Ok(M::tag_alloc_base_pointer(self, Pointer::new(alloc_id, offset)))
Ok(M::tag_alloc_base_pointer(self, ptr))
}

pub fn create_fn_alloc_ptr(
Expand Down Expand Up @@ -238,7 +237,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
new_align: Align,
kind: MemoryKind<M::MemoryKind>,
) -> InterpResult<'tcx, Pointer<M::PointerTag>> {
let (alloc_id, offset, ptr) = self.ptr_get_alloc_id(ptr)?;
let (alloc_id, offset, _tag) = self.ptr_get_alloc_id(ptr)?;
if offset.bytes() != 0 {
throw_ub_format!(
"reallocating {:?} which does not point to the beginning of an object",
Expand All @@ -255,14 +254,14 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
};
// This will also call the access hooks.
self.mem_copy(
ptr.into(),
ptr,
Align::ONE,
new_ptr.into(),
Align::ONE,
old_size.min(new_size),
/*nonoverlapping*/ true,
)?;
self.deallocate_ptr(ptr.into(), old_size_and_align, kind)?;
self.deallocate_ptr(ptr, old_size_and_align, kind)?;

Ok(new_ptr)
}
Expand All @@ -274,7 +273,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
old_size_and_align: Option<(Size, Align)>,
kind: MemoryKind<M::MemoryKind>,
) -> InterpResult<'tcx> {
let (alloc_id, offset, ptr) = self.ptr_get_alloc_id(ptr)?;
let (alloc_id, offset, tag) = self.ptr_get_alloc_id(ptr)?;
trace!("deallocating: {}", alloc_id);

if offset.bytes() != 0 {
Expand Down Expand Up @@ -330,7 +329,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
*self.tcx,
&mut self.machine,
&mut alloc.extra,
ptr.provenance,
(alloc_id, tag),
alloc_range(Size::ZERO, size),
)?;

Expand All @@ -350,17 +349,17 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
ptr: Pointer<Option<M::PointerTag>>,
size: Size,
align: Align,
) -> InterpResult<'tcx, Option<(AllocId, Size, Pointer<M::PointerTag>)>> {
) -> InterpResult<'tcx, Option<(AllocId, Size, M::TagExtra)>> {
let align = M::enforce_alignment(&self).then_some(align);
self.check_and_deref_ptr(
ptr,
size,
align,
CheckInAllocMsg::MemoryAccessTest,
|alloc_id, offset, ptr| {
|alloc_id, offset, tag| {
let (size, align) =
self.get_alloc_size_and_align(alloc_id, AllocCheck::Dereferenceable)?;
Ok((size, align, (alloc_id, offset, ptr)))
Ok((size, align, (alloc_id, offset, tag)))
},
)
}
Expand Down Expand Up @@ -401,11 +400,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
size: Size,
align: Option<Align>,
msg: CheckInAllocMsg,
alloc_size: impl FnOnce(
AllocId,
Size,
Pointer<M::PointerTag>,
) -> InterpResult<'tcx, (Size, Align, T)>,
alloc_size: impl FnOnce(AllocId, Size, M::TagExtra) -> InterpResult<'tcx, (Size, Align, T)>,
) -> InterpResult<'tcx, Option<T>> {
fn check_offset_align(offset: u64, align: Align) -> InterpResult<'static> {
if offset % align.bytes() == 0 {
Expand Down Expand Up @@ -433,8 +428,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
}
None
}
Ok((alloc_id, offset, ptr)) => {
let (alloc_size, alloc_align, ret_val) = alloc_size(alloc_id, offset, ptr)?;
Ok((alloc_id, offset, tag)) => {
let (alloc_size, alloc_align, ret_val) = alloc_size(alloc_id, offset, tag)?;
// Test bounds. This also ensures non-null.
// It is sufficient to check this for the end pointer. Also check for overflow!
if offset.checked_add(size, &self.tcx).map_or(true, |end| end > alloc_size) {
Expand All @@ -450,10 +445,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
// we want the error to be about the bounds.
if let Some(align) = align {
if M::force_int_for_alignment_check(self) {
let addr = Scalar::from_pointer(ptr, &self.tcx)
.to_machine_usize(&self.tcx)
.expect("ptr-to-int cast for align check should never fail");
check_offset_align(addr, align)?;
// `force_int_for_alignment_check` can only be true if `OFFSET_IS_ADDR` is true.
check_offset_align(ptr.addr().bytes(), align)?;
} else {
// Check allocation alignment and offset alignment.
if alloc_align.bytes() < align.bytes() {
Expand Down Expand Up @@ -569,14 +562,14 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
size,
align,
CheckInAllocMsg::MemoryAccessTest,
|alloc_id, offset, ptr| {
|alloc_id, offset, tag| {
let alloc = self.get_alloc_raw(alloc_id)?;
Ok((alloc.size(), alloc.align, (alloc_id, offset, ptr, alloc)))
Ok((alloc.size(), alloc.align, (alloc_id, offset, tag, alloc)))
},
)?;
if let Some((alloc_id, offset, ptr, alloc)) = ptr_and_alloc {
if let Some((alloc_id, offset, tag, alloc)) = ptr_and_alloc {
let range = alloc_range(offset, size);
M::memory_read(*self.tcx, &self.machine, &alloc.extra, ptr.provenance, range)?;
M::memory_read(*self.tcx, &self.machine, &alloc.extra, (alloc_id, tag), range)?;
Ok(Some(AllocRef { alloc, range, tcx: *self.tcx, alloc_id }))
} else {
// Even in this branch we have to be sure that we actually access the allocation, in
Expand Down Expand Up @@ -631,13 +624,13 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
align: Align,
) -> InterpResult<'tcx, Option<AllocRefMut<'a, 'tcx, M::PointerTag, M::AllocExtra>>> {
let parts = self.get_ptr_access(ptr, size, align)?;
if let Some((alloc_id, offset, ptr)) = parts {
if let Some((alloc_id, offset, tag)) = parts {
let tcx = *self.tcx;
// FIXME: can we somehow avoid looking up the allocation twice here?
// We cannot call `get_raw_mut` inside `check_and_deref_ptr` as that would duplicate `&mut self`.
let (alloc, machine) = self.get_alloc_raw_mut(alloc_id)?;
let range = alloc_range(offset, size);
M::memory_written(tcx, machine, &mut alloc.extra, ptr.provenance, range)?;
M::memory_written(tcx, machine, &mut alloc.extra, (alloc_id, tag), range)?;
Ok(Some(AllocRefMut { alloc, range, tcx, alloc_id }))
} else {
Ok(None)
Expand Down Expand Up @@ -732,7 +725,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
ptr: Pointer<Option<M::PointerTag>>,
) -> InterpResult<'tcx, FnVal<'tcx, M::ExtraFnVal>> {
trace!("get_fn({:?})", ptr);
let (alloc_id, offset, _ptr) = self.ptr_get_alloc_id(ptr)?;
let (alloc_id, offset, _tag) = self.ptr_get_alloc_id(ptr)?;
if offset.bytes() != 0 {
throw_ub!(InvalidFunctionPointer(Pointer::new(alloc_id, offset)))
}
Expand Down Expand Up @@ -1012,16 +1005,16 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
// and once below to get the underlying `&[mut] Allocation`.

// Source alloc preparations and access hooks.
let Some((src_alloc_id, src_offset, src)) = src_parts else {
let Some((src_alloc_id, src_offset, src_tag)) = src_parts else {
// Zero-sized *source*, that means dst is also zero-sized and we have nothing to do.
return Ok(());
};
let src_alloc = self.get_alloc_raw(src_alloc_id)?;
let src_range = alloc_range(src_offset, size);
M::memory_read(*tcx, &self.machine, &src_alloc.extra, src.provenance, src_range)?;
M::memory_read(*tcx, &self.machine, &src_alloc.extra, (src_alloc_id, src_tag), src_range)?;
// We need the `dest` ptr for the next operation, so we get it now.
// We already did the source checks and called the hooks so we are good to return early.
let Some((dest_alloc_id, dest_offset, dest)) = dest_parts else {
let Some((dest_alloc_id, dest_offset, dest_tag)) = dest_parts else {
// Zero-sized *destination*.
return Ok(());
};
Expand All @@ -1043,7 +1036,13 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
// Destination alloc preparations and access hooks.
let (dest_alloc, extra) = self.get_alloc_raw_mut(dest_alloc_id)?;
let dest_range = alloc_range(dest_offset, size * num_copies);
M::memory_written(*tcx, extra, &mut dest_alloc.extra, dest.provenance, dest_range)?;
M::memory_written(
*tcx,
extra,
&mut dest_alloc.extra,
(dest_alloc_id, dest_tag),
dest_range,
)?;
let dest_bytes = dest_alloc
.get_bytes_mut_ptr(&tcx, dest_range)
.map_err(|e| e.to_interp_error(dest_alloc_id))?
Expand Down Expand Up @@ -1164,11 +1163,11 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
pub fn ptr_try_get_alloc_id(
&self,
ptr: Pointer<Option<M::PointerTag>>,
) -> Result<(AllocId, Size, Pointer<M::PointerTag>), u64> {
) -> Result<(AllocId, Size, M::TagExtra), u64> {
match ptr.into_pointer_or_addr() {
Ok(ptr) => {
let (alloc_id, offset) = M::ptr_get_alloc(self, ptr);
Ok((alloc_id, offset, ptr))
let (alloc_id, offset, extra) = M::ptr_get_alloc(self, ptr);
Ok((alloc_id, offset, extra))
}
Err(addr) => Err(addr.bytes()),
}
Expand All @@ -1179,7 +1178,7 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
pub fn ptr_get_alloc_id(
&self,
ptr: Pointer<Option<M::PointerTag>>,
) -> InterpResult<'tcx, (AllocId, Size, Pointer<M::PointerTag>)> {
) -> InterpResult<'tcx, (AllocId, Size, M::TagExtra)> {
self.ptr_try_get_alloc_id(ptr).map_err(|offset| {
err_ub!(DanglingIntPointer(offset, CheckInAllocMsg::InboundsTest)).into()
})
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_const_eval/src/interpret/validity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValidityVisitor<'rt, 'mir, '
if let Some(ref mut ref_tracking) = self.ref_tracking {
// Proceed recursively even for ZST, no reason to skip them!
// `!` is a ZST and we want to validate it.
if let Ok((alloc_id, _offset, _ptr)) = self.ecx.ptr_try_get_alloc_id(place.ptr) {
if let Ok((alloc_id, _offset, _tag)) = self.ecx.ptr_try_get_alloc_id(place.ptr) {
// Special handling for pointers to statics (irrespective of their type).
let alloc_kind = self.ecx.tcx.get_global_alloc(alloc_id);
if let Some(GlobalAlloc::Static(did)) = alloc_kind {
Expand Down
17 changes: 17 additions & 0 deletions compiler/rustc_middle/src/mir/interpret/pointer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,9 @@ pub struct Pointer<Tag = AllocId> {
}

static_assert_size!(Pointer, 16);
// `Option<Tag>` pointers are also passed around quite a bit
// (but not stored in permanent machine state).
static_assert_size!(Pointer<Option<AllocId>>, 16);

// We want the `Debug` output to be readable as it is used by `derive(Debug)` for
// all the Miri types.
Expand Down Expand Up @@ -198,12 +201,26 @@ impl<Tag> From<Pointer<Tag>> for Pointer<Option<Tag>> {
}

impl<Tag> Pointer<Option<Tag>> {
/// Convert this pointer that *might* have a tag into a pointer that *definitely* has a tag, or
/// an absolute address.
///
/// This is rarely what you want; call `ptr_try_get_alloc_id` instead.
pub fn into_pointer_or_addr(self) -> Result<Pointer<Tag>, Size> {
match self.provenance {
Some(tag) => Ok(Pointer::new(tag, self.offset)),
None => Err(self.offset),
}
}

/// Returns the absolute address the pointer points to.
/// Only works if Tag::OFFSET_IS_ADDR is true!
pub fn addr(self) -> Size
where
Tag: Provenance,
{
assert!(Tag::OFFSET_IS_ADDR);
self.offset
}
}

impl<Tag> Pointer<Option<Tag>> {
Expand Down

0 comments on commit 9d9d591

Please sign in to comment.