Skip to content

Commit

Permalink
Auto merge of rust-lang#129778 - RalfJung:interp-lossy-typed-copy, r=…
Browse files Browse the repository at this point in the history
…saethlin

interpret: make typed copies lossy wrt provenance and padding

A "typed copy" in Rust can be a lossy process: when copying at type `usize` (or any other non-pointer type), if the original memory had any provenance, that provenance is lost. When copying at pointer type, if the original memory had partial provenance (i.e., not the same provenance for all bytes), that provenance is lost. When copying any type with padding, the contents of padding are lost.

This PR equips our validity-checking pass with the ability to reset provenance and padding according to those rules. Can be reviewed commit-by-commit. The first three commits are just preparation without any functional change.

Fixes rust-lang/miri#845
Fixes rust-lang/miri#2182
  • Loading branch information
bors committed Sep 10, 2024
2 parents 712463d + 0a70924 commit 304b7f8
Show file tree
Hide file tree
Showing 49 changed files with 1,246 additions and 310 deletions.
4 changes: 2 additions & 2 deletions compiler/rustc_const_eval/src/const_eval/eval_queries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ fn eval_body_using_ecx<'tcx, R: InterpretationResult<'tcx>>(
let intern_result = intern_const_alloc_recursive(ecx, intern_kind, &ret);

// Since evaluation had no errors, validate the resulting constant.
const_validate_mplace(&ecx, &ret, cid)?;
const_validate_mplace(ecx, &ret, cid)?;

// Only report this after validation, as validaiton produces much better diagnostics.
// FIXME: ensure validation always reports this and stop making interning care about it.
Expand Down Expand Up @@ -391,7 +391,7 @@ fn eval_in_interpreter<'tcx, R: InterpretationResult<'tcx>>(

#[inline(always)]
fn const_validate_mplace<'tcx>(
ecx: &InterpCx<'tcx, CompileTimeMachine<'tcx>>,
ecx: &mut InterpCx<'tcx, CompileTimeMachine<'tcx>>,
mplace: &MPlaceTy<'tcx>,
cid: GlobalId<'tcx>,
) -> Result<(), ErrorHandled> {
Expand Down
27 changes: 22 additions & 5 deletions compiler/rustc_const_eval/src/const_eval/machine.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
use std::borrow::Borrow;
use std::borrow::{Borrow, Cow};
use std::fmt;
use std::hash::Hash;
use std::ops::ControlFlow;

use rustc_ast::Mutability;
use rustc_data_structures::fx::{FxIndexMap, IndexEntry};
use rustc_data_structures::fx::{FxHashMap, FxIndexMap, IndexEntry};
use rustc_hir::def_id::{DefId, LocalDefId};
use rustc_hir::{self as hir, LangItem, CRATE_HIR_ID};
use rustc_middle::mir::AssertMessage;
use rustc_middle::query::TyCtxtAt;
use rustc_middle::ty::layout::{FnAbiOf, TyAndLayout};
use rustc_middle::ty::{self, TyCtxt};
use rustc_middle::ty::{self, Ty, TyCtxt};
use rustc_middle::{bug, mir};
use rustc_span::symbol::{sym, Symbol};
use rustc_span::Span;
Expand All @@ -24,8 +24,8 @@ use crate::fluent_generated as fluent;
use crate::interpret::{
self, compile_time_machine, err_ub, throw_exhaust, throw_inval, throw_ub_custom, throw_unsup,
throw_unsup_format, AllocId, AllocRange, ConstAllocation, CtfeProvenance, FnArg, Frame,
GlobalAlloc, ImmTy, InterpCx, InterpResult, MPlaceTy, OpTy, Pointer, PointerArithmetic, Scalar,
StackPopCleanup,
GlobalAlloc, ImmTy, InterpCx, InterpResult, MPlaceTy, OpTy, Pointer, PointerArithmetic,
RangeSet, Scalar, StackPopCleanup,
};

/// When hitting this many interpreted terminators we emit a deny by default lint
Expand Down Expand Up @@ -65,6 +65,9 @@ pub struct CompileTimeMachine<'tcx> {
/// storing the result in the given `AllocId`.
/// Used to prevent reads from a static's base allocation, as that may allow for self-initialization loops.
pub(crate) static_root_ids: Option<(AllocId, LocalDefId)>,

/// A cache of "data range" computations for unions (i.e., the offsets of non-padding bytes).
union_data_ranges: FxHashMap<Ty<'tcx>, RangeSet>,
}

#[derive(Copy, Clone)]
Expand Down Expand Up @@ -99,6 +102,7 @@ impl<'tcx> CompileTimeMachine<'tcx> {
can_access_mut_global,
check_alignment,
static_root_ids: None,
union_data_ranges: FxHashMap::default(),
}
}
}
Expand Down Expand Up @@ -766,6 +770,19 @@ impl<'tcx> interpret::Machine<'tcx> for CompileTimeMachine<'tcx> {
}
Ok(())
}

fn cached_union_data_range<'e>(
ecx: &'e mut InterpCx<'tcx, Self>,
ty: Ty<'tcx>,
compute_range: impl FnOnce() -> RangeSet,
) -> Cow<'e, RangeSet> {
if ecx.tcx.sess.opts.unstable_opts.extra_const_ub_checks {
Cow::Borrowed(ecx.machine.union_data_ranges.entry(ty).or_insert_with(compute_range))
} else {
// Don't bother caching, we're only doing one validation at the end anyway.
Cow::Owned(compute_range())
}
}
}

// Please do not add any code below the above `Machine` trait impl. I (oli-obk) plan more cleanups
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_const_eval/src/interpret/discriminant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use rustc_target::abi::{self, TagEncoding, VariantIdx, Variants};
use tracing::{instrument, trace};

use super::{
err_ub, throw_ub, ImmTy, InterpCx, InterpResult, Machine, Readable, Scalar, Writeable,
err_ub, throw_ub, ImmTy, InterpCx, InterpResult, Machine, Projectable, Scalar, Writeable,
};

impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
Expand Down Expand Up @@ -60,7 +60,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
#[instrument(skip(self), level = "trace")]
pub fn read_discriminant(
&self,
op: &impl Readable<'tcx, M::Provenance>,
op: &impl Projectable<'tcx, M::Provenance>,
) -> InterpResult<'tcx, VariantIdx> {
let ty = op.layout().ty;
trace!("read_discriminant_value {:#?}", op.layout());
Expand Down
12 changes: 11 additions & 1 deletion compiler/rustc_const_eval/src/interpret/machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use rustc_apfloat::{Float, FloatConvert};
use rustc_ast::{InlineAsmOptions, InlineAsmTemplatePiece};
use rustc_middle::query::TyCtxtAt;
use rustc_middle::ty::layout::TyAndLayout;
use rustc_middle::ty::Ty;
use rustc_middle::{mir, ty};
use rustc_span::def_id::DefId;
use rustc_span::Span;
Expand All @@ -19,7 +20,7 @@ use rustc_target::spec::abi::Abi as CallAbi;
use super::{
throw_unsup, throw_unsup_format, AllocBytes, AllocId, AllocKind, AllocRange, Allocation,
ConstAllocation, CtfeProvenance, FnArg, Frame, ImmTy, InterpCx, InterpResult, MPlaceTy,
MemoryKind, Misalignment, OpTy, PlaceTy, Pointer, Provenance, CTFE_ALLOC_SALT,
MemoryKind, Misalignment, OpTy, PlaceTy, Pointer, Provenance, RangeSet, CTFE_ALLOC_SALT,
};

/// Data returned by [`Machine::after_stack_pop`], and consumed by
Expand Down Expand Up @@ -578,6 +579,15 @@ pub trait Machine<'tcx>: Sized {
ecx: &InterpCx<'tcx, Self>,
instance: Option<ty::Instance<'tcx>>,
) -> usize;

fn cached_union_data_range<'e>(
_ecx: &'e mut InterpCx<'tcx, Self>,
_ty: Ty<'tcx>,
compute_range: impl FnOnce() -> RangeSet,
) -> Cow<'e, RangeSet> {
// Default to no caching.
Cow::Owned(compute_range())
}
}

/// A lot of the flexibility above is just needed for `Miri`, but all "compile-time" machines
Expand Down
49 changes: 36 additions & 13 deletions compiler/rustc_const_eval/src/interpret/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@
use std::assert_matches::assert_matches;
use std::borrow::Cow;
use std::cell::Cell;
use std::collections::VecDeque;
use std::{fmt, ptr};
use std::{fmt, mem, ptr};

use rustc_ast::Mutability;
use rustc_data_structures::fx::{FxHashSet, FxIndexMap};
Expand Down Expand Up @@ -118,7 +117,7 @@ pub struct Memory<'tcx, M: Machine<'tcx>> {
/// This stores whether we are currently doing reads purely for the purpose of validation.
/// Those reads do not trigger the machine's hooks for memory reads.
/// Needless to say, this must only be set with great care!
validation_in_progress: Cell<bool>,
validation_in_progress: bool,
}

/// A reference to some allocation that was already bounds-checked for the given region
Expand All @@ -145,7 +144,7 @@ impl<'tcx, M: Machine<'tcx>> Memory<'tcx, M> {
alloc_map: M::MemoryMap::default(),
extra_fn_ptr_map: FxIndexMap::default(),
dead_alloc_map: FxIndexMap::default(),
validation_in_progress: Cell::new(false),
validation_in_progress: false,
}
}

Expand Down Expand Up @@ -682,15 +681,15 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
// We want to call the hook on *all* accesses that involve an AllocId, including zero-sized
// accesses. That means we cannot rely on the closure above or the `Some` branch below. We
// do this after `check_and_deref_ptr` to ensure some basic sanity has already been checked.
if !self.memory.validation_in_progress.get() {
if !self.memory.validation_in_progress {
if let Ok((alloc_id, ..)) = self.ptr_try_get_alloc_id(ptr, size_i64) {
M::before_alloc_read(self, alloc_id)?;
}
}

if let Some((alloc_id, offset, prov, alloc)) = ptr_and_alloc {
let range = alloc_range(offset, size);
if !self.memory.validation_in_progress.get() {
if !self.memory.validation_in_progress {
M::before_memory_read(
self.tcx,
&self.machine,
Expand Down Expand Up @@ -766,11 +765,14 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
let parts = self.get_ptr_access(ptr, size)?;
if let Some((alloc_id, offset, prov)) = parts {
let tcx = self.tcx;
let validation_in_progress = self.memory.validation_in_progress;
// FIXME: can we somehow avoid looking up the allocation twice here?
// We cannot call `get_raw_mut` inside `check_and_deref_ptr` as that would duplicate `&mut self`.
let (alloc, machine) = self.get_alloc_raw_mut(alloc_id)?;
let range = alloc_range(offset, size);
M::before_memory_write(tcx, machine, &mut alloc.extra, (alloc_id, prov), range)?;
if !validation_in_progress {
M::before_memory_write(tcx, machine, &mut alloc.extra, (alloc_id, prov), range)?;
}
Ok(Some(AllocRefMut { alloc, range, tcx: *tcx, alloc_id }))
} else {
Ok(None)
Expand Down Expand Up @@ -1014,16 +1016,16 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
///
/// We do this so Miri's allocation access tracking does not show the validation
/// reads as spurious accesses.
pub fn run_for_validation<R>(&self, f: impl FnOnce() -> R) -> R {
pub fn run_for_validation<R>(&mut self, f: impl FnOnce(&mut Self) -> R) -> R {
// This deliberately uses `==` on `bool` to follow the pattern
// `assert!(val.replace(new) == old)`.
assert!(
self.memory.validation_in_progress.replace(true) == false,
mem::replace(&mut self.memory.validation_in_progress, true) == false,
"`validation_in_progress` was already set"
);
let res = f();
let res = f(self);
assert!(
self.memory.validation_in_progress.replace(false) == true,
mem::replace(&mut self.memory.validation_in_progress, false) == true,
"`validation_in_progress` was unset by someone else"
);
res
Expand Down Expand Up @@ -1115,6 +1117,10 @@ impl<'a, 'tcx, M: Machine<'tcx>> std::fmt::Debug for DumpAllocs<'a, 'tcx, M> {
impl<'tcx, 'a, Prov: Provenance, Extra, Bytes: AllocBytes>
AllocRefMut<'a, 'tcx, Prov, Extra, Bytes>
{
pub fn as_ref<'b>(&'b self) -> AllocRef<'b, 'tcx, Prov, Extra, Bytes> {
AllocRef { alloc: self.alloc, range: self.range, tcx: self.tcx, alloc_id: self.alloc_id }
}

/// `range` is relative to this allocation reference, not the base of the allocation.
pub fn write_scalar(&mut self, range: AllocRange, val: Scalar<Prov>) -> InterpResult<'tcx> {
let range = self.range.subrange(range);
Expand All @@ -1130,13 +1136,30 @@ impl<'tcx, 'a, Prov: Provenance, Extra, Bytes: AllocBytes>
self.write_scalar(alloc_range(offset, self.tcx.data_layout().pointer_size), val)
}

/// Mark the given sub-range (relative to this allocation reference) as uninitialized.
pub fn write_uninit(&mut self, range: AllocRange) -> InterpResult<'tcx> {
let range = self.range.subrange(range);
Ok(self
.alloc
.write_uninit(&self.tcx, range)
.map_err(|e| e.to_interp_error(self.alloc_id))?)
}

/// Mark the entire referenced range as uninitialized
pub fn write_uninit(&mut self) -> InterpResult<'tcx> {
pub fn write_uninit_full(&mut self) -> InterpResult<'tcx> {
Ok(self
.alloc
.write_uninit(&self.tcx, self.range)
.map_err(|e| e.to_interp_error(self.alloc_id))?)
}

/// Remove all provenance in the reference range.
pub fn clear_provenance(&mut self) -> InterpResult<'tcx> {
Ok(self
.alloc
.clear_provenance(&self.tcx, self.range)
.map_err(|e| e.to_interp_error(self.alloc_id))?)
}
}

impl<'tcx, 'a, Prov: Provenance, Extra, Bytes: AllocBytes> AllocRef<'a, 'tcx, Prov, Extra, Bytes> {
Expand Down Expand Up @@ -1278,7 +1301,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
};
let src_alloc = self.get_alloc_raw(src_alloc_id)?;
let src_range = alloc_range(src_offset, size);
assert!(!self.memory.validation_in_progress.get(), "we can't be copying during validation");
assert!(!self.memory.validation_in_progress, "we can't be copying during validation");
M::before_memory_read(
tcx,
&self.machine,
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_const_eval/src/interpret/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ pub(crate) use self::intrinsics::eval_nullary_intrinsic;
pub use self::machine::{compile_time_machine, AllocMap, Machine, MayLeak, ReturnAction};
pub use self::memory::{AllocKind, AllocRef, AllocRefMut, FnVal, Memory, MemoryKind};
use self::operand::Operand;
pub use self::operand::{ImmTy, Immediate, OpTy, Readable};
pub use self::operand::{ImmTy, Immediate, OpTy};
pub use self::place::{MPlaceTy, MemPlaceMeta, PlaceTy, Writeable};
use self::place::{MemPlace, Place};
pub use self::projection::{OffsetMode, Projectable};
pub use self::stack::{Frame, FrameInfo, LocalState, StackPopCleanup, StackPopInfo};
pub(crate) use self::util::create_static_alloc;
pub use self::validity::{CtfeValidationMode, RefTracking};
pub use self::validity::{CtfeValidationMode, RangeSet, RefTracking};
pub use self::visitor::ValueVisitor;
Loading

0 comments on commit 304b7f8

Please sign in to comment.