From 8d57764dc90731e48560a8df9d098e21066bb00d Mon Sep 17 00:00:00 2001 From: David Anekstein Date: Tue, 29 Aug 2023 13:00:27 -0400 Subject: [PATCH 1/3] Support intermediate casts in calls. In lighttpd there are intermediate casts where the final cast is an argument to memcpy. Prior to this, only one cast was exempted, the final one. Now, all casts are skipped/allowed as long as the final cast serves as input to a function like `memcpy` or `free` or `realloc`. --- c2rust-analyze/src/c_void_casts.rs | 75 +++++++++++++++++----- c2rust-analyze/src/dataflow/type_check.rs | 32 ++++++--- c2rust-analyze/tests/filecheck/algo_md5.rs | 66 +++++++++---------- 3 files changed, 115 insertions(+), 58 deletions(-) diff --git a/c2rust-analyze/src/c_void_casts.rs b/c2rust-analyze/src/c_void_casts.rs index 7b9795c57..14c601c86 100644 --- a/c2rust-analyze/src/c_void_casts.rs +++ b/c2rust-analyze/src/c_void_casts.rs @@ -2,6 +2,7 @@ use std::borrow::Borrow; use std::collections::{HashMap, HashSet}; use rustc_middle::mir::{BasicBlock, LocalKind}; +use rustc_middle::ty::Ty; use rustc_middle::{ mir::{ BasicBlockData, Body, LocalDecls, Location, Place, Rvalue, Statement, StatementKind, @@ -53,6 +54,16 @@ impl CVoidCastDirection { } } +pub fn is_c_void_ptr(tcx: TyCtxt, ty: Ty) -> bool { + if let Some(TyKind::Adt(adt, _)) = ty.builtin_deref(true).map(|ty_mut| ty_mut.ty.kind()) { + let def_path = tcx.def_path(adt.did()).data[0].to_string(); + let item_name = tcx.item_name(adt.did()); + def_path == "ffi" && item_name.as_str() == "c_void" + } else { + false + } +} + /// The [`Place`] of a [`*c_void`]. /// /// It is checked to be a [`*c_void`] upon construction. @@ -77,14 +88,8 @@ impl<'tcx> CVoidPtr<'tcx> { local_decls: &LocalDecls<'tcx>, tcx: TyCtxt<'tcx>, ) -> Option { - let deref_ty = place.ty(local_decls, tcx).ty.builtin_deref(true)?; - - if let TyKind::Adt(adt, _) = deref_ty.ty.kind() { - if tcx.def_path(adt.did()).data[0].to_string() == "ffi" - && tcx.item_name(adt.did()).as_str() == "c_void" - { - return Some(Self { place }); - } + if is_c_void_ptr(tcx, place.ty(local_decls, tcx).ty) { + return Some(Self { place }); } None @@ -251,6 +256,27 @@ pub struct CVoidCasts<'tcx> { to: CVoidCastsUniDirectional<'tcx>, } +pub fn rv_place<'tcx>(rv: &Rvalue<'tcx>) -> Option> { + use Rvalue::*; + match rv { + Use(op) => op.place(), + Repeat(op, _) => op.place(), + Ref(_, _, p) => Some(*p), + // ThreadLocalRef + AddressOf(_, p) => Some(*p), + Len(p) => Some(*p), + Cast(_, op, _) => op.place(), + // BinaryOp + // CheckedBinaryOp + // NullaryOp + UnaryOp(_, op) => op.place(), + Discriminant(p) => Some(*p), + // Aggregate + ShallowInitBox(op, _) => op.place(), + _ => None, + } +} + impl<'tcx> CVoidCasts<'tcx> { pub fn direction(&self, direction: CVoidCastDirection) -> &CVoidCastsUniDirectional<'tcx> { use CVoidCastDirection::*; @@ -407,6 +433,8 @@ impl<'tcx> CVoidCasts<'tcx> { /// * `calloc` /// * `realloc` /// * `free` + /// * `memcpy` + /// * `memset` /// /// and insert their casts to and from [`*c_void`]. /// @@ -511,14 +539,36 @@ impl<'tcx> CVoidCasts<'tcx> { let mut inserted_places = HashSet::new(); let mut modifying_statements = Vec::new(); - for (current_block, current_block_data) in body.basic_blocks().iter_enumerated() { + let mut current_place = c_void_ptr.place; + + for (current_block, current_block_data) in body.basic_blocks().iter_enumerated().rev() { + for (index, stmt) in current_block_data.statements.iter().enumerate().rev() { + if let Some((_, rhs)) = + get_assign_sides(stmt).filter(|(lhs, _)| *lhs == current_place) + { + // ancestors of a *libc::c_void are given special treatment; + // casts between these ancestors are exempt from scrutiny + if let Some(place) = rv_place(rhs) { + if get_cast_place(rhs).is_some() { + let location = Location { + statement_index: index, + block: current_block, + }; + self.insert_cast(direction, location); + } + + current_place = place; + } + } + } + modifying_statements.extend(Self::find_modifying_assignments( current_block, current_block_data, &c_void_ptr, )); - if let Some((statement_index, cast)) = + if let Some((_, cast)) = Self::find_last_cast(¤t_block_data.statements, c_void_ptr) { assert!( @@ -530,11 +580,6 @@ impl<'tcx> CVoidCasts<'tcx> { body.local_kind(c_void_ptr.place.local) == LocalKind::Temp, "Unsupported cast into non-temporary local" ); - let location = Location { - statement_index, - block: current_block, - }; - self.insert_cast(direction, location); self.insert_call(direction, terminator_location(current_block, bb_data), cast); } } diff --git a/c2rust-analyze/src/dataflow/type_check.rs b/c2rust-analyze/src/dataflow/type_check.rs index 3b7ae89b0..b35646029 100644 --- a/c2rust-analyze/src/dataflow/type_check.rs +++ b/c2rust-analyze/src/dataflow/type_check.rs @@ -1,5 +1,5 @@ use super::DataflowConstraints; -use crate::c_void_casts::CVoidCastDirection; +use crate::c_void_casts::{is_c_void_ptr, CVoidCastDirection}; use crate::context::{AnalysisCtxt, LTy, PermissionSet, PointerId}; use crate::panic_detail; use crate::util::{ @@ -143,9 +143,17 @@ impl<'tcx> TypeChecker<'tcx, '_> { match is_transmutable_ptr_cast(from_ty, to_ty) { Some(true) => { // TODO add other dataflow constraints - }, - Some(false) => ::log::error!("TODO: unsupported ptr-to-ptr cast between pointee types not yet supported as safely transmutable: `{from_ty:?} as {to_ty:?}`"), - None => {}, // not a ptr cast (no dataflow constraints needed); let rustc typeck this + } + Some(false) => { + if is_c_void_ptr(self.acx.tcx(), to_lty.ty) { + // allow casts to c_void + self.do_assign_pointer_ids(to_lty.label, from_lty.label); + } else { + ::log::error!("TODO: unsupported ptr-to-ptr cast between pointee types not yet supported as safely transmutable: `{from_ty:?} as {to_ty:?}`"); + } + } + + None => {} // not a ptr cast (no dataflow constraints needed); let rustc typeck this }; } } @@ -521,22 +529,28 @@ impl<'tcx> TypeChecker<'tcx, '_> { let src_ptr = args[1] .place() .expect("Casts to/from null pointer are not yet supported"); - let src_ptr = self.acx.c_void_casts.get_adjusted_place_or_default_to( + let src_ptr_casted_from = self.acx.c_void_casts.get_adjusted_place_or_default_to( loc, CVoidCastDirection::To, src_ptr, ); + self.visit_place(out_ptr, Mutability::Mut); let dest_ptr_lty = self.acx.type_of(out_ptr); assert!(args.len() == 3); - self.visit_place(src_ptr, Mutability::Not); - let src_ptr_lty = self.acx.type_of(src_ptr); + self.visit_place(src_ptr_casted_from, Mutability::Not); + let src_ptr_casted_lty = self.acx.type_of(src_ptr_casted_from); // input needs READ permission let perms = PermissionSet::READ; - self.constraints.add_all_perms(src_ptr_lty.label, perms); + self.constraints + .add_all_perms(src_ptr_casted_lty.label, perms); - // Perform a pseudo-assignment for *dest = *src + // Perform a pseudo-assignment for *dest = *src. + // We use `src_ptr` instead of `src_ptr_casted_from` because the type that was + // casted to the libc::c_void_ptr that `memcpy` takes likely differs from the + // type that's pointed to by `dest_ptr` + let src_ptr_lty = self.acx.type_of(src_ptr); self.do_equivalence_nested(dest_ptr_lty.args[0], src_ptr_lty.args[0]); } Callee::Memset => { diff --git a/c2rust-analyze/tests/filecheck/algo_md5.rs b/c2rust-analyze/tests/filecheck/algo_md5.rs index d04a2fd53..61736d0be 100644 --- a/c2rust-analyze/tests/filecheck/algo_md5.rs +++ b/c2rust-analyze/tests/filecheck/algo_md5.rs @@ -1,7 +1,6 @@ #![feature(rustc_private)] extern crate libc; - extern "C" { fn memcpy(_: *mut libc::c_void, _: *const libc::c_void, _: libc::c_ulong) -> *mut libc::c_void; fn memset(_: *mut libc::c_void, _: libc::c_int, _: libc::c_ulong) -> *mut libc::c_void; @@ -81,8 +80,6 @@ static mut PADDING: [libc::c_uchar; 64] = [ 0 as libc::c_int as libc::c_uchar, 0 as libc::c_int as libc::c_uchar, ]; - -// CHECK-LABEL: MD5_Init #[no_mangle] pub unsafe extern "C" fn MD5_Init(mut context: *mut MD5_CTX) { (*context).count[1 as libc::c_int as usize] = 0 as libc::c_int as uint32_t; @@ -95,13 +92,13 @@ pub unsafe extern "C" fn MD5_Init(mut context: *mut MD5_CTX) { #[no_mangle] pub unsafe extern "C" fn MD5_Update( mut context: *mut MD5_CTX, - mut input: *const libc::c_void, + mut _input: *const libc::c_void, mut inputLen: libc::c_uint, ) { let mut i: libc::c_uint = 0; let mut ndx: libc::c_uint = 0; let mut partLen: libc::c_uint = 0; - // let mut input: *const libc::c_uchar = _input as *const libc::c_uchar; + let mut input: *const libc::c_uchar = _input as *const libc::c_uchar; ndx = (*context).count[0 as libc::c_int as usize] >> 3 as libc::c_int & 0x3f as libc::c_int as libc::c_uint; (*context).count[0 as libc::c_int as usize] = @@ -119,7 +116,7 @@ pub unsafe extern "C" fn MD5_Update( memcpy( &mut *((*context).buffer).as_mut_ptr().offset(ndx as isize) as *mut libc::c_uchar as *mut libc::c_void, - input /* as *mut libc::c_uchar */ as *const libc::c_void, + input as *mut libc::c_uchar as *const libc::c_void, partLen as libc::c_ulong, ); li_MD5Transform( @@ -128,31 +125,31 @@ pub unsafe extern "C" fn MD5_Update( ); i = partLen; while i.wrapping_add(63 as libc::c_int as libc::c_uint) < inputLen { - // li_MD5Transform(((*context).state).as_mut_ptr(), &*input.offset(i as isize)); + li_MD5Transform(((*context).state).as_mut_ptr(), &*input.offset(i as isize)); i = i.wrapping_add(64 as libc::c_int as libc::c_uint); } ndx = 0 as libc::c_int as libc::c_uint; } else { i = 0 as libc::c_int as libc::c_uint; } - // memcpy( - // &mut *((*context).buffer).as_mut_ptr().offset(ndx as isize) as *mut libc::c_uchar - // as *mut libc::c_void, - // &*input.offset(i as isize) as *const libc::c_uchar as *mut libc::c_uchar - // as *const libc::c_void, - // inputLen.wrapping_sub(i) as libc::c_ulong, - // ); + memcpy( + &mut *((*context).buffer).as_mut_ptr().offset(ndx as isize) as *mut libc::c_uchar + as *mut libc::c_void, + &*input.offset(i as isize) as *const libc::c_uchar as *mut libc::c_uchar + as *const libc::c_void, + inputLen.wrapping_sub(i) as libc::c_ulong, + ); } #[no_mangle] pub unsafe extern "C" fn MD5_Final(mut digest: *mut libc::c_uchar, mut context: *mut MD5_CTX) { let mut bits: [libc::c_uchar; 8] = [0; 8]; let mut ndx: libc::c_uint = 0; let mut padLen: libc::c_uint = 0; - // Encode( - // bits.as_mut_ptr(), - // ((*context).count).as_mut_ptr(), - // 8 as libc::c_int as libc::c_uint, - // ); + Encode( + bits.as_mut_ptr(), + ((*context).count).as_mut_ptr(), + 8 as libc::c_int as libc::c_uint, + ); ndx = (*context).count[0 as libc::c_int as usize] >> 3 as libc::c_int & 0x3f as libc::c_int as libc::c_uint; padLen = if ndx < 56 as libc::c_int as libc::c_uint { @@ -160,20 +157,19 @@ pub unsafe extern "C" fn MD5_Final(mut digest: *mut libc::c_uchar, mut context: } else { (120 as libc::c_int as libc::c_uint).wrapping_sub(ndx) }; - // MD5_Update(context, PADDING.as_mut_ptr(),// as *const libc::c_void, - // padLen); - // MD5_Update( - // context, - // bits.as_mut_ptr(), // as *const libc::c_void, - // 8 as libc::c_int as libc::c_uint, - // ); - // Encode( - // digest, - // ((*context).state).as_mut_ptr(), - // 16 as libc::c_int as libc::c_uint, - // ); + MD5_Update(context, PADDING.as_mut_ptr() as *const libc::c_void, padLen); + MD5_Update( + context, + bits.as_mut_ptr() as *const libc::c_void, + 8 as libc::c_int as libc::c_uint, + ); + Encode( + digest, + ((*context).state).as_mut_ptr(), + 16 as libc::c_int as libc::c_uint, + ); memset( - context /* as *mut libc::c_uchar */ as *mut libc::c_void, + context as *mut libc::c_uchar as *mut libc::c_void, 0 as libc::c_int, ::std::mem::size_of::() as libc::c_ulong, ); @@ -184,7 +180,7 @@ unsafe extern "C" fn li_MD5Transform(mut state: *mut uint32_t, mut block: *const let mut c: uint32_t = *state.offset(2 as libc::c_int as isize); let mut d: uint32_t = *state.offset(3 as libc::c_int as isize); let mut x: [uint32_t; 16] = [0; 16]; - // Decode(x.as_mut_ptr(), block, 64 as libc::c_int as libc::c_uint); + Decode(x.as_mut_ptr(), block, 64 as libc::c_int as libc::c_uint); a = (a as libc::c_uint).wrapping_add( (b & c | !b & d) .wrapping_add(x[0 as libc::c_int as usize]) @@ -642,7 +638,7 @@ unsafe extern "C" fn li_MD5Transform(mut state: *mut uint32_t, mut block: *const let ref mut fresh3 = *state.offset(3 as libc::c_int as isize); *fresh3 = (*fresh3 as libc::c_uint).wrapping_add(d) as uint32_t as uint32_t; memset( - x.as_mut_ptr() /* as *mut libc::c_uchar */ as *mut libc::c_void, + x.as_mut_ptr() as *mut libc::c_uchar as *mut libc::c_void, 0 as libc::c_int, ::std::mem::size_of::<[uint32_t; 16]>() as libc::c_ulong, ); @@ -672,6 +668,8 @@ unsafe extern "C" fn Encode( j = j.wrapping_add(4 as libc::c_int as libc::c_uint); } } + +// CHECK-LABEL: Decode unsafe extern "C" fn Decode( mut output: *mut uint32_t, mut input: *const libc::c_uchar, From a1fbda0dd6fb12d61ea61d9abde0d2594b302368 Mon Sep 17 00:00:00 2001 From: David Anekstein Date: Mon, 18 Sep 2023 19:00:59 -0400 Subject: [PATCH 2/3] get source place of Use or Copy instead of the Place of any arbitrary Rvalue --- c2rust-analyze/src/c_void_casts.rs | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/c2rust-analyze/src/c_void_casts.rs b/c2rust-analyze/src/c_void_casts.rs index 14c601c86..a27c5aac7 100644 --- a/c2rust-analyze/src/c_void_casts.rs +++ b/c2rust-analyze/src/c_void_casts.rs @@ -256,23 +256,12 @@ pub struct CVoidCasts<'tcx> { to: CVoidCastsUniDirectional<'tcx>, } -pub fn rv_place<'tcx>(rv: &Rvalue<'tcx>) -> Option> { +/// Gets the [`Place`] associated with [`Rvalue::Use`] or [`Rvalue::Cast`], and returns `None` otherwise. +pub fn source_place<'tcx>(rv: &Rvalue<'tcx>) -> Option> { use Rvalue::*; match rv { Use(op) => op.place(), - Repeat(op, _) => op.place(), - Ref(_, _, p) => Some(*p), - // ThreadLocalRef - AddressOf(_, p) => Some(*p), - Len(p) => Some(*p), Cast(_, op, _) => op.place(), - // BinaryOp - // CheckedBinaryOp - // NullaryOp - UnaryOp(_, op) => op.place(), - Discriminant(p) => Some(*p), - // Aggregate - ShallowInitBox(op, _) => op.place(), _ => None, } } @@ -548,7 +537,7 @@ impl<'tcx> CVoidCasts<'tcx> { { // ancestors of a *libc::c_void are given special treatment; // casts between these ancestors are exempt from scrutiny - if let Some(place) = rv_place(rhs) { + if let Some(place) = source_place(rhs) { if get_cast_place(rhs).is_some() { let location = Location { statement_index: index, From 8eddd725546f1e804debf4d8bf0a2a428bf695d8 Mon Sep 17 00:00:00 2001 From: David Anekstein Date: Mon, 18 Sep 2023 19:03:54 -0400 Subject: [PATCH 3/3] docs --- c2rust-analyze/src/c_void_casts.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/c2rust-analyze/src/c_void_casts.rs b/c2rust-analyze/src/c_void_casts.rs index a27c5aac7..0646ab716 100644 --- a/c2rust-analyze/src/c_void_casts.rs +++ b/c2rust-analyze/src/c_void_casts.rs @@ -256,7 +256,8 @@ pub struct CVoidCasts<'tcx> { to: CVoidCastsUniDirectional<'tcx>, } -/// Gets the [`Place`] associated with [`Rvalue::Use`] or [`Rvalue::Cast`], and returns `None` otherwise. +/// Gets the [`Place`] associated with [`Rvalue::Use`] or [`Rvalue::Cast`] +/// and returns `None` otherwise. pub fn source_place<'tcx>(rv: &Rvalue<'tcx>) -> Option> { use Rvalue::*; match rv {