Description
I'm trying to build a small linux container runtime as part of another project. I'd like to do the moral equivalent of the following (extracted out and untested):
fn spawn_in_container(cmd: std::process::Command) -> anyhow::Result<u32> {
let mut args = clone3::Clone3::default();
args.exit_signal(libc::SIGCHLD as _)
.flag_newuser()
.flag_newns()
.flag_newpid();
match unsafe { args.call().context("clone3")? } {
0 => unsafe { self.child_after_fork(cmd) },
pid => return Ok(pid),
}
}
// SAFETY: blah blah blah we can't allocate or anything else
unsafe fn child_after_fork(cmd: std::process::Command) -> ! {
// ... various container setup
// If successful, this never returns.
let e = cmd.exec();
std::process::abort();
}
do_exec
in process_unix.rs
makes a big deal about the (un)safety of this operation, so I thought that it would be safe to use Command::exec
. Unfortunately, I just caught a deadlock:
#0 0x000072ca4efb0c0b in __lll_lock_wait_private () from target:/usr/lib/libc.so.6
#1 0x000072ca4efc5138 in malloc () from target:/usr/lib/libc.so.6
#2 0x00006458e3b79d7f in alloc::alloc::alloc () at library/alloc/src/alloc.rs:100
#3 alloc::alloc::Global::alloc_impl () at library/alloc/src/alloc.rs:183
#4 alloc::alloc::{impl#1}::allocate () at library/alloc/src/alloc.rs:243
#5 alloc::raw_vec::RawVec::try_allocate_in<u8, alloc::alloc::Global> () at library/alloc/src/raw_vec.rs:230
#6 alloc::raw_vec::RawVec::with_capacity_in<u8, alloc::alloc::Global> () at library/alloc/src/raw_vec.rs:158
#7 alloc::vec::Vec::with_capacity_in<u8, alloc::alloc::Global> () at library/alloc/src/vec/mod.rs:699
#8 alloc::slice::hack::{impl#1}::to_vec<u8, alloc::alloc::Global> () at library/alloc/src/slice.rs:162
#9 alloc::slice::hack::to_vec<u8, alloc::alloc::Global> () at library/alloc/src/slice.rs:111
#10 alloc::slice::{impl#0}::to_vec_in<u8, alloc::alloc::Global> () at library/alloc/src/slice.rs:478
#11 alloc::vec::{impl#11}::clone<u8, alloc::alloc::Global> () at library/alloc/src/vec/mod.rs:2843
#12 std::sys::os_str::bytes::{impl#4}::clone () at library/std/src/sys/os_str/bytes.rs:73
#13 std::ffi::os_str::{impl#10}::clone () at library/std/src/ffi/os_str.rs:641
#14 std::sys_common::process::CommandEnv::capture () at library/std/src/sys_common/process.rs:45
#15 std::sys_common::process::CommandEnv::capture_if_changed () at library/std/src/sys_common/process.rs:58
#16 std::sys::pal::unix::process::process_common::Command::capture_env () at library/std/src/sys/pal/unix/process/process_common.rs:363
#17 0x00006458e3b71913 in std::sys::pal::unix::process::process_common::Command::exec () at library/std/src/sys/pal/unix/process/process_unix.rs:237
#18 std::os::unix::process::{impl#0}::exec () at library/std/src/os/unix/process.rs:227
Something in capture_env
is allocating, which violates the rules around what you're allowed to do between fork
or clone
and exec
.
As far as I can tell, this isn't documented one way or the other. So maybe this is a documentation bug, or I missed the documentation. Still, the amount of surface area that has the potential to allocate seems very small here - maybe the allocation would be possible to avoid? That would let me and others use the stdlib Command
for this use-case, which would be pretty nice.
Meta
rustc --version --verbose
:
rustc 1.81.0 (eeb90cda1 2024-09-04)
binary: rustc
commit-hash: eeb90cda1969383f56a2637cbd3037bdf598841c
commit-date: 2024-09-04
host: x86_64-unknown-linux-gnu
release: 1.81.0
LLVM version: 18.1.7