Skip to content

std: stop using TLS in signal handler #140628

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 85 additions & 58 deletions library/std/src/sys/pal/unix/stack_overflow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,35 @@ impl Drop for Handler {
}
}

#[cfg(any(
target_os = "linux",
target_os = "freebsd",
target_os = "hurd",
target_os = "macos",
target_os = "netbsd",
target_os = "openbsd",
target_os = "solaris",
target_os = "illumos",
#[cfg(all(
not(miri),
any(
target_os = "linux",
target_os = "freebsd",
target_os = "hurd",
target_os = "macos",
target_os = "netbsd",
target_os = "openbsd",
target_os = "solaris",
target_os = "illumos",
),
))]
mod thread_info;

// miri doesn't model signals, and this code has some synchronization properties
// that we don't want to expose to user code.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// that we don't want to expose to user code.
// that we don't want to expose to user code. Also, Miri fundamentally cannot even have a stack overflow.

#[cfg(all(
not(miri),
any(
target_os = "linux",
target_os = "freebsd",
target_os = "hurd",
target_os = "macos",
target_os = "netbsd",
target_os = "openbsd",
target_os = "solaris",
target_os = "illumos",
)
))]
mod imp {
use libc::{
Expand All @@ -46,22 +66,13 @@ mod imp {
use libc::{mmap64, mprotect, munmap};

use super::Handler;
use crate::cell::Cell;
use super::thread_info::{delete_current_info, set_current_info, with_current_info};
use crate::ops::Range;
use crate::sync::OnceLock;
use crate::sync::atomic::{Atomic, AtomicBool, AtomicPtr, AtomicUsize, Ordering};
use crate::sys::pal::unix::os;
use crate::{io, mem, ptr, thread};

// We use a TLS variable to store the address of the guard page. While TLS
// variables are not guaranteed to be signal-safe, this works out in practice
// since we make sure to write to the variable before the signal stack is
// installed, thereby ensuring that the variable is always allocated when
// the signal handler is called.
thread_local! {
// FIXME: use `Range` once that implements `Copy`.
static GUARD: Cell<(usize, usize)> = const { Cell::new((0, 0)) };
}
use crate::thread::with_current_name;
use crate::{io, mem, panic, ptr};

// Signal handler for the SIGSEGV and SIGBUS handlers. We've got guard pages
// (unmapped pages) at the end of every thread's stack, so if a thread ends
Expand Down Expand Up @@ -93,29 +104,35 @@ mod imp {
info: *mut libc::siginfo_t,
_data: *mut libc::c_void,
) {
let (start, end) = GUARD.get();
// SAFETY: this pointer is provided by the system and will always point to a valid `siginfo_t`.
let addr = unsafe { (*info).si_addr().addr() };
let fault_addr = unsafe { (*info).si_addr().addr() };

// `with_current_info` expects that the process aborts after it is
// called. If the signal was not caused by a memory access, this might
// not be true. We detect this by noticing that the `si_addr` field is
// zero if the signal is synthetic.
if fault_addr != 0 {
with_current_info(|thread_info| {
// If the faulting address is within the guard page, then we print a
// message saying so and abort.
if let Some(thread_info) = thread_info
&& thread_info.guard_page_range.contains(&fault_addr)
{
let name = thread_info.thread_name.as_deref().unwrap_or("<unknown>");
rtprintpanic!("\nthread '{name}' has overflowed its stack\n");
rtabort!("stack overflow");
}
})
}

// If the faulting address is within the guard page, then we print a
// message saying so and abort.
if start <= addr && addr < end {
thread::with_current_name(|name| {
let name = name.unwrap_or("<unknown>");
rtprintpanic!("\nthread '{name}' has overflowed its stack\n");
});
// Unregister ourselves by reverting back to the default behavior.
// SAFETY: assuming all platforms define struct sigaction as "zero-initializable"
let mut action: sigaction = unsafe { mem::zeroed() };
action.sa_sigaction = SIG_DFL;
// SAFETY: pray this is a well-behaved POSIX implementation of fn sigaction
unsafe { sigaction(signum, &action, ptr::null_mut()) };

rtabort!("stack overflow");
} else {
// Unregister ourselves by reverting back to the default behavior.
// SAFETY: assuming all platforms define struct sigaction as "zero-initializable"
let mut action: sigaction = unsafe { mem::zeroed() };
action.sa_sigaction = SIG_DFL;
// SAFETY: pray this is a well-behaved POSIX implementation of fn sigaction
unsafe { sigaction(signum, &action, ptr::null_mut()) };

// See comment above for why this function returns.
}
// See comment above for why this function returns.
}

static PAGE_SIZE: Atomic<usize> = AtomicUsize::new(0);
Expand All @@ -128,9 +145,7 @@ mod imp {
pub unsafe fn init() {
PAGE_SIZE.store(os::page_size(), Ordering::Relaxed);

// Always write to GUARD to ensure the TLS variable is allocated.
let guard = unsafe { install_main_guard().unwrap_or(0..0) };
GUARD.set((guard.start, guard.end));
let mut guard_page_range = unsafe { install_main_guard() };

// SAFETY: assuming all platforms define struct sigaction as "zero-initializable"
let mut action: sigaction = unsafe { mem::zeroed() };
Expand All @@ -145,7 +160,13 @@ mod imp {
let handler = unsafe { make_handler(true) };
MAIN_ALTSTACK.store(handler.data, Ordering::Relaxed);
mem::forget(handler);

if let Some(guard_page_range) = guard_page_range.take() {
let thread_name = with_current_name(|name| name.map(Box::from));
set_current_info(guard_page_range, thread_name);
}
}

action.sa_flags = SA_SIGINFO | SA_ONSTACK;
action.sa_sigaction = signal_handler as sighandler_t;
// SAFETY: only overriding signals if the default is set
Expand Down Expand Up @@ -214,9 +235,10 @@ mod imp {
}

if !main_thread {
// Always write to GUARD to ensure the TLS variable is allocated.
let guard = unsafe { current_guard() }.unwrap_or(0..0);
GUARD.set((guard.start, guard.end));
if let Some(guard_page_range) = unsafe { current_guard() } {
let thread_name = with_current_name(|name| name.map(Box::from));
set_current_info(guard_page_range, thread_name);
}
}

// SAFETY: assuming stack_t is zero-initializable
Expand Down Expand Up @@ -261,6 +283,8 @@ mod imp {
// a mapping that started one page earlier, so walk back a page and unmap from there.
unsafe { munmap(data.sub(page_size), sigstack_size + page_size) };
}

delete_current_info();
}

/// Modern kernels on modern hardware can have dynamic signal stack sizes.
Expand Down Expand Up @@ -590,17 +614,20 @@ mod imp {
// usually have fewer qualms about forwards compatibility, since the runtime
// is shipped with the OS):
// <https://github.com/apple/swift/blob/swift-5.10-RELEASE/stdlib/public/runtime/CrashHandlerMacOS.cpp>
#[cfg(not(any(
target_os = "linux",
target_os = "freebsd",
target_os = "hurd",
target_os = "macos",
target_os = "netbsd",
target_os = "openbsd",
target_os = "solaris",
target_os = "illumos",
target_os = "cygwin",
)))]
#[cfg(any(
miri,
not(any(
target_os = "linux",
target_os = "freebsd",
target_os = "hurd",
target_os = "macos",
target_os = "netbsd",
target_os = "openbsd",
target_os = "solaris",
target_os = "illumos",
target_os = "cygwin",
))
))]
mod imp {
pub unsafe fn init() {}

Expand Down
129 changes: 129 additions & 0 deletions library/std/src/sys/pal/unix/stack_overflow/thread_info.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
//! TLS, but async-signal-safe.
//!
//! Unfortunately, because thread local storage isn't async-signal-safe, we
//! cannot soundly use it in our stack overflow handler. While this works
//! without problems on most platforms, it can lead to undefined behaviour
//! on others (such as GNU/Linux). Luckily, the POSIX specification documents
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess we expect that all cfg(unix) platforms satisfy POSIX? Is that a safe assumption to make?

It seems a little odd to me that errno is somehow special (vs arbitrary thread-locals)...

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems a little odd to me that errno is somehow special (vs arbitrary thread-locals)...

The C runtime can rely on the fact that it's always loaded and use e.g. the initial-exec TLS model – which is async-signal-safe – for errno (that's what glibc does). Or they embed errno into the thread control block, which is accessible through the thread pointer (that's what musl does).

I guess we expect that all cfg(unix) platforms satisfy POSIX? Is that a safe assumption to make?

The expectation must only hold for platforms that enable the stack overflow handler.

Copy link
Member

@workingjubilee workingjubilee May 5, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not a safe assumption to make on various niche cases, however if they implement any threading, have signals, yet do not implement POSIX correctly-enough for this, they will have a lot of problems that we will immediately notice. In particular, this is a very mild demand: "implement errno in a POSIX-conforming way". If you have threads and signals AND want to run C code, you pretty much have to do this. Your only choice with being non-conformant in that case is to deliberately pick a non-conforming implementation that still somehow doesn't get stomped by C code running concurrently.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In practice the only other implementation of errno I would expect to see is the global version that gets stomped by running concurrently and thus cannot support threading.

//! two thread-specific values that can be accessed in asynchronous signal
//! handlers: the value of `pthread_self()` and the address of `errno`. As
//! `pthread_t` is an opaque platform-specific type, we use the address of
//! `errno` here. As it is thread-specific and does not change over the
//! lifetime of a thread, we can use `&errno` as a key for a `BTreeMap`
//! that stores thread-specific data.
//!
//! Concurrent access to this map is synchronized by two locks – an outer
//! [`Mutex`] and an inner spin lock that also remembers the identity of
//! the lock owner:
//! * The spin lock is the primary means of synchronization: since it only
//! uses native atomics, it can be soundly used inside the signal handle
//! as opposed to [`Mutex`], which might not be async-signal-safe.
//! * The [`Mutex`] prevents busy-waiting in the setup logic, as all accesses
//! there are performed with the [`Mutex`] held, which makes the spin-lock
//! redundant in the common case.
//! * Finally, by using the `errno` address as the locked value of the spin
//! lock, we can detect cases where a SIGSEGV occurred while the thread
//! info is being modified.

use crate::collections::BTreeMap;
use crate::hint::spin_loop;
use crate::ops::Range;
use crate::sync::Mutex;
use crate::sync::atomic::{AtomicUsize, Ordering};
use crate::sys::os::errno_location;

pub struct ThreadInfo {
pub guard_page_range: Range<usize>,
pub thread_name: Option<Box<str>>,
}

static LOCK: Mutex<()> = Mutex::new(());
static SPIN_LOCK: AtomicUsize = AtomicUsize::new(0);
// This uses a `BTreeMap` instead of a hashmap since it supports constant
// initialization and automatically reduces the amount of memory used when
// items are removed.
static mut THREAD_INFO: BTreeMap<usize, ThreadInfo> = BTreeMap::new();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe s/static mut/ with a SyncUnsafeCell or something like that?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't feel strongly about this, but I didn't think it any clearer...


struct UnlockOnDrop;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we make this be SpinGuard<'a, BTreeMap<usize, ThreadInfo>> or so?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would necessitate Deref and DerefMut implementations that aren't really adding any clarity.


impl Drop for UnlockOnDrop {
fn drop(&mut self) {
SPIN_LOCK.store(0, Ordering::Release);
}
}

/// Get the current thread's information, if available.
///
/// Calling this function might freeze other threads if they attempt to modify
/// their thread information. Thus, the caller should ensure that the process
/// is aborted shortly after this function is called.
///
/// This function is guaranteed to be async-signal-safe if `f` is too.
pub fn with_current_info<R>(f: impl FnOnce(Option<&ThreadInfo>) -> R) -> R {
let this = errno_location().addr();
let mut attempt = 0;
let _guard = loop {
// If we are just spinning endlessly, it's very likely that the thread
// modifying the thread info map has a lower priority than us and will
// not continue until we stop running. Just give up in that case.
if attempt == 10_000_000 {
rtprintpanic!("deadlock in SIGSEGV handler");
return f(None);
}

match SPIN_LOCK.compare_exchange(0, this, Ordering::Acquire, Ordering::Relaxed) {
Ok(_) => break UnlockOnDrop,
Err(owner) if owner == this => {
rtabort!("a thread received SIGSEGV while modifying its stack overflow information")
}
// Spin until the lock can be acquired – there is nothing better to
// do. This is unfortunately a priority hole, but a stack overflow
// is a fatal error anyway.
Err(_) => {
spin_loop();
attempt += 1;
}
}
};

// SAFETY: we own the spin lock, so `THREAD_INFO` cannot not be aliased.
let thread_info = unsafe { &*(&raw const THREAD_INFO) };
f(thread_info.get(&this))
}

fn spin_lock_in_setup(this: usize) -> UnlockOnDrop {
loop {
match SPIN_LOCK.compare_exchange(0, this, Ordering::Acquire, Ordering::Relaxed) {
Ok(_) => return UnlockOnDrop,
Err(owner) if owner == this => {
unreachable!("the thread info setup logic isn't recursive")
}
// This function is always called with the outer lock held,
// meaning the only time locking can fail is if another thread has
// encountered a stack overflow. Since that will abort the process,
// we just stop the current thread until that time. We use `pause`
// instead of spinning to avoid priority inversion.
// SAFETY: this doesn't have any safety preconditions.
Err(_) => drop(unsafe { libc::pause() }),
}
}
}

pub fn set_current_info(guard_page_range: Range<usize>, thread_name: Option<Box<str>>) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: why not take the ThreadInfo struct as the argument?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought the struct creation to be unnecessary clutter at the call site.

let this = errno_location().addr();
let _lock_guard = LOCK.lock();
let _spin_guard = spin_lock_in_setup(this);

// SAFETY: we own the spin lock, so `THREAD_INFO` cannot be aliased.
let thread_info = unsafe { &mut *(&raw mut THREAD_INFO) };
thread_info.insert(this, ThreadInfo { guard_page_range, thread_name });
}

pub fn delete_current_info() {
let this = errno_location().addr();
let _lock_guard = LOCK.lock();
let _spin_guard = spin_lock_in_setup(this);

// SAFETY: we own the spin lock, so `THREAD_INFO` cannot not be aliased.
let thread_info = unsafe { &mut *(&raw mut THREAD_INFO) };
thread_info.remove(&this);
}
Loading