Skip to content

Commit

Permalink
flambda-backend: Fix segfault when signal handlers invoked in non-OCa…
Browse files Browse the repository at this point in the history
…ml threads on runtime5 (#2154)

* Cherry-pick 59029b9 (part of ocaml/ocaml PR11307)

* Add a test, which would segfault before this PR
  • Loading branch information
mshinwell authored Dec 13, 2023
1 parent 0b9333c commit b3993f4
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 21 deletions.
1 change: 1 addition & 0 deletions runtime/caml/domain.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ void caml_handle_gc_interrupt(void);
void caml_handle_incoming_interrupts(void);

CAMLextern void caml_interrupt_self(void);
void caml_interrupt_all_for_signal(void);
void caml_reset_young_limit(caml_domain_state *);

CAMLextern void caml_reset_domain_lock(void);
Expand Down
40 changes: 34 additions & 6 deletions runtime/domain.c
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,10 @@ typedef cpuset_t cpu_set_t;

/* control of STW interrupts */
struct interruptor {
atomic_uintnat* interrupt_word;
/* The outermost atomic is for synchronization with
caml_interrupt_all_for_signal. The innermost atomic is also for
cross-domain communication.*/
_Atomic(atomic_uintnat *) interrupt_word;
caml_plat_mutex lock;
caml_plat_cond cond;

Expand Down Expand Up @@ -205,7 +208,7 @@ static caml_plat_mutex all_domains_lock = CAML_PLAT_MUTEX_INITIALIZER;
static caml_plat_cond all_domains_cond =
CAML_PLAT_COND_INITIALIZER(&all_domains_lock);
static atomic_uintnat /* dom_internal* */ stw_leader = 0;
static struct dom_internal all_domains[Max_domains];
static dom_internal all_domains[Max_domains];

CAMLexport atomic_uintnat caml_num_domains_running;

Expand Down Expand Up @@ -294,7 +297,8 @@ CAMLexport caml_domain_state* caml_get_domain_state(void)

Caml_inline void interrupt_domain(struct interruptor* s)
{
atomic_store_release(s->interrupt_word, (uintnat)(-1));
atomic_uintnat * interrupt_word = atomic_load_relaxed(&s->interrupt_word);
atomic_store_release(interrupt_word, (uintnat)(-1));
}

int caml_incoming_interrupts_queued(void)
Expand Down Expand Up @@ -585,8 +589,14 @@ static void domain_create(uintnat initial_minor_heap_wsize) {

caml_state = domain_state;

domain_state->young_limit = 0;
/* Synchronized with [caml_interrupt_all_for_signal], so that the
initializing write of young_limit happens before any
interrupt. */
atomic_store_explicit(&s->interrupt_word, &domain_state->young_limit,
memory_order_release);

s->unique_id = fresh_domain_unique_id();
s->interrupt_word = &domain_state->young_limit;
s->running = 1;
atomic_fetch_add(&caml_num_domains_running, 1);

Expand Down Expand Up @@ -873,7 +883,7 @@ void caml_init_domains(uintnat minor_heap_wsz) {

dom->id = i;

dom->interruptor.interrupt_word = 0;
dom->interruptor.interrupt_word = NULL;
caml_plat_mutex_init(&dom->interruptor.lock);
caml_plat_cond_init(&dom->interruptor.cond,
&dom->interruptor.lock);
Expand Down Expand Up @@ -1529,10 +1539,28 @@ int caml_try_run_on_all_domains_async(
leader_setup, 0, 0);
}

void caml_interrupt_self(void) {
void caml_interrupt_self(void)
{
interrupt_domain(&domain_self->interruptor);
}

/* async-signal-safe */
void caml_interrupt_all_for_signal(void)
{
for (dom_internal *d = all_domains; d < &all_domains[Max_domains]; d++) {
/* [all_domains] is an array of values. So we can access
[interrupt_word] directly without synchronisation other than
with other people who access the same [interrupt_word].*/
atomic_uintnat * interrupt_word =
atomic_load_explicit(&d->interruptor.interrupt_word,
memory_order_acquire);
/* Early exit: if the current domain was never initialized, then
neither have been any of the remaining ones. */
if (interrupt_word == NULL) return;
interrupt_domain(&d->interruptor);
}
}

void caml_reset_young_limit(caml_domain_state * dom_st)
{
CAMLassert ((uintnat)dom_st->young_ptr > (uintnat)dom_st->young_trigger);
Expand Down
9 changes: 6 additions & 3 deletions runtime/minor_gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -844,9 +844,12 @@ void caml_alloc_small_dispatch (caml_domain_state * dom_st,
"minor GC");
else {
caml_handle_gc_interrupt();
/* In the case of long-running C code that regularly polls with
[caml_process_pending_actions], still force a query of all
callbacks at every minor collection or major slice. */
/* We might be here due to a recently-recorded signal, so we
need to remember that we must run signal handlers. In
addition, in the case of long-running C code that regularly
polls with caml_process_pending_actions, we want to force a
query of all callbacks at every minor collection or major
slice (similarly to OCaml behaviour). */
dom_st->action_pending = 1;
}

Expand Down
38 changes: 26 additions & 12 deletions runtime/signals.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,21 +107,37 @@ CAMLexport value caml_process_pending_signals_exn(void)
}

/* Record the delivery of a signal, and arrange for it to be processed
as soon as possible:
- via Caml_state->action_pending, processed in
caml_process_pending_actions.
- by playing with the allocation limit, processed in
caml_alloc_small_dispatch.
*/
as soon as possible, by playing with the allocation limit,
processed in caml_alloc_small_dispatch. */
CAMLexport void caml_record_signal(int signal_number)
{
unsigned int i;
if (signal_number <= 0 || signal_number >= NSIG) return;
i = signal_number - 1;
atomic_fetch_or(&caml_pending_signals[i / BITS_PER_WORD],
(uintnat)1 << (i % BITS_PER_WORD));
// FIXME: the TLS variable is not thread-safe
caml_interrupt_self();
/* We interrupt all domains when a signal arrives. Signals (SIGINT,
SIGALRM...) arrive infrequently-enough that this is affordable.
This is a strategy that makes as little assumptions as possible
about signal-safety, threads, and domains.
* In mixed C/OCaml applications there is no guarantee that the
POSIX signal handler runs in an OCaml thread, so Caml_state might
be unavailable.
* While C11 mandates that atomic thread-local variables are
async-signal-safe for reading, gcc does not conform and can
allocate in corner cases involving dynamic linking. It is also
unclear whether the OSX implementation conforms, but this might
be a theoretical concern only.
* The thread executing a POSIX signal handler is not necessarily
the most ready to execute the corresponding OCaml signal handler.
Examples:
- Ctrl-C in the toplevel when domain 0 is stuck inside [Domain.join].
- a thread that has just spawned, before the appropriate mask is set.
*/
caml_interrupt_all_for_signal();
}

/* Management of blocking sections. */
Expand Down Expand Up @@ -303,7 +319,7 @@ void caml_request_minor_gc (void)
[Caml_state->action_pending] is 1, or there is a function currently
running which is executing all actions.
This is used to ensure [Caml_state->young_limit] is always set
This is used to ensure that [Caml_state->young_limit] is always set
appropriately.
In case there are two different callbacks (say, a signal and a
Expand All @@ -316,9 +332,7 @@ void caml_request_minor_gc (void)
calling them first.
*/

CAMLno_tsan /* When called from [caml_record_signal], these memory
accesses may not be synchronized. Otherwise we assume
that we have unique access to dom_st. */
/* We assume that we have unique access to dom_st. */
void caml_set_action_pending(caml_domain_state * dom_st)
{
dom_st->action_pending = 1;
Expand Down
18 changes: 18 additions & 0 deletions testsuite/tests/lib-threads/signal_handler_run_in_c_thread.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
(* TEST
modules = "signal_handler_run_in_c_thread_stubs.c"
* native
** hassysthreads
*)

(* This doesn't actually need systhreads, but the requirement should
ensure the C pthreads-using code will build. *)

external c_stub : unit -> unit = "test_signal_handler_run_in_c_thread"

let () =
Sys.set_signal Sys.sigusr1 (Signal_handle (fun _ -> exit 0));
c_stub ();
while true do
(* Ensure pending actions are run, by forcing allocation *)
ignore (Sys.opaque_identity (Random.int 42, Random.int 42))
done
22 changes: 22 additions & 0 deletions testsuite/tests/lib-threads/signal_handler_run_in_c_thread_stubs.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#include <pthread.h>
#include <signal.h>
#include <caml/mlvalues.h>

static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;

static void* in_thread(void* unused)
{
(void) pthread_cond_signal(&cond);
/* Signal to be received in this thread by the OCaml signal handler */
while (1);
}

value test_signal_handler_run_in_c_thread(value unit)
{
pthread_t thread;
pthread_create(&thread, NULL, &in_thread, NULL);
pthread_cond_wait(&cond, &mutex);
pthread_kill(thread, SIGUSR1);
return Val_unit;
}

0 comments on commit b3993f4

Please sign in to comment.