@@ -448,6 +448,7 @@ static int signal_caught_cond = -1;
448
448
449
449
int jl_thread_suspend_and_get_state (int tid , int timeout , bt_context_t * ctx )
450
450
{
451
+ int err ;
451
452
pthread_mutex_lock (& in_signal_lock );
452
453
jl_ptls_t ptls2 = jl_atomic_load_relaxed (& jl_all_tls_states )[tid ];
453
454
jl_task_t * ct2 = ptls2 ? jl_atomic_load_relaxed (& ptls2 -> current_task ) : NULL ;
@@ -456,22 +457,51 @@ int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
456
457
pthread_mutex_unlock (& in_signal_lock );
457
458
return 0 ;
458
459
}
459
- sig_atomic_t request = 0 ;
460
- if (!jl_atomic_cmpswap (& ptls2 -> signal_request , & request , 1 )) {
460
+ if (jl_atomic_load (& ptls2 -> signal_request ) != 0 ) {
461
461
// something is wrong, or there is already a usr2 in flight elsewhere
462
+ // try to wait for it to finish or wait for timeout
463
+ struct pollfd event = {signal_caught_cond , POLLIN , 0 };
464
+ do {
465
+ err = poll (& event , 1 , timeout * 1000 );
466
+ } while (err == -1 && errno == EINTR );
467
+ if (err == -1 || (event .revents & POLLIN ) == 0 ) {
468
+ // not ready after timeout: cancel this request
469
+ pthread_mutex_unlock (& in_signal_lock );
470
+ return 0 ;
471
+ }
472
+ }
473
+ // check for any stale signal_caught_cond events
474
+ struct pollfd event = {signal_caught_cond , POLLIN , 0 };
475
+ do {
476
+ err = poll (& event , 1 , 0 );
477
+ } while (err == -1 && errno == EINTR );
478
+ if (err == -1 ) {
462
479
pthread_mutex_unlock (& in_signal_lock );
463
480
return 0 ;
464
481
}
482
+ if ((event .revents & POLLIN ) != 0 ) {
483
+ // consume it before continuing
484
+ eventfd_t got ;
485
+ do {
486
+ err = read (signal_caught_cond , & got , sizeof (eventfd_t ));
487
+ } while (err == -1 && errno == EINTR );
488
+ if (err != sizeof (eventfd_t )) abort ();
489
+ assert (got == 1 ); (void ) got ;
490
+ }
491
+ sig_atomic_t request = jl_atomic_exchange (& ptls2 -> signal_request , 1 );
492
+ assert (request == 0 || request == -1 );
465
493
request = 1 ;
466
- int err = pthread_kill (ptls2 -> system_id , SIGUSR2 );
467
- // wait for thread to acknowledge or timeout
468
- struct pollfd event = {signal_caught_cond , POLLIN , 0 };
494
+ err = pthread_kill (ptls2 -> system_id , SIGUSR2 );
469
495
if (err == 0 ) {
496
+ // wait for thread to acknowledge or timeout
497
+ struct pollfd event = {signal_caught_cond , POLLIN , 0 };
470
498
do {
471
499
err = poll (& event , 1 , timeout * 1000 );
472
500
} while (err == -1 && errno == EINTR );
501
+ if (err != 1 || (event .revents & POLLIN ) == 0 )
502
+ err = -1 ;
473
503
}
474
- if (( event . revents & POLLIN ) == 0 ) {
504
+ if (err == -1 ) {
475
505
// not ready after timeout: try to cancel this request
476
506
if (jl_atomic_cmpswap (& ptls2 -> signal_request , & request , 0 )) {
477
507
pthread_mutex_unlock (& in_signal_lock );
@@ -487,7 +517,7 @@ int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
487
517
// Now the other thread is waiting on exit_signal_cond (verify that here by
488
518
// checking it is 0, and add an acquire barrier for good measure)
489
519
request = jl_atomic_load_acquire (& ptls2 -> signal_request );
490
- assert (request == 0 ); (void ) request ;
520
+ assert (request == 0 || request == -1 ); (void ) request ;
491
521
jl_atomic_store_release (& ptls2 -> signal_request , 4 ); // prepare to resume normally, but later code may change this
492
522
* ctx = * signal_context ;
493
523
return 1 ;
@@ -546,6 +576,7 @@ static void jl_exit_thread0(int signo, jl_bt_element_t *bt_data, size_t bt_size)
546
576
}
547
577
548
578
// request:
579
+ // -1: processing
549
580
// 0: nothing [not from here]
550
581
// 1: get state & wait for request
551
582
// 2: throw sigint if `!defer_signal && io_wait` or if force throw threshold
@@ -561,21 +592,35 @@ void usr2_handler(int sig, siginfo_t *info, void *ctx)
561
592
if (ptls == NULL )
562
593
return ;
563
594
int errno_save = errno ;
595
+ sig_atomic_t request = jl_atomic_load (& ptls -> signal_request );
596
+ if (request == 0 )
597
+ return ;
598
+ if (!jl_atomic_cmpswap (& ptls -> signal_request , & request , -1 ))
599
+ return ;
564
600
// acknowledge that we saw the signal_request
565
- sig_atomic_t request = jl_atomic_exchange (& ptls -> signal_request , 0 );
601
+ int err ;
602
+ eventfd_t got = 1 ;
603
+ err = write (signal_caught_cond , & got , sizeof (eventfd_t ));
604
+ if (err != sizeof (eventfd_t )) abort ();
605
+ int processing = -1 ;
606
+ jl_atomic_cmpswap (& ptls -> signal_request , & processing , 0 );
566
607
if (request == 1 ) {
567
608
signal_context = jl_to_bt_context (ctx );
568
- int err ;
569
- eventfd_t got = 1 ;
570
- err = write (signal_caught_cond , & got , sizeof (eventfd_t ));
571
- if (err != sizeof (eventfd_t )) abort ();
572
609
do {
573
610
err = read (exit_signal_cond , & got , sizeof (eventfd_t ));
574
611
} while (err == -1 && errno == EINTR );
575
612
if (err != sizeof (eventfd_t )) abort ();
576
613
assert (got == 1 );
577
- request = jl_atomic_exchange (& ptls -> signal_request , 0 );
614
+ request = jl_atomic_exchange (& ptls -> signal_request , -1 );
615
+ if (request != 0 ) {
616
+ int err ;
617
+ eventfd_t got = 1 ;
618
+ err = write (signal_caught_cond , & got , sizeof (eventfd_t ));
619
+ if (err != sizeof (eventfd_t )) abort ();
620
+ }
578
621
assert (request == 2 || request == 3 || request == 4 );
622
+ int processing = -1 ;
623
+ jl_atomic_cmpswap (& ptls -> signal_request , & processing , 0 );
579
624
}
580
625
if (request == 2 ) {
581
626
int force = jl_check_force_sigint ();
0 commit comments