@@ -724,6 +724,85 @@ void jl_unlock_stackwalk(int lockret)
724
724
jl_unlock_profile_mach (1 , lockret );
725
725
}
726
726
727
+ // assumes holding `jl_lock_profile_mach`
728
+ void jl_profile_thread_mach (int tid )
729
+ {
730
+ // if there is no space left, return early
731
+ if (jl_profile_is_buffer_full ()) {
732
+ jl_profile_stop_timer ();
733
+ return ;
734
+ }
735
+ if (_dyld_dlopen_atfork_prepare != NULL && _dyld_dlopen_atfork_parent != NULL )
736
+ _dyld_dlopen_atfork_prepare ();
737
+ if (_dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL )
738
+ _dyld_atfork_prepare (); // briefly acquire the dlsym lock
739
+ host_thread_state_t state ;
740
+ int valid_thread = jl_thread_suspend_and_get_state2 (tid , & state );
741
+ unw_context_t * uc = (unw_context_t * )& state ;
742
+ if (_dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL )
743
+ _dyld_atfork_parent (); // quickly release the dlsym lock
744
+ if (_dyld_dlopen_atfork_prepare != NULL && _dyld_dlopen_atfork_parent != NULL )
745
+ _dyld_dlopen_atfork_parent ();
746
+ if (!valid_thread )
747
+ return ;
748
+ if (profile_running ) {
749
+ #ifdef LLVMLIBUNWIND
750
+ /*
751
+ * Unfortunately compact unwind info is incorrectly generated for quite a number of
752
+ * libraries by quite a large number of compilers. We can fall back to DWARF unwind info
753
+ * in some cases, but in quite a number of cases (especially libraries not compiled in debug
754
+ * mode, only the compact unwind info may be available). Even more unfortunately, there is no
755
+ * way to detect such bogus compact unwind info (other than noticing the resulting segfault).
756
+ * What we do here is ugly, but necessary until the compact unwind info situation improves.
757
+ * We try to use the compact unwind info and if that results in a segfault, we retry with DWARF info.
758
+ * Note that in a small number of cases this may result in bogus stack traces, but at least the topmost
759
+ * entry will always be correct, and the number of cases in which this is an issue is rather small.
760
+ * Other than that, this implementation is not incorrect as the other thread is paused while we are profiling
761
+ * and during stack unwinding we only ever read memory, but never write it.
762
+ */
763
+
764
+ forceDwarf = 0 ;
765
+ unw_getcontext (& profiler_uc ); // will resume from this point if the next lines segfault at any point
766
+
767
+ if (forceDwarf == 0 ) {
768
+ // Save the backtrace
769
+ profile_bt_size_cur += rec_backtrace_ctx ((jl_bt_element_t * )profile_bt_data_prof + profile_bt_size_cur , profile_bt_size_max - profile_bt_size_cur - 1 , uc , NULL );
770
+ }
771
+ else if (forceDwarf == 1 ) {
772
+ profile_bt_size_cur += rec_backtrace_ctx_dwarf ((jl_bt_element_t * )profile_bt_data_prof + profile_bt_size_cur , profile_bt_size_max - profile_bt_size_cur - 1 , uc , NULL );
773
+ }
774
+ else if (forceDwarf == -1 ) {
775
+ jl_safe_printf ("WARNING: profiler attempt to access an invalid memory location\n" );
776
+ }
777
+
778
+ forceDwarf = -2 ;
779
+ #else
780
+ profile_bt_size_cur += rec_backtrace_ctx ((jl_bt_element_t * )profile_bt_data_prof + profile_bt_size_cur , profile_bt_size_max - profile_bt_size_cur - 1 , uc , NULL );
781
+ #endif
782
+ jl_ptls_t ptls = jl_atomic_load_relaxed (& jl_all_tls_states )[tid ];
783
+
784
+ // store threadid but add 1 as 0 is preserved to indicate end of block
785
+ profile_bt_data_prof [profile_bt_size_cur ++ ].uintptr = ptls -> tid + 1 ;
786
+
787
+ // store task id (never null)
788
+ profile_bt_data_prof [profile_bt_size_cur ++ ].jlvalue = (jl_value_t * )jl_atomic_load_relaxed (& ptls -> current_task );
789
+
790
+ // store cpu cycle clock
791
+ profile_bt_data_prof [profile_bt_size_cur ++ ].uintptr = cycleclock ();
792
+
793
+ // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
794
+ profile_bt_data_prof [profile_bt_size_cur ++ ].uintptr = jl_atomic_load_relaxed (& ptls -> sleep_check_state ) + 1 ;
795
+
796
+ // Mark the end of this block with two 0's
797
+ profile_bt_data_prof [profile_bt_size_cur ++ ].uintptr = 0 ;
798
+ profile_bt_data_prof [profile_bt_size_cur ++ ].uintptr = 0 ;
799
+ }
800
+ // We're done! Resume the thread.
801
+ jl_thread_resume (tid );
802
+ }
803
+
804
+ void jl_profile_task_unix (void );
805
+
727
806
void * mach_profile_listener (void * arg )
728
807
{
729
808
(void )arg ;
@@ -741,88 +820,21 @@ void *mach_profile_listener(void *arg)
741
820
// sample each thread, round-robin style in reverse order
742
821
// (so that thread zero gets notified last)
743
822
int keymgr_locked = jl_lock_profile_mach (0 );
744
-
745
823
int nthreads = jl_atomic_load_acquire (& jl_n_threads );
746
- int * randperm = profile_get_randperm (nthreads );
747
- for (int idx = nthreads ; idx -- > 0 ; ) {
748
- // Stop the threads in the random or reverse round-robin order.
749
- int i = randperm [idx ];
750
- // if there is no space left, break early
751
- if (jl_profile_is_buffer_full ()) {
752
- jl_profile_stop_timer ();
753
- break ;
754
- }
755
-
756
- if (_dyld_dlopen_atfork_prepare != NULL && _dyld_dlopen_atfork_parent != NULL )
757
- _dyld_dlopen_atfork_prepare ();
758
- if (_dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL )
759
- _dyld_atfork_prepare (); // briefly acquire the dlsym lock
760
- host_thread_state_t state ;
761
- int valid_thread = jl_thread_suspend_and_get_state2 (i , & state );
762
- unw_context_t * uc = (unw_context_t * )& state ;
763
- if (_dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL )
764
- _dyld_atfork_parent (); // quickly release the dlsym lock
765
- if (_dyld_dlopen_atfork_prepare != NULL && _dyld_dlopen_atfork_parent != NULL )
766
- _dyld_dlopen_atfork_parent ();
767
- if (!valid_thread )
768
- continue ;
769
- if (running ) {
770
- #ifdef LLVMLIBUNWIND
771
- /*
772
- * Unfortunately compact unwind info is incorrectly generated for quite a number of
773
- * libraries by quite a large number of compilers. We can fall back to DWARF unwind info
774
- * in some cases, but in quite a number of cases (especially libraries not compiled in debug
775
- * mode, only the compact unwind info may be available). Even more unfortunately, there is no
776
- * way to detect such bogus compact unwind info (other than noticing the resulting segfault).
777
- * What we do here is ugly, but necessary until the compact unwind info situation improves.
778
- * We try to use the compact unwind info and if that results in a segfault, we retry with DWARF info.
779
- * Note that in a small number of cases this may result in bogus stack traces, but at least the topmost
780
- * entry will always be correct, and the number of cases in which this is an issue is rather small.
781
- * Other than that, this implementation is not incorrect as the other thread is paused while we are profiling
782
- * and during stack unwinding we only ever read memory, but never write it.
783
- */
784
-
785
- forceDwarf = 0 ;
786
- unw_getcontext (& profiler_uc ); // will resume from this point if the next lines segfault at any point
787
-
788
- if (forceDwarf == 0 ) {
789
- // Save the backtrace
790
- bt_size_cur += rec_backtrace_ctx ((jl_bt_element_t * )bt_data_prof + bt_size_cur , bt_size_max - bt_size_cur - 1 , uc , NULL );
791
- }
792
- else if (forceDwarf == 1 ) {
793
- bt_size_cur += rec_backtrace_ctx_dwarf ((jl_bt_element_t * )bt_data_prof + bt_size_cur , bt_size_max - bt_size_cur - 1 , uc , NULL );
794
- }
795
- else if (forceDwarf == -1 ) {
796
- jl_safe_printf ("WARNING: profiler attempt to access an invalid memory location\n" );
797
- }
798
-
799
- forceDwarf = -2 ;
800
- #else
801
- bt_size_cur += rec_backtrace_ctx ((jl_bt_element_t * )bt_data_prof + bt_size_cur , bt_size_max - bt_size_cur - 1 , uc , NULL );
802
- #endif
803
- jl_ptls_t ptls = jl_atomic_load_relaxed (& jl_all_tls_states )[i ];
804
-
805
- // META_OFFSET_THREADID store threadid but add 1 as 0 is preserved to indicate end of block
806
- bt_data_prof [bt_size_cur ++ ].uintptr = ptls -> tid + 1 ;
807
-
808
- // META_OFFSET_TASKID store task id (never null)
809
- bt_data_prof [bt_size_cur ++ ].jlvalue = (jl_value_t * )jl_atomic_load_relaxed (& ptls -> current_task );
810
-
811
- // META_OFFSET_CPUCYCLECLOCK store cpu cycle clock
812
- bt_data_prof [bt_size_cur ++ ].uintptr = cycleclock ();
813
-
814
- // META_OFFSET_SLEEPSTATE store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
815
- bt_data_prof [bt_size_cur ++ ].uintptr = jl_atomic_load_relaxed (& ptls -> sleep_check_state ) + 1 ;
816
-
817
- // Mark the end of this block with two 0's
818
- bt_data_prof [bt_size_cur ++ ].uintptr = 0 ;
819
- bt_data_prof [bt_size_cur ++ ].uintptr = 0 ;
824
+ if (profile_all_tasks ) {
825
+ // Don't take the stackwalk lock here since it's already taken in `jl_rec_backtrace`
826
+ jl_profile_task_unix ();
827
+ }
828
+ else {
829
+ int * randperm = profile_get_randperm (nthreads );
830
+ for (int idx = nthreads ; idx -- > 0 ; ) {
831
+ // Stop the threads in random order.
832
+ int i = randperm [idx ];
833
+ jl_profile_thread_mach (i );
820
834
}
821
- // We're done! Resume the thread.
822
- jl_thread_resume (i );
823
835
}
824
836
jl_unlock_profile_mach (0 , keymgr_locked );
825
- if (running ) {
837
+ if (profile_running ) {
826
838
jl_check_profile_autostop ();
827
839
// Reset the alarm
828
840
kern_return_t ret = clock_alarm (clk , TIME_RELATIVE , timerprof , profile_port );
@@ -831,7 +843,8 @@ void *mach_profile_listener(void *arg)
831
843
}
832
844
}
833
845
834
- JL_DLLEXPORT int jl_profile_start_timer (void )
846
+
847
+ JL_DLLEXPORT int jl_profile_start_timer (uint8_t all_tasks )
835
848
{
836
849
kern_return_t ret ;
837
850
if (!profile_started ) {
@@ -860,7 +873,8 @@ JL_DLLEXPORT int jl_profile_start_timer(void)
860
873
timerprof .tv_sec = nsecprof /GIGA ;
861
874
timerprof .tv_nsec = nsecprof %GIGA ;
862
875
863
- running = 1 ;
876
+ profile_running = 1 ;
877
+ profile_all_tasks = all_tasks ;
864
878
// ensure the alarm is running
865
879
ret = clock_alarm (clk , TIME_RELATIVE , timerprof , profile_port );
866
880
HANDLE_MACH_ERROR ("clock_alarm" , ret );
@@ -870,5 +884,6 @@ JL_DLLEXPORT int jl_profile_start_timer(void)
870
884
871
885
JL_DLLEXPORT void jl_profile_stop_timer (void )
872
886
{
873
- running = 0 ;
887
+ profile_running = 0 ;
888
+ profile_all_tasks = 0 ;
874
889
}
0 commit comments