From ec182a4e40e936d14f1301f22cd90816ff3c9414 Mon Sep 17 00:00:00 2001 From: d-netto Date: Fri, 9 Feb 2024 01:32:01 -0300 Subject: [PATCH] better printing of types on page profiler --- src/Makefile | 3 +- src/gc-page-profiler.c | 24 ++++++++--- src/gc-page-profiler.h | 93 +++++++++++++++++++++++++++++++++++++++--- src/gc.c | 37 +++++++++++------ src/julia_internal.h | 2 +- 5 files changed, 134 insertions(+), 25 deletions(-) diff --git a/src/Makefile b/src/Makefile index 323aaea61c47d9..7308f037424760 100644 --- a/src/Makefile +++ b/src/Makefile @@ -317,9 +317,10 @@ $(BUILDDIR)/debuginfo.o $(BUILDDIR)/debuginfo.dbg.obj: $(addprefix $(SRCDIR)/,de $(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR)/processor.h $(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h $(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h -$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h +$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h $(SRCDIR)/gc-page-profiler.h $(BUILDDIR)/gc-heap-snapshot.o $(BUILDDIR)/gc-heap-snapshot.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(BUILDDIR)/gc-alloc-profiler.o $(BUILDDIR)/gc-alloc-profiler.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-alloc-profiler.h +$(BUILDDIR)/gc-page-profiler.o $(BUILDDIR)/gc-page-profiler.dbg.obj: $(SRCDIR)/gc-page-profiler.h $(BUILDDIR)/init.o $(BUILDDIR)/init.dbg.obj: $(SRCDIR)/builtin_proto.h $(BUILDDIR)/interpreter.o $(BUILDDIR)/interpreter.dbg.obj: $(SRCDIR)/builtin_proto.h $(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/llvm-codegen-shared.h diff --git a/src/gc-page-profiler.c b/src/gc-page-profiler.c index 5af1c3d014770d..2e876e4b7b4d6c 100644 --- a/src/gc-page-profiler.c +++ b/src/gc-page-profiler.c @@ -20,6 +20,8 @@ gc_page_profiler_serializer_t gc_page_serializer_create(void) JL_NOTSAFEPOINT gc_page_profiler_serializer_t serializer; if (__unlikely(page_profile_enabled)) { arraylist_new(&serializer.typestrs, GC_PAGE_SZ); + serializer.buffers = (char *)malloc_s(GC_PAGE_PROFILER_SERIALIZER_INIT_CAPACITY); + serializer.cursor = 0; } else { serializer.typestrs.len = 0; @@ -34,6 +36,8 @@ void gc_page_serializer_init(gc_page_profiler_serializer_t *serializer, serializer->typestrs.len = 0; serializer->data = (char *)pg->data; serializer->osize = pg->osize; + serializer->cursor = 0; + serializer->capacity = GC_PAGE_PROFILER_SERIALIZER_INIT_CAPACITY; } } @@ -41,6 +45,7 @@ void gc_page_serializer_destroy(gc_page_profiler_serializer_t *serializer) JL_NO { if (__unlikely(page_profile_enabled)) { arraylist_free(&serializer->typestrs); + free(serializer->buffers); } } @@ -71,8 +76,9 @@ void gc_page_profile_write_preamble(gc_page_profiler_serializer_t *serializer) JL_NOTSAFEPOINT { if (__unlikely(page_profile_enabled)) { - char str[GC_TYPE_STR_MAXLEN]; - snprintf(str, GC_TYPE_STR_MAXLEN, + const size_t large_enough_str_size = 4096; + char str[large_enough_str_size]; + snprintf(str, large_enough_str_size, "{\"address\": \"%p\",\"object_size\": %d,\"objects\": [", serializer->data, serializer->osize); ios_write(page_profile_stream, str, strlen(str)); @@ -102,22 +108,27 @@ void gc_page_profile_write_comma(gc_page_profiler_serializer_t *serializer) JL_N void gc_page_profile_write_to_file(gc_page_profiler_serializer_t *serializer) JL_NOTSAFEPOINT { + size_t large_enough_str_size = 4096; if (__unlikely(page_profile_enabled)) { // write to file uv_mutex_lock(&page_profile_lock); gc_page_profile_write_comma(serializer); gc_page_profile_write_preamble(serializer); - char str[GC_TYPE_STR_MAXLEN]; + char *str = (char *)malloc_s(large_enough_str_size); for (size_t i = 0; i < serializer->typestrs.len; i++) { const char *name = (const char *)serializer->typestrs.items[i]; if (name == GC_SERIALIZER_EMPTY) { - snprintf(str, GC_TYPE_STR_MAXLEN, "\"empty\","); + snprintf(str, large_enough_str_size, "\"empty\","); } else if (name == GC_SERIALIZER_GARBAGE) { - snprintf(str, GC_TYPE_STR_MAXLEN, "\"garbage\","); + snprintf(str, large_enough_str_size, "\"garbage\","); } else { - snprintf(str, GC_TYPE_STR_MAXLEN, "\"%s\",", name); + while ((strlen(name) + 1) > large_enough_str_size) { + large_enough_str_size *= 2; + str = (char *)realloc_s(str, large_enough_str_size); + } + snprintf(str, large_enough_str_size, "\"%s\",", name); } // remove trailing comma for last element if (i == serializer->typestrs.len - 1) { @@ -125,6 +136,7 @@ void gc_page_profile_write_to_file(gc_page_profiler_serializer_t *serializer) } ios_write(page_profile_stream, str, strlen(str)); } + free(str); gc_page_profile_write_epilogue(serializer); page_profile_pages_written++; uv_mutex_unlock(&page_profile_lock); diff --git a/src/gc-page-profiler.h b/src/gc-page-profiler.h index b103e23905ba52..28989f8f8e206f 100644 --- a/src/gc-page-profiler.h +++ b/src/gc-page-profiler.h @@ -9,22 +9,29 @@ extern "C" { #endif -#define GC_TYPE_STR_MAXLEN (512) +#define GC_PAGE_PROFILER_SERIALIZER_INIT_CAPACITY (4096) typedef struct { arraylist_t typestrs; char *data; int osize; + char *buffers; + size_t cursor; + size_t capacity; } gc_page_profiler_serializer_t; // mutex for page profile extern uv_mutex_t page_profile_lock; +// whether page profiling is enabled +extern int page_profile_enabled; // Serializer functions gc_page_profiler_serializer_t gc_page_serializer_create(void) JL_NOTSAFEPOINT; -void gc_page_serializer_init(gc_page_profiler_serializer_t *serializer, jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT; +void gc_page_serializer_init(gc_page_profiler_serializer_t *serializer, + jl_gc_pagemeta_t *pg) JL_NOTSAFEPOINT; void gc_page_serializer_destroy(gc_page_profiler_serializer_t *serializer) JL_NOTSAFEPOINT; -void gc_page_serializer_write(gc_page_profiler_serializer_t *serializer, const char *str) JL_NOTSAFEPOINT; +void gc_page_serializer_write(gc_page_profiler_serializer_t *serializer, + const char *str) JL_NOTSAFEPOINT; // Page profile functions #define GC_SERIALIZER_EMPTY ((const char *)0x1) #define GC_SERIALIZER_GARBAGE ((const char *)0x2) @@ -42,13 +49,89 @@ STATIC_INLINE void gc_page_profile_write_garbage(gc_page_profiler_serializer_t * gc_page_serializer_write(serializer, GC_SERIALIZER_GARBAGE); } } +STATIC_INLINE char *gc_page_profile_request_buffer(gc_page_profiler_serializer_t *serializer, size_t size) JL_NOTSAFEPOINT +{ + while (serializer->cursor + size >= serializer->capacity) { + serializer->capacity *= 2; + serializer->buffers = (char *)realloc_s(serializer->buffers, serializer->capacity); + } + char *p = &serializer->buffers[serializer->cursor]; + memset(p, 0, size); + serializer->cursor += size; + return p; +} STATIC_INLINE void gc_page_profile_write_live_obj(gc_page_profiler_serializer_t *serializer, jl_taggedvalue_t *v, int enabled) JL_NOTSAFEPOINT { if (__unlikely(enabled)) { - const char *name = jl_typeof_str(jl_valueof(v)); - gc_page_serializer_write(serializer, name); + jl_value_t *a = jl_valueof(v); + jl_value_t *t = jl_typeof(a); + ios_t str_; + int ios_need_close = 0; + char *type_name = NULL; + char *type_name_in_serializer = NULL; + if (t == (jl_value_t *)jl_get_buff_tag()) { + type_name = "Buffer"; + type_name_in_serializer = + gc_page_profile_request_buffer(serializer, strlen(type_name) + 1); + strcpy(type_name_in_serializer, type_name); + } + else if (jl_is_string(a)) { + type_name = "String"; + type_name_in_serializer = + gc_page_profile_request_buffer(serializer, strlen(type_name) + 1); + strcpy(type_name_in_serializer, type_name); + } + else if (jl_is_symbol(a)) { + type_name = jl_symbol_name((jl_sym_t *)a); + type_name_in_serializer = + gc_page_profile_request_buffer(serializer, strlen(type_name) + 1); + strcpy(type_name_in_serializer, type_name); + } + else if (jl_is_simplevector(a)) { + type_name = "SimpleVector"; + type_name_in_serializer = + gc_page_profile_request_buffer(serializer, strlen(type_name) + 1); + strcpy(type_name_in_serializer, type_name); + } + else if (jl_is_module(a)) { + type_name = jl_symbol_name_(((jl_module_t *)a)->name); + type_name_in_serializer = + gc_page_profile_request_buffer(serializer, strlen(type_name) + 1); + strcpy(type_name_in_serializer, type_name); + } + else if (jl_is_task(a)) { + type_name = "Task"; + type_name_in_serializer = + gc_page_profile_request_buffer(serializer, strlen(type_name) + 1); + strcpy(type_name_in_serializer, type_name); + } + else if (jl_is_datatype(a)) { + ios_need_close = 1; + ios_mem(&str_, 0); + JL_STREAM *str = (JL_STREAM *)&str_; + jl_static_show(str, a); + type_name = str_.buf; + type_name_in_serializer = + gc_page_profile_request_buffer(serializer, str_.size + 1); + memcpy(type_name_in_serializer, type_name, str_.size); + } + else { + ios_need_close = 1; + ios_mem(&str_, 0); + JL_STREAM *str = (JL_STREAM *)&str_; + jl_static_show(str, t); + type_name = str_.buf; + type_name_in_serializer = + gc_page_profile_request_buffer(serializer, str_.size + 1); + memcpy(type_name_in_serializer, type_name, str_.size); + } + gc_page_serializer_write(serializer, type_name_in_serializer); + if (ios_need_close) { + ios_close(&str_); + } + jl_may_leak(type_name_in_serializer); } } void gc_enable_page_profile(void) JL_NOTSAFEPOINT; diff --git a/src/gc.c b/src/gc.c index 06a41393953b9a..b7d5f555d40904 100644 --- a/src/gc.c +++ b/src/gc.c @@ -199,7 +199,7 @@ jl_ptls_t* gc_all_tls_states; gc_heapstatus_t gc_heap_stats = {0}; int next_sweep_full = 0; const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00 -JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) +JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT { return jl_buff_tag; } @@ -1659,18 +1659,32 @@ int gc_sweep_prescan(jl_ptls_t ptls, jl_gc_padded_page_stack_t *new_gc_allocd_sc void gc_sweep_wake_all(jl_ptls_t ptls, jl_gc_padded_page_stack_t *new_gc_allocd_scratch) { int parallel_sweep_worthwhile = gc_sweep_prescan(ptls, new_gc_allocd_scratch); - jl_atomic_store(&gc_allocd_scratch, new_gc_allocd_scratch); - if (!parallel_sweep_worthwhile) { + if (parallel_sweep_worthwhile && !page_profile_enabled) { + jl_atomic_store(&gc_allocd_scratch, new_gc_allocd_scratch); + uv_mutex_lock(&gc_threads_lock); + for (int i = gc_first_tid; i < gc_first_tid + jl_n_markthreads; i++) { + jl_ptls_t ptls2 = gc_all_tls_states[i]; + assert(ptls2 != NULL); // should be a GC thread + jl_atomic_fetch_add(&ptls2->gc_sweeps_requested, 1); + } + uv_cond_broadcast(&gc_threads_cond); + uv_mutex_unlock(&gc_threads_lock); return; } - uv_mutex_lock(&gc_threads_lock); - for (int i = gc_first_tid; i < gc_first_tid + jl_n_markthreads; i++) { - jl_ptls_t ptls2 = gc_all_tls_states[i]; - assert(ptls2 != NULL); // should be a GC thread - jl_atomic_fetch_add(&ptls2->gc_sweeps_requested, 1); + if (page_profile_enabled) { + // we need to ensure that no threads are running sweeping when + // collecting a page profile. + // wait for all to leave in order to ensure that a straggler doesn't + // try to enter sweeping after we set `gc_allocd_scratch` below. + for (int i = gc_first_tid; i < gc_first_tid + jl_n_markthreads; i++) { + jl_ptls_t ptls2 = gc_all_tls_states[i]; + assert(ptls2 != NULL); // should be a GC thread + while (jl_atomic_load_acquire(&ptls2->gc_sweeps_requested) != 0) { + jl_cpu_pause(); + } + } } - uv_cond_broadcast(&gc_threads_cond); - uv_mutex_unlock(&gc_threads_lock); + jl_atomic_store(&gc_allocd_scratch, new_gc_allocd_scratch); } // wait for all threads to finish sweeping @@ -1813,8 +1827,7 @@ static void gc_sweep_pool(void) } // the actual sweeping - jl_gc_padded_page_stack_t *new_gc_allocd_scratch = (jl_gc_padded_page_stack_t *) malloc_s(n_threads * sizeof(jl_gc_padded_page_stack_t)); - memset(new_gc_allocd_scratch, 0, n_threads * sizeof(jl_gc_padded_page_stack_t)); + jl_gc_padded_page_stack_t *new_gc_allocd_scratch = (jl_gc_padded_page_stack_t *) calloc_s(n_threads * sizeof(jl_gc_padded_page_stack_t)); jl_ptls_t ptls = jl_current_task->ptls; gc_sweep_wake_all(ptls, new_gc_allocd_scratch); gc_sweep_pool_parallel(ptls); diff --git a/src/julia_internal.h b/src/julia_internal.h index 9840da6b174483..15090ee319a6e5 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -536,7 +536,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_alloc(jl_ptls_t ptls, size_t sz, void *ty); // defined as uint64_t[3] so that we can get the right alignment of this and a "type tag" on it const extern uint64_t _jl_buff_tag[3]; #define jl_buff_tag ((uintptr_t)LLT_ALIGN((uintptr_t)&_jl_buff_tag[1],16)) -JL_DLLEXPORT uintptr_t jl_get_buff_tag(void); +JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT; typedef void jl_gc_tracked_buffer_t; // For the benefit of the static analyzer STATIC_INLINE jl_gc_tracked_buffer_t *jl_gc_alloc_buf(jl_ptls_t ptls, size_t sz)