From 2ba7c7f7b151ff56cf12bf3cab286981bb646c90 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 4 Aug 2023 10:34:23 +0100 Subject: [PATCH] Add some GC stats to Py_STATS (GH-107581) --- Include/internal/pycore_code.h | 2 ++ Include/pystats.h | 9 +++++++++ Modules/gcmodule.c | 18 +++++++++++++++++ Python/specialize.c | 18 ++++++++++++++++- Tools/scripts/summarize_stats.py | 34 ++++++++++++++++++++++++++++++++ 5 files changed, 80 insertions(+), 1 deletion(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index ee1b85187cbab6..00099376635e9b 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -274,6 +274,7 @@ extern int _PyStaticCode_Init(PyCodeObject *co); #define EVAL_CALL_STAT_INC(name) do { if (_py_stats) _py_stats->call_stats.eval_calls[name]++; } while (0) #define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) \ do { if (_py_stats && PyFunction_Check(callable)) _py_stats->call_stats.eval_calls[name]++; } while (0) +#define GC_STAT_ADD(gen, name, n) do { if (_py_stats) _py_stats->gc_stats[(gen)].name += (n); } while (0) // Export for '_opcode' shared extension PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void); @@ -287,6 +288,7 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void); #define OBJECT_STAT_INC_COND(name, cond) ((void)0) #define EVAL_CALL_STAT_INC(name) ((void)0) #define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) ((void)0) +#define GC_STAT_ADD(gen, name, n) ((void)0) #endif // !Py_STATS // Utility functions for reading/writing 32/64-bit values in the inline caches. diff --git a/Include/pystats.h b/Include/pystats.h index 54c9b8d8b3538f..e24aef5fe8072b 100644 --- a/Include/pystats.h +++ b/Include/pystats.h @@ -74,12 +74,21 @@ typedef struct _object_stats { uint64_t optimization_traces_created; uint64_t optimization_traces_executed; uint64_t optimization_uops_executed; + /* Temporary value used during GC */ + uint64_t object_visits; } ObjectStats; +typedef struct _gc_stats { + uint64_t collections; + uint64_t object_visits; + uint64_t objects_collected; +} GCStats; + typedef struct _stats { OpcodeStats opcode_stats[256]; CallStats call_stats; ObjectStats object_stats; + GCStats *gc_stats; } PyStats; diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index 246c0a9e160aa9..35a35091bf4511 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -460,6 +460,7 @@ update_refs(PyGC_Head *containers) static int visit_decref(PyObject *op, void *parent) { + OBJECT_STAT_INC(object_visits); _PyObject_ASSERT(_PyObject_CAST(parent), !_PyObject_IsFreed(op)); if (_PyObject_IS_GC(op)) { @@ -498,6 +499,7 @@ subtract_refs(PyGC_Head *containers) static int visit_reachable(PyObject *op, PyGC_Head *reachable) { + OBJECT_STAT_INC(object_visits); if (!_PyObject_IS_GC(op)) { return 0; } @@ -725,6 +727,7 @@ clear_unreachable_mask(PyGC_Head *unreachable) static int visit_move(PyObject *op, PyGC_Head *tolist) { + OBJECT_STAT_INC(object_visits); if (_PyObject_IS_GC(op)) { PyGC_Head *gc = AS_GC(op); if (gc_is_collecting(gc)) { @@ -1195,6 +1198,12 @@ gc_collect_main(PyThreadState *tstate, int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, int nofail) { + GC_STAT_ADD(generation, collections, 1); +#ifdef Py_STATS + if (_py_stats) { + _py_stats->object_stats.object_visits = 0; + } +#endif int i; Py_ssize_t m = 0; /* # objects collected */ Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */ @@ -1351,6 +1360,15 @@ gc_collect_main(PyThreadState *tstate, int generation, stats->collected += m; stats->uncollectable += n; + GC_STAT_ADD(generation, objects_collected, m); +#ifdef Py_STATS + if (_py_stats) { + GC_STAT_ADD(generation, object_visits, + _py_stats->object_stats.object_visits); + _py_stats->object_stats.object_visits = 0; + } +#endif + if (PyDTrace_GC_DONE_ENABLED()) { PyDTrace_GC_DONE(n + m); } diff --git a/Python/specialize.c b/Python/specialize.c index 1669ce17fc804e..de329ef1195cbf 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -18,7 +18,8 @@ */ #ifdef Py_STATS -PyStats _py_stats_struct = { 0 }; +GCStats _py_gc_stats[NUM_GENERATIONS] = { 0 }; +PyStats _py_stats_struct = { .gc_stats = &_py_gc_stats[0] }; PyStats *_py_stats = NULL; #define ADD_STAT_TO_DICT(res, field) \ @@ -202,17 +203,32 @@ print_object_stats(FILE *out, ObjectStats *stats) fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->optimization_uops_executed); } +static void +print_gc_stats(FILE *out, GCStats *stats) +{ + for (int i = 0; i < NUM_GENERATIONS; i++) { + fprintf(out, "GC[%d] collections: %" PRIu64 "\n", i, stats[i].collections); + fprintf(out, "GC[%d] object visits: %" PRIu64 "\n", i, stats[i].object_visits); + fprintf(out, "GC[%d] objects collected: %" PRIu64 "\n", i, stats[i].objects_collected); + } +} + static void print_stats(FILE *out, PyStats *stats) { print_spec_stats(out, stats->opcode_stats); print_call_stats(out, &stats->call_stats); print_object_stats(out, &stats->object_stats); + print_gc_stats(out, stats->gc_stats); } void _Py_StatsClear(void) { + for (int i = 0; i < NUM_GENERATIONS; i++) { + _py_gc_stats[i] = (GCStats) { 0 }; + } _py_stats_struct = (PyStats) { 0 }; + _py_stats_struct.gc_stats = _py_gc_stats; } void diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index 9c881897c2de1d..f798b2f772d08a 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -494,6 +494,22 @@ def calculate_object_stats(stats): rows.append((label, value, ratio)) return rows +def calculate_gc_stats(stats): + gc_stats = [] + for key, value in stats.items(): + if not key.startswith("GC"): + continue + n, _, rest = key[3:].partition("]") + name = rest.strip() + gen_n = int(n) + while len(gc_stats) <= gen_n: + gc_stats.append({}) + gc_stats[gen_n][name] = value + return [ + (i, gen["collections"], gen["objects collected"], gen["object visits"]) + for (i, gen) in enumerate(gc_stats) + ] + def emit_object_stats(stats): with Section("Object stats", summary="allocations, frees and dict materializatons"): rows = calculate_object_stats(stats) @@ -505,6 +521,22 @@ def emit_comparative_object_stats(base_stats, head_stats): head_rows = calculate_object_stats(head_stats) emit_table(("", "Base Count:", "Base Ratio:", "Head Count:", "Head Ratio:"), join_rows(base_rows, head_rows)) +def emit_gc_stats(stats): + with Section("GC stats", summary="GC collections and effectiveness"): + rows = calculate_gc_stats(stats) + emit_table(("Generation:", "Collections:", "Objects collected:", "Object visits:"), rows) + +def emit_comparative_gc_stats(base_stats, head_stats): + with Section("GC stats", summary="GC collections and effectiveness"): + base_rows = calculate_gc_stats(base_stats) + head_rows = calculate_gc_stats(head_stats) + emit_table( + ("Generation:", + "Base collections:", "Head collections:", + "Base objects collected:", "Head objects collected:", + "Base object visits:", "Head object visits:"), + join_rows(base_rows, head_rows)) + def get_total(opcode_stats): total = 0 for opcode_stat in opcode_stats: @@ -574,6 +606,7 @@ def output_single_stats(stats): emit_specialization_overview(opcode_stats, total) emit_call_stats(stats) emit_object_stats(stats) + emit_gc_stats(stats) with Section("Meta stats", summary="Meta statistics"): emit_table(("", "Count:"), [('Number of data files', stats['__nfiles__'])]) @@ -596,6 +629,7 @@ def output_comparative_stats(base_stats, head_stats): ) emit_comparative_call_stats(base_stats, head_stats) emit_comparative_object_stats(base_stats, head_stats) + emit_comparative_gc_stats(base_stats, head_stats) def output_stats(inputs, json_output=None): if len(inputs) == 1: