6
6
7
7
#include "dwarf-regs.h" /* for EM_HOST */
8
8
#include "syscalltbl.h"
9
+ #include "util/cgroup.h"
9
10
#include "util/hashmap.h"
10
11
#include "util/trace.h"
11
12
#include "util/util.h"
12
13
#include <bpf/bpf.h>
14
+ #include <linux/rbtree.h>
13
15
#include <linux/time64.h>
14
16
#include <tools/libc_compat.h> /* reallocarray */
15
17
18
20
19
21
20
22
static struct syscall_summary_bpf * skel ;
23
+ static struct rb_root cgroups = RB_ROOT ;
21
24
22
25
int trace_prepare_bpf_summary (enum trace_summary_mode mode )
23
26
{
@@ -29,9 +32,14 @@ int trace_prepare_bpf_summary(enum trace_summary_mode mode)
29
32
30
33
if (mode == SUMMARY__BY_THREAD )
31
34
skel -> rodata -> aggr_mode = SYSCALL_AGGR_THREAD ;
35
+ else if (mode == SUMMARY__BY_CGROUP )
36
+ skel -> rodata -> aggr_mode = SYSCALL_AGGR_CGROUP ;
32
37
else
33
38
skel -> rodata -> aggr_mode = SYSCALL_AGGR_CPU ;
34
39
40
+ if (cgroup_is_v2 ("perf_event" ) > 0 )
41
+ skel -> rodata -> use_cgroup_v2 = 1 ;
42
+
35
43
if (syscall_summary_bpf__load (skel ) < 0 ) {
36
44
fprintf (stderr , "failed to load syscall summary bpf skeleton\n" );
37
45
return -1 ;
@@ -42,6 +50,9 @@ int trace_prepare_bpf_summary(enum trace_summary_mode mode)
42
50
return -1 ;
43
51
}
44
52
53
+ if (mode == SUMMARY__BY_CGROUP )
54
+ read_all_cgroups (& cgroups );
55
+
45
56
return 0 ;
46
57
}
47
58
@@ -88,9 +99,13 @@ static double rel_stddev(struct syscall_stats *stat)
88
99
* per-cpu analysis so it's keyed by the syscall number to combine stats
89
100
* from different CPUs. And syscall_data always has a syscall_node so
90
101
* it can effectively work as flat hierarchy.
102
+ *
103
+ * For per-cgroup stats, it uses two-level data structure like thread
104
+ * syscall_data is keyed by CGROUP and has an array of node which
105
+ * represents each syscall for the cgroup.
91
106
*/
92
107
struct syscall_data {
93
- int key ; /* tid if AGGR_THREAD, syscall-nr if AGGR_CPU */
108
+ u64 key ; /* tid if AGGR_THREAD, syscall-nr if AGGR_CPU, cgroup if AGGR_CGROUP */
94
109
int nr_events ;
95
110
int nr_nodes ;
96
111
u64 total_time ;
@@ -191,7 +206,7 @@ static int print_thread_stat(struct syscall_data *data, FILE *fp)
191
206
192
207
qsort (data -> nodes , data -> nr_nodes , sizeof (* data -> nodes ), nodecmp );
193
208
194
- printed += fprintf (fp , " thread (%d), " , data -> key );
209
+ printed += fprintf (fp , " thread (%d), " , ( int ) data -> key );
195
210
printed += fprintf (fp , "%d events\n\n" , data -> nr_events );
196
211
197
212
printed += fprintf (fp , " syscall calls errors total min avg max stddev\n" );
@@ -283,6 +298,75 @@ static int print_total_stats(struct syscall_data **data, int nr_data, FILE *fp)
283
298
return printed ;
284
299
}
285
300
301
+ static int update_cgroup_stats (struct hashmap * hash , struct syscall_key * map_key ,
302
+ struct syscall_stats * map_data )
303
+ {
304
+ struct syscall_data * data ;
305
+ struct syscall_node * nodes ;
306
+
307
+ if (!hashmap__find (hash , map_key -> cgroup , & data )) {
308
+ data = zalloc (sizeof (* data ));
309
+ if (data == NULL )
310
+ return - ENOMEM ;
311
+
312
+ data -> key = map_key -> cgroup ;
313
+ if (hashmap__add (hash , data -> key , data ) < 0 ) {
314
+ free (data );
315
+ return - ENOMEM ;
316
+ }
317
+ }
318
+
319
+ /* update thread total stats */
320
+ data -> nr_events += map_data -> count ;
321
+ data -> total_time += map_data -> total_time ;
322
+
323
+ nodes = reallocarray (data -> nodes , data -> nr_nodes + 1 , sizeof (* nodes ));
324
+ if (nodes == NULL )
325
+ return - ENOMEM ;
326
+
327
+ data -> nodes = nodes ;
328
+ nodes = & data -> nodes [data -> nr_nodes ++ ];
329
+ nodes -> syscall_nr = map_key -> nr ;
330
+
331
+ /* each thread has an entry for each syscall, just use the stat */
332
+ memcpy (& nodes -> stats , map_data , sizeof (* map_data ));
333
+ return 0 ;
334
+ }
335
+
336
+ static int print_cgroup_stat (struct syscall_data * data , FILE * fp )
337
+ {
338
+ int printed = 0 ;
339
+ struct cgroup * cgrp = __cgroup__find (& cgroups , data -> key );
340
+
341
+ qsort (data -> nodes , data -> nr_nodes , sizeof (* data -> nodes ), nodecmp );
342
+
343
+ if (cgrp )
344
+ printed += fprintf (fp , " cgroup %s," , cgrp -> name );
345
+ else
346
+ printed += fprintf (fp , " cgroup id:%lu," , (unsigned long )data -> key );
347
+
348
+ printed += fprintf (fp , " %d events\n\n" , data -> nr_events );
349
+
350
+ printed += fprintf (fp , " syscall calls errors total min avg max stddev\n" );
351
+ printed += fprintf (fp , " (msec) (msec) (msec) (msec) (%%)\n" );
352
+ printed += fprintf (fp , " --------------- -------- ------ -------- --------- --------- --------- ------\n" );
353
+
354
+ printed += print_common_stats (data , fp );
355
+ printed += fprintf (fp , "\n\n" );
356
+
357
+ return printed ;
358
+ }
359
+
360
+ static int print_cgroup_stats (struct syscall_data * * data , int nr_data , FILE * fp )
361
+ {
362
+ int printed = 0 ;
363
+
364
+ for (int i = 0 ; i < nr_data ; i ++ )
365
+ printed += print_cgroup_stat (data [i ], fp );
366
+
367
+ return printed ;
368
+ }
369
+
286
370
int trace_print_bpf_summary (FILE * fp )
287
371
{
288
372
struct bpf_map * map = skel -> maps .syscall_stats_map ;
@@ -305,10 +389,19 @@ int trace_print_bpf_summary(FILE *fp)
305
389
struct syscall_stats stat ;
306
390
307
391
if (!bpf_map__lookup_elem (map , & key , sizeof (key ), & stat , sizeof (stat ), 0 )) {
308
- if (skel -> rodata -> aggr_mode == SYSCALL_AGGR_THREAD )
392
+ switch (skel -> rodata -> aggr_mode ) {
393
+ case SYSCALL_AGGR_THREAD :
309
394
update_thread_stats (& schash , & key , & stat );
310
- else
395
+ break ;
396
+ case SYSCALL_AGGR_CPU :
311
397
update_total_stats (& schash , & key , & stat );
398
+ break ;
399
+ case SYSCALL_AGGR_CGROUP :
400
+ update_cgroup_stats (& schash , & key , & stat );
401
+ break ;
402
+ default :
403
+ break ;
404
+ }
312
405
}
313
406
314
407
prev_key = & key ;
@@ -325,10 +418,19 @@ int trace_print_bpf_summary(FILE *fp)
325
418
326
419
qsort (data , nr_data , sizeof (* data ), datacmp );
327
420
328
- if (skel -> rodata -> aggr_mode == SYSCALL_AGGR_THREAD )
421
+ switch (skel -> rodata -> aggr_mode ) {
422
+ case SYSCALL_AGGR_THREAD :
329
423
printed += print_thread_stats (data , nr_data , fp );
330
- else
424
+ break ;
425
+ case SYSCALL_AGGR_CPU :
331
426
printed += print_total_stats (data , nr_data , fp );
427
+ break ;
428
+ case SYSCALL_AGGR_CGROUP :
429
+ printed += print_cgroup_stats (data , nr_data , fp );
430
+ break ;
431
+ default :
432
+ break ;
433
+ }
332
434
333
435
for (i = 0 ; i < nr_data && data ; i ++ ) {
334
436
free (data [i ]-> nodes );
@@ -343,5 +445,14 @@ int trace_print_bpf_summary(FILE *fp)
343
445
344
446
void trace_cleanup_bpf_summary (void )
345
447
{
448
+ if (!RB_EMPTY_ROOT (& cgroups )) {
449
+ struct cgroup * cgrp , * tmp ;
450
+
451
+ rbtree_postorder_for_each_entry_safe (cgrp , tmp , & cgroups , node )
452
+ cgroup__put (cgrp );
453
+
454
+ cgroups = RB_ROOT ;
455
+ }
456
+
346
457
syscall_summary_bpf__destroy (skel );
347
458
}
0 commit comments