Skip to content

Commit

Permalink
perf ftrace latency: Introduce --bucket-range to ask for linear bucke…
Browse files Browse the repository at this point in the history
…ting

In addition to showing it exponentially, using log2() to figure out the
histogram index, allow for showing it linearly:

The preexisting more, the default:

  # perf ftrace latency --use-nsec --use-bpf \
  			-T switch_mm_irqs_off -a sleep 2
  #   DURATION     |      COUNT | GRAPH                                   |
       0 -    1 ns |          0 |                                         |
       1 -    2 ns |          0 |                                         |
       2 -    4 ns |          0 |                                         |
       4 -    8 ns |          0 |                                         |
       8 -   16 ns |          0 |                                         |
      16 -   32 ns |          0 |                                         |
      32 -   64 ns |          0 |                                         |
      64 -  128 ns |        238 | #                                       |
     128 -  256 ns |       1704 | ##########                              |
     256 -  512 ns |        672 | ###                                     |
     512 - 1024 ns |       4458 | ##########################              |
       1 -    2 us |        677 | ####                                    |
       2 -    4 us |          5 |                                         |
       4 -    8 us |          0 |                                         |
       8 -   16 us |          0 |                                         |
      16 -   32 us |          0 |                                         |
      32 -   64 us |          0 |                                         |
      64 -  128 us |          0 |                                         |
     128 -  256 us |          0 |                                         |
     256 -  512 us |          0 |                                         |
     512 - 1024 us |          0 |                                         |
       1 - ...  ms |          0 |                                         |
  #

The new histogram mode:

  # perf ftrace latency --bucket-range=150 --use-nsec --use-bpf \
  			-T switch_mm_irqs_off -a sleep 2
  #   DURATION     |      COUNT | GRAPH                                   |
       0 -    1 ns |          0 |                                         |
       1 -  151 ns |        265 | #                                       |
     151 -  301 ns |       1797 | ###########                             |
     301 -  451 ns |        258 | #                                       |
     451 -  601 ns |        289 | #                                       |
     601 -  751 ns |       2049 | #############                           |
     751 -  901 ns |        967 | ######                                  |
     901 - 1051 ns |        513 | ###                                     |
    1.05 - 1.20 us |        114 |                                         |
    1.20 - 1.35 us |        559 | ###                                     |
    1.35 - 1.50 us |        189 | #                                       |
    1.50 - 1.65 us |        137 |                                         |
    1.65 - 1.80 us |         32 |                                         |
    1.80 - 1.95 us |          2 |                                         |
    1.95 - 2.10 us |          0 |                                         |
    2.10 - 2.25 us |          1 |                                         |
    2.25 - 2.40 us |          1 |                                         |
    2.40 - 2.55 us |          0 |                                         |
    2.55 - 2.70 us |          0 |                                         |
    2.70 - 2.85 us |          0 |                                         |
    2.85 - 3.00 us |          1 |                                         |
    3.00 - ...  us |          4 |                                         |
  #

Co-developed-by: Gabriele Monaco <gmonaco@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Clark Williams <williams@redhat.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20241112181214.1171244-3-acme@kernel.org
Signed-off-by: Gabriele Monaco <gmonaco@redhat.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
  • Loading branch information
acmel committed Dec 10, 2024
1 parent 12115c6 commit e8536dd
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 13 deletions.
3 changes: 3 additions & 0 deletions tools/perf/Documentation/perf-ftrace.txt
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,9 @@ OPTIONS for 'perf ftrace latency'
--use-nsec::
Use nano-second instead of micro-second as a base unit of the histogram.

--bucket-range=::
Bucket range in ms or ns (according to -n/--use-nsec), default is log2() mode.


OPTIONS for 'perf ftrace profile'
---------------------------------
Expand Down
66 changes: 53 additions & 13 deletions tools/perf/builtin-ftrace.c
Original file line number Diff line number Diff line change
Expand Up @@ -777,9 +777,17 @@ static void make_histogram(struct perf_ftrace *ftrace, int buckets[],
if (ftrace->use_nsec)
num *= 1000;

i = log2(num);
if (i < 0)
if (!ftrace->bucket_range) {
i = log2(num);
if (i < 0)
i = 0;
} else {
// Less than 1 unit (ms or ns), or, in the future,
// than the min latency desired.
i = 0;
if (num > 0) // 1st entry: [ 1 unit .. bucket_range units ]
i = num / ftrace->bucket_range + 1;
}
if (i >= NUM_BUCKET)
i = NUM_BUCKET - 1;

Expand Down Expand Up @@ -815,28 +823,58 @@ static void display_histogram(struct perf_ftrace *ftrace, int buckets[])
" DURATION ", "COUNT", bar_total, "GRAPH");

bar_len = buckets[0] * bar_total / total;
printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",

printf(" %4d - %4d %s | %10d | %.*s%*s |\n",
0, 1, use_nsec ? "ns" : "us", buckets[0], bar_len, bar, bar_total - bar_len, "");

for (i = 1; i < NUM_BUCKET - 1; i++) {
int start = (1 << (i - 1));
int stop = 1 << i;
int start, stop;
const char *unit = use_nsec ? "ns" : "us";

if (start >= 1024) {
start >>= 10;
stop >>= 10;
unit = use_nsec ? "us" : "ms";
if (!ftrace->bucket_range) {
start = (1 << (i - 1));
stop = 1 << i;

if (start >= 1024) {
start >>= 10;
stop >>= 10;
unit = use_nsec ? "us" : "ms";
}
} else {
start = (i - 1) * ftrace->bucket_range + 1;
stop = i * ftrace->bucket_range + 1;

if (start >= 1000) {
double dstart = start / 1000.0,
dstop = stop / 1000.0;
printf(" %4.2f - %-4.2f", dstart, dstop);
unit = use_nsec ? "us" : "ms";
goto print_bucket_info;
}
}

printf(" %4d - %4d", start, stop);
print_bucket_info:
bar_len = buckets[i] * bar_total / total;
printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",
start, stop, unit, buckets[i], bar_len, bar,
printf(" %s | %10d | %.*s%*s |\n", unit, buckets[i], bar_len, bar,
bar_total - bar_len, "");
}

bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
printf(" %4d - %-4s %s | %10d | %.*s%*s |\n",
1, "...", use_nsec ? "ms" : " s", buckets[NUM_BUCKET - 1],
if (!ftrace->bucket_range) {
printf(" %4d - %-4s %s", 1, "...", use_nsec ? "ms" : "s ");
} else {
int upper_outlier = (NUM_BUCKET - 2) * ftrace->bucket_range;

if (upper_outlier >= 1000) {
double dstart = upper_outlier / 1000.0;

printf(" %4.2f - %-4s %s", dstart, "...", use_nsec ? "us" : "ms");
} else {
printf(" %4d - %4s %s", upper_outlier, "...", use_nsec ? "ns" : "us");
}
}
printf(" | %10d | %.*s%*s |\n", buckets[NUM_BUCKET - 1],
bar_len, bar, bar_total - bar_len, "");

}
Expand Down Expand Up @@ -1558,6 +1596,8 @@ int cmd_ftrace(int argc, const char **argv)
#endif
OPT_BOOLEAN('n', "use-nsec", &ftrace.use_nsec,
"Use nano-second histogram"),
OPT_UINTEGER(0, "bucket-range", &ftrace.bucket_range,
"Bucket range in ms or ns (-n/--use-nsec), default is log2() mode"),
OPT_PARENT(common_options),
};
const struct option profile_options[] = {
Expand Down
2 changes: 2 additions & 0 deletions tools/perf/util/bpf_ftrace.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
return -1;
}

skel->rodata->bucket_range = ftrace->bucket_range;

/* don't need to set cpu filter for system-wide mode */
if (ftrace->target.cpu_list) {
ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus);
Expand Down
14 changes: 14 additions & 0 deletions tools/perf/util/bpf_skel/func_latency.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ int enabled = 0;
const volatile int has_cpu = 0;
const volatile int has_task = 0;
const volatile int use_nsec = 0;
const volatile unsigned int bucket_range;

SEC("kprobe/func")
int BPF_PROG(func_begin)
Expand Down Expand Up @@ -100,12 +101,25 @@ int BPF_PROG(func_end)
if (delta < 0)
return 0;

if (bucket_range != 0) {
delta /= cmp_base;
// Less than 1 unit (ms or ns), or, in the future,
// than the min latency desired.
key = 0;
if (delta > 0) { // 1st entry: [ 1 unit .. bucket_range units )
key = delta / bucket_range + 1;
if (key >= NUM_BUCKET)
key = NUM_BUCKET - 1;
}
goto do_lookup;
}
// calculate index using delta
for (key = 0; key < (NUM_BUCKET - 1); key++) {
if (delta < (cmp_base << key))
break;
}

do_lookup:
hist = bpf_map_lookup_elem(&latency, &key);
if (!hist)
return 0;
Expand Down
1 change: 1 addition & 0 deletions tools/perf/util/ftrace.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ struct perf_ftrace {
unsigned long percpu_buffer_size;
bool inherit;
bool use_nsec;
unsigned int bucket_range;
int graph_depth;
int func_stack_trace;
int func_irq_info;
Expand Down

0 comments on commit e8536dd

Please sign in to comment.