Add script to generate toplev plots from benchmark

siboehm · Aug 8, 2021 · 52c15c2 · 52c15c2
1 parent 486c59f
commit 52c15c2
Show file tree

Hide file tree

Showing 5 changed files with 71 additions and 54 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,7 @@
 # lleaves 🍃
 ![CI](https://github.com/siboehm/lleaves/workflows/CI/badge.svg)
 [![Documentation Status](https://readthedocs.org/projects/lleaves/badge/?version=latest)](https://lleaves.readthedocs.io/en/latest/?badge=latest)
+[![Downloads](https://pepy.tech/badge/lleaves)](https://pepy.tech/project/lleaves)
 
 A LLVM-based compiler for LightGBM decision trees.
 
@@ -38,7 +39,7 @@ mostly numerical features.
 |batchsize   | 1  | 10| 100 |
 |---|---:|---:|---:|
 |LightGBM   | 52.31μs   | 84.46μs   | 441.15μs |
-|ONNX   | 11.00μs | 36.74μs | 190.87μs  |
+|ONNX  Runtime| 11.00μs | 36.74μs | 190.87μs  |
 |Treelite   | 28.03μs   | 40.81μs   | 94.14μs  |
 |``lleaves``   | 9.61μs | 14.06μs | 31.88μs  |
 
@@ -47,7 +48,7 @@ mix of categorical and numerical features.
 |batchsize   | 10,000  | 100,000  | 678,000 |
 |---|---:|---:|---:|
 |LightGBM   | 95.14ms | 992.472ms   | 7034.65ms  |
-|ONNX   | 38.83ms  | 381.40ms  | 2849.42ms  |
+|ONNX  Runtime | 38.83ms  | 381.40ms  | 2849.42ms  |
 |Treelite   | 38.15ms | 414.15ms  | 2854.10ms  |
 |``lleaves``  | 5.90ms  | 56.96ms | 388.88ms |
 

diff --git a/benchmarks/c_bench/README.md b/benchmarks/c_bench/README.md
@@ -9,4 +9,11 @@ mkdir build && cd build
 export LLEAVES_BENCHMARK_MODEL="mtpl2"
 cmake .. && make
 ./benchmark
+```
+
+There is a script to use [toplev](https://github.com/andikleen/pmu-tools) to generate
+some plots about CPU bottlenecks.
+Make sure `toplev.py` is on your PATH and run:
+```bash
+./plot_toplev.sh <some prefix>
 ```
diff --git a/benchmarks/c_bench/benchmark.sh b/benchmarks/c_bench/benchmark.sh
diff --git a/benchmarks/c_bench/c_bench.cpp b/benchmarks/c_bench/c_bench.cpp
@@ -1,39 +1,42 @@
+#include "c_bench.h"
+#include "cnpy.h"
+#include <algorithm>
 #include <cstdlib>
 #include <iostream>
-#include <algorithm>
-#include "cnpy.h"
-#include "c_bench.h"
 
 #define N_REPEAT 20
 
-
 int main(int argc, char **argv) {
-    (void) argc;
-    (void) argv;
-
-    char *model_name = std::getenv("LLEAVES_BENCHMARK_MODEL");
-    std::cout << "Running model " << model_name << "\n";
-
-    std::ostringstream model_stream;
-    model_stream << "../../data/" << model_name << ".npy";
-    std::string model_file = model_stream.str();
-    cnpy::NpyArray arr = cnpy::npy_load(model_file);
-
-    std::cout << "Batchsize: " << arr.shape[0] << "\n";
-
-    auto *loaded_data = arr.data<double>();
-    ulong n_preds = arr.shape[0] / (ulong) 6;
-    auto *out = (double *) (malloc(n_preds * sizeof(double)));
-
-    std::array<double, N_REPEAT> timings{};
-    clock_t start, end;
-    for (size_t i = 0; i < N_REPEAT; ++i) {
-        start = clock();
-        forest_root(loaded_data, out, (int) 0, (int) n_preds);
-        end = clock();
-
-        timings[i] = (double) (end - start) / CLOCKS_PER_SEC;
-    }
-
-    std::cout << "Runtime: " << *std::min_element(timings.begin(), timings.end()) << "\n";
+  (void)argc;
+  (void)argv;
+
+  char *model_name = std::getenv("LLEAVES_BENCHMARK_MODEL");
+  std::cout << "Running model " << model_name << "\n";
+
+  std::ostringstream model_stream;
+  model_stream << "../../data/" << model_name << ".npy";
+  std::string model_file = model_stream.str();
+  cnpy::NpyArray arr = cnpy::npy_load(model_file);
+
+  std::cout << "Batchsize: " << arr.shape[0] << "\n";
+
+  auto *loaded_data = arr.data<double>();
+  ulong n_preds = arr.shape[0] / (ulong)6;
+  auto *out = (double *)(malloc(n_preds * sizeof(double)));
+
+  std::array<double, N_REPEAT> timings{};
+  clock_t start, end;
+  std::cout << "starting...\n";
+  for (size_t i = 0; i < N_REPEAT; ++i) {
+    start = clock();
+    forest_root(loaded_data, out, (int)0, (int)n_preds);
+    end = clock();
+
+    timings[i] = (double)(end - start) / CLOCKS_PER_SEC;
+  }
+  std::cout << "...ending, took "
+            << std::accumulate(timings.begin(), timings.end(), 0.0) << "\n";
+
+  std::cout << "Runtime: " << *std::min_element(timings.begin(), timings.end())
+            << "\n";
 }
diff --git a/benchmarks/c_bench/plot_toplev.sh b/benchmarks/c_bench/plot_toplev.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+prefix=$1
+for model in "NYC_taxi" "mtpl2"; do
+  export LLEAVES_BENCHMARK_MODEL=${model}
+  pushd build || exit 1
+  cmake .. && make
+
+  # high level overview plot
+  run_id="${model}_1v_${prefix}"
+  toplev.py -l1 -v -I 100 -x, -o "../${run_id}.csv" --no-desc --core C0 taskset -c 0 ./benchmark &&\
+   tl-barplot.py --cpu C0 -o "../${run_id}.png" "../${run_id}.csv"
+
+  # detailed level 2 metrics
+  run_id="${model}_2v_${prefix}"
+  toplev.py -l2 -v -D 500 -o "../${run_id}.txt" --no-desc --core C0 taskset -c 0 ./benchmark
+
+  # detailed level 3 plot
+  run_id="${model}_3_${prefix}"
+  toplev.py -l3 -I 100 -x, -o "../${run_id}.csv" --no-desc --core C0 taskset -c 0 ./benchmark &&\
+   tl-barplot.py --cpu C0 -o "../${run_id}.png" "../${run_id}.csv"
+
+  popd || exit 1
+done;