From 52c15c29980c70250aacf0ad12c59232d27633cf Mon Sep 17 00:00:00 2001
From: Simon Boehm <simon@siboehm.com>
Date: Sun, 8 Aug 2021 09:04:54 +0200
Subject: [PATCH] Add script to generate toplev plots from benchmark

---
 README.md                         |  5 ++-
 benchmarks/c_bench/README.md      |  7 ++++
 benchmarks/c_bench/benchmark.sh   | 20 ---------
 benchmarks/c_bench/c_bench.cpp    | 67 ++++++++++++++++---------------
 benchmarks/c_bench/plot_toplev.sh | 26 ++++++++++++
 5 files changed, 71 insertions(+), 54 deletions(-)
 delete mode 100755 benchmarks/c_bench/benchmark.sh
 create mode 100755 benchmarks/c_bench/plot_toplev.sh
diff --git a/README.md b/README.md
index 539bc3f..dac46e8 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,7 @@
 # lleaves 🍃
 ![CI](https://github.com/siboehm/lleaves/workflows/CI/badge.svg)
 [![Documentation Status](https://readthedocs.org/projects/lleaves/badge/?version=latest)](https://lleaves.readthedocs.io/en/latest/?badge=latest)
+[![Downloads](https://pepy.tech/badge/lleaves)](https://pepy.tech/project/lleaves)
 
 A LLVM-based compiler for LightGBM decision trees.
 
@@ -38,7 +39,7 @@ mostly numerical features.
 |batchsize   | 1  | 10| 100 |
 |---|---:|---:|---:|
 |LightGBM   | 52.31μs   | 84.46μs   | 441.15μs |
-|ONNX   | 11.00μs | 36.74μs | 190.87μs  |
+|ONNX  Runtime| 11.00μs | 36.74μs | 190.87μs  |
 |Treelite   | 28.03μs   | 40.81μs   | 94.14μs  |
 |``lleaves``   | 9.61μs | 14.06μs | 31.88μs  |
 
@@ -47,7 +48,7 @@ mix of categorical and numerical features.
 |batchsize   | 10,000  | 100,000  | 678,000 |
 |---|---:|---:|---:|
 |LightGBM   | 95.14ms | 992.472ms   | 7034.65ms  |
-|ONNX   | 38.83ms  | 381.40ms  | 2849.42ms  |
+|ONNX  Runtime | 38.83ms  | 381.40ms  | 2849.42ms  |
 |Treelite   | 38.15ms | 414.15ms  | 2854.10ms  |
 |``lleaves``  | 5.90ms  | 56.96ms | 388.88ms |
 
diff --git a/benchmarks/c_bench/README.md b/benchmarks/c_bench/README.md
index 377eb7a..0aafa9f 100644
--- a/benchmarks/c_bench/README.md
+++ b/benchmarks/c_bench/README.md
@@ -9,4 +9,11 @@ mkdir build && cd build
 export LLEAVES_BENCHMARK_MODEL="mtpl2"
 cmake .. && make
 ./benchmark
+```
+
+There is a script to use [toplev](https://github.com/andikleen/pmu-tools) to generate
+some plots about CPU bottlenecks.
+Make sure `toplev.py` is on your PATH and run:
+```bash
+./plot_toplev.sh <some prefix>
 ```
\ No newline at end of file
diff --git a/benchmarks/c_bench/benchmark.sh b/benchmarks/c_bench/benchmark.sh
deleted file mode 100755
index 1c50e6c..0000000
--- a/benchmarks/c_bench/benchmark.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/usr/bin/env bash
-
-model_file='"../../tests/models/single_tree/model.txt"'
-
-# clean up cache
-if [[ -f "llvm.o" ]]; then
-  rm "llvm.o"
-fi
-
-python -c "import lleaves; lleaves.Model(model_file=${model_file}).compile(\"llvm.o\")"
-
-g++ c_bench.cpp -c -o c_bench.o
-
-# fails with:
-# /usr/bin/ld: llvm.o: warning: relocation in read-only section `.text'
-# /usr/bin/ld: c_bench.o: in function `main':
-# c_bench.cpp:(.text+0x19c): undefined reference to `forest_root(double*, double*, int, int)'
-# /usr/bin/ld: warning: creating DT_TEXTREL in a PIE
-# collect2: error: ld returned 1 exit status
-g++ c_bench.o llvm.o -o c_bench -L/usr/local/lib/libcnpy.so -lcnpy -lz --std=c++11 -lstdc++
diff --git a/benchmarks/c_bench/c_bench.cpp b/benchmarks/c_bench/c_bench.cpp
index 34e76b6..9e9fcaf 100644
--- a/benchmarks/c_bench/c_bench.cpp
+++ b/benchmarks/c_bench/c_bench.cpp
@@ -1,39 +1,42 @@
+#include "c_bench.h"
+#include "cnpy.h"
+#include <algorithm>
 #include <cstdlib>
 #include <iostream>
-#include <algorithm>
-#include "cnpy.h"
-#include "c_bench.h"
 
 #define N_REPEAT 20
 
-
 int main(int argc, char **argv) {
-    (void) argc;
-    (void) argv;
-
-    char *model_name = std::getenv("LLEAVES_BENCHMARK_MODEL");
-    std::cout << "Running model " << model_name << "\n";
-
-    std::ostringstream model_stream;
-    model_stream << "../../data/" << model_name << ".npy";
-    std::string model_file = model_stream.str();
-    cnpy::NpyArray arr = cnpy::npy_load(model_file);
-
-    std::cout << "Batchsize: " << arr.shape[0] << "\n";
-
-    auto *loaded_data = arr.data<double>();
-    ulong n_preds = arr.shape[0] / (ulong) 6;
-    auto *out = (double *) (malloc(n_preds * sizeof(double)));
-
-    std::array<double, N_REPEAT> timings{};
-    clock_t start, end;
-    for (size_t i = 0; i < N_REPEAT; ++i) {
-        start = clock();
-        forest_root(loaded_data, out, (int) 0, (int) n_preds);
-        end = clock();
-
-        timings[i] = (double) (end - start) / CLOCKS_PER_SEC;
-    }
-
-    std::cout << "Runtime: " << *std::min_element(timings.begin(), timings.end()) << "\n";
+  (void)argc;
+  (void)argv;
+
+  char *model_name = std::getenv("LLEAVES_BENCHMARK_MODEL");
+  std::cout << "Running model " << model_name << "\n";
+
+  std::ostringstream model_stream;
+  model_stream << "../../data/" << model_name << ".npy";
+  std::string model_file = model_stream.str();
+  cnpy::NpyArray arr = cnpy::npy_load(model_file);
+
+  std::cout << "Batchsize: " << arr.shape[0] << "\n";
+
+  auto *loaded_data = arr.data<double>();
+  ulong n_preds = arr.shape[0] / (ulong)6;
+  auto *out = (double *)(malloc(n_preds * sizeof(double)));
+
+  std::array<double, N_REPEAT> timings{};
+  clock_t start, end;
+  std::cout << "starting...\n";
+  for (size_t i = 0; i < N_REPEAT; ++i) {
+    start = clock();
+    forest_root(loaded_data, out, (int)0, (int)n_preds);
+    end = clock();
+
+    timings[i] = (double)(end - start) / CLOCKS_PER_SEC;
+  }
+  std::cout << "...ending, took "
+            << std::accumulate(timings.begin(), timings.end(), 0.0) << "\n";
+
+  std::cout << "Runtime: " << *std::min_element(timings.begin(), timings.end())
+            << "\n";
 }
diff --git a/benchmarks/c_bench/plot_toplev.sh b/benchmarks/c_bench/plot_toplev.sh
new file mode 100755
index 0000000..03ca45b
--- /dev/null
+++ b/benchmarks/c_bench/plot_toplev.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+prefix=$1
+for model in "NYC_taxi" "mtpl2"; do
+  export LLEAVES_BENCHMARK_MODEL=${model}
+  pushd build || exit 1
+  cmake .. && make
+
+  # high level overview plot
+  run_id="${model}_1v_${prefix}"
+  toplev.py -l1 -v -I 100 -x, -o "../${run_id}.csv" --no-desc --core C0 taskset -c 0 ./benchmark &&\
+   tl-barplot.py --cpu C0 -o "../${run_id}.png" "../${run_id}.csv"
+
+  # detailed level 2 metrics
+  run_id="${model}_2v_${prefix}"
+  toplev.py -l2 -v -D 500 -o "../${run_id}.txt" --no-desc --core C0 taskset -c 0 ./benchmark
+
+  # detailed level 3 plot
+  run_id="${model}_3_${prefix}"
+  toplev.py -l3 -I 100 -x, -o "../${run_id}.csv" --no-desc --core C0 taskset -c 0 ./benchmark &&\
+   tl-barplot.py --cpu C0 -o "../${run_id}.png" "../${run_id}.csv"
+
+  popd || exit 1
+done;
\ No newline at end of file