From 52c15c29980c70250aacf0ad12c59232d27633cf Mon Sep 17 00:00:00 2001 From: Simon Boehm Date: Sun, 8 Aug 2021 09:04:54 +0200 Subject: [PATCH] Add script to generate toplev plots from benchmark --- README.md | 5 ++- benchmarks/c_bench/README.md | 7 ++++ benchmarks/c_bench/benchmark.sh | 20 --------- benchmarks/c_bench/c_bench.cpp | 67 ++++++++++++++++--------------- benchmarks/c_bench/plot_toplev.sh | 26 ++++++++++++ 5 files changed, 71 insertions(+), 54 deletions(-) delete mode 100755 benchmarks/c_bench/benchmark.sh create mode 100755 benchmarks/c_bench/plot_toplev.sh diff --git a/README.md b/README.md index 539bc3f..dac46e8 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # lleaves 🍃 ![CI](https://github.com/siboehm/lleaves/workflows/CI/badge.svg) [![Documentation Status](https://readthedocs.org/projects/lleaves/badge/?version=latest)](https://lleaves.readthedocs.io/en/latest/?badge=latest) +[![Downloads](https://pepy.tech/badge/lleaves)](https://pepy.tech/project/lleaves) A LLVM-based compiler for LightGBM decision trees. @@ -38,7 +39,7 @@ mostly numerical features. |batchsize | 1 | 10| 100 | |---|---:|---:|---:| |LightGBM | 52.31Ξs | 84.46Ξs | 441.15Ξs | -|ONNX | 11.00Ξs | 36.74Ξs | 190.87Ξs | +|ONNX Runtime| 11.00Ξs | 36.74Ξs | 190.87Ξs | |Treelite | 28.03Ξs | 40.81Ξs | 94.14Ξs | |``lleaves`` | 9.61Ξs | 14.06Ξs | 31.88Ξs | @@ -47,7 +48,7 @@ mix of categorical and numerical features. |batchsize | 10,000 | 100,000 | 678,000 | |---|---:|---:|---:| |LightGBM | 95.14ms | 992.472ms | 7034.65ms | -|ONNX | 38.83ms | 381.40ms | 2849.42ms | +|ONNX Runtime | 38.83ms | 381.40ms | 2849.42ms | |Treelite | 38.15ms | 414.15ms | 2854.10ms | |``lleaves`` | 5.90ms | 56.96ms | 388.88ms | diff --git a/benchmarks/c_bench/README.md b/benchmarks/c_bench/README.md index 377eb7a..0aafa9f 100644 --- a/benchmarks/c_bench/README.md +++ b/benchmarks/c_bench/README.md @@ -9,4 +9,11 @@ mkdir build && cd build export LLEAVES_BENCHMARK_MODEL="mtpl2" cmake .. && make ./benchmark +``` + +There is a script to use [toplev](https://github.com/andikleen/pmu-tools) to generate +some plots about CPU bottlenecks. +Make sure `toplev.py` is on your PATH and run: +```bash +./plot_toplev.sh ``` \ No newline at end of file diff --git a/benchmarks/c_bench/benchmark.sh b/benchmarks/c_bench/benchmark.sh deleted file mode 100755 index 1c50e6c..0000000 --- a/benchmarks/c_bench/benchmark.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env bash - -model_file='"../../tests/models/single_tree/model.txt"' - -# clean up cache -if [[ -f "llvm.o" ]]; then - rm "llvm.o" -fi - -python -c "import lleaves; lleaves.Model(model_file=${model_file}).compile(\"llvm.o\")" - -g++ c_bench.cpp -c -o c_bench.o - -# fails with: -# /usr/bin/ld: llvm.o: warning: relocation in read-only section `.text' -# /usr/bin/ld: c_bench.o: in function `main': -# c_bench.cpp:(.text+0x19c): undefined reference to `forest_root(double*, double*, int, int)' -# /usr/bin/ld: warning: creating DT_TEXTREL in a PIE -# collect2: error: ld returned 1 exit status -g++ c_bench.o llvm.o -o c_bench -L/usr/local/lib/libcnpy.so -lcnpy -lz --std=c++11 -lstdc++ diff --git a/benchmarks/c_bench/c_bench.cpp b/benchmarks/c_bench/c_bench.cpp index 34e76b6..9e9fcaf 100644 --- a/benchmarks/c_bench/c_bench.cpp +++ b/benchmarks/c_bench/c_bench.cpp @@ -1,39 +1,42 @@ +#include "c_bench.h" +#include "cnpy.h" +#include #include #include -#include -#include "cnpy.h" -#include "c_bench.h" #define N_REPEAT 20 - int main(int argc, char **argv) { - (void) argc; - (void) argv; - - char *model_name = std::getenv("LLEAVES_BENCHMARK_MODEL"); - std::cout << "Running model " << model_name << "\n"; - - std::ostringstream model_stream; - model_stream << "../../data/" << model_name << ".npy"; - std::string model_file = model_stream.str(); - cnpy::NpyArray arr = cnpy::npy_load(model_file); - - std::cout << "Batchsize: " << arr.shape[0] << "\n"; - - auto *loaded_data = arr.data(); - ulong n_preds = arr.shape[0] / (ulong) 6; - auto *out = (double *) (malloc(n_preds * sizeof(double))); - - std::array timings{}; - clock_t start, end; - for (size_t i = 0; i < N_REPEAT; ++i) { - start = clock(); - forest_root(loaded_data, out, (int) 0, (int) n_preds); - end = clock(); - - timings[i] = (double) (end - start) / CLOCKS_PER_SEC; - } - - std::cout << "Runtime: " << *std::min_element(timings.begin(), timings.end()) << "\n"; + (void)argc; + (void)argv; + + char *model_name = std::getenv("LLEAVES_BENCHMARK_MODEL"); + std::cout << "Running model " << model_name << "\n"; + + std::ostringstream model_stream; + model_stream << "../../data/" << model_name << ".npy"; + std::string model_file = model_stream.str(); + cnpy::NpyArray arr = cnpy::npy_load(model_file); + + std::cout << "Batchsize: " << arr.shape[0] << "\n"; + + auto *loaded_data = arr.data(); + ulong n_preds = arr.shape[0] / (ulong)6; + auto *out = (double *)(malloc(n_preds * sizeof(double))); + + std::array timings{}; + clock_t start, end; + std::cout << "starting...\n"; + for (size_t i = 0; i < N_REPEAT; ++i) { + start = clock(); + forest_root(loaded_data, out, (int)0, (int)n_preds); + end = clock(); + + timings[i] = (double)(end - start) / CLOCKS_PER_SEC; + } + std::cout << "...ending, took " + << std::accumulate(timings.begin(), timings.end(), 0.0) << "\n"; + + std::cout << "Runtime: " << *std::min_element(timings.begin(), timings.end()) + << "\n"; } diff --git a/benchmarks/c_bench/plot_toplev.sh b/benchmarks/c_bench/plot_toplev.sh new file mode 100755 index 0000000..03ca45b --- /dev/null +++ b/benchmarks/c_bench/plot_toplev.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash + +set -euo pipefail + +prefix=$1 +for model in "NYC_taxi" "mtpl2"; do + export LLEAVES_BENCHMARK_MODEL=${model} + pushd build || exit 1 + cmake .. && make + + # high level overview plot + run_id="${model}_1v_${prefix}" + toplev.py -l1 -v -I 100 -x, -o "../${run_id}.csv" --no-desc --core C0 taskset -c 0 ./benchmark &&\ + tl-barplot.py --cpu C0 -o "../${run_id}.png" "../${run_id}.csv" + + # detailed level 2 metrics + run_id="${model}_2v_${prefix}" + toplev.py -l2 -v -D 500 -o "../${run_id}.txt" --no-desc --core C0 taskset -c 0 ./benchmark + + # detailed level 3 plot + run_id="${model}_3_${prefix}" + toplev.py -l3 -I 100 -x, -o "../${run_id}.csv" --no-desc --core C0 taskset -c 0 ./benchmark &&\ + tl-barplot.py --cpu C0 -o "../${run_id}.png" "../${run_id}.csv" + + popd || exit 1 +done; \ No newline at end of file