Skip to content

Update results and scripts for GlareDB #363

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions glaredb/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# The binary
/glaredb

# Any of the hits files
/data

# Result data, should be manually copied to the right spot if being kept.
results.csv
results.json
50 changes: 36 additions & 14 deletions glaredb/benchmark.sh
Original file line number Diff line number Diff line change
@@ -1,20 +1,42 @@
#!/bin/bash
#!/usr/bin/env bash

# Install
set -e

sudo apt-get install -y unzip
curl https://glaredb.com/install.sh | sh
repo_root=$(git rev-parse --show-toplevel)
script_dir=$(dirname "$0")

wget https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/hits_compatible/athena/hits.parquet
if [[ "$(basename "$repo_root")" == "glaredb" ]]; then
# Inside glaredb repo, build from source.
cargo build --release --bin glaredb
cp "${repo_root}/target/release/glaredb" "${script_dir}/glaredb"
else
# Not in glaredb repo, use prebuilt binary.
export GLAREDB_INSTALL_DIR="${script_dir}"
export GLAREDB_VERSION="v25.5.2"
curl -fsSL https://glaredb.com/install.sh | sh
fi

cat queries.sql | while read -r query
do
sync
echo 3 | sudo tee /proc/sys/vm/drop_caches
# Get the data.
mkdir -p "${script_dir}/data"
pushd "${script_dir}/data"

for i in $(seq 1 3); do
./glaredb --timing --query "${query}"
done;
done 2>&1 | tee log.txt
mode="${1:-single}" # Default to 'single' if no arg given.
case "${mode}" in
single)
wget --continue https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/hits_compatible/athena/hits.parquet
;;
partitioned)
seq 0 99 | xargs -P100 -I{} bash -c 'wget --continue https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_{}.parquet'
;;
*)
echo "Invalid argument to 'benchmark.sh', expected 'single' or 'partitioned'"
exit 1
;;
esac
popd

cat log.txt | grep -oP 'Time: \d+\.\d+s|Error' | sed -r -e 's/Time: ([0-9]+\.[0-9]+)s/\1/; s/Error/null/' | awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }'
# Ensure working directory is the script dir. The view that gets created uses a
# relative path.
pushd "${script_dir}"

./run.sh "${mode}"
3 changes: 3 additions & 0 deletions glaredb/create_single.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
CREATE TEMP VIEW hits AS
SELECT * REPLACE (EventDate::DATE AS EventDate)
FROM read_parquet('./data/hits.parquet');
86 changes: 43 additions & 43 deletions glaredb/queries.sql

Large diffs are not rendered by default.

92 changes: 46 additions & 46 deletions glaredb/results/c6a.4xlarge.json
Original file line number Diff line number Diff line change
@@ -1,55 +1,55 @@
{
"system": "GlareDB",
"date": "2024-02-02",
"system": "GlareDB (Parquet, single)",
"date": "2025-05-06",
"machine": "c6a.4xlarge, 500gb gp2",
"cluster_size": 1,
"comment": "",
"tags": ["Rust", "serverless"],
"load_time": 0,
"data_size": 14779976446,
"result": [
[0.221,0.070,0.069],
[0.509,0.425,0.424],
[0.569,0.462,0.466],
[0.787,0.483,0.470],
[1.339,1.220,1.200],
[1.800,1.644,1.649],
[0.490,0.418,0.410],
[0.509,0.423,0.423],
[2.009,1.880,1.920],
[3.383,3.134,3.147],
[0.950,0.695,0.702],
[0.891,0.743,0.721],
[1.787,1.646,1.654],
[4.032,3.007,2.974],
[1.911,1.746,1.762],
[1.538,1.321,1.360],
[3.746,3.335,3.327],
[3.611,3.064,2.993],
[7.835,6.293,6.414],
[0.882,0.469,0.470],
[9.903,1.943,1.947],
[11.653,2.182,2.172],
[22.503,4.410,4.416],
[56.481,11.754,11.769],
[3.039,0.925,0.917],
[1.132,0.854,0.855],
[2.939,0.991,0.973],
[9.958,2.688,2.695],
[9.431,5.639,5.614],
[1.027,0.872,0.814],
[2.611,1.508,1.497],
[6.177,1.887,1.960],
[9.675,9.095,8.891],
[12.268,7.139,7.063],
[12.675,7.661,7.671],
[2.418,2.250,2.210],
[9.998,2.095,2.066],
[9.273,2.782,2.722],
[10.015,2.085,2.079],
[18.876,3.284,3.317],
[2.963,0.939,0.917],
[2.165,0.973,0.936],
[1.380,0.901,0.864]
]
[0.044,0.038,0.037],
[0.147,0.131,0.134],
[0.231,0.207,0.204],
[0.311,0.160,0.152],
[1.064,1.010,1.005],
[1.072,1.008,1.038],
[0.127,0.111,0.111],
[0.159,0.139,0.146],
[1.583,1.512,1.480],
[1.919,1.839,1.827],
[0.598,0.535,0.528],
[0.680,0.614,0.616],
[1.105,1.013,0.995],
[2.921,2.089,2.053],
[1.228,1.109,1.106],
[1.382,1.349,1.330],
[2.997,2.447,2.431],
[2.707,2.086,2.092],
[5.820,4.520,4.625],
[0.246,0.193,0.193],
[9.619,1.751,1.761],
[11.123,1.493,1.476],
[21.896,3.584,3.538],
[55.927,14.498,14.510],
[2.526,0.884,0.873],
[1.022,0.974,0.964],
[2.544,1.081,1.077],
[9.457,1.559,1.503],
[11.226,12.444,12.082],
[4.675,4.722,4.769],
[2.161,1.257,1.251],
[5.866,1.583,1.582],
[7.481,6.244,6.247],
[10.969,3.891,3.921],
[11.092,4.200,4.157],
[1.472,1.433,1.446],
[0.243,0.191,0.191],
[0.180,0.154,0.153],
[0.180,0.127,0.124],
[0.444,0.348,0.343],
[0.102,0.075,0.075],
[0.089,0.069,0.074],
[0.094,0.079,0.079]
]
}
55 changes: 0 additions & 55 deletions glaredb/results/c6a.metal.json

This file was deleted.

57 changes: 57 additions & 0 deletions glaredb/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/usr/bin/env bash

set -eu
set -o pipefail

case "$1" in
single)
create_sql_file="create_single.sql"
;;
*)
echo "Invalid argument to 'run.sh', expected 'single'"
exit 1
;;
esac

TRIES=3
QUERY_NUM=0

echo "[" > results.json
echo "query_num,iteration,duration" > results.csv

cat queries.sql | while read -r query; do
sync
echo 3 | sudo tee /proc/sys/vm/drop_caches > /dev/null

echo "${QUERY_NUM}: ${query}"

[ "${QUERY_NUM}" != 0 ] && echo "," >> results.json
echo -n " [" >> results.json

for i in $(seq 1 $TRIES); do
output=$(./glaredb --init "${create_sql_file}" -c ".timer on" -c "${query}")
duration=$(awk -F': ' '/^Execution duration/ { printf "%.3f\n", $2 }' <<< "$output")

echo "$output"

if [ -z "${duration}" ]; then
echo "Query failed"
exit 1
fi

# JSON results
echo -n "${duration}" >> results.json
[ "${i}" != "${TRIES}" ] && echo -n "," >> results.json

# CSV results
echo "${QUERY_NUM},${i},${duration}" >> results.csv
done

echo -n "]" >> results.json

QUERY_NUM=$((QUERY_NUM + 1))
done

echo "" >> results.csv
echo "" >> results.json
echo "]" >> results.json