Skip to content

Commit

Permalink
new data G2 havingid0 for CH mergretree primary key
Browse files Browse the repository at this point in the history
  • Loading branch information
jangorecki committed Mar 26, 2019
1 parent e082cd6 commit d456c8b
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 4 deletions.
2 changes: 1 addition & 1 deletion clickhouse-exec.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ rm -f clickhouse/log/$1_$2_q*.csv
rm -f clickhouse/log/$1_$2.out clickhouse/log/$1_$2_q*.csv

# execute sql script on clickhouse
cat "clickhouse/$1-clickhouse.sql" | clickhouse-client -t -mn --max_memory_usage=109951162777600 --format=Pretty --output_format_pretty_max_rows 1 2> clickhouse/log/$1_$2.out
cat "clickhouse/$1-clickhouse.sql" | clickhouse-client -t -mn --max_memory_usage=$CH_MEM --format=Pretty --output_format_pretty_max_rows 1 2> clickhouse/log/$1_$2.out

# parse timings from clickhouse/log/[task]_[data_name].out and clickhouse/log/[task]_[data_name]_q[i]_r[j].csv
Rscript clickhouse/clickhouse-parse-log.R "$1" "$2"
Expand Down
13 changes: 13 additions & 0 deletions clickhouse/setup-clickhouse.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,16 @@ Rscript -e 'all_data=data.table::fread("data.csv")[task=="groupby", data]; setNa
#sudo EDITOR=vim visudo
#user ALL=NOPASSWD: /usr/sbin/service clickhouse-server start
#user ALL=NOPASSWD: /usr/sbin/service clickhouse-server stop

# prepare primary key for mergetree table engine
awk -F',' -v OFS=',' 'NR == 1 {print "id0", $0; next} {print (NR-1), $0}' data/G1_1e6_1e2_0_0.csv > data/G2_1e6_1e2_0_0.csv

awk -F',' -v OFS=',' 'NR == 1 {print "id0", $0; next} {print (NR-1), $0}' data/G1_1e9_1e2_0_0.csv > data/G2_1e9_1e2_0_0.csv
awk -F',' -v OFS=',' 'NR == 1 {print "id0", $0; next} {print (NR-1), $0}' data/G1_1e9_1e1_0_0.csv > data/G2_1e9_1e1_0_0.csv
awk -F',' -v OFS=',' 'NR == 1 {print "id0", $0; next} {print (NR-1), $0}' data/G1_1e9_2e0_0_0.csv > data/G2_1e9_2e0_0_0.csv
awk -F',' -v OFS=',' 'NR == 1 {print "id0", $0; next} {print (NR-1), $0}' data/G1_1e9_1e2_0_1.csv > data/G2_1e9_1e2_0_1.csv
clickhouse-client --query="CREATE TABLE IF NOT EXISTS G2_1e9_1e2_0_0 (id0 Int32, id1 String, id2 String, id3 String, id4 Int32, id5 Int32, id6 Int32, v1 Int32, v2 Int32, v3 Float64) ENGINE = MergeTree() ORDER BY (id0)"
clickhouse-client --query="CREATE TABLE IF NOT EXISTS G2_1e9_1e1_0_0 (id0 Int32, id1 String, id2 String, id3 String, id4 Int32, id5 Int32, id6 Int32, v1 Int32, v2 Int32, v3 Float64) ENGINE = MergeTree() ORDER BY (id0)"
clickhouse-client --query="CREATE TABLE IF NOT EXISTS G2_1e9_2e0_0_0 (id0 Int32, id1 String, id2 String, id3 String, id4 Int32, id5 Int32, id6 Int32, v1 Int32, v2 Int32, v3 Float64) ENGINE = MergeTree() ORDER BY (id0)"
clickhouse-client --query="CREATE TABLE IF NOT EXISTS G2_1e9_1e2_0_1 (id0 Int32, id1 String, id2 String, id3 String, id4 Int32, id5 Int32, id6 Int32, v1 Int32, v2 Int32, v3 Float64) ENGINE = MergeTree() ORDER BY (id0)"
# same for 1e8 so we can compare memory table engine vs mergetree
10 changes: 9 additions & 1 deletion data.csv
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,18 @@ groupby,G1_1e8_1e2_0_0,FALSE
groupby,G1_1e8_1e1_0_0,FALSE
groupby,G1_1e8_2e0_0_0,FALSE
groupby,G1_1e8_1e2_0_1,FALSE
groupby,G1_1e9_1e2_0_0,TRUE
groupby,G2_1e8_1e2_0_0,FALSE
groupby,G2_1e8_1e1_0_0,FALSE
groupby,G2_1e8_2e0_0_0,FALSE
groupby,G2_1e8_1e2_0_1,FALSE
groupby,G1_1e9_1e2_0_0,FALSE
groupby,G1_1e9_1e1_0_0,FALSE
groupby,G1_1e9_2e0_0_0,FALSE
groupby,G1_1e9_1e2_0_1,FALSE
groupby,G2_1e9_1e2_0_0,FALSE
groupby,G2_1e9_1e1_0_0,FALSE
groupby,G2_1e9_2e0_0_0,FALSE
groupby,G2_1e9_1e2_0_1,FALSE
join,J1_1e6_NA_0_0,FALSE
join,J1_1e7_NA_0_0,TRUE
join,J1_1e8_NA_0_0,FALSE
Expand Down
4 changes: 2 additions & 2 deletions run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ export BATCH=$(date +%s)
if [[ -f ./stop ]]; then echo "# Benchmark run $BATCH aborted. 'stop' file exists, should be removed before calling 'run.sh'" && exit; fi;

# confirm clickhouse is not running
source ./ch.sh
ch_active && echo "# Benchmark run $BATCH aborted. clickhouse-server is running, shut it down before calling 'run.sh'" && exit;
#source ./ch.sh
#ch_active && echo "# Benchmark run $BATCH aborted. clickhouse-server is running, shut it down before calling 'run.sh'" && exit;

# set lock
if [[ -f ./run.lock ]]; then echo "# Benchmark run $BATCH aborted. 'run.lock' file exists, this should be checked before calling 'run.sh'. Ouput redirection mismatch might have happened if writing output to same file as currently running $(cat ./run.lock) benchmark run" && exit; else echo $BATCH > run.lock; fi;
Expand Down

0 comments on commit d456c8b

Please sign in to comment.