From e98d67cebe7545f6414f2381b9846270c453d17a Mon Sep 17 00:00:00 2001 From: jangorecki Date: Wed, 2 Jan 2019 14:51:17 +0530 Subject: [PATCH] generate report for technical measures --- run.sh | 3 +- tech.Rmd | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 tech.Rmd diff --git a/run.sh b/run.sh index 683deef7..885c91e2 100755 --- a/run.sh +++ b/run.sh @@ -35,7 +35,8 @@ Rscript ./launcher.R # publish report for all tasks rm -f rmarkdown.out rm -rf public -Rscript -e 'rmarkdown::render("index.Rmd", output_dir="public")' > ./rmarkdown.out 2>&1 && echo "# Benchmark report produced" +Rscript -e 'rmarkdown::render("index.Rmd", output_dir="public")' > ./rmarkdown-index.out 2>&1 && echo "# Benchmark report produced" +Rscript -e 'rmarkdown::render("tech.Rmd", output_dir="public")' > ./rmarkdown-tech.out 2>&1 && echo "# Benchmark tech report produced" # publish benchmark, only if token file exists rm -rf db-benchmark.gh-pages diff --git a/tech.Rmd b/tech.Rmd new file mode 100644 index 00000000..5498c0a0 --- /dev/null +++ b/tech.Rmd @@ -0,0 +1,85 @@ +--- +title: "Technical measures of db-benchmark" +output: + html_document: + self_contained: yes + toc: true +--- + +```{r init, echo=FALSE} +knitr::opts_chunk$set(echo=FALSE, cache=FALSE) +library(data.table) +library(lattice) +ft = function(x) factor(x, levels=unique(x)) +kk = knitr::kable +``` + +```{r loading} +d = fread("~/git/db-benchmark/time.csv")[!is.na(batch) & in_rows %in% c(1e7, 1e8, 1e9)] +l = fread("~/git/db-benchmark/logs.csv")[nzchar(solution)] +``` + +```{r cleaning} +ftdata = function(x) { + k=ft(substr(x, 8, 10)) + in_rows=ft(substr(x, 4, 6)) + tsorted = function(x) { + ans = rep("unsorted", length(x)) + ans[as.logical(x)] = "sorted" + ans + } + nasorted=ft(sprintf("%s%% NAs, %s", substr(x, 12, 12), tsorted(as.integer(substr(x, 14, 14))))) + list(k=k, in_rows=in_rows, nasorted=nasorted) +} +l = l[, c(list(nodename=ft(nodename), ibatch=as.integer(ft(as.character(batch))), solution=ft(solution), + action=ft(action), stderr=stderr, version=ft(version), git=ft(git), task=ft(task), data=ft(data), timestamp=timestamp, batch=batch), + ftdata(data))] +l[, max_batch:=max(batch, na.rm=TRUE), c("nodename","solution","task","data")] +l[, is_max_batch:=FALSE][batch==max_batch, is_max_batch:=TRUE][, max_batch:=NULL] + +d = d[, c(list(nodename=ft(nodename), ibatch=as.integer(ft(as.character(batch))), solution=ft(solution), + question=ft(question), run=run, version=ft(version), git=ft(git), task=ft(task), data=ft(data), + timestamp=timestamp, batch=batch, time_sec=time_sec), + ftdata(data))] +d[, max_batch:=max(batch, na.rm=TRUE), c("nodename","solution","task","data")] +d[, is_max_batch:=FALSE][batch==max_batch, is_max_batch:=TRUE][, max_batch:=NULL] + +ld = d[l[action=="start"], on=c("nodename","batch","solution","task","data","in_rows","k","nasorted"), nomatch=NA] +``` + +## Incompleted timings of last run + +```{r completed} +ll = ld[i.is_max_batch==TRUE, .(completed=sum(!is.na(time_sec))), c("nodename","batch","solution","task","data","in_rows","k","nasorted")] +stopifnot(length(unique(ll$nodename))==1L) +``` + +### groupby + +```{r completed_groupby} +kk(ll[completed nul +```