Skip to content

Commit

Permalink
multiple report status file
Browse files Browse the repository at this point in the history
  • Loading branch information
jangorecki committed Jan 2, 2019
1 parent e98d67c commit 6eec311
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 47 deletions.
21 changes: 4 additions & 17 deletions index.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,10 @@ Because we have been asked many times to do so, the first task and initial motiv
```{r load_deps, include=FALSE}
# rm -rf public && Rscript -e 'rmarkdown::render("index.Rmd", output_dir="public")' # has to be output_dir='public' as there is hardcode in benchplot for that path
knitr::opts_chunk$set(echo=FALSE, cache=FALSE)
library(data.table)
source("report.R")
report_status_file = get_report_status_file()
source("helpers.R")
source("benchplot.R") # also creates 'code' for groupby
fs = function(x) factor(x, levels=unique(x))
kk = knitr::kable
report_status_file = "report-success"
if (file.exists(report_status_file)) file.remove(report_status_file)
```

```{r exceptions, include=FALSE, eval=FALSE}
# CURRENTLY NOT USED
#exceptions = rbindlist(list(
# data.table(solution = "pandas", version="0.23.4", task = "groupby", in_rows = 1e9, data=c("G1_1e9_1e2_0_0"), comment = "lack of memory to read csv")#,
#data.table(solution = "pandas", version="0.23.4", task = "join", in_rows = 1e9, data=c("X1e9_2c-Y1e9_2c"), comment = "lack of memory"),
#data.table(solution = "dplyr", version="0.7.99.9000", task = "join", in_rows = 1e9, data=c("X1e9_2c-Y1e9_2c"), comment = "Cannot allocate memory"),
#data.table(solution = "pydatatable", version="0.6.0", task = "join", in_rows = c(1e7,1e8,1e9), data=c("X1e7_2c-Y1e7_2c","X1e8_2c-Y1e8_2c","X1e9_2c-Y1e9_2c"), comment = "not yet implemented")
#))
```

```{r load_data, include=FALSE}
Expand Down Expand Up @@ -75,7 +62,7 @@ by_data = function(dt, .in_rows, .task) {
stop("no other task defined for decompose_dataname")
}
}
wide = dt[run==1L, dcast(.SD, fs(data)+fs(question) ~ fs(solution), value.var="time_sec")]
wide = dt[run==1L, dcast(.SD, ft(data)+ft(question) ~ ft(solution), value.var="time_sec")]
#d = rollup(wide, by=c("data","question"), j=lapply(.SD, sum), id=TRUE) # including sub totals
d = groupingsets(wide, by=c("data","question"), j=lapply(.SD, sum), id=TRUE, sets=list(c("data","question"), character(0)))
setorderv(d, "data", na.last = TRUE)
Expand Down Expand Up @@ -192,7 +179,7 @@ hours_took = paste0(hours_took, recent_lg[, .(sec_diff = timestamp[action=="fini
Benchmark run took around `r hours_took` hours.

```{r set_success_state, include=FALSE}
writeLines("", report_status_file)
cat("groupby\n", file=report_status_file, append=TRUE)
```

Report was generated on: `r format(Sys.time(), usetz=TRUE)`.
9 changes: 7 additions & 2 deletions run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,17 @@ Rscript ./launcher.R
# publish report for all tasks
rm -f rmarkdown.out
rm -rf public
rm -f report-done
Rscript -e 'rmarkdown::render("index.Rmd", output_dir="public")' > ./rmarkdown-index.out 2>&1 && echo "# Benchmark report produced"
Rscript -e 'rmarkdown::render("tech.Rmd", output_dir="public")' > ./rmarkdown-tech.out 2>&1 && echo "# Benchmark tech report produced"

# publish benchmark, only if token file exists
# publish benchmark, only if reports successfully generated (groupby, tech), token file exists
rm -rf db-benchmark.gh-pages
$DO_PUBLISH && [ -f ./report-success ] && [ -f ./token ] && ((./publish.sh && echo "# Benchmark results has been published") || echo "# Benchmark publish script failed")
$DO_PUBLISH \
&& [ -f ./report-done ] \
&& [ $(wc -l report-done | awk '{print $1}') -eq 2 ] \
&& [ -f ./token ] \
&& ((./publish.sh && echo "# Benchmark results has been published") || echo "# Benchmark publish script failed")
# remove run lock file
rm -f run.lock
Expand Down
36 changes: 8 additions & 28 deletions tech.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,8 @@ output:

```{r init, echo=FALSE}
knitr::opts_chunk$set(echo=FALSE, cache=FALSE)
library(data.table)
library(lattice)
ft = function(x) factor(x, levels=unique(x))
kk = knitr::kable
source("report.R")
report_status_file = get_report_status_file()
```

```{r loading}
Expand All @@ -20,30 +18,8 @@ l = fread("~/git/db-benchmark/logs.csv")[nzchar(solution)]
```

```{r cleaning}
ftdata = function(x) {
k=ft(substr(x, 8, 10))
in_rows=ft(substr(x, 4, 6))
tsorted = function(x) {
ans = rep("unsorted", length(x))
ans[as.logical(x)] = "sorted"
ans
}
nasorted=ft(sprintf("%s%% NAs, %s", substr(x, 12, 12), tsorted(as.integer(substr(x, 14, 14)))))
list(k=k, in_rows=in_rows, nasorted=nasorted)
}
l = l[, c(list(nodename=ft(nodename), ibatch=as.integer(ft(as.character(batch))), solution=ft(solution),
action=ft(action), stderr=stderr, version=ft(version), git=ft(git), task=ft(task), data=ft(data), timestamp=timestamp, batch=batch),
ftdata(data))]
l[, max_batch:=max(batch, na.rm=TRUE), c("nodename","solution","task","data")]
l[, is_max_batch:=FALSE][batch==max_batch, is_max_batch:=TRUE][, max_batch:=NULL]
d = d[, c(list(nodename=ft(nodename), ibatch=as.integer(ft(as.character(batch))), solution=ft(solution),
question=ft(question), run=run, version=ft(version), git=ft(git), task=ft(task), data=ft(data),
timestamp=timestamp, batch=batch, time_sec=time_sec),
ftdata(data))]
d[, max_batch:=max(batch, na.rm=TRUE), c("nodename","solution","task","data")]
d[, is_max_batch:=FALSE][batch==max_batch, is_max_batch:=TRUE][, max_batch:=NULL]
l = clean_logs(l)
d = clean_time(d)
ld = d[l[action=="start"], on=c("nodename","batch","solution","task","data","in_rows","k","nasorted"), nomatch=NA]
```

Expand Down Expand Up @@ -83,3 +59,7 @@ p = sapply(setNames(nm=as.character(unique(ll$solution))), simplify = FALSE, fun
)
sapply(seq_along(p), function(i) print(p[[i]], split=c(1, i, 1, length(p)), more=i!=length(p))) -> nul
```

```{r set_success_state, include=FALSE}
cat("tech\n", file=report_status_file, append=TRUE)
```

0 comments on commit 6eec311

Please sign in to comment.