From ae956ba2e7700761828e2e9a24eed183e1a6b3fc Mon Sep 17 00:00:00 2001 From: Martin Maechler Date: Sat, 23 Sep 2023 21:06:00 +0200 Subject: [PATCH 01/88] run correctly even when is.atomic(NULL) becomes FALSE --- R/data.table.R | 2 +- R/frank.R | 2 +- R/fread.R | 2 +- R/print.data.table.R | 3 ++- R/setkey.R | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 473cf6e76..801482147 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -882,7 +882,7 @@ replace_dot_alias = function(e) { bynames = allbyvars = NULL # the rest now fall through } else bynames = names(byval) - if (is.atomic(byval)) { + if (is.atomic(byval) || is.null(byval)) { if (is.character(byval) && length(byval)<=ncol(x) && !(is.name(bysub) && bysub %chin% names_x) ) { stopf("'by' appears to evaluate to column names but isn't c() or key(). Use by=list(...) if you can. Otherwise, by=eval%s should work. This is for efficiency so data.table can detect which columns are needed.", deparse(bysub)) } else { diff --git a/R/frank.R b/R/frank.R index ba90a83b9..419f5ea41 100644 --- a/R/frank.R +++ b/R/frank.R @@ -12,7 +12,7 @@ frankv = function(x, cols=seq_along(x), order=1L, na.last=TRUE, ties.method=c("a .Call(Csetlistelt, xx, 1L, x) xx } - if (is.atomic(x)) { + if (is.atomic(x) || is.null(x)) { if (!missing(cols) && !is.null(cols)) stopf("x is a single vector, non-NULL 'cols' doesn't make sense") cols = 1L diff --git a/R/fread.R b/R/fread.R index f8b025d9c..e0337c591 100644 --- a/R/fread.R +++ b/R/fread.R @@ -135,7 +135,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC") if (!allNA(colClasses)) stopf("colClasses is type 'logical' which is ok if all NA but it has some TRUE or FALSE values in it which is not allowed. Please consider the drop= or select= argument instead. See ?fread.") colClasses = NULL } - if (!is.null(colClasses) && is.atomic(colClasses)) { + if (!is.null(colClasses) && is.atomic(colClasses)) { ## future R can use if (is.atomic(.)) if (!is.character(colClasses)) stopf("colClasses is not type list or character vector") if (!length(colClasses)) { colClasses=NULL; diff --git a/R/print.data.table.R b/R/print.data.table.R index 16950fd11..7271ac458 100644 --- a/R/print.data.table.R +++ b/R/print.data.table.R @@ -141,7 +141,8 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"), } format.data.table = function (x, ..., justify="none") { - if (is.atomic(x) && !is.null(x)) { + if (is.atomic(x) && !is.null(x)) { ## future R can use if (is.atomic(x)) + stopf("Internal structure doesn't seem to be a list. Possibly corrupt data.table.") } do.call("cbind", lapply(x, format_col, ..., justify=justify)) diff --git a/R/setkey.R b/R/setkey.R index 3bd3f782c..5f3027a2d 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -169,7 +169,7 @@ is.sorted = function(x, by=NULL) { ORDERING_TYPES = c('logical', 'integer', 'double', 'complex', 'character') forderv = function(x, by=seq_along(x), retGrp=FALSE, sort=TRUE, order=1L, na.last=FALSE) { - if (is.atomic(x)) { # including forderv(NULL) which returns error consistent with base::order(NULL), + if (is.atomic(x) || is.null(x)) { # including forderv(NULL) which returns error consistent with base::order(NULL), if (!missing(by) && !is.null(by)) stopf("x is a single vector, non-NULL 'by' doesn't make sense") by = NULL } else { From db5ba4135ffb95dca6d43f115c34c5786147c3ba Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Sun, 29 Oct 2023 20:22:24 +0100 Subject: [PATCH 02/88] mention survey in README (#5711) * mention survey in README * Michael suggestions Co-authored-by: Michael Chirico --------- Co-authored-by: Michael Chirico --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 46bbfed1e..37cedad27 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,12 @@ # data.table +The data.table 2023 community survey is now live! Click on https://tinyurl.com/datatable-survey to fill it out. The survey will remain open until **December 1st, 2023**. + +In addition to filling out the survey, it would be great if you could share it with others who might be interested in participating. + +--- + [![CRAN status](https://cranchecks.info/badges/flavor/release/data.table)](https://cran.r-project.org/web/checks/check_results_data.table.html) [![R-CMD-check](https://github.com/Rdatatable/data.table/workflows/R-CMD-check/badge.svg)](https://github.com/Rdatatable/data.table/actions) From 5068e452c9e92df031210a8d7561af57c73e7c6e Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Fri, 3 Nov 2023 01:59:33 +0100 Subject: [PATCH 03/88] add inst/cc to gitignore (#5689) --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 00d0d0e8b..74c9043c0 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,9 @@ data.table_*.tar.gz data.table.Rcheck src/Makevars +# Package install +inst/cc + # Emacs IDE files .emacs.desktop .emacs.desktop.lock From 2addb00fbae3b47ff5eaf9c7f65059f5e5925ebd Mon Sep 17 00:00:00 2001 From: sluga Date: Fri, 3 Nov 2023 02:00:42 +0100 Subject: [PATCH 04/88] Mention the 2023 data.table community survey (#5705) --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 37cedad27..f89112486 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,12 @@ In addition to filling out the survey, it would be great if you could share it w `data.table` provides a high-performance version of [base R](https://www.r-project.org/about.html)'s `data.frame` with syntax and feature enhancements for ease of use, convenience and programming speed. +--- + +**NEW:** Take part in the [data.table 2023 community survey](https://tinyurl.com/datatable-survey/) and help shape the future of the project! The survey closes on **December 1st**. + +--- + ## Why `data.table`? * concise syntax: fast to type, fast to read From dd9ee72ced50bc8ef32424f2671f4cca0468b007 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Fri, 3 Nov 2023 02:05:04 +0100 Subject: [PATCH 05/88] Fix typo in doc of `data.table()` (#5577) --- man/data.table.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/data.table.Rd b/man/data.table.Rd index ecc79e2a5..a5da7ebc4 100644 --- a/man/data.table.Rd +++ b/man/data.table.Rd @@ -212,7 +212,7 @@ The way to read this out loud is: "Take \code{DT}, subset rows by \code{i}, \emp X[c>1, sum(a), by=c] # get rows where c>1 is TRUE, and on those rows, get sum(a) grouped by 'c' X[Y, .(a, b), on="c"] # get rows where Y$c == X$c, and select columns 'X$a' and 'X$b' for those rows X[Y, .(a, i.a), on="c"] # get rows where Y$c == X$c, and then select 'X$a' and 'Y$a' (=i.a) - X[Y, sum(a*i.a), on="c" by=.EACHI] # for *each* 'Y$c', get sum(a*i.a) on matching rows in 'X$c' + X[Y, sum(a*i.a), on="c", by=.EACHI] # for *each* 'Y$c', get sum(a*i.a) on matching rows in 'X$c' X[, plot(a, b), by=c] # j accepts any expression, generates plot for each group and returns no data # see ?assign to add/update/delete columns by reference using the same consistent interface From 8afea02b1a68848c321f02c18164a120da1bc355 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 2 Nov 2023 22:40:34 -0700 Subject: [PATCH 06/88] Update repo link for lubridate (#5603) --- R/IDateTime.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/IDateTime.R b/R/IDateTime.R index 4e6adf55e..185952fe7 100644 --- a/R/IDateTime.R +++ b/R/IDateTime.R @@ -315,8 +315,8 @@ clip_msec = function(secs, action) { # Adapted from Hadley Wickham's routines cited below to ensure # integer results. # http://gist.github.com/10238 -# See also Hadley's more advanced and complex lubridate package: -# http://github.com/hadley/lubridate +# See also Hadley et al's more advanced and complex lubridate package: +# https://github.com/tidyverse/lubridate # lubridate routines do not return integer values. ################################################################### From c0528ef24c45f4333fbb95b8b5e5dc7954be6514 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 2 Nov 2023 22:44:09 -0700 Subject: [PATCH 07/88] Use new badges endpoint (#5555) Closes #5553 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f89112486..7f6c3c103 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ In addition to filling out the survey, it would be great if you could share it w --- -[![CRAN status](https://cranchecks.info/badges/flavor/release/data.table)](https://cran.r-project.org/web/checks/check_results_data.table.html) +[![CRAN status](https://badges.cranchecks.info/flavor/release/data.table.svg)](https://cran.r-project.org/web/checks/check_results_data.table.html) [![R-CMD-check](https://github.com/Rdatatable/data.table/workflows/R-CMD-check/badge.svg)](https://github.com/Rdatatable/data.table/actions) [![AppVeyor build status](https://ci.appveyor.com/api/projects/status/kayjdh5qtgymhoxr/branch/master?svg=true)](https://ci.appveyor.com/project/Rdatatable/data-table) [![Codecov test coverage](https://codecov.io/github/Rdatatable/data.table/coverage.svg?branch=master)](https://codecov.io/github/Rdatatable/data.table?branch=master) From e6076b02f746dd05d921ac355291fb42623f6c02 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 2 Nov 2023 23:01:12 -0700 Subject: [PATCH 08/88] Revert "Mention the 2023 data.table community survey (#5705)" (#5716) This reverts commit 2addb00fbae3b47ff5eaf9c7f65059f5e5925ebd. --- README.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/README.md b/README.md index 7f6c3c103..fbe2de22a 100644 --- a/README.md +++ b/README.md @@ -21,12 +21,6 @@ In addition to filling out the survey, it would be great if you could share it w `data.table` provides a high-performance version of [base R](https://www.r-project.org/about.html)'s `data.frame` with syntax and feature enhancements for ease of use, convenience and programming speed. ---- - -**NEW:** Take part in the [data.table 2023 community survey](https://tinyurl.com/datatable-survey/) and help shape the future of the project! The survey closes on **December 1st**. - ---- - ## Why `data.table`? * concise syntax: fast to type, fast to read From ac2b737876c393e635e75d79c6865bbd5af97ee6 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Sun, 5 Nov 2023 01:22:36 +0100 Subject: [PATCH 09/88] setup pkgup GH actions workflow (#5690) * setup pkgup for data.table --- .github/workflows/pkgup.yaml | 68 ++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 .github/workflows/pkgup.yaml diff --git a/.github/workflows/pkgup.yaml b/.github/workflows/pkgup.yaml new file mode 100644 index 000000000..d1064cc41 --- /dev/null +++ b/.github/workflows/pkgup.yaml @@ -0,0 +1,68 @@ +# permissions and concurrency settings for GitHub Pages +permissions: + contents: read + pages: write + id-token: write +concurrency: + group: "pages" + cancel-in-progress: true + +on: [push] +jobs: + build: + name: data.table + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: r-lib/actions/setup-pandoc@v2 + - uses: r-lib/actions/setup-r@v2 + - name: cache-r-dependencies + uses: actions/cache@v3 + with: + path: ${{ env.R_LIBS_USER }}/* + key: library-cache-${{ github.run_id }} + restore-keys: library-cache + - name: setup-os-dependencies + run: | + sudo apt-get install -y libcurl4-openssl-dev + - name: setup-r-dependencies + run: | + Rscript -e 'stopifnot(file.copy("DESCRIPTION", file.path(tdir<-tempdir(), "PACKAGES"))); db<-available.packages(paste0("file://", tdir)); deps<-setdiff(tools::package_dependencies(read.dcf("DESCRIPTION", fields="Package")[[1L]], db, which="most")[[1L]], installed.packages(priority="high")[,"Package"]); if (length(deps)) { ap<-available.packages()[,"Version"]; ap<-ap[names(ap) %in% deps]; if (!all(deps%in%names(ap))) stop("dependencies are not avaiable in repository: ",paste(setdiff(deps, names(ap)), collapse=", ")); ip<-installed.packages()[,"Version"]; ip<-ip[names(ip) %in% deps]; pkgs<-ap[deps]>ip[deps]; install.packages(names(pkgs[pkgs|is.na(pkgs)]), INSTALL_opts="--html") }' + - name: build + run: | + echo "Revision:" $GITHUB_SHA >> ./DESCRIPTION + R CMD build . + - name: check + run: | + R CMD check --as-cran --no-manual $(ls -1t data.table_*.tar.gz | head -n 1) + - name: manual + if: github.ref == 'refs/heads/master' + run: | + cp -R ${{ env.R_LIBS_USER }} library + R CMD INSTALL --library="library" $(ls -1t data.table_*.tar.gz | head -n 1) --html + mkdir -p doc/html + cp /usr/share/R/doc/html/{left.jpg,up.jpg,Rlogo.svg,R.css,index.html} doc/html + Rscript -e 'utils::make.packages.html("library", docdir="doc")' + sed -i "s|file://|../..|g" doc/html/packages.html + mkdir -p public + mv doc public/doc + cp -r --parents library/*/{html,help,doc,demo,DESCRIPTION,README,NEWS,README.md,NEWS.md} public 2>/dev/null || : + sed -i 's|"/doc/html/|"/data.table/doc/html/|g' public/library/data.table/doc/index.html 2>/dev/null || : + - name: repo + if: github.ref == 'refs/heads/master' + run: | + mkdir -p public/src/contrib + mv $(ls -1t data.table_*.tar.gz | head -n 1) public/src/contrib + Rscript -e 'tools::write_PACKAGES("public/src/contrib", fields="Revision")' + - name: upload + if: github.ref == 'refs/heads/master' + uses: actions/upload-pages-artifact@v1 + with: + path: "public" + - name: deploy + if: github.ref == 'refs/heads/master' + id: deployment + uses: actions/deploy-pages@v1 From 94e8fbe448f9dcd83fe6f2751dbe36b05b404ee0 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Sun, 5 Nov 2023 01:57:57 +0100 Subject: [PATCH 10/88] update_dev_pkg uses GH for R repo (#5720) --- R/devel.R | 2 +- man/update_dev_pkg.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/devel.R b/R/devel.R index 8bd7a1466..df77eb0e0 100644 --- a/R/devel.R +++ b/R/devel.R @@ -17,7 +17,7 @@ dcf.repo = function(pkg, repo, field, type) { dcf[dcf[,"Package"]==pkg, field][[1L]] } -update_dev_pkg = function(object="data.table", repo="https://Rdatatable.gitlab.io/data.table", field="Revision", type=getOption("pkgType"), lib=NULL, ...) { +update_dev_pkg = function(object="data.table", repo="https://Rdatatable.github.io/data.table", field="Revision", type=getOption("pkgType"), lib=NULL, ...) { # this works for any package, not just data.table pkg = object # perform package upgrade when new Revision present diff --git a/man/update_dev_pkg.Rd b/man/update_dev_pkg.Rd index 3db5b9831..9914138c8 100644 --- a/man/update_dev_pkg.Rd +++ b/man/update_dev_pkg.Rd @@ -5,7 +5,7 @@ Downloads and installs latest development version only when a new commit is available which has also passed all tests. Defaults are set to update \code{data.table}, other packages can be used as well. Their repository has to include git commit information in PACKAGES file. } \usage{update_dev_pkg(object="data.table", - repo="https://Rdatatable.gitlab.io/data.table", + repo="https://Rdatatable.github.io/data.table", field="Revision", type=getOption("pkgType"), lib=NULL, \dots) } \arguments{ From e66f5dcef6a6a0258787fe0ef968760abae137a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20M=C3=A4chler?= Date: Sun, 5 Nov 2023 02:01:03 +0100 Subject: [PATCH 11/88] run correctly even when is.atomic(NULL) becomes FALSE (#5691) --- R/data.table.R | 2 +- R/frank.R | 2 +- R/fread.R | 2 +- R/print.data.table.R | 3 ++- R/setkey.R | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 473cf6e76..801482147 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -882,7 +882,7 @@ replace_dot_alias = function(e) { bynames = allbyvars = NULL # the rest now fall through } else bynames = names(byval) - if (is.atomic(byval)) { + if (is.atomic(byval) || is.null(byval)) { if (is.character(byval) && length(byval)<=ncol(x) && !(is.name(bysub) && bysub %chin% names_x) ) { stopf("'by' appears to evaluate to column names but isn't c() or key(). Use by=list(...) if you can. Otherwise, by=eval%s should work. This is for efficiency so data.table can detect which columns are needed.", deparse(bysub)) } else { diff --git a/R/frank.R b/R/frank.R index ba90a83b9..419f5ea41 100644 --- a/R/frank.R +++ b/R/frank.R @@ -12,7 +12,7 @@ frankv = function(x, cols=seq_along(x), order=1L, na.last=TRUE, ties.method=c("a .Call(Csetlistelt, xx, 1L, x) xx } - if (is.atomic(x)) { + if (is.atomic(x) || is.null(x)) { if (!missing(cols) && !is.null(cols)) stopf("x is a single vector, non-NULL 'cols' doesn't make sense") cols = 1L diff --git a/R/fread.R b/R/fread.R index f8b025d9c..e0337c591 100644 --- a/R/fread.R +++ b/R/fread.R @@ -135,7 +135,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC") if (!allNA(colClasses)) stopf("colClasses is type 'logical' which is ok if all NA but it has some TRUE or FALSE values in it which is not allowed. Please consider the drop= or select= argument instead. See ?fread.") colClasses = NULL } - if (!is.null(colClasses) && is.atomic(colClasses)) { + if (!is.null(colClasses) && is.atomic(colClasses)) { ## future R can use if (is.atomic(.)) if (!is.character(colClasses)) stopf("colClasses is not type list or character vector") if (!length(colClasses)) { colClasses=NULL; diff --git a/R/print.data.table.R b/R/print.data.table.R index 16950fd11..7271ac458 100644 --- a/R/print.data.table.R +++ b/R/print.data.table.R @@ -141,7 +141,8 @@ print.data.table = function(x, topn=getOption("datatable.print.topn"), } format.data.table = function (x, ..., justify="none") { - if (is.atomic(x) && !is.null(x)) { + if (is.atomic(x) && !is.null(x)) { ## future R can use if (is.atomic(x)) + stopf("Internal structure doesn't seem to be a list. Possibly corrupt data.table.") } do.call("cbind", lapply(x, format_col, ..., justify=justify)) diff --git a/R/setkey.R b/R/setkey.R index 3bd3f782c..5f3027a2d 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -169,7 +169,7 @@ is.sorted = function(x, by=NULL) { ORDERING_TYPES = c('logical', 'integer', 'double', 'complex', 'character') forderv = function(x, by=seq_along(x), retGrp=FALSE, sort=TRUE, order=1L, na.last=FALSE) { - if (is.atomic(x)) { # including forderv(NULL) which returns error consistent with base::order(NULL), + if (is.atomic(x) || is.null(x)) { # including forderv(NULL) which returns error consistent with base::order(NULL), if (!missing(by) && !is.null(by)) stopf("x is a single vector, non-NULL 'by' doesn't make sense") by = NULL } else { From af82d403e375605e8d5da7ef9016bfa539068462 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Mon, 6 Nov 2023 11:23:21 +0100 Subject: [PATCH 12/88] fix warnings on CRAN #5696 (#5712) * fix warnings on CRAN #5696 --- inst/tests/tests.Rraw | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 9117c0fcb..d7ad5a99a 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14473,8 +14473,8 @@ options(datatable.rbindlist.check=NULL) # this option is set to NULL at the top if (.Platform$OS.type == 'windows') local({ lc_collate <- Sys.getlocale(c('LC_COLLATE')) lc_ctype <- Sys.getlocale(c('LC_CTYPE')) - Sys.setlocale('LC_COLLATE', "Chinese (Simplified)_China.936") - Sys.setlocale('LC_CTYPE', "Chinese (Simplified)_China.936") + suppressWarnings(Sys.setlocale('LC_COLLATE', "Chinese (Simplified)_China.936")) ## fix CRAN warning #5696 + suppressWarnings(Sys.setlocale('LC_CTYPE', "Chinese (Simplified)_China.936")) on.exit({ Sys.setlocale('LC_COLLATE', lc_collate) Sys.setlocale('LC_CTYPE', lc_ctype) From 4c633907a651f25a4e02c86e6bebb6f5c8c37fa8 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Tue, 7 Nov 2023 07:16:31 +0100 Subject: [PATCH 13/88] run workflow only on pushes to master (#5728) --- .github/workflows/pkgup.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pkgup.yaml b/.github/workflows/pkgup.yaml index d1064cc41..67541f4e6 100644 --- a/.github/workflows/pkgup.yaml +++ b/.github/workflows/pkgup.yaml @@ -7,7 +7,11 @@ concurrency: group: "pages" cancel-in-progress: true -on: [push] +on: + push: + branches: + - 'master' + jobs: build: name: data.table From 6b9d559606767562f7f7dd4c7842a9e4a9fb597c Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Wed, 15 Nov 2023 17:54:37 +0100 Subject: [PATCH 14/88] add missing links in forder.c (#5741) --- src/forder.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/forder.c b/src/forder.c index f2846828a..8a62e1de7 100644 --- a/src/forder.c +++ b/src/forder.c @@ -10,9 +10,9 @@ http://stereopsis.com/radix.html Previous version of this file was promoted into base R, see ?base::sort. - Denmark useR! presentation - Stanford DSC presentation - JSM presentation + Denmark useR! presentation https://github.com/Rdatatable/data.table/wiki/talks/useR2015_Matt.pdf + Stanford DSC presentation https://github.com/Rdatatable/data.table/wiki/talks/DSC2016_ParallelSort.pdf + JSM presentation https://github.com/Rdatatable/data.table/wiki/talks/JSM2018_Matt.pdf Techniques used : skewed groups are split in parallel finds unique bytes to save 256 sweeping From ec9b1e45060e7ff2deeef5c3f1533abd9abe7176 Mon Sep 17 00:00:00 2001 From: Martin Maechler Date: Mon, 20 Nov 2023 14:21:20 +0100 Subject: [PATCH 15/88] update 2 URLs --- NEWS.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 025a7651b..52333e9b3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -691,7 +691,7 @@ 1. In v1.13.0 (July 2020) native parsing of datetime was added to `fread` by Michael Chirico which dramatically improved performance. Before then datetime was read as type character by default which was slow. Since v1.13.0, UTC-marked datetime (e.g. `2020-07-24T10:11:12.134Z` where the final `Z` is present) has been read automatically as POSIXct and quickly. We provided the migration option `datatable.old.fread.datetime.character` to revert to the previous slow character behavior. We also added the `tz=` argument to control unmarked datetime; i.e. where the `Z` (or equivalent UTC postfix) is missing in the data. The default `tz=""` reads unmarked datetime as character as before, slowly. We gave you the ability to set `tz="UTC"` to turn on the new behavior and read unmarked datetime as UTC, quickly. R sessions that are running in UTC by setting the TZ environment variable, as is good practice and common in production, have also been reading unmarked datetime as UTC since v1.13.0, much faster. Note 1 of v1.13.0 (below in this file) ended `In addition to convenience, fread is now significantly faster in the presence of dates, UTC-marked datetimes, and unmarked datetime when tz="UTC" is provided.`. - At `rstudio::global(2021)`, Neal Richardson, Director of Engineering at Ursa Labs, compared Arrow CSV performance to `data.table` CSV performance, [Bigger Data With Ease Using Apache Arrow](https://www.rstudio.com/resources/rstudioglobal-2021/bigger-data-with-ease-using-apache-arrow/). He opened by comparing to `data.table` as his main point. Arrow was presented as 3 times faster than `data.table`. He talked at length about this result. However, no reproducible code was provided and we were not contacted in advance in case we had any comments. He mentioned New York Taxi data in his talk which is a dataset known to us as containing unmarked datetime. [Rebuttal](https://twitter.com/MattDowle/status/1360073970498875394). + At `rstudio::global(2021)`, Neal Richardson, Director of Engineering at Ursa Labs, compared Arrow CSV performance to `data.table` CSV performance, [Bigger Data With Ease Using Apache Arrow](https://posit.co/resources/videos/bigger-data-with-ease-using-apache-arrow/). He opened by comparing to `data.table` as his main point. Arrow was presented as 3 times faster than `data.table`. He talked at length about this result. However, no reproducible code was provided and we were not contacted in advance in case we had any comments. He mentioned New York Taxi data in his talk which is a dataset known to us as containing unmarked datetime. [Rebuttal](https://twitter.com/MattDowle/status/1360073970498875394). `tz=`'s default is now changed from `""` to `"UTC"`. If you have been using `tz=` explicitly then there should be no change. The change to read UTC-marked datetime as POSIXct rather than character already happened in v1.13.0. The change now is that unmarked datetimes are now read as UTC too by default without needing to set `tz="UTC"`. None of the 1,017 CRAN packages directly using `data.table` are affected. As before, the migration option `datatable.old.fread.datetime.character` can still be set to TRUE to revert to the old character behavior. This migration option is temporary and will be removed in the near future. @@ -2136,7 +2136,7 @@ When `j` is a symbol (as in the quanteda and xgboost examples above) it will con 2. Just to state explicitly: data.table does not now depend on or require OpenMP. If you don't have it (as on CRAN's Mac it appears but not in general on Mac) then data.table should build, run and pass all tests just fine. -3. There are now 5,910 raw tests as reported by `test.data.table()`. Tests cover 91% of the 4k lines of R and 89% of the 7k lines of C. These stats are now known thanks to Jim Hester's [Covr](https://CRAN.R-project.org/package=covr) package and [Codecov.io](https://about.codecov.io/). If anyone is looking for something to help with, creating tests to hit the missed lines shown by clicking the `R` and `src` folders at the bottom [here](https://codecov.io/github/Rdatatable/data.table?branch=master) would be very much appreciated. +3. There are now 5,910 raw tests as reported by `test.data.table()`. Tests cover 91% of the 4k lines of R and 89% of the 7k lines of C. These stats are now known thanks to Jim Hester's [Covr](https://CRAN.R-project.org/package=covr) package and [Codecov.io](https://about.codecov.io/). If anyone is looking for something to help with, creating tests to hit the missed lines shown by clicking the `R` and `src` folders at the bottom [here](https://app.codecov.io/github/Rdatatable/data.table?branch=master) would be very much appreciated. 4. The FAQ vignette has been revised given the changes in v1.9.8. In particular, the very first FAQ. From 514fd3442eee878a604cbfb9030c38cd6c22f184 Mon Sep 17 00:00:00 2001 From: Martin Maechler Date: Mon, 20 Nov 2023 14:40:56 +0100 Subject: [PATCH 16/88] fix "lost braces" NOTE (--as-cran): here the {.} are *extraneous* --- man/data.table.Rd | 43 +++++++++++++++++++++--------------------- man/fread.Rd | 16 ++++++++-------- man/froll.Rd | 36 +++++++++++++++++------------------ man/fsort.Rd | 6 +++--- man/fwrite.Rd | 34 ++++++++++++++++----------------- man/openmp-utils.Rd | 24 +++++++++++------------ man/setops.Rd | 16 ++++++---------- man/special-symbols.Rd | 12 ++++++------ 8 files changed, 92 insertions(+), 95 deletions(-) diff --git a/man/data.table.Rd b/man/data.table.Rd index a5da7ebc4..502595d7c 100644 --- a/man/data.table.Rd +++ b/man/data.table.Rd @@ -62,13 +62,13 @@ data.table(\dots, keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFac If \code{i} is a \code{data.table}, the columns in \code{i} to be matched against \code{x} can be specified using one of these ways: \itemize{ - \item{\code{on} argument (see below). It allows for both \code{equi-} and the newly implemented \code{non-equi} joins.} + \item \code{on} argument (see below). It allows for both \code{equi-} and the newly implemented \code{non-equi} joins. - \item{If not, \code{x} \emph{must be keyed}. Key can be set using \code{\link{setkey}}. If \code{i} is also keyed, then first \emph{key} column of \code{i} is matched against first \emph{key} column of \code{x}, second against second, etc.. + \item If not, \code{x} \emph{must be keyed}. Key can be set using \code{\link{setkey}}. If \code{i} is also keyed, then first \emph{key} column of \code{i} is matched against first \emph{key} column of \code{x}, second against second, etc.. If \code{i} is not keyed, then first column of \code{i} is matched against first \emph{key} column of \code{x}, second column of \code{i} against second \emph{key} column of \code{x}, etc\ldots - This is summarised in code as \code{min(length(key(x)), if (haskey(i)) length(key(i)) else ncol(i))}.} + This is summarised in code as \code{min(length(key(x)), if (haskey(i)) length(key(i)) else ncol(i))}. } Using \code{on=} is recommended (even during keyed joins) as it helps understand the code better and also allows for \emph{non-equi} joins. @@ -100,15 +100,15 @@ data.table(\dots, keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFac \item{by}{ Column names are seen as if they are variables (as in \code{j} when \code{with=TRUE}). The \code{data.table} is then grouped by the \code{by} and \code{j} is evaluated within each group. The order of the rows within each group is preserved, as is the order of the groups. \code{by} accepts: \itemize{ - \item{A single unquoted column name: e.g., \code{DT[, .(sa=sum(a)), by=x]}} + \item A single unquoted column name: e.g., \code{DT[, .(sa=sum(a)), by=x]} - \item{a \code{list()} of expressions of column names: e.g., \code{DT[, .(sa=sum(a)), by=.(x=x>0, y)]}} + \item a \code{list()} of expressions of column names: e.g., \code{DT[, .(sa=sum(a)), by=.(x=x>0, y)]} - \item{a single character string containing comma separated column names (where spaces are significant since column names may contain spaces even at the start or end): e.g., \code{DT[, sum(a), by="x,y,z"]}} + \item a single character string containing comma separated column names (where spaces are significant since column names may contain spaces even at the start or end): e.g., \code{DT[, sum(a), by="x,y,z"]} - \item{a character vector of column names: e.g., \code{DT[, sum(a), by=c("x", "y")]}} + \item a character vector of column names: e.g., \code{DT[, sum(a), by=c("x", "y")]} - \item{or of the form \code{startcol:endcol}: e.g., \code{DT[, sum(a), by=x:z]}} + \item or of the form \code{startcol:endcol}: e.g., \code{DT[, sum(a), by=x:z]} } \emph{Advanced:} When \code{i} is a \code{list} (or \code{data.frame} or \code{data.table}), \code{DT[i, j, by=.EACHI]} evaluates \code{j} for the groups in `DT` that each row in \code{i} joins to. That is, you can join (in \code{i}) and aggregate (in \code{j}) simultaneously. We call this \emph{grouping by each i}. See \href{https://stackoverflow.com/a/27004566/559784}{this StackOverflow answer} for a more detailed explanation until we \href{https://github.com/Rdatatable/data.table/issues/944}{roll out vignettes}. @@ -128,10 +128,10 @@ data.table(\dots, keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFac \item{roll}{ When \code{i} is a \code{data.table} and its row matches to all but the last \code{x} join column, and its value in the last \code{i} join column falls in a gap (including after the last observation in \code{x} for that group), then: \itemize{ - \item{\code{+Inf} (or \code{TRUE}) rolls the \emph{prevailing} value in \code{x} forward. It is also known as last observation carried forward (LOCF).} - \item{\code{-Inf} rolls backwards instead; i.e., next observation carried backward (NOCB).} - \item{finite positive or negative number limits how far values are carried forward or backward.} - \item{"nearest" rolls the nearest value instead.} + \item \code{+Inf} (or \code{TRUE}) rolls the \emph{prevailing} value in \code{x} forward. It is also known as last observation carried forward (LOCF). + \item \code{-Inf} rolls backwards instead; i.e., next observation carried backward (NOCB). + \item finite positive or negative number limits how far values are carried forward or backward. + \item "nearest" rolls the nearest value instead. } Rolling joins apply to the last join column, generally a date but can be any variable. It is particularly fast using a modified binary search. @@ -139,8 +139,8 @@ data.table(\dots, keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFac \item{rollends}{ A logical vector length 2 (a single logical is recycled) indicating whether values falling before the first value or after the last value for a group should be rolled as well. \itemize{ - \item{If \code{rollends[2]=TRUE}, it will roll the last value forward. \code{TRUE} by default for LOCF and \code{FALSE} for NOCB rolls.} - \item{If \code{rollends[1]=TRUE}, it will roll the first value backward. \code{TRUE} by default for NOCB and \code{FALSE} for LOCF rolls.} + \item If \code{rollends[2]=TRUE}, it will roll the last value forward. \code{TRUE} by default for LOCF and \code{FALSE} for NOCB rolls. + \item If \code{rollends[1]=TRUE}, it will roll the first value backward. \code{TRUE} by default for NOCB and \code{FALSE} for LOCF rolls. } When \code{roll} is a finite number, that limit is also applied when rolling the ends.} @@ -163,15 +163,16 @@ data.table(\dots, keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFac \item{on}{ Indicate which columns in \code{x} should be joined with which columns in \code{i} along with the type of binary operator to join with (see non-equi joins below on this). When specified, this overrides the keys set on \code{x} and \code{i}. When \code{.NATURAL} keyword provided then \emph{natural join} is made (join on common columns). There are multiple ways of specifying the \code{on} argument: \itemize{ - \item{As an unnamed character vector, e.g., \code{X[Y, on=c("a", "b")]}, used when columns \code{a} and \code{b} are common to both \code{X} and \code{Y}.} - \item{\emph{Foreign key joins}: As a \emph{named} character vector when the join columns have different names in \code{X} and \code{Y}. + \item As an unnamed character vector, e.g., \code{X[Y, on=c("a", "b")]}, used when columns \code{a} and \code{b} are common to both \code{X} and \code{Y}. + \item \emph{Foreign key joins}: As a \emph{named} character vector when the join columns have different names in \code{X} and \code{Y}. For example, \code{X[Y, on=c(x1="y1", x2="y2")]} joins \code{X} and \code{Y} by matching columns \code{x1} and \code{x2} in \code{X} with columns \code{y1} and \code{y2} in \code{Y}, respectively. From v1.9.8, you can also express foreign key joins using the binary operator \code{==}, e.g. \code{X[Y, on=c("x1==y1", "x2==y2")]}. - NB: shorthand like \code{X[Y, on=c("a", V2="b")]} is also possible if, e.g., column \code{"a"} is common between the two tables.} - \item{For convenience during interactive scenarios, it is also possible to use \code{.()} syntax as \code{X[Y, on=.(a, b)]}.} - \item{From v1.9.8, (non-equi) joins using binary operators \code{>=, >, <=, <} are also possible, e.g., \code{X[Y, on=c("x>=a", "y<=b")]}, or for interactive use as \code{X[Y, on=.(x>=a, y<=b)]}.} + NB: shorthand like \code{X[Y, on=c("a", V2="b")]} is also possible if, e.g., column \code{"a"} is common between the two tables. + + \item For convenience during interactive scenarios, it is also possible to use \code{.()} syntax as \code{X[Y, on=.(a, b)]}. + \item From v1.9.8, (non-equi) joins using binary operators \code{>=, >, <=, <} are also possible, e.g., \code{X[Y, on=c("x>=a", "y<=b")]}, or for interactive use as \code{X[Y, on=.(x>=a, y<=b)]}. } See examples as well as \href{../doc/datatable-secondary-indices-and-auto-indexing.html}{\code{vignette("datatable-secondary-indices-and-auto-indexing")}}. } @@ -182,8 +183,8 @@ data.table(\dots, keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFac \code{data.table} builds on base \R functionality to reduce 2 types of time:\cr \enumerate{ - \item{programming time (easier to write, read, debug and maintain), and} - \item{compute time (fast and memory efficient).} + \item programming time (easier to write, read, debug and maintain), and + \item compute time (fast and memory efficient). } The general form of data.table syntax is:\cr diff --git a/man/fread.Rd b/man/fread.Rd index cc96062de..78c8a7628 100644 --- a/man/fread.Rd +++ b/man/fread.Rd @@ -88,15 +88,15 @@ On Windows, "French_France.1252" is tried which should be available as standard When \code{quote} is a single character, \itemize{ - \item{Spaces and other whitespace (other than \code{sep} and \code{\\n}) may appear in unquoted character fields, e.g., \code{\dots,2,Joe Bloggs,3.14,\dots}.} + \item Spaces and other whitespace (other than \code{sep} and \code{\\n}) may appear in unquoted character fields, e.g., \code{\dots,2,Joe Bloggs,3.14,\dots}. - \item{When \code{character} columns are \emph{quoted}, they must start and end with that quoting character immediately followed by \code{sep} or \code{\\n}, e.g., \code{\dots,2,"Joe Bloggs",3.14,\dots}. + \item When \code{character} columns are \emph{quoted}, they must start and end with that quoting character immediately followed by \code{sep} or \code{\\n}, e.g., \code{\dots,2,"Joe Bloggs",3.14,\dots}. In essence quoting character fields are \emph{required} only if \code{sep} or \code{\\n} appears in the string value. Quoting may be used to signify that numeric data should be read as text. Unescaped quotes may be present in a quoted field, e.g., \code{\dots,2,"Joe, "Bloggs"",3.14,\dots}, as well as escaped quotes, e.g., \code{\dots,2,"Joe \",Bloggs\"",3.14,\dots}. If an embedded quote is followed by the separator inside a quoted field, the embedded quotes up to that point in that field must be balanced; e.g. \code{\dots,2,"www.blah?x="one",y="two"",3.14,\dots}. - On those fields that do not satisfy these conditions, e.g., fields with unbalanced quotes, \code{fread} re-attempts that field as if it isn't quoted. This is quite useful in reading files that contains fields with unbalanced quotes as well, automatically.} + On those fields that do not satisfy these conditions, e.g., fields with unbalanced quotes, \code{fread} re-attempts that field as if it isn't quoted. This is quite useful in reading files that contains fields with unbalanced quotes as well, automatically. } To read fields \emph{as is} instead, use \code{quote = ""}. @@ -106,11 +106,11 @@ To read fields \emph{as is} instead, use \code{quote = ""}. Currently, the \code{yaml} setting is somewhat inflexible with respect to incorporating metadata to facilitate file reading. Information on column classes should be stored at the top level under the heading \code{schema} and subheading \code{fields}; those with both a \code{type} and a \code{name} sub-heading will be merged into \code{colClasses}. Other supported elements are as follows: \itemize{ - \item{ \code{sep} (or alias \code{delimiter}) } - \item{ \code{header} } - \item{ \code{quote} (or aliases \code{quoteChar}, \code{quote_char}) } - \item{ \code{dec} (or alias \code{decimal}) } - \item{ \code{na.strings} } + \item \code{sep} (or alias \code{delimiter}) + \item \code{header} + \item \code{quote} (or aliases \code{quoteChar}, \code{quote_char}) + \item \code{dec} (or alias \code{decimal}) + \item \code{na.strings} } \bold{File Download:} diff --git a/man/froll.Rd b/man/froll.Rd index 090b397a9..d6cb75067 100644 --- a/man/froll.Rd +++ b/man/froll.Rd @@ -64,9 +64,9 @@ frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center")) observation has its own corresponding rolling window width. Due to the logic of adaptive rolling functions, the following restrictions apply: \itemize{ - \item{ \code{align} only \code{"right"}. } - \item{ if list of vectors is passed to \code{x}, then all - vectors within it must have equal length. } + \item \code{align} only \code{"right"}. + \item if list of vectors is passed to \code{x}, then all + vectors within it must have equal length. } When multiple columns or multiple windows width are provided, then they @@ -93,21 +93,21 @@ frollapply(x, n, FUN, \dots, fill=NA, align=c("right", "left", "center")) \code{zoo} might expect following differences in \code{data.table} implementation. \itemize{ - \item{ rolling function will always return result of the same length - as input. } - \item{ \code{fill} defaults to \code{NA}. } - \item{ \code{fill} accepts only constant values. It does not support - for \emph{na.locf} or other functions. } - \item{ \code{align} defaults to \code{"right"}. } - \item{ \code{na.rm} is respected, and other functions are not needed - when input contains \code{NA}. } - \item{ integers and logical are always coerced to double. } - \item{ when \code{adaptive=FALSE} (default), then \code{n} must be a - numeric vector. List is not accepted. } - \item{ when \code{adaptive=TRUE}, then \code{n} must be vector of - length equal to \code{nrow(x)}, or list of such vectors. } - \item{ \code{partial} window feature is not supported, although it can - be accomplished by using \code{adaptive=TRUE}, see examples. \code{NA} is always returned for incomplete windows. } + \item rolling function will always return result of the same length as input. + \item \code{fill} defaults to \code{NA}. + \item \code{fill} accepts only constant values. It does not support + for \emph{na.locf} or other functions. + \item \code{align} defaults to \code{"right"}. + \item \code{na.rm} is respected, and other functions are not needed + when input contains \code{NA}. + \item integers and logical are always coerced to double. + \item when \code{adaptive=FALSE} (default), then \code{n} must be a + numeric vector. List is not accepted. + \item when \code{adaptive=TRUE}, then \code{n} must be vector of + length equal to \code{nrow(x)}, or list of such vectors. + \item \code{partial} window feature is not supported, although it can + be accomplished by using \code{adaptive=TRUE}, see + examples. \code{NA} is always returned for incomplete windows. } Be aware that rolling functions operates on the physical order of input. diff --git a/man/fsort.Rd b/man/fsort.Rd index 6c11022d2..0eba047a1 100644 --- a/man/fsort.Rd +++ b/man/fsort.Rd @@ -20,9 +20,9 @@ fsort(x, decreasing = FALSE, na.last = FALSE, internal=FALSE, verbose=FALSE, \do Process will raise error if \code{x} contains negative values. Unless \code{x} is already sorted \code{fsort} will redirect processing to slower single threaded \emph{order} followed by \emph{subset} in following cases: \itemize{ - \item{data type other than \emph{double} (\emph{numeric})} - \item{data having \code{NA}s} - \item{\code{decreasing==FALSE}} + \item data type other than \emph{double} (\emph{numeric}) + \item data having \code{NA}s + \item \code{decreasing==FALSE} } } \value{ diff --git a/man/fwrite.Rd b/man/fwrite.Rd index ba6eb4751..a4fcf788e 100644 --- a/man/fwrite.Rd +++ b/man/fwrite.Rd @@ -37,18 +37,18 @@ fwrite(x, file = "", append = FALSE, quote = "auto", \item{col.names}{Should the column names (header row) be written? The default is \code{TRUE} for new files and when overwriting existing files (\code{append=FALSE}). Otherwise, the default is \code{FALSE} to prevent column names appearing again mid-file when stacking a set of \code{data.table}s or appending rows to the end of a file.} \item{qmethod}{A character string specifying how to deal with embedded double quote characters when quoting strings. \itemize{ - \item{"escape" - the quote character (as well as the backslash character) is escaped in C style by a backslash, or} - \item{"double" (default, same as \code{write.csv}), in which case the double quote is doubled with another one.} + \item "escape" - the quote character (as well as the backslash character) is escaped in C style by a backslash, or + \item "double" (default, same as \code{write.csv}), in which case the double quote is doubled with another one. }} \item{logical01}{Should \code{logical} values be written as \code{1} and \code{0} rather than \code{"TRUE"} and \code{"FALSE"}?} \item{logicalAsInt}{Deprecated. Old name for `logical01`. Name change for consistency with `fread` for which `logicalAsInt` would not make sense.} \item{scipen}{ \code{integer} In terms of printing width, how much of a bias should there be towards printing whole numbers rather than scientific notation? See Details. } \item{dateTimeAs}{ How \code{Date}/\code{IDate}, \code{ITime} and \code{POSIXct} items are written. \itemize{ - \item{"ISO" (default) - \code{2016-09-12}, \code{18:12:16} and \code{2016-09-12T18:12:16.999999Z}. 0, 3 or 6 digits of fractional seconds are printed if and when present for convenience, regardless of any R options such as \code{digits.secs}. The idea being that if milli and microseconds are present then you most likely want to retain them. R's internal UTC representation is written faithfully to encourage ISO standards, stymie timezone ambiguity and for speed. An option to consider is to start R in the UTC timezone simply with \code{"$ TZ='UTC' R"} at the shell (NB: it must be one or more spaces between \code{TZ='UTC'} and \code{R}, anything else will be silently ignored; this TZ setting applies just to that R process) or \code{Sys.setenv(TZ='UTC')} at the R prompt and then continue as if UTC were local time.} - \item{"squash" - \code{20160912}, \code{181216} and \code{20160912181216999}. This option allows fast and simple extraction of \code{yyyy}, \code{mm}, \code{dd} and (most commonly to group by) \code{yyyymm} parts using integer div and mod operations. In R for example, one line helper functions could use \code{\%/\%10000}, \code{\%/\%100\%\%100}, \code{\%\%100} and \code{\%/\%100} respectively. POSIXct UTC is squashed to 17 digits (including 3 digits of milliseconds always, even if \code{000}) which may be read comfortably as \code{integer64} (automatically by \code{fread()}).} - \item{"epoch" - \code{17056}, \code{65536} and \code{1473703936.999999}. The underlying number of days or seconds since the relevant epoch (1970-01-01, 00:00:00 and 1970-01-01T00:00:00Z respectively), negative before that (see \code{?Date}). 0, 3 or 6 digits of fractional seconds are printed if and when present.} - \item{"write.csv" - this currently affects \code{POSIXct} only. It is written as \code{write.csv} does by using the \code{as.character} method which heeds \code{digits.secs} and converts from R's internal UTC representation back to local time (or the \code{"tzone"} attribute) as of that historical date. Accordingly this can be slow. All other column types (including \code{Date}, \code{IDate} and \code{ITime} which are independent of timezone) are written as the "ISO" option using fast C code which is already consistent with \code{write.csv}.} + \item "ISO" (default) - \code{2016-09-12}, \code{18:12:16} and \code{2016-09-12T18:12:16.999999Z}. 0, 3 or 6 digits of fractional seconds are printed if and when present for convenience, regardless of any R options such as \code{digits.secs}. The idea being that if milli and microseconds are present then you most likely want to retain them. R's internal UTC representation is written faithfully to encourage ISO standards, stymie timezone ambiguity and for speed. An option to consider is to start R in the UTC timezone simply with \code{"$ TZ='UTC' R"} at the shell (NB: it must be one or more spaces between \code{TZ='UTC'} and \code{R}, anything else will be silently ignored; this TZ setting applies just to that R process) or \code{Sys.setenv(TZ='UTC')} at the R prompt and then continue as if UTC were local time. + \item "squash" - \code{20160912}, \code{181216} and \code{20160912181216999}. This option allows fast and simple extraction of \code{yyyy}, \code{mm}, \code{dd} and (most commonly to group by) \code{yyyymm} parts using integer div and mod operations. In R for example, one line helper functions could use \code{\%/\%10000}, \code{\%/\%100\%\%100}, \code{\%\%100} and \code{\%/\%100} respectively. POSIXct UTC is squashed to 17 digits (including 3 digits of milliseconds always, even if \code{000}) which may be read comfortably as \code{integer64} (automatically by \code{fread()}). + \item "epoch" - \code{17056}, \code{65536} and \code{1473703936.999999}. The underlying number of days or seconds since the relevant epoch (1970-01-01, 00:00:00 and 1970-01-01T00:00:00Z respectively), negative before that (see \code{?Date}). 0, 3 or 6 digits of fractional seconds are printed if and when present. + \item "write.csv" - this currently affects \code{POSIXct} only. It is written as \code{write.csv} does by using the \code{as.character} method which heeds \code{digits.secs} and converts from R's internal UTC representation back to local time (or the \code{"tzone"} attribute) as of that historical date. Accordingly this can be slow. All other column types (including \code{Date}, \code{IDate} and \code{ITime} which are independent of timezone) are written as the "ISO" option using fast C code which is already consistent with \code{write.csv}. } The first three options are fast due to new specialized C code. The epoch to date-part conversion uses a fast approach by Howard Hinnant (see references) using a day-of-year starting on 1 March. You should not be able to notice any difference in write speed between those three options. The date range supported for \code{Date} and \code{IDate} is [0000-03-01, 9999-12-31]. Every one of these 3,652,365 dates have been tested and compared to base R including all 2,790 leap days in this range. \cr \cr This option applies to vectors of date/time in list column cells, too. \cr \cr @@ -73,17 +73,17 @@ To save space, \code{fwrite} prefers to write wide numeric values in scientific The following fields will be written to the header of the file and surrounded by \code{---} on top and bottom: \itemize{ - \item{ \code{source} - Contains the R version and \code{data.table} version used to write the file } - \item{ \code{creation_time_utc} - Current timestamp in UTC time just before the header is written } - \item{ \code{schema} with element \code{fields} giving \code{name}-\code{type} (\code{class}) pairs for the table; multi-class objects (e.g. \code{c('POSIXct', 'POSIXt')}) will have their first class written. } - \item{ \code{header} - same as \code{col.names} (which is \code{header} on input) } - \item{ \code{sep} } - \item{ \code{sep2} } - \item{ \code{eol} } - \item{ \code{na.strings} - same as \code{na} } - \item{ \code{dec} } - \item{ \code{qmethod} } - \item{ \code{logical01} } + \item \code{source} - Contains the R version and \code{data.table} version used to write the file + \item \code{creation_time_utc} - Current timestamp in UTC time just before the header is written + \item \code{schema} with element \code{fields} giving \code{name}-\code{type} (\code{class}) pairs for the table; multi-class objects (e.g. \code{c('POSIXct', 'POSIXt')}) will have their first class written. + \item \code{header} - same as \code{col.names} (which is \code{header} on input) + \item \code{sep} + \item \code{sep2} + \item \code{eol} + \item \code{na.strings} - same as \code{na} + \item \code{dec} + \item \code{qmethod} + \item \code{logical01} } } diff --git a/man/openmp-utils.Rd b/man/openmp-utils.Rd index 71e469ed7..df942009c 100644 --- a/man/openmp-utils.Rd +++ b/man/openmp-utils.Rd @@ -37,18 +37,18 @@ Internally parallelized code is used in the following places: \itemize{ - \item{\file{between.c} - \code{\link{between}()}} - \item{\file{cj.c} - \code{\link{CJ}()}} - \item{\file{coalesce.c} - \code{\link{fcoalesce}()}} - \item{\file{fifelse.c} - \code{\link{fifelse}()}} - \item{\file{fread.c} - \code{\link{fread}()}} - \item{\file{forder.c}, \file{fsort.c}, and \file{reorder.c} - \code{\link{forder}()} and related} - \item{\file{froll.c}, \file{frolladaptive.c}, and \file{frollR.c} - \code{\link{froll}()} and family} - \item{\file{fwrite.c} - \code{\link{fwrite}()}} - \item{\file{gsumm.c} - GForce in various places, see \link{GForce}} - \item{\file{nafill.c} - \code{\link{nafill}()}} - \item{\file{subset.c} - Used in \code{\link[=data.table]{[.data.table}} subsetting} - \item{\file{types.c} - Internal testing usage} + \item\file{between.c} - \code{\link{between}()} + \item\file{cj.c} - \code{\link{CJ}()} + \item\file{coalesce.c} - \code{\link{fcoalesce}()} + \item\file{fifelse.c} - \code{\link{fifelse}()} + \item\file{fread.c} - \code{\link{fread}()} + \item\file{forder.c}, \file{fsort.c}, and \file{reorder.c} - \code{\link{forder}()} and related + \item\file{froll.c}, \file{frolladaptive.c}, and \file{frollR.c} - \code{\link{froll}()} and family + \item\file{fwrite.c} - \code{\link{fwrite}()} + \item\file{gsumm.c} - GForce in various places, see \link{GForce} + \item\file{nafill.c} - \code{\link{nafill}()} + \item\file{subset.c} - Used in \code{\link[=data.table]{[.data.table}} subsetting + \item\file{types.c} - Internal testing usage } } \examples{ diff --git a/man/setops.Rd b/man/setops.Rd index 395cdab33..dfa2572c7 100644 --- a/man/setops.Rd +++ b/man/setops.Rd @@ -23,16 +23,12 @@ fsetequal(x, y, all = TRUE) \arguments{ \item{x, y}{\code{data.table}s.} \item{all}{Logical. Default is \code{FALSE} and removes duplicate rows on the result. When \code{TRUE}, if there are \code{xn} copies of a particular row in \code{x} and \code{yn} copies of the same row in \code{y}, then: - \itemize{ - - \item{\code{fintersect} will return \code{min(xn, yn)} copies of that row.} - - \item{\code{fsetdiff} will return \code{max(0, xn-yn)} copies of that row.} - - \item{\code{funion} will return \code{xn+yn} copies of that row.} - - \item{\code{fsetequal} will return \code{FALSE} unless \code{xn == yn}.} - } + \itemize{ + \item\code{fintersect} will return \code{min(xn, yn)} copies of that row. + \item\code{fsetdiff} will return \code{max(0, xn-yn)} copies of that row. + \item\code{funion} will return \code{xn+yn} copies of that row. + \item\code{fsetequal} will return \code{FALSE} unless \code{xn == yn}. + } } } \details{ diff --git a/man/special-symbols.Rd b/man/special-symbols.Rd index c96cbef5c..9fb3cb45a 100644 --- a/man/special-symbols.Rd +++ b/man/special-symbols.Rd @@ -19,12 +19,12 @@ These symbols used in \code{j} are defined as follows. \itemize{ - \item{\code{.SD} is a \code{data.table} containing the \bold{S}ubset of \code{x}'s \bold{D}ata for each group, excluding any columns used in \code{by} (or \code{keyby}).} - \item{\code{.BY} is a \code{list} containing a length 1 vector for each item in \code{by}. This can be useful when \code{by} is not known in advance. The \code{by} variables are also available to \code{j} directly by name; useful for example for titles of graphs if \code{j} is a plot command, or to branch with \code{if()} depending on the value of a group variable.} - \item{\code{.N} is an integer, length 1, containing the number of rows in the group. This may be useful when the column names are not known in advance and for convenience generally. When grouping by \code{i}, \code{.N} is the number of rows in \code{x} matched to, for each row of \code{i}, regardless of whether \code{nomatch} is \code{NA} or \code{NULL}. It is renamed to \code{N} (no dot) in the result (otherwise a column called \code{".N"} could conflict with the \code{.N} variable, see FAQ 4.6 for more details and example), unless it is explicitly named; e.g., \code{DT[,list(total=.N),by=a]}.} - \item{\code{.I} is an integer vector equal to \code{seq_len(nrow(x))}. While grouping, it holds for each item in the group, its row location in \code{x}. This is useful to subset in \code{j}; e.g. \code{DT[, .I[which.max(somecol)], by=grp]}. If used in \code{by} it corresponds to applying a function rowwise. } - \item{\code{.GRP} is an integer, length 1, containing a simple group counter. 1 for the 1st group, 2 for the 2nd, etc.} - \item{\code{.NGRP} is an integer, length 1, containing the number of groups. } + \item \code{.SD} is a \code{data.table} containing the \bold{S}ubset of \code{x}'s \bold{D}ata for each group, excluding any columns used in \code{by} (or \code{keyby}). + \item \code{.BY} is a \code{list} containing a length 1 vector for each item in \code{by}. This can be useful when \code{by} is not known in advance. The \code{by} variables are also available to \code{j} directly by name; useful for example for titles of graphs if \code{j} is a plot command, or to branch with \code{if()} depending on the value of a group variable. + \item \code{.N} is an integer, length 1, containing the number of rows in the group. This may be useful when the column names are not known in advance and for convenience generally. When grouping by \code{i}, \code{.N} is the number of rows in \code{x} matched to, for each row of \code{i}, regardless of whether \code{nomatch} is \code{NA} or \code{NULL}. It is renamed to \code{N} (no dot) in the result (otherwise a column called \code{".N"} could conflict with the \code{.N} variable, see FAQ 4.6 for more details and example), unless it is explicitly named; e.g., \code{DT[,list(total=.N),by=a]}. + \item \code{.I} is an integer vector equal to \code{seq_len(nrow(x))}. While grouping, it holds for each item in the group, its row location in \code{x}. This is useful to subset in \code{j}; e.g. \code{DT[, .I[which.max(somecol)], by=grp]}. If used in \code{by} it corresponds to applying a function rowwise. + \item \code{.GRP} is an integer, length 1, containing a simple group counter. 1 for the 1st group, 2 for the 2nd, etc. + \item \code{.NGRP} is an integer, length 1, containing the number of groups. } \code{.EACHI} is defined as \code{NULL} but its value is not used. Its usage is \code{by=.EACHI} (or \code{keyby=.EACHI}) which invokes grouping-by-each-row-of-i; see \code{\link{data.table}}'s \code{by} argument for more details. From bd5641207727c0c68340c5e269dd00f0274f1ab5 Mon Sep 17 00:00:00 2001 From: jangorecki Date: Thu, 23 Nov 2023 10:09:39 +0100 Subject: [PATCH 17/88] fix r-devel cran check links --- man/assign.Rd | 2 +- man/fwrite.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/man/assign.Rd b/man/assign.Rd index bb87a5221..df255d395 100644 --- a/man/assign.Rd +++ b/man/assign.Rd @@ -66,7 +66,7 @@ All of the following result in a friendly error (by design) : DT[, {col1 := 1L; col2 := 2L}] # Use the functional form, `:=`(), instead (see above). } -For additional resources, please read \href{../doc/datatable-faq.html}{\code{vignette("datatable-faq")}}. Also have a look at StackOverflow's \href{https://stackoverflow.com/search?q=\%5Bdata.table\%5D+reference}{data.table tag}. +For additional resources, please read \href{../doc/datatable-faq.html}{\code{vignette("datatable-faq")}}. Also have a look at StackOverflow's \href{https://stackoverflow.com/questions/tagged/data.table/}{data.table tag}. \code{:=} in \code{j} can be combined with all types of \code{i} (such as binary search), and all types of \code{by}. This a one reason why \code{:=} has been implemented in \code{j}. Please see \href{../doc/datatable-reference-semantics}{\code{vignette("datatable-reference-semantics")}} and also \code{FAQ 2.16} for analogies to SQL. diff --git a/man/fwrite.Rd b/man/fwrite.Rd index a4fcf788e..42ae44a29 100644 --- a/man/fwrite.Rd +++ b/man/fwrite.Rd @@ -64,7 +64,7 @@ fwrite(x, file = "", append = FALSE, quote = "auto", \item{encoding}{ The encoding of the strings written to the CSV file. Default is \code{""}, which means writting raw bytes without considering the encoding. Other possible options are \code{"UTF-8"} and \code{"native"}. } } \details{ -\code{fwrite} began as a community contribution with \href{https://github.com/Rdatatable/data.table/pull/1613}{pull request #1613} by Otto Seiskari. This gave Matt Dowle the impetus to specialize the numeric formatting and to parallelize: \url{https://h2o.ai/blog/fast-csv-writing-for-r/}. Final items were tracked in \href{https://github.com/Rdatatable/data.table/issues/1664}{issue #1664} such as automatic quoting, \code{bit64::integer64} support, decimal/scientific formatting exactly matching \code{write.csv} between 2.225074e-308 and 1.797693e+308 to 15 significant figures, \code{row.names}, dates (between 0000-03-01 and 9999-12-31), times and \code{sep2} for \code{list} columns where each cell can itself be a vector. +\code{fwrite} began as a community contribution with \href{https://github.com/Rdatatable/data.table/pull/1613}{pull request #1613} by Otto Seiskari. This gave Matt Dowle the impetus to specialize the numeric formatting and to parallelize: \url{https://h2o.ai/blog/2016/fast-csv-writing-for-r/}. Final items were tracked in \href{https://github.com/Rdatatable/data.table/issues/1664}{issue #1664} such as automatic quoting, \code{bit64::integer64} support, decimal/scientific formatting exactly matching \code{write.csv} between 2.225074e-308 and 1.797693e+308 to 15 significant figures, \code{row.names}, dates (between 0000-03-01 and 9999-12-31), times and \code{sep2} for \code{list} columns where each cell can itself be a vector. To save space, \code{fwrite} prefers to write wide numeric values in scientific notation -- e.g. \code{10000000000} takes up much more space than \code{1e+10}. Most file readers (e.g. \code{\link{fread}}) understand scientific notation, so there's no fidelity loss. Like in base R, users can control this by specifying the \code{scipen} argument, which follows the same rules as \code{\link[base]{options}('scipen')}. \code{fwrite} will see how much space a value will take to write in scientific vs. decimal notation, and will only write in scientific notation if the latter is more than \code{scipen} characters wider. For \code{10000000000}, then, \code{1e+10} will be written whenever \code{scipen<6}. From 2ccfdc1fd4132de35321aa29382098de65b11f86 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Thu, 23 Nov 2023 18:50:58 +0100 Subject: [PATCH 18/88] updated mock file, closes #5754 (#5755) --- tests/knitr.Rout.mock | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/tests/knitr.Rout.mock b/tests/knitr.Rout.mock index 1f17724c8..ea37b2c46 100644 --- a/tests/knitr.Rout.mock +++ b/tests/knitr.Rout.mock @@ -8,10 +8,11 @@ DT # yes ``` ``` -## x y -## 1: 1 4 -## 2: 2 5 -## 3: 3 6 +## x y +## +## 1: 1 4 +## 2: 2 5 +## 3: 3 6 ``` ```r @@ -20,10 +21,11 @@ print(DT[, z := 10:12]) # yes ``` ``` -## x y z -## 1: 1 4 10 -## 2: 2 5 11 -## 3: 3 6 12 +## x y z +## +## 1: 1 4 10 +## 2: 2 5 11 +## 3: 3 6 12 ``` ```r @@ -32,10 +34,11 @@ DT # yes ``` ``` -## x y z a -## 1: 1 4 10 1 -## 2: 2 5 11 1 -## 3: 3 6 12 1 +## x y z a +## +## 1: 1 4 10 1 +## 2: 2 5 11 1 +## 3: 3 6 12 1 ``` Some text. From f8f5976f0c3bebdebd1a94c3584698147128c6d6 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Sat, 25 Nov 2023 11:09:13 +0100 Subject: [PATCH 19/88] escape zlib dependent tests (#5759) --- DESCRIPTION | 1 - R/fwrite.R | 2 ++ inst/tests/tests.Rraw | 35 +++++++++++++++++++++-------------- 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 74a4b6e1c..00f955c37 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -4,7 +4,6 @@ Title: Extension of `data.frame` Depends: R (>= 3.1.0) Imports: methods Suggests: bit64 (>= 4.0.0), bit (>= 4.0.4), curl, R.utils, xts, nanotime, zoo (>= 1.8-1), yaml, knitr, rmarkdown, markdown -SystemRequirements: zlib Description: Fast aggregation of large data (e.g. 100GB in RAM), fast ordered joins, fast add/modify/delete of columns by group using no copies at all, list columns, friendly and fast character-separated-value read/write. Offers a natural and flexible syntax, for faster development. License: MPL-2.0 | file LICENSE URL: https://r-datatable.com, https://Rdatatable.gitlab.io/data.table, https://github.com/Rdatatable/data.table diff --git a/R/fwrite.R b/R/fwrite.R index c822b0567..54ef04ed0 100644 --- a/R/fwrite.R +++ b/R/fwrite.R @@ -115,3 +115,5 @@ fwrite = function(x, file="", append=FALSE, quote="auto", invisible() } +nozlib = function() identical(.Call(Cdt_zlib_version), "zlib header files were not found when data.table was compiled") + diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index d7ad5a99a..bd3319f80 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -69,6 +69,7 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) { which.first = data.table:::which.first which.last = data.table:::which.last `-.IDate` = data.table:::`-.IDate` + nozlib = data.table:::nozlib # Also, for functions that are masked by other packages, we need to map the data.table one. Or else, # the other package's function would be picked up. As above, we only need to do this because we desire @@ -9880,16 +9881,20 @@ test(1658.39, fwrite(matrix(1:3, nrow=3, ncol=1), quote = TRUE), output = '"V1"\ test(1658.40, fwrite(matrix(1:4, nrow=2, ncol=2, dimnames = list(c("ra","rb"),c("ca","cb"))), quote = TRUE), output = '"ca","cb"\n.*1,3\n2,4', message = "x being coerced from class: matrix to data.table") # fwrite compress -test(1658.41, fwrite(data.table(a=c(1:3), b=c(1:3)), compress="gzip"), output='a,b\n1,1\n2,2\n3,3') # compress ignored on console -DT = data.table(a=rep(1:2,each=100), b=rep(1:4,each=25)) -test(1658.421, fwrite(DT, file=f1<-tempfile(fileext=".gz"), verbose=TRUE), NULL, - output="args.nrow=200 args.ncol=2.*maxLineLen=5[12].*Writing 200 rows in 1 batches of 200 rows.*nth=1") # [12] for Windows where eolLen==2 -test(1658.422, fwrite(DT, file=f2<-tempfile()), NULL) -test(1658.423, file.info(f1)$size < file.info(f2)$size) # 74 < 804 (file.size() isn't available in R 3.1.0) -if (test_R.utils) test(1658.43, fread(f1), DT) # use fread to decompress gz (works cross-platform) -fwrite(DT, file=f3<-tempfile(), compress="gzip") # compress to filename not ending .gz -test(1658.44, file.info(f3)$size, file.info(f1)$size) -unlink(c(f1,f2,f3)) +if (nozlib()) { + test(1658.409, fwrite(data.table(a=1), file=tempfile(), compress="gzip"), error="header files were not found at the time data.table was compiled") +} else { + test(1658.41, fwrite(data.table(a=c(1:3), b=c(1:3)), compress="gzip"), output='a,b\n1,1\n2,2\n3,3') # compress ignored on console + DT = data.table(a=rep(1:2,each=100), b=rep(1:4,each=25)) + test(1658.421, fwrite(DT, file=f1<-tempfile(fileext=".gz"), verbose=TRUE), NULL, + output="args.nrow=200 args.ncol=2.*maxLineLen=5[12].*Writing 200 rows in 1 batches of 200 rows.*nth=1") # [12] for Windows where eolLen==2 + test(1658.422, fwrite(DT, file=f2<-tempfile()), NULL) + test(1658.423, file.info(f1)$size < file.info(f2)$size) # 74 < 804 (file.size() isn't available in R 3.1.0) + if (test_R.utils) test(1658.43, fread(f1), DT) # use fread to decompress gz (works cross-platform) + fwrite(DT, file=f3<-tempfile(), compress="gzip") # compress to filename not ending .gz + test(1658.441, file.info(f3)$size, file.info(f1)$size) + unlink(c(f1,f2,f3)) +} DT = data.table(a=1:3, b=list(1:4, c(3.14, 100e10), c("foo", "bar", "baz"))) test(1658.45, fwrite(DT), output=c("a,b","1,1|2|3|4","2,3.14|1e+12","3,foo|bar|baz")) DT[3,b:=as.raw(0:2)] @@ -9916,10 +9921,12 @@ test(1658.52, file.info(f1)$size, file.info(f2)$size) unlink(c(f1, f2)) # compression error -5 due to only 3 bytes (bom) in first block; #3599 -DT = data.table(l=letters, n=1:26) -test(1658.53, fwrite(DT, file=f<-tempfile(fileext=".gz"), bom=TRUE, col.names=FALSE), NULL) -if (test_R.utils) test(1658.54, fread(f), setnames(DT,c("V1","V2"))) -unlink(f) +if (!nozlib()) { + DT = data.table(l=letters, n=1:26) + test(1658.53, fwrite(DT, file=f<-tempfile(fileext=".gz"), bom=TRUE, col.names=FALSE), NULL) + if (test_R.utils) test(1658.54, fread(f), setnames(DT,c("V1","V2"))) + unlink(f) +} # complex column support for fwrite, part of #3690 DT = data.table(a=1:3, z=0:2 - (2:0)*1i) From b34ac7bfa1845713f2457fbfd6415573c592183c Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Sat, 25 Nov 2023 15:11:46 +0100 Subject: [PATCH 20/88] reviving GLCI * removed 3.4.4, 3.5.0 test jobs * updated urls to windows R binaries * cleanup old comments * remove docker builds * using new lighter images --- .ci/Dockerfile.in | 9 -- .ci/ci.R | 4 - .ci/publish.R | 2 +- .gitlab-ci.yml | 309 +++++++++++++++++++++------------------------- 4 files changed, 143 insertions(+), 181 deletions(-) delete mode 100644 .ci/Dockerfile.in diff --git a/.ci/Dockerfile.in b/.ci/Dockerfile.in deleted file mode 100644 index 559bb9a40..000000000 --- a/.ci/Dockerfile.in +++ /dev/null @@ -1,9 +0,0 @@ -FROM registry.gitlab.com/jangorecki/dockerfiles/SRC_IMAGE_NAME - -MAINTAINER Jan Gorecki j.gorecki@wit.edu.pl - -COPY bus/build/cran/ /cran/ - -RUN Rscript -e 'install.packages("data.table", repos=file.path("file:","cran"))' - -CMD ["R"] diff --git a/.ci/ci.R b/.ci/ci.R index a165de818..f3a428566 100644 --- a/.ci/ci.R +++ b/.ci/ci.R @@ -185,7 +185,3 @@ function(pkgs, dp } -## set repositories for CI tests -if (as.logical(Sys.getenv("GITLAB_CI","false")) && identical(Sys.getenv("CI_PROJECT_NAME"), "data.table")) { - options("repos" = if (.Platform$OS.type == "windows") file.path("file://",getwd(),"bus/mirror-packages/cran") else file.path("file:", normalizePath("bus/mirror-packages/cran", mustWork=FALSE))) -} diff --git a/.ci/publish.R b/.ci/publish.R index 526d9bd80..16521fc0e 100644 --- a/.ci/publish.R +++ b/.ci/publish.R @@ -148,7 +148,7 @@ lib.copy <- function(lib.from, repodir="bus/integration/cran"){ pkg.copy <- function(pkg.from, lib.to) { pkg<-basename(pkg.from); dir.create(file.path(lib.to, pkg), recursive=TRUE) - lib.dirs<-intersect(c("html","doc"), all.lib.dirs<-list.dirs(pkg.from, full.names=FALSE)) + lib.dirs<-intersect(c("help","html","doc"), all.lib.dirs<-list.dirs(pkg.from, full.names=FALSE)) ans1<-setNames(file.copy(file.path(pkg.from, lib.dirs), file.path(lib.to, pkg), recursive=TRUE), lib.dirs) lib.files<-setdiff(list.files(pkg.from), all.lib.dirs) ans2<-setNames(file.copy(file.path(pkg.from, lib.files), file.path(lib.to, pkg)), lib.files) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f0c403793..18f821b43 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -6,9 +6,9 @@ variables: TZ: "UTC" ## to avoid 'Failed to create bus connection' from timedatectl via Sys.timezone() on Docker with R 3.4. ## Setting TZ for all GLCI jobs to isolate them from timezone. We could have a new GLCI job to test under ## a non-UTC timezone, although, that's what we do routinely in dev. - R_REL_VERSION: "4.2" - R_DEVEL_VERSION: "4.3" - R_OLDREL_VERSION: "4.1" + R_REL_VERSION: "4.3" + R_DEVEL_VERSION: "4.4" + R_OLDREL_VERSION: "4.2" stages: - dependencies @@ -24,83 +24,87 @@ stages: paths: - bus -mirror-packages: ## mirror all recursive dependencies, source and win.binary of data.table suggests from DESCRIPTION +## mirror packages +# download all recursive dependencies once to be used across multiple test jobs +# sources and binaries for r-release, r-devel and r-oldrel +# cache between runs +mirror-packages: stage: dependencies tags: - linux - image: registry.gitlab.com/jangorecki/dockerfiles/r-base-dev + image: registry.gitlab.com/jangorecki/dockerfiles/r-base-minimal cache: paths: - - bus/$CI_BUILD_NAME/cran + - bus/$CI_JOB_NAME/cran script: - echo 'source(".ci/ci.R")' >> .Rprofile - - mkdir -p bus/$CI_BUILD_NAME/cran/src/contrib + - mkdir -p bus/$CI_JOB_NAME/cran/src/contrib - Rscript -e 'mirror.packages(dcf.dependencies("DESCRIPTION", "all"), repos=Sys.getenv("CRAN_MIRROR"), repodir="bus/mirror-packages/cran")' - - rm bus/$CI_BUILD_NAME/cran/src/contrib/PACKAGES.rds ## fallback to PACKAGES dcf so available.packages:3.4.4 works - Rscript -e 'sapply(simplify=FALSE, setNames(nm=Sys.getenv(c("R_REL_VERSION","R_DEVEL_VERSION","R_OLDREL_VERSION"))), function(binary.ver) mirror.packages(type="win.binary", dcf.dependencies("DESCRIPTION", "all"), repos=Sys.getenv("CRAN_MIRROR"), repodir="bus/mirror-packages/cran", binary.ver=binary.ver))' <<: *artifacts -# mirror-other-packages: ## mirror integration suggests from pkgs at the top of inst/tests/other.Rraw; off now #5274 -# stage: dependencies -# tags: -# - linux -# image: registry.gitlab.com/jangorecki/dockerfiles/r-base-dev -# cache: -# paths: -# - bus/$CI_BUILD_NAME/cran -# script: -# - echo 'source(".ci/ci.R")' >> .Rprofile -# - mkdir -p bus/$CI_BUILD_NAME/cran/src/contrib -# - Rscript -e 'eval(parse("inst/tests/other.Rraw", n=1L)); mirror.packages(pkgs, repos=Sys.getenv("CRAN_MIRROR"), repodir="bus/mirror-other-packages/cran")' -# <<: *artifacts - -build: ## build data.table sources as tar.gz archive +## build +# sources as tar.gz archive +# build vignettes +build: stage: build tags: - linux - image: registry.gitlab.com/jangorecki/dockerfiles/r-builder + image: registry.gitlab.com/jangorecki/dockerfiles/r-base ## r-base-gcc after rstudio/markdown#108 needs: ["mirror-packages"] before_script: - Rscript -e 'install.packages(c("knitr","rmarkdown"), repos=file.path("file:",normalizePath("bus/mirror-packages/cran")), quiet=TRUE)' - rm -r bus - sed -i '/^[[:space:]]*$/d' ./DESCRIPTION ## make last line end abruptly; i.e. without a final \n - - echo "Revision:" $CI_BUILD_REF >> ./DESCRIPTION + - echo "Revision:" $CI_COMMIT_SHA >> ./DESCRIPTION script: - R CMD build . - - mkdir -p bus/$CI_BUILD_NAME/cran/src/contrib - - mv $(ls -1t data.table_*.tar.gz | head -n 1) bus/$CI_BUILD_NAME/cran/src/contrib/. + - mkdir -p bus/$CI_JOB_NAME/cran/src/contrib + - mv $(ls -1t data.table_*.tar.gz | head -n 1) bus/$CI_JOB_NAME/cran/src/contrib/. - Rscript -e 'tools::write_PACKAGES(contrib.url("bus/build/cran"), fields="Revision", addFiles=TRUE)' - - rm bus/$CI_BUILD_NAME/cran/src/contrib/PACKAGES.rds ## fallback to PACKAGES dcf so available.packages:3.4.4 works <<: *artifacts +## install deps aliases .test-install-deps: &install-deps - - Rscript -e 'source(".ci/ci.R"); install.packages(dcf.dependencies("DESCRIPTION", which="most"), quiet=TRUE)' + - Rscript -e 'source(".ci/ci.R"); install.packages(dcf.dependencies("DESCRIPTION", which="all"), repos=file.path("file:", normalizePath("bus/mirror-packages/cran", mustWork=FALSE)), quiet=TRUE)' +.test-install-deps-win: &install-deps-win + - Rscript.exe -e "source('.ci/ci.R'); install.packages(dcf.dependencies('DESCRIPTION', which='all'), repos=file.path('file://',getwd(),'bus/mirror-packages/cran'), quiet=TRUE)" +## copy data.table tar.gz from bus R repo to current directory .test-cp-src: &cp-src - cp $(ls -1t bus/build/cran/src/contrib/data.table_*.tar.gz | head -n 1) . .test-cp-src-win: &cp-src-win - cp.exe $(ls.exe -1t bus/build/cran/src/contrib/data.table_*.tar.gz | head.exe -n 1) . +## move data.table tar.gz to bus .test-mv-src: &mv-src - - mkdir -p bus/$CI_BUILD_NAME && mv $(ls -1t data.table_*.tar.gz | head -n 1) bus/$CI_BUILD_NAME + - mkdir -p bus/$CI_JOB_NAME && mv $(ls -1t data.table_*.tar.gz | head -n 1) bus/$CI_JOB_NAME .test-mv-src-win: &mv-src-win - - mkdir.exe -p bus/$CI_BUILD_NAME; mv.exe $(ls.exe -1t data.table_*.tar.gz | head.exe -n 1) bus/$CI_BUILD_NAME + - mkdir.exe -p bus/$CI_JOB_NAME; mv.exe $(ls.exe -1t data.table_*.tar.gz | head.exe -n 1) bus/$CI_JOB_NAME + +## move data.table binaries to bus R repo +.test-mv-bin-win: &mv-bin-win + - mkdir.exe -p cran/bin/windows/contrib/$R_VERSION; mv.exe $(ls.exe -1t data.table_*.zip | head.exe -n 1) cran/bin/windows/contrib/$R_VERSION +## remove data.table tar.gz .test-rm-src: &rm-src - rm $(ls -1t data.table_*.tar.gz | head -n 1) .test-rm-src-win: &rm-src-win - rm.exe $(ls.exe -1t data.table_*.tar.gz | head.exe -n 1) -.test-mv-bin-win: &mv-bin-win - - mkdir.exe -p cran/bin/windows/contrib/$R_VERSION; mv.exe $(ls.exe -1t data.table_*.zip | head.exe -n 1) cran/bin/windows/contrib/$R_VERSION - +## install R on windows .test-install-r-rel-win: &install-r-rel-win - - curl.exe -s -o ../R-rel.exe https://cloud.r-project.org/bin/windows/base/R-4.2.2-win.exe; Start-Process -FilePath ..\R-rel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait - # see #5198 for discussion about the https link used above; it will break each time R is released and the version number will need to be updated + - curl.exe -s -o ../R-rel.exe https://cloud.r-project.org/bin/windows/base/old/4.3.2/R-4.3.2-win.exe; Start-Process -FilePath ..\R-rel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait .test-install-r-devel-win: &install-r-devel-win - curl.exe -s -o ../R-devel.exe https://cloud.r-project.org/bin/windows/base/R-devel-win.exe; Start-Process -FilePath ..\R-devel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait .test-install-r-oldrel-win: &install-r-oldrel-win - - curl.exe -s -o ../R-oldrel.exe https://cloud.r-project.org/bin/windows/base/old/4.1.3/R-4.1.3-win.exe; Start-Process -FilePath ..\R-oldrel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait + - curl.exe -s -o ../R-oldrel.exe https://cloud.r-project.org/bin/windows/base/old/4.2.3/R-4.2.3-win.exe; Start-Process -FilePath ..\R-oldrel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait + +## install Rtools on windows +.test-install-rtools42-win: &install-rtools42-win + - curl.exe -s -o ../rtools.exe https://cloud.r-project.org/bin/windows/Rtools/rtools42/files/rtools42-5355-5357.exe; Start-Process -FilePath ..\rtools.exe -ArgumentList "/VERYSILENT /DIR=C:\rtools42" -NoNewWindow -Wait +.test-install-rtools43-win: &install-rtools43-win + - curl.exe -s -o ../rtools.exe https://cloud.r-project.org/bin/windows/Rtools/rtools43/files/rtools43-5863-5818.exe; Start-Process -FilePath ..\rtools.exe -ArgumentList "/VERYSILENT /DIR=C:\rtools43" -NoNewWindow -Wait .test-template: &test stage: test @@ -112,21 +116,6 @@ build: ## build data.table sources as tar.gz archive tags: - linux -.test-cran-lin-template: &test-cran-lin - <<: *test-lin - variables: - _R_CHECK_CRAN_INCOMING_: "TRUE" - _R_CHECK_CRAN_INCOMING_REMOTE_: "FALSE" - before_script: - - *install-deps - - *cp-src - - rm -r bus - script: - - *mv-src - - cd bus/$CI_BUILD_NAME - - R CMD check --as-cran --no-manual $(ls -1t data.table_*.tar.gz | head -n 1) - - *rm-src - .test-win-template: &test-win <<: *test tags: @@ -138,20 +127,26 @@ build: ## build data.table sources as tar.gz archive # tags: # - macosx -test-rel-lin: ## most comprehensive tests, force all suggests, also integration tests, using gcc -O3 -flto -fno-common -Wunused-result +## most comprehensive tests +# force all suggests +# flags: gcc -O3 -flto -fno-common -Wunused-result +# tests for compilation warnings +# measure memory usage during tests +test-rel-lin: <<: *test-lin - image: registry.gitlab.com/jangorecki/dockerfiles/r-builder - needs: ["mirror-packages","build"] # "mirror-other-packages" + image: registry.gitlab.com/jangorecki/dockerfiles/r-data.table + needs: ["mirror-packages","build"] + allow_failure: true ## temp workaround #5760 variables: _R_CHECK_CRAN_INCOMING_: "FALSE" _R_CHECK_CRAN_INCOMING_REMOTE_: "FALSE" _R_CHECK_FORCE_SUGGESTS_: "TRUE" _R_CHECK_TESTS_NLINES_: "0" OPENBLAS_MAIN_FREE: "1" - TEST_DATA_TABLE_WITH_OTHER_PACKAGES: "FALSE" #5274 + TEST_DATA_TABLE_MEMTEST: "1" before_script: - - Rscript -e 'source(".ci/ci.R"); install.packages(dcf.dependencies("DESCRIPTION", which="all"), quiet=TRUE)' ## does seem to be needed despite 'needs mirror-packages' - ## - Rscript -e 'eval(parse("inst/tests/other.Rraw", n=1L)); install.packages(pkgs, quiet=TRUE, repos=c(getOption("repos"), file.path("file:", normalizePath("bus/mirror-other-packages/cran", mustWork=FALSE))))' + - apt-get update -q && apt-get install -y --no-install-recommends libcurl4-openssl-dev ## remove after #5749 + - *install-deps - *cp-src - rm -r bus - mkdir -p ~/.R @@ -159,16 +154,20 @@ test-rel-lin: ## most comprehensive tests, force all suggests, also integration - echo 'CXXFLAGS=-g -O3 -flto -fno-common -Wunused-result -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars script: - *mv-src - - cd bus/$CI_BUILD_NAME + - cd bus/$CI_JOB_NAME - R CMD check $(ls -1t data.table_*.tar.gz | head -n 1) - *rm-src - (! grep "warning:" data.table.Rcheck/00install.out) -test-rel-vanilla-lin: ## minimal, no suggested deps, no vignettes or manuals, measure memory, using gcc -O0 -fno-openmp +## vanilla minimal +# no suggested deps +# no vignettes or manuals +# no openmp +# flags: gcc -O0 -fno-openmp +test-rel-vanilla-lin: <<: *test-lin - image: registry.gitlab.com/jangorecki/dockerfiles/r-base-dev - variables: - TEST_DATA_TABLE_MEMTEST: "1" + image: registry.gitlab.com/jangorecki/dockerfiles/r-base-gcc + allow_failure: true ## temp workaround #5484 before_script: - *cp-src - rm -r bus @@ -177,36 +176,46 @@ test-rel-vanilla-lin: ## minimal, no suggested deps, no vignettes or manuals, me - echo 'CXXFLAGS=-g -O0 -fno-openmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars script: - *mv-src - - cd bus/$CI_BUILD_NAME + - cd bus/$CI_JOB_NAME - R CMD check --no-manual --ignore-vignettes $(ls -1t data.table_*.tar.gz | head -n 1) - *rm-src -test-rel-cran-lin: ## R-release on Linux, extra NOTEs check and build pdf manual thus not from cran-lin template +## R-release on Linux +# strict checks for 0 NOTEs +# extra NOTEs check and build pdf manual thus not from cran-lin template +test-rel-cran-lin: <<: *test-lin - image: registry.gitlab.com/jangorecki/dockerfiles/r-builder + image: registry.gitlab.com/jangorecki/dockerfiles/r-base variables: _R_CHECK_CRAN_INCOMING_: "TRUE" ## stricter --as-cran checks should run in dev pipelines continuously (not sure what they are though) _R_CHECK_CRAN_INCOMING_REMOTE_: "FALSE" ## Other than no URL checking (takes many minutes) or 'Days since last update 0' NOTEs needed, #3284 - _R_CHECK_CRAN_INCOMING_TARBALL_THRESHOLD_: "7500000" ## effective from R 4.1.0 + _R_CHECK_CRAN_INCOMING_TARBALL_THRESHOLD_: "7500000" ## bytes + _R_CHECK_PKG_SIZES_THRESHOLD_: "7" ## MB 'checking installed package size' NOTE before_script: + - apt-get update -q && apt-get install -y --no-install-recommends libcurl4-openssl-dev ## temp workaround #5749 - *install-deps - *cp-src - rm -r bus - mkdir -p ~/.R - - echo 'CFLAGS=-g0 -O2 -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2'> ~/.R/Makevars ## -g0 because -g increases datatable.so size from 0.5MB to 1.5MB and breaches 'installed package size <= 5MB' note - - echo 'CXXFLAGS=-g0 -O2 -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars + - echo 'CFLAGS=-g -O2 -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars + - echo 'CXXFLAGS=-g -O2 -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars script: - *mv-src - - cd bus/$CI_BUILD_NAME + - cd bus/$CI_JOB_NAME - R CMD check --as-cran $(ls -1t data.table_*.tar.gz | head -n 1) - *rm-src - >- Rscript -e 'l=tail(readLines("data.table.Rcheck/00check.log"), 1L); if (!identical(l, "Status: OK")) stop("Last line of ", shQuote("00check.log"), " is not ", shQuote("Status: OK"), " but ", shQuote(l)) else q("no")' -test-dev-cran-lin: ## R-devel on Linux, --enable-strict-barrier --disable-long-double, check for new notes and compilation warnings, thus allow_failure +## R-devel on Linux +# TODO: --enable-strict-barrier --disable-long-double +# tests for compilation warnings +# tests for new notes +# thus allow_failure +test-dev-cran-lin: <<: *test-lin image: registry.gitlab.com/jangorecki/dockerfiles/r-devel - allow_failure: true + allow_failure: true ## to not be blocked by changes in r-devel variables: _R_CHECK_CRAN_INCOMING_: "TRUE" _R_CHECK_CRAN_INCOMING_REMOTE_: "FALSE" @@ -218,84 +227,95 @@ test-dev-cran-lin: ## R-devel on Linux, --enable-strict-barrier --disable-long-d - rm -r bus script: - *mv-src - - cd bus/$CI_BUILD_NAME + - cd bus/$CI_JOB_NAME - R CMD check --as-cran --no-manual $(ls -1t data.table_*.tar.gz | head -n 1) - *rm-src - (! grep "warning:" data.table.Rcheck/00install.out) - - >- + - >- ## this likely need an update but check fails now on complex NA so CI is not reaching here anyway Rscript -e 'l=tail(readLines("data.table.Rcheck/00check.log"), 1L); if (!identical(l, "Status: 3 NOTEs")) stop("Last line of ", shQuote("00check.log"), " is not ", shQuote("Status: 3 NOTEs"), " (size of tarball, installed package size, top-level files) but ", shQuote(l)) else q("no")' -test-310-cran-lin: ## R-3.1.0 on Linux, stated dependency of R - <<: *test-cran-lin +## R 3.1.0 +# stated dependency on R +test-310-cran-lin: image: registry.gitlab.com/jangorecki/dockerfiles/r-3.1.0 + <<: *test-lin + before_script: + - *install-deps + - *cp-src + - rm -r bus + script: + - *mv-src + - cd bus/$CI_JOB_NAME + - R CMD check --no-manual $(ls -1t data.table_*.tar.gz | head -n 1) + - *rm-src -test-344-cran-lin: ## R-3.4.4 on Linux, last R non-altrep version - <<: *test-cran-lin - image: registry.gitlab.com/jangorecki/dockerfiles/r-3.4.4 - -test-350-cran-lin: ## R-3.5.0 on Linux, first R altrep version - <<: *test-cran-lin - image: registry.gitlab.com/jangorecki/dockerfiles/r-3.5.0 - -test-rel-win: ## R-release on Windows, test and build binaries +## R-release on Windows +# test and build binaries +test-rel-win: <<: *test-win variables: R_VERSION: "$R_REL_VERSION" before_script: - *install-r-rel-win - - curl.exe -s -o ../rtools.exe https://cloud.r-project.org/bin/windows/Rtools/rtools42/files/rtools42-5355-5357.exe; Start-Process -FilePath ..\rtools.exe -ArgumentList "/VERYSILENT /DIR=C:\rtools42" -NoNewWindow -Wait - - $ENV:PATH = "C:\R\bin;C:\rtools42\usr\bin;$ENV:PATH" - - Rscript.exe -e "source('.ci/ci.R'); install.packages(dcf.dependencies('DESCRIPTION', which='most'), quiet=TRUE)" + - *install-rtools43-win + - $ENV:PATH = "C:\R\bin;C:\rtools43\usr\bin;$ENV:PATH" + - *install-deps-win - *cp-src-win - rm.exe -r bus script: - *mv-src-win - - cd bus/$CI_BUILD_NAME + - cd bus/$CI_JOB_NAME - R.exe CMD check --no-manual $(ls.exe -1t data.table_*.tar.gz | head.exe -n 1) - R.exe CMD INSTALL --build $(ls.exe -1t data.table_*.tar.gz | head.exe -n 1) - *rm-src-win - *mv-bin-win -test-dev-win: ## R-devel on Windows; see #5294 for changes in Dec 2021 related to UCRT and Rtools42 +## R-devel on Windows +# test and build binaries +test-dev-win: <<: *test-win variables: R_VERSION: "$R_DEVEL_VERSION" + allow_failure: true ## temp workaround #5748 before_script: - *install-r-devel-win - - curl.exe -s -o ../rtools.exe https://cloud.r-project.org/bin/windows/Rtools/rtools43/files/rtools43-5493-5475.exe; Start-Process -FilePath ..\rtools.exe -ArgumentList "/VERYSILENT /DIR=C:\rtools43" -NoNewWindow -Wait + - *install-rtools43-win - $ENV:PATH = "C:\R\bin;C:\rtools43\usr\bin;$ENV:PATH" - - Rscript.exe -e "source('.ci/ci.R'); install.packages(dcf.dependencies('DESCRIPTION', which='most', exclude=c('knitr','rmarkdown')), quiet=TRUE)" ## exclude= for #5294 + - *install-deps-win - *cp-src-win - rm.exe -r bus script: - *mv-src-win - - cd bus/$CI_BUILD_NAME + - cd bus/$CI_JOB_NAME - R.exe CMD check --no-manual --ignore-vignettes $(ls.exe -1t data.table_*.tar.gz | head.exe -n 1) - R.exe CMD INSTALL --build $(ls.exe -1t data.table_*.tar.gz | head.exe -n 1) - *rm-src-win - *mv-bin-win -test-old-win: ## R-oldrel on Windows +## R-oldrel on Windows +# test and build binaries +test-old-win: <<: *test-win variables: R_VERSION: "$R_OLDREL_VERSION" before_script: - *install-r-oldrel-win - - curl.exe -s -o ../rtools.exe https://cloud.r-project.org/bin/windows/Rtools/rtools40-x86_64.exe; Start-Process -FilePath ..\rtools.exe -ArgumentList "/VERYSILENT /DIR=C:\rtools40" -NoNewWindow -Wait - ## rtools42 doesn't support 32bit so oldrel-win (currently R 4.1) needs rtools40 - - $ENV:PATH = "C:\R\bin;C:\rtools40\usr\bin;$ENV:PATH" - - Rscript.exe -e "source('.ci/ci.R'); install.packages(dcf.dependencies('DESCRIPTION', which='most', exclude=c('knitr','rmarkdown')), quiet=TRUE)" ## exclude= for #5294 + - *install-rtools42-win + - $ENV:PATH = "C:\R\bin;C:\rtools42\usr\bin;$ENV:PATH" + - *install-deps-win - *cp-src-win - rm.exe -r bus script: - *mv-src-win - - cd bus/$CI_BUILD_NAME + - cd bus/$CI_JOB_NAME - R.exe CMD check --no-manual --ignore-vignettes $(ls.exe -1t data.table_*.tar.gz | head.exe -n 1) - R.exe CMD INSTALL --build $(ls.exe -1t data.table_*.tar.gz | head.exe -n 1) - *rm-src-win - *mv-bin-win -#test-rel-mac: ## R-release on MacOS, no macosx runner yet +## R-release on MacOS +# no macosx runner set yet +#test-rel-mac: # <<: *test-mac # variables: # R_VERSION: "$R_REL_VERSION" @@ -305,7 +325,7 @@ test-old-win: ## R-oldrel on Windows # - rm -r bus # script: # - *mv-src -# - cd bus/$CI_BUILD_NAME +# - cd bus/$CI_JOB_NAME # - R CMD check $(ls -1t data.table_*.tar.gz | head -n 1) # - R CMD INSTALL --build $(ls -1t data.table_*.tar.gz | head -n 1) # - mkdir -p cran/bin/macosx/el-capitan/contrib/$R_VERSION @@ -313,23 +333,28 @@ test-old-win: ## R-oldrel on Windows # - *rm-src # - *mv-bin-mac -integration: ## merging all artifacts to produce single R repository, documentation and website +## integrate artifacts +# merging package tarballs and binaries into single R repository +# rendering documentation +# setting up CRAN-like structure +# generating pkgdown website +integration: stage: integration image: registry.gitlab.com/jangorecki/dockerfiles/r-pkgdown tags: - linux only: - master - - tags - needs: ["mirror-packages","build","test-rel-lin","test-rel-cran-lin","test-dev-cran-lin","test-rel-vanilla-lin","test-310-cran-lin","test-344-cran-lin","test-350-cran-lin","test-rel-win","test-dev-win","test-old-win"] + needs: ["mirror-packages","build","test-rel-lin","test-rel-cran-lin","test-dev-cran-lin","test-rel-vanilla-lin","test-310-cran-lin","test-rel-win","test-dev-win","test-old-win"] script: + - R --version - Rscript -e 'pkgdown::build_site(override=list(destination="./pkgdown"))' ## html manual, vignettes, repos, cran_web, cran_checks - echo 'source(".ci/ci.R"); source(".ci/publish.R")' >> .Rprofile ## list of available test-* jobs dynamically based on bus/test-* directories - Rscript -e 'cat("\ntest.jobs <- c(\n"); cat(paste0(" \"",list.files("bus",pattern="^test-"),"\" = \"data.table\""), sep=",\n"); cat(")\n")' >> .Rprofile - Rscript -e 'sapply(names(test.jobs), check.test, pkg="data.table", simplify=FALSE)' - - mkdir -p bus/$CI_BUILD_NAME + - mkdir -p bus/$CI_JOB_NAME ## delete any existing non-dev version of data.table - rm -f bus/mirror-packages/cran/src/contrib/data.table_*.tar.gz - rm -f bus/mirror-packages/cran/bin/windows/contrib/$R_REL_VERSION/data.table_*.zip @@ -339,14 +364,14 @@ integration: ## merging all artifacts to produce single R repository, documentat #- rm -f bus/mirror-packages/cran/bin/macosx/el-capitan/contrib/$R_DEVEL_VERSION/data.table_*.tgz #- rm -f bus/mirror-packages/cran/bin/macosx/el-capitan/contrib/$R_OLDREL_VERSION/data.table_*.tgz ## merge mirror-packages and R devel packages - - mv bus/mirror-packages/cran bus/$CI_BUILD_NAME/ + - mv bus/mirror-packages/cran bus/$CI_JOB_NAME/ ## publish package sources - - mkdir -p bus/$CI_BUILD_NAME/cran/library bus/$CI_BUILD_NAME/cran/doc - - mv $(ls -1t bus/build/cran/src/contrib/data.table_*.tar.gz | head -n 1) bus/$CI_BUILD_NAME/cran/src/contrib + - mkdir -p bus/$CI_JOB_NAME/cran/library bus/$CI_JOB_NAME/cran/doc + - mv $(ls -1t bus/build/cran/src/contrib/data.table_*.tar.gz | head -n 1) bus/$CI_JOB_NAME/cran/src/contrib - Rscript -e 'tools::write_PACKAGES(contrib.url("bus/integration/cran", type="source"), type="source", fields="Revision", addFiles=TRUE)' ## publish binaries - Rscript -e 'move.bin("test-rel-win", Sys.getenv("R_REL_VERSION"), os.type="windows")' - - Rscript -e 'move.bin("test-dev-win", Sys.getenv("R_DEVEL_VERSION"), os.type="windows")' + - Rscript -e 'move.bin("test-dev-win", Sys.getenv("R_DEVEL_VERSION"), os.type="windows", silent=TRUE)' - Rscript -e 'move.bin("test-old-win", Sys.getenv("R_OLDREL_VERSION"), os.type="windows")' - Rscript -e 'tools::write_PACKAGES(contrib.url("bus/integration/cran", type="win.binary", ver=Sys.getenv("R_REL_VERSION")), type="win.binary", fields="Revision", addFiles=TRUE)' - Rscript -e 'tools::write_PACKAGES(contrib.url("bus/integration/cran", type="win.binary", ver=Sys.getenv("R_DEVEL_VERSION")), type="win.binary", fields="Revision", addFiles=TRUE)' @@ -385,67 +410,17 @@ integration: ## merging all artifacts to produce single R repository, documentat - mv pkgdown/* bus/integration/cran/ ## cleanup artifacts from other jobs - mkdir tmpbus - - mv bus/$CI_BUILD_NAME tmpbus + - mv bus/$CI_JOB_NAME tmpbus - rm -r bus - mv tmpbus bus <<: *artifacts -.docker-template: &docker - stage: deploy - tags: - - linux - image: docker - services: - - docker:dind - needs: - - job: build - - job: integration - artifacts: false - before_script: - - sed "s/SRC_IMAGE_NAME/$SRC_IMAGE_NAME/" < .ci/Dockerfile.in > Dockerfile - - docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY - script: - - docker build --pull -t "$CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG" -f Dockerfile . - - docker run --rm "$CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG" Rscript -e 'cat(R.version.string, "\ndata.table revision", read.dcf(system.file("DESCRIPTION", package="data.table"), fields="Revision")[[1L]], "\n"); require(data.table); test.data.table()' - - docker push "$CI_REGISTRY_IMAGE/$IMAGE_NAME:$IMAGE_TAG" - -docker-r-release: ## data.table on R-release - only: - - master - variables: - SRC_IMAGE_NAME: "r-base-dev" - IMAGE_NAME: "r-release" - IMAGE_TAG: "latest" - <<: *docker - -docker-r-release-builder: ## data.table on R-release extended for Rmd vignettes build dependencies - only: - - master - variables: - SRC_IMAGE_NAME: "r-builder" - IMAGE_NAME: "r-release-builder" - IMAGE_TAG: "latest" - <<: *docker - -docker-r-devel: ## data.table on R-devel - only: - - master - variables: - SRC_IMAGE_NAME: "r-devel" - IMAGE_NAME: "r-devel" - IMAGE_TAG: "latest" - <<: *docker - -docker-tags: ## data.table on R-release fixed version images - only: - - tags - variables: - SRC_IMAGE_NAME: "r-base-dev" - IMAGE_NAME: "r-release" - IMAGE_TAG: $CI_COMMIT_TAG - <<: *docker - -pages: ## publish R repository, test jobs summaries, html documentation of all packages in repo, pkgdown +## publish +# R repository +# test jobs summaries +# html documentation of all packages in repo +# pkgdown website +pages: stage: deploy environment: production tags: @@ -458,7 +433,7 @@ pages: ## publish R repository, test jobs summaries, html documentation of all p - mkdir -p public - cp -r bus/integration/cran/* public - cat public/src/contrib/PACKAGES - artifacts: ## publish only when no failure + artifacts: expire_in: 2 weeks paths: - public From 46ee05bfa1cba9cb4ac31096352e6d056be07385 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Sat, 25 Nov 2023 15:12:32 +0100 Subject: [PATCH 21/88] update r-lib actions setup (#5632) --- .github/workflows/test-coverage.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index ba1f94fde..3e5919893 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -18,7 +18,7 @@ jobs: steps: - uses: actions/checkout@v2 - - uses: r-lib/actions/setup-r@v1 + - uses: r-lib/actions/setup-r@v2 - uses: r-lib/actions/setup-pandoc@v1 From a63a89f844f5c3b817d5ac639b2f5b4d6d7c5c3a Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Sat, 25 Nov 2023 16:00:48 +0100 Subject: [PATCH 22/88] no nanotime anymore (#5761) --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 00f955c37..405b7a009 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,7 +3,7 @@ Version: 1.14.9 Title: Extension of `data.frame` Depends: R (>= 3.1.0) Imports: methods -Suggests: bit64 (>= 4.0.0), bit (>= 4.0.4), curl, R.utils, xts, nanotime, zoo (>= 1.8-1), yaml, knitr, rmarkdown, markdown +Suggests: bit64 (>= 4.0.0), bit (>= 4.0.4), curl, R.utils, xts, zoo (>= 1.8-1), yaml, knitr, rmarkdown, markdown Description: Fast aggregation of large data (e.g. 100GB in RAM), fast ordered joins, fast add/modify/delete of columns by group using no copies at all, list columns, friendly and fast character-separated-value read/write. Offers a natural and flexible syntax, for faster development. License: MPL-2.0 | file LICENSE URL: https://r-datatable.com, https://Rdatatable.gitlab.io/data.table, https://github.com/Rdatatable/data.table From 37e5521f835f856ea3f4fee9e1f2caf463547c01 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Sat, 25 Nov 2023 16:13:37 +0100 Subject: [PATCH 23/88] Revert "update_dev_pkg uses GH for R repo (#5720)" (#5762) This reverts commit 94e8fbe448f9dcd83fe6f2751dbe36b05b404ee0. --- R/devel.R | 2 +- man/update_dev_pkg.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/devel.R b/R/devel.R index df77eb0e0..8bd7a1466 100644 --- a/R/devel.R +++ b/R/devel.R @@ -17,7 +17,7 @@ dcf.repo = function(pkg, repo, field, type) { dcf[dcf[,"Package"]==pkg, field][[1L]] } -update_dev_pkg = function(object="data.table", repo="https://Rdatatable.github.io/data.table", field="Revision", type=getOption("pkgType"), lib=NULL, ...) { +update_dev_pkg = function(object="data.table", repo="https://Rdatatable.gitlab.io/data.table", field="Revision", type=getOption("pkgType"), lib=NULL, ...) { # this works for any package, not just data.table pkg = object # perform package upgrade when new Revision present diff --git a/man/update_dev_pkg.Rd b/man/update_dev_pkg.Rd index 9914138c8..3db5b9831 100644 --- a/man/update_dev_pkg.Rd +++ b/man/update_dev_pkg.Rd @@ -5,7 +5,7 @@ Downloads and installs latest development version only when a new commit is available which has also passed all tests. Defaults are set to update \code{data.table}, other packages can be used as well. Their repository has to include git commit information in PACKAGES file. } \usage{update_dev_pkg(object="data.table", - repo="https://Rdatatable.github.io/data.table", + repo="https://Rdatatable.gitlab.io/data.table", field="Revision", type=getOption("pkgType"), lib=NULL, \dots) } \arguments{ From 74a749be94406553e3d8daad8497014b0beabf9e Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Sat, 25 Nov 2023 16:17:32 +0100 Subject: [PATCH 24/88] not building docker images anymore (#5763) --- .ci/publish.R | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/.ci/publish.R b/.ci/publish.R index 16521fc0e..ec35fe43f 100644 --- a/.ci/publish.R +++ b/.ci/publish.R @@ -102,15 +102,6 @@ package.index <- function(package, lib.loc, repodir="bus/integration/cran") { sprintf(" Windows binaries: %s ", format.bins(ver=c("r-devel","r-release","r-oldrel"), bin_ver=c(r_devel_ver, r_rel_ver, r_oldrel_ver), cran.home=cran.home, os.type="windows", pkg=pkg, version=version, repodir=repodir)), sprintf(" macOS binaries: %s ", format.bins(ver=c("r-release","r-oldrel"), bin_ver=c(r_rel_ver, r_oldrel_ver), cran.home=cran.home, os.type="macosx", pkg=pkg, version=version, repodir=repodir)) ) - if (pkg=="data.table") { ## docker images - registry = Sys.getenv("CI_REGISTRY", "registry.gitlab.com") - namespace = Sys.getenv("CI_PROJECT_NAMESPACE", "Rdatatable") - project = Sys.getenv("CI_PROJECT_NAME", "data.table") - images = c("r-release","r-devel","r-release-builder") - images.title = c("Base R release", "Base R development", "R release package builder") - tags = rep("latest", 3) - docker.dl = sprintf(" %s:
docker pull %s/%s/%s/%s:%s
", images.title, tolower(registry), tolower(namespace), tolower(project), tolower(images), tags) - } index.file = file.path(repodir, "web/packages", pkg, "index.html") if (!dir.exists(dirname(index.file))) dir.create(dirname(index.file), recursive=TRUE) writeLines(c( @@ -131,11 +122,6 @@ package.index <- function(package, lib.loc, repodir="bus/integration/cran") { sprintf("", pkg), tbl.dl, "
", - if (pkg=="data.table") - c("

Docker images:

", - sprintf("", pkg), - docker.dl, - "
"), "", "" ), index.file) From 50a3dc3744a8039b2215ad6a348bf2aea1e5b3d7 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Sat, 25 Nov 2023 17:12:48 +0100 Subject: [PATCH 25/88] disable memtest in CI (#5765) --- .gitlab-ci.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 18f821b43..95c02a46a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -131,7 +131,6 @@ build: # force all suggests # flags: gcc -O3 -flto -fno-common -Wunused-result # tests for compilation warnings -# measure memory usage during tests test-rel-lin: <<: *test-lin image: registry.gitlab.com/jangorecki/dockerfiles/r-data.table @@ -143,7 +142,6 @@ test-rel-lin: _R_CHECK_FORCE_SUGGESTS_: "TRUE" _R_CHECK_TESTS_NLINES_: "0" OPENBLAS_MAIN_FREE: "1" - TEST_DATA_TABLE_MEMTEST: "1" before_script: - apt-get update -q && apt-get install -y --no-install-recommends libcurl4-openssl-dev ## remove after #5749 - *install-deps @@ -397,7 +395,7 @@ integration: - mv /tmp/opencran/doc bus/integration/cran/ ## library html manual, vignettes - Rscript -e 'lib.copy(lib.from="/tmp/opencran/library")' - ## web/checks/$pkg/$job 00install.out, 00check.log, *.Rout, memtest.csv, memtest.png + ## web/checks/$pkg/$job 00install.out, 00check.log, *.Rout, memtest.csv, memtest.png ## memtest not available for now #5764 - Rscript -e 'sapply(names(test.jobs), check.copy, simplify=FALSE)' ## web/packages/$pkg/$pkg.pdf - Rscript -e 'pdf.copy("data.table", "test-rel-lin")' From a6fe882cb2088209c201da09cb3c7a59e30c8745 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Mon, 27 Nov 2023 09:33:10 +0100 Subject: [PATCH 26/88] lto warning fix (#5766) * attempt to resolve lto compilation warning #5760 * do not allow failure anymore * another attempt for lto warning * try to fix another lto warning * move comment to related line, thx Michael --- .gitlab-ci.yml | 7 +++---- src/chmatch.c | 3 ++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 95c02a46a..80fa5d00a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -129,13 +129,12 @@ build: ## most comprehensive tests # force all suggests -# flags: gcc -O3 -flto -fno-common -Wunused-result +# flags: gcc -O3 -flto=auto -fno-common -Wunused-result # tests for compilation warnings test-rel-lin: <<: *test-lin image: registry.gitlab.com/jangorecki/dockerfiles/r-data.table needs: ["mirror-packages","build"] - allow_failure: true ## temp workaround #5760 variables: _R_CHECK_CRAN_INCOMING_: "FALSE" _R_CHECK_CRAN_INCOMING_REMOTE_: "FALSE" @@ -148,8 +147,8 @@ test-rel-lin: - *cp-src - rm -r bus - mkdir -p ~/.R - - echo 'CFLAGS=-g -O3 -flto -fno-common -Wunused-result -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars - - echo 'CXXFLAGS=-g -O3 -flto -fno-common -Wunused-result -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars + - echo 'CFLAGS=-g -O3 -flto=auto -fno-common -Wunused-result -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars + - echo 'CXXFLAGS=-g -O3 -flto=auto -fno-common -Wunused-result -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars script: - *mv-src - cd bus/$CI_JOB_NAME diff --git a/src/chmatch.c b/src/chmatch.c index a091e646f..b3ac5d818 100644 --- a/src/chmatch.c +++ b/src/chmatch.c @@ -95,7 +95,8 @@ static SEXP chmatchMain(SEXP x, SEXP table, int nomatch, bool chin, bool chmatch // For example: A,B,C,B,D,E,A,A => A(TL=1),B(2),C(3),D(4),E(5) => dupMap 1 2 3 5 6 | 8 7 4 // dupLink 7 8 | 6 (blank=0) int *counts = (int *)calloc(nuniq, sizeof(int)); - int *map = (int *)calloc(tablelen+nuniq, sizeof(int)); // +nuniq to store a 0 at the end of each group + unsigned int mapsize = tablelen+nuniq; // lto compilation warning #5760 // +nuniq to store a 0 at the end of each group + int *map = (int *)calloc(mapsize, sizeof(int)); if (!counts || !map) { // # nocov start for (int i=0; i Date: Mon, 27 Nov 2023 10:15:53 +0100 Subject: [PATCH 27/88] proper fix for #5753 to make zlib fully optional (#5770) --- R/fwrite.R | 2 +- inst/tests/tests.Rraw | 6 +++--- src/data.table.h | 1 + src/init.c | 1 + src/utils.c | 7 +++++++ 5 files changed, 13 insertions(+), 4 deletions(-) diff --git a/R/fwrite.R b/R/fwrite.R index 54ef04ed0..e1484b9e3 100644 --- a/R/fwrite.R +++ b/R/fwrite.R @@ -115,5 +115,5 @@ fwrite = function(x, file="", append=FALSE, quote="auto", invisible() } -nozlib = function() identical(.Call(Cdt_zlib_version), "zlib header files were not found when data.table was compiled") +haszlib = function() .Call(Cdt_has_zlib) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index bd3319f80..59ca6aabd 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -69,7 +69,7 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) { which.first = data.table:::which.first which.last = data.table:::which.last `-.IDate` = data.table:::`-.IDate` - nozlib = data.table:::nozlib + haszlib = data.table:::haszlib # Also, for functions that are masked by other packages, we need to map the data.table one. Or else, # the other package's function would be picked up. As above, we only need to do this because we desire @@ -9881,7 +9881,7 @@ test(1658.39, fwrite(matrix(1:3, nrow=3, ncol=1), quote = TRUE), output = '"V1"\ test(1658.40, fwrite(matrix(1:4, nrow=2, ncol=2, dimnames = list(c("ra","rb"),c("ca","cb"))), quote = TRUE), output = '"ca","cb"\n.*1,3\n2,4', message = "x being coerced from class: matrix to data.table") # fwrite compress -if (nozlib()) { +if (!haszlib()) { test(1658.409, fwrite(data.table(a=1), file=tempfile(), compress="gzip"), error="header files were not found at the time data.table was compiled") } else { test(1658.41, fwrite(data.table(a=c(1:3), b=c(1:3)), compress="gzip"), output='a,b\n1,1\n2,2\n3,3') # compress ignored on console @@ -9921,7 +9921,7 @@ test(1658.52, file.info(f1)$size, file.info(f2)$size) unlink(c(f1, f2)) # compression error -5 due to only 3 bytes (bom) in first block; #3599 -if (!nozlib()) { +if (haszlib()) { DT = data.table(l=letters, n=1:26) test(1658.53, fwrite(DT, file=f<-tempfile(fileext=".gz"), bom=TRUE, col.names=FALSE), NULL) if (test_R.utils) test(1658.54, fread(f), setnames(DT,c("V1","V2"))) diff --git a/src/data.table.h b/src/data.table.h index c4458e899..4c9df894c 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -333,6 +333,7 @@ SEXP initLastUpdated(SEXP); SEXP allNAR(SEXP); SEXP test_dt_win_snprintf(void); SEXP dt_zlib_version(void); +SEXP dt_has_zlib(void); SEXP startsWithAny(SEXP, SEXP, SEXP); SEXP convertDate(SEXP, SEXP); SEXP fastmean(SEXP); diff --git a/src/init.c b/src/init.c index 2cffabd34..e374eb6e4 100644 --- a/src/init.c +++ b/src/init.c @@ -136,6 +136,7 @@ R_CallMethodDef callMethods[] = { {"CcoerceAs", (DL_FUNC) &coerceAs, -1}, {"Ctest_dt_win_snprintf", (DL_FUNC)&test_dt_win_snprintf, -1}, {"Cdt_zlib_version", (DL_FUNC)&dt_zlib_version, -1}, +{"Cdt_has_zlib", (DL_FUNC)&dt_has_zlib, -1}, {"Csubstitute_call_arg_namesR", (DL_FUNC) &substitute_call_arg_namesR, -1}, {"CstartsWithAny", (DL_FUNC)&startsWithAny, -1}, {"CconvertDate", (DL_FUNC)&convertDate, -1}, diff --git a/src/utils.c b/src/utils.c index fa10fd97c..3dfd8bcc6 100644 --- a/src/utils.c +++ b/src/utils.c @@ -379,6 +379,13 @@ SEXP dt_zlib_version(void) { #endif return ScalarString(mkChar(out)); } +SEXP dt_has_zlib(void) { +#ifndef NOZLIB + return ScalarLogical(1); +#else + return ScalarLogical(0); +#endif +} SEXP startsWithAny(const SEXP x, const SEXP y, SEXP start) { // for is_url in fread.R added in #5097 From ba2f26bef5708a281ad4b064a2c8f464c3ff05d5 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Mon, 27 Nov 2023 10:56:08 +0100 Subject: [PATCH 28/88] some extra dev-related ignore dir/file (#5771) --- .Rbuildignore | 1 + .gitignore | 2 ++ 2 files changed, 3 insertions(+) diff --git a/.Rbuildignore b/.Rbuildignore index 1e99a9004..5f47bbacd 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -39,3 +39,4 @@ ^pkgdown$ ^lib$ ^library$ +^devwd$ diff --git a/.gitignore b/.gitignore index 74c9043c0..559df7b9d 100644 --- a/.gitignore +++ b/.gitignore @@ -41,6 +41,8 @@ vignettes/plots/figures .Renviron lib library +devwd +dev.R *.csv *.csvy *.RDS From f032dd1ff3976fe9e136fb8bbb57a2f4ec6fed87 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Fri, 1 Dec 2023 20:25:53 +0100 Subject: [PATCH 29/88] fix broken Sean Lahman link (#5776) Closes #5767 Co-authored-by: Tyson Barrett --- vignettes/datatable-sd-usage.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vignettes/datatable-sd-usage.Rmd b/vignettes/datatable-sd-usage.Rmd index f84fd6ea6..60d5c07c1 100644 --- a/vignettes/datatable-sd-usage.Rmd +++ b/vignettes/datatable-sd-usage.Rmd @@ -34,7 +34,7 @@ The simpler usage of `.SD` is for column subsetting (i.e., when `.SDcols` is spe ## Loading and Previewing Lahman Data -To give this a more real-world feel, rather than making up data, let's load some data sets about baseball from the [Lahman database](https://www.seanlahman.com/baseball-archive/statistics/). In typical R usage, we'd simply load these data sets from the `Lahman` R package; in this vignette, we've pre-downloaded them directly from the package's GitHub page instead. +To give this a more real-world feel, rather than making up data, let's load some data sets about baseball from the [Lahman database](https://github.com/cdalzell/Lahman). In typical R usage, we'd simply load these data sets from the `Lahman` R package; in this vignette, we've pre-downloaded them directly from the package's GitHub page instead. ```{r download_lahman} load('Teams.RData') @@ -46,7 +46,7 @@ setDT(Pitching) Pitching ``` -Readers up on baseball lingo should find the tables' contents familiar; `Teams` records some statistics for a given team in a given year, while `Pitching` records statistics for a given pitcher in a given year. Please do check out the [documentation](https://www.seanlahman.com/files/database/readme2017.txt) and explore the data yourself a bit before proceeding to familiarize yourself with their structure. +Readers up on baseball lingo should find the tables' contents familiar; `Teams` records some statistics for a given team in a given year, while `Pitching` records statistics for a given pitcher in a given year. Please do check out the [documentation](https://github.com/cdalzell/Lahman) and explore the data yourself a bit before proceeding to familiarize yourself with their structure. # `.SD` on Ungrouped Data From cbb0d075ce2c3cf3fc44359c7a940021cb37d0a0 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Sat, 2 Dec 2023 13:38:34 +0100 Subject: [PATCH 30/88] fix print format, closes #5778 (#5779) --- src/ijoin.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ijoin.c b/src/ijoin.c index 96a9deae4..b4f0a4b08 100644 --- a/src/ijoin.c +++ b/src/ijoin.c @@ -322,7 +322,7 @@ SEXP overlaps(SEXP ux, SEXP imatches, SEXP multArg, SEXP typeArg, SEXP nomatchAr ++totlen; } break; - default: error(_("Internal error: unknown type in mult=ALL in overlaps: %d"), mult, type); // #nocov + default: error(_("Internal error: unknown type in mult=ALL in overlaps: %d"), type); // #nocov } } else totlen = rows; end1 = clock() - start; From 67fb763662b59f04bd6037e3b80f2c02f8e4c87c Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Sat, 2 Dec 2023 15:26:18 +0100 Subject: [PATCH 31/88] cherry picked #5769 to master (#5780) --- src/assign.c | 7 ++++--- src/fread.c | 2 +- src/fsort.c | 6 +++--- src/fwrite.c | 8 ++++---- src/gsumm.c | 6 +++--- src/init.c | 31 ++++++++++++++++--------------- src/reorder.c | 4 ++-- src/snprintf.c | 2 +- 8 files changed, 34 insertions(+), 32 deletions(-) diff --git a/src/assign.c b/src/assign.c index 61f38a554..fc960132d 100644 --- a/src/assign.c +++ b/src/assign.c @@ -470,7 +470,8 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) // Can growVector at this point easily enough, but it shouldn't happen in first place so leave it as // strong error message for now. else if (TRUELENGTH(names) != oldtncol) - error(_("Internal error: selfrefnames is ok but tl names [%d] != tl [%d]"), TRUELENGTH(names), oldtncol); // # nocov + // Use (long long) to cast R_xlen_t to a fixed type to robustly avoid -Wformat compiler warnings, see #5768, PRId64 didnt work + error(_("Internal error: selfrefnames is ok but tl names [%ld] != tl [%d]"), TRUELENGTH(names), oldtncol); // # nocov SETLENGTH(dt, oldncol+LENGTH(newcolnames)); SETLENGTH(names, oldncol+LENGTH(newcolnames)); for (int i=0; i=tt[i+1]) - error(_("Internal error: %d column numbers to delete not now in strictly increasing order. No-dups were checked earlier.")); // # nocov + error(_("Internal error: %d column numbers to delete not now in strictly increasing order. No-dups were checked earlier."), i); // # nocov } for (int i=tt[0], j=1, k=tt[0]+1; i0) for (int j=0; jCT_EMPTY) { args.header=true; - if (verbose) DTPRINT(_(" 'header' determined to be true due to column %d containing a string on row 1 and a lower type (%s) in the rest of the %d sample rows\n"), + if (verbose) DTPRINT(_(" 'header' determined to be true due to column %d containing a string on row 1 and a lower type (%s) in the rest of the %"PRId64" sample rows\n"), j+1, typeName[type[j]], sampleLines); break; } diff --git a/src/fsort.c b/src/fsort.c index 6dbb85d55..2618ec577 100644 --- a/src/fsort.c +++ b/src/fsort.c @@ -165,7 +165,7 @@ SEXP fsort(SEXP x, SEXP verboseArg) { int MSBNbits = maxBit > 15 ? 16 : maxBit+1; // how many bits make up the MSB int shift = maxBit + 1 - MSBNbits; // the right shift to leave the MSB bits remaining size_t MSBsize = 1LL< 65,536) - if (verbose) Rprintf(_("maxBit=%d; MSBNbits=%d; shift=%d; MSBsize=%d\n"), maxBit, MSBNbits, shift, MSBsize); + if (verbose) Rprintf(_("maxBit=%d; MSBNbits=%d; shift=%d; MSBsize=%zu\n"), maxBit, MSBNbits, shift, MSBsize); uint64_t *counts = (uint64_t *)R_alloc(nBatch*MSBsize, sizeof(uint64_t)); memset(counts, 0, nBatch*MSBsize*sizeof(uint64_t)); @@ -242,11 +242,11 @@ SEXP fsort(SEXP x, SEXP verboseArg) { if (verbose) { Rprintf(_("Top 20 MSB counts: ")); for(int i=0; i0 && msbCounts[order[MSBsize-1]] < 2) MSBsize--; if (verbose) { - Rprintf(_("%d by excluding 0 and 1 counts\n"), MSBsize); + Rprintf(_("%zu by excluding 0 and 1 counts\n"), MSBsize); } bool failed=false, alloc_fail=false, non_monotonic=false; // shared bools only ever assigned true; no need for atomic or critical assign diff --git a/src/fwrite.c b/src/fwrite.c index c5f977212..ef8b822e9 100644 --- a/src/fwrite.c +++ b/src/fwrite.c @@ -714,7 +714,7 @@ void fwriteMain(fwriteMainArgs args) } if (headerLen) { char *buff = malloc(headerLen); - if (!buff) STOP(_("Unable to allocate %d MiB for header: %s"), headerLen / 1024 / 1024, strerror(errno)); + if (!buff) STOP(_("Unable to allocate %zu MiB for header: %s"), headerLen / 1024 / 1024, strerror(errno)); char *ch = buff; if (args.bom) {*ch++=(char)0xEF; *ch++=(char)0xBB; *ch++=(char)0xBF; } // 3 appears above (search for "bom") memcpy(ch, args.yaml, yamlLen); @@ -753,7 +753,7 @@ void fwriteMain(fwriteMainArgs args) char *zbuff = malloc(zbuffSize); if (!zbuff) { free(buff); // # nocov - STOP(_("Unable to allocate %d MiB for zbuffer: %s"), zbuffSize / 1024 / 1024, strerror(errno)); // # nocov + STOP(_("Unable to allocate %zu MiB for zbuffer: %s"), zbuffSize / 1024 / 1024, strerror(errno)); // # nocov } size_t zbuffUsed = zbuffSize; ret1 = compressbuff(&stream, zbuff, &zbuffUsed, buff, (size_t)(ch-buff)); @@ -820,7 +820,7 @@ void fwriteMain(fwriteMainArgs args) char *buffPool = malloc(nth*(size_t)buffSize); if (!buffPool) { // # nocov start - STOP(_("Unable to allocate %d MB * %d thread buffers; '%d: %s'. Please read ?fwrite for nThread, buffMB and verbose options."), + STOP(_("Unable to allocate %zu MB * %d thread buffers; '%d: %s'. Please read ?fwrite for nThread, buffMB and verbose options."), (size_t)buffSize/(1024^2), nth, errno, strerror(errno)); // # nocov end } @@ -831,7 +831,7 @@ void fwriteMain(fwriteMainArgs args) if (!zbuffPool) { // # nocov start free(buffPool); - STOP(_("Unable to allocate %d MB * %d thread compressed buffers; '%d: %s'. Please read ?fwrite for nThread, buffMB and verbose options."), + STOP(_("Unable to allocate %zu MB * %d thread compressed buffers; '%d: %s'. Please read ?fwrite for nThread, buffMB and verbose options."), (size_t)zbuffSize/(1024^2), nth, errno, strerror(errno)); // # nocov end } diff --git a/src/gsumm.c b/src/gsumm.c index ed4169ff5..2047c61cd 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -86,8 +86,8 @@ SEXP gforce(SEXP env, SEXP jsub, SEXP o, SEXP f, SEXP l, SEXP irowsArg) { // TODO: enable stress-test mode in tests only (#3205) which can be turned off by default in release to decrease overhead on small data // if that is established to be biting (it may be fine). if (nBatch<1 || batchSize<1 || lastBatchSize<1) { - error(_("Internal error: nrow=%d ngrp=%d nbit=%d bitshift=%d highSize=%d nBatch=%d batchSize=%d lastBatchSize=%d\n"), // # nocov - nrow, ngrp, nb, bitshift, highSize, nBatch, batchSize, lastBatchSize); // # nocov + error(_("Internal error: nrow=%d ngrp=%d nbit=%d bitshift=%d highSize=%zu nBatch=%zu batchSize=%zu lastBatchSize=%zu\n"), // # nocov + nrow, ngrp, nb, bitshift, highSize, nBatch, batchSize, lastBatchSize); // # nocov } // initial population of g: #pragma omp parallel for num_threads(getDTthreads(ngrp, false)) @@ -1116,7 +1116,7 @@ SEXP gprod(SEXP x, SEXP narmArg) { //clock_t start = clock(); if (nrow != n) error(_("nrow [%d] != length(x) [%d] in %s"), nrow, n, "gprod"); long double *s = malloc(ngrp * sizeof(long double)); - if (!s) error(_("Unable to allocate %d * %d bytes for gprod"), ngrp, sizeof(long double)); + if (!s) error(_("Unable to allocate %d * %lu bytes for gprod"), ngrp, sizeof(long double)); for (int i=0; i8) error(_("Pointers are %d bytes, greater than 8. We have not tested on any architecture greater than 64bit yet."), sizeof(char *)); + if (sizeof(char *)>8) error(_("Pointers are %lu bytes, greater than 8. We have not tested on any architecture greater than 64bit yet."), sizeof(char *)); // One place we need the largest sizeof is the working memory malloc in reorder.c } @@ -177,23 +177,24 @@ void attribute_visible R_init_data_table(DllInfo *info) const char *msg = _("... failed. Please forward this message to maintainer('data.table')."); if ((int)NA_INTEGER != (int)INT_MIN) error(_("Checking NA_INTEGER [%d] == INT_MIN [%d] %s"), NA_INTEGER, INT_MIN, msg); if ((int)NA_INTEGER != (int)NA_LOGICAL) error(_("Checking NA_INTEGER [%d] == NA_LOGICAL [%d] %s"), NA_INTEGER, NA_LOGICAL, msg); - if (sizeof(int) != 4) error(_("Checking sizeof(%s) [%d] is %d %s"), "int", sizeof(int), 4, msg); - if (sizeof(double) != 8) error(_("Checking sizeof(%s) [%d] is %d %s"), "double", sizeof(double), 8, msg); // 8 on both 32bit and 64bit - // alignof not available in C99: if (alignof(double) != 8) error(_("Checking alignof(double) [%d] is 8 %s"), alignof(double), msg); // 8 on both 32bit and 64bit - if (sizeof(long long) != 8) error(_("Checking sizeof(%s) [%d] is %d %s"), "long long", sizeof(long long), 8, msg); - if (sizeof(char *) != 4 && sizeof(char *) != 8) error(_("Checking sizeof(pointer) [%d] is 4 or 8 %s"), sizeof(char *), msg); - if (sizeof(SEXP) != sizeof(char *)) error(_("Checking sizeof(SEXP) [%d] == sizeof(pointer) [%d] %s"), sizeof(SEXP), sizeof(char *), msg); - if (sizeof(uint64_t) != 8) error(_("Checking sizeof(%s) [%d] is %d %s"), "uint64_t", sizeof(uint64_t), 8, msg); - if (sizeof(int64_t) != 8) error(_("Checking sizeof(%s) [%d] is %d %s"), "int64_t", sizeof(int64_t), 8, msg); - if (sizeof(signed char) != 1) error(_("Checking sizeof(%s) [%d] is %d %s"), "signed char", sizeof(signed char), 1, msg); - if (sizeof(int8_t) != 1) error(_("Checking sizeof(%s) [%d] is %d %s"), "int8_t", sizeof(int8_t), 1, msg); - if (sizeof(uint8_t) != 1) error(_("Checking sizeof(%s) [%d] is %d %s"), "uint8_t", sizeof(uint8_t), 1, msg); - if (sizeof(int16_t) != 2) error(_("Checking sizeof(%s) [%d] is %d %s"), "int16_t", sizeof(int16_t), 2, msg); - if (sizeof(uint16_t) != 2) error(_("Checking sizeof(%s) [%d] is %d %s"), "uint16_t", sizeof(uint16_t), 2 ,msg); + if (sizeof(int) != 4) error(_("Checking sizeof(%s) [%lu] is %d %s"), "int", sizeof(int), 4, msg); + if (sizeof(double) != 8) error(_("Checking sizeof(%s) [%lu] is %d %s"), "double", sizeof(double), 8, msg); // 8 on both 32bit and 64bit + // alignof not available in C99: if (alignof(double) != 8) error(_("Checking alignof(double) [%lu] is 8 %s"), alignof(double), msg); // 8 on both 32bit and 64bit + if (sizeof(long long) != 8) error(_("Checking sizeof(%s) [%lu] is %d %s"), "long long", sizeof(long long), 8, msg); + if (sizeof(char *) != 4 && sizeof(char *) != 8) error(_("Checking sizeof(pointer) [%lu] is 4 or 8 %s"), sizeof(char *), msg); + if (sizeof(SEXP) != sizeof(char *)) error(_("Checking sizeof(SEXP) [%lu] == sizeof(pointer) [%lu] %s"), sizeof(SEXP), sizeof(char *), msg); + if (sizeof(uint64_t) != 8) error(_("Checking sizeof(%s) [%lu] is %d %s"), "uint64_t", sizeof(uint64_t), 8, msg); + if (sizeof(int64_t) != 8) error(_("Checking sizeof(%s) [%lu] is %d %s"), "int64_t", sizeof(int64_t), 8, msg); + if (sizeof(signed char) != 1) error(_("Checking sizeof(%s) [%lu] is %d %s"), "signed char", sizeof(signed char), 1, msg); + if (sizeof(int8_t) != 1) error(_("Checking sizeof(%s) [%lu] is %d %s"), "int8_t", sizeof(int8_t), 1, msg); + if (sizeof(uint8_t) != 1) error(_("Checking sizeof(%s) [%lu] is %d %s"), "uint8_t", sizeof(uint8_t), 1, msg); + if (sizeof(int16_t) != 2) error(_("Checking sizeof(%s) [%lu] is %d %s"), "int16_t", sizeof(int16_t), 2, msg); + if (sizeof(uint16_t) != 2) error(_("Checking sizeof(%s) [%lu] is %d %s"), "uint16_t", sizeof(uint16_t), 2 ,msg); SEXP tmp = PROTECT(allocVector(INTSXP,2)); if (LENGTH(tmp)!=2) error(_("Checking LENGTH(allocVector(INTSXP,2)) [%d] is 2 %s"), LENGTH(tmp), msg); - if (TRUELENGTH(tmp)!=0) error(_("Checking TRUELENGTH(allocVector(INTSXP,2)) [%d] is 0 %s"), TRUELENGTH(tmp), msg); + // Use (long long) to cast R_xlen_t to a fixed type to robustly avoid -Wformat compiler warnings, see #5768 + if (TRUELENGTH(tmp)!=0) error(_("Checking TRUELENGTH(allocVector(INTSXP,2)) [%lld] is 0 %s"), (long long)TRUELENGTH(tmp), msg); UNPROTECT(1); // According to IEEE (http://en.wikipedia.org/wiki/IEEE_754-1985#Zero) we can rely on 0.0 being all 0 bits. diff --git a/src/reorder.c b/src/reorder.c index debdb0217..a36e27055 100644 --- a/src/reorder.c +++ b/src/reorder.c @@ -14,7 +14,7 @@ SEXP reorder(SEXP x, SEXP order) for (int i=0; i maxSize) @@ -24,7 +24,7 @@ SEXP reorder(SEXP x, SEXP order) copySharedColumns(x); // otherwise two columns which point to the same vector would be reordered and then re-reordered, issues linked in PR#3768 } else { if (SIZEOF(x)!=4 && SIZEOF(x)!=8 && SIZEOF(x)!=16 && SIZEOF(x)!=1) - error(_("reorder accepts vectors but this non-VECSXP is type '%s' which isn't yet supported (SIZEOF=%d)"), type2char(TYPEOF(x)), SIZEOF(x)); + error(_("reorder accepts vectors but this non-VECSXP is type '%s' which isn't yet supported (SIZEOF=%zu)"), type2char(TYPEOF(x)), SIZEOF(x)); if (ALTREP(x)) error(_("Internal error in reorder.c: cannot reorder an ALTREP vector. Please see NEWS item 2 in v1.11.4 and report this as a bug.")); // # nocov maxSize = SIZEOF(x); nrow = length(x); diff --git a/src/snprintf.c b/src/snprintf.c index 6b8098c6f..f322931fc 100644 --- a/src/snprintf.c +++ b/src/snprintf.c @@ -214,7 +214,7 @@ SEXP test_dt_win_snprintf(void) int res = dt_win_snprintf(buff, 10, "%4$d%2$d%3$d%5$d%1$d", 111, 222, 33, 44, 555); // fmt longer than n if (strlen(buff)!=9 || strcmp(buff, "442223355")) error(_("dt_win_snprintf test %d failed: %s"), 9, buff); - if (res!=13) /* should return what would have been written if not chopped */ error(_("dt_win_snprintf test %d failed: %s"), 10, res); + if (res!=13) /* should return what would have been written if not chopped */ error(_("dt_win_snprintf test %d failed: %d"), 10, res); dt_win_snprintf(buff, 39, "%l", 3); if (strlen(buff)!=38 || strcmp(buff, "0 %l does not end with recognized t")) error(_("dt_win_snprintf test %d failed: %s"), 11, buff); From a413d3c9f9d9b6e25d99e0c348772ccb81188f11 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Sat, 2 Dec 2023 15:29:20 +0100 Subject: [PATCH 32/88] Fix format-security compiler warnings (#5774) (#5781) Co-authored-by: Michael Chirico --- src/assign.c | 2 +- src/forder.c | 2 +- src/fwrite.c | 4 ++-- src/rbindlist.c | 8 ++++---- src/subset.c | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/assign.c b/src/assign.c index fc960132d..3356e918b 100644 --- a/src/assign.c +++ b/src/assign.c @@ -527,7 +527,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) targetcol = VECTOR_ELT(dt,coln); } const char *ret = memrecycle(targetcol, rows, 0, targetlen, thisvalue, 0, -1, coln+1, CHAR(STRING_ELT(names, coln))); - if (ret) warning(ret); + if (ret) warning("%s", ret); } *_Last_updated = numToDo; // the updates have taken place with no error, so update .Last.updated now diff --git a/src/forder.c b/src/forder.c index 8a62e1de7..c9063782b 100644 --- a/src/forder.c +++ b/src/forder.c @@ -56,7 +56,7 @@ static int *anso = NULL; static bool notFirst=false; static char msg[1001]; -#define STOP(...) do {snprintf(msg, 1000, __VA_ARGS__); cleanup(); error(msg);} while(0) // http://gcc.gnu.org/onlinedocs/cpp/Swallowing-the-Semicolon.html#Swallowing-the-Semicolon +#define STOP(...) do {snprintf(msg, 1000, __VA_ARGS__); cleanup(); error("%s", msg);} while(0) // http://gcc.gnu.org/onlinedocs/cpp/Swallowing-the-Semicolon.html#Swallowing-the-Semicolon // use STOP in this file (not error()) to ensure cleanup() is called first // snprintf to msg first in case nrow (just as an example) is provided in the message because cleanup() sets nrow to 0 #undef warning diff --git a/src/fwrite.c b/src/fwrite.c index ef8b822e9..322909749 100644 --- a/src/fwrite.c +++ b/src/fwrite.c @@ -736,7 +736,7 @@ void fwriteMain(fwriteMainArgs args) } if (f==-1) { *ch = '\0'; - DTPRINT(buff); + DTPRINT("%s", buff); free(buff); } else { int ret1=0, ret2=0; @@ -926,7 +926,7 @@ void fwriteMain(fwriteMainArgs args) errno=0; if (f==-1) { *ch='\0'; // standard C string end marker so DTPRINT knows where to stop - DTPRINT(myBuff); + DTPRINT("%s", myBuff); } else if ((args.is_gzip ? WRITE(f, myzBuff, (int)myzbuffUsed) : WRITE(f, myBuff, (int)(ch-myBuff))) == -1) { failed=true; // # nocov diff --git a/src/rbindlist.c b/src/rbindlist.c index 366902883..2ffff3af8 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -208,7 +208,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) const char *str = isString(s) ? CHAR(STRING_ELT(s,w2)) : ""; snprintf(buff, 1000, _("Column %d ['%s'] of item %d is missing in item %d. Use fill=TRUE to fill with NA (NULL for list columns), or use.names=FALSE to ignore column names.%s"), w2+1, str, i+1, missi+1, extra ); - if (usenames==TRUE) error(buff); + if (usenames==TRUE) error("%s", buff); i = LENGTH(l); // break from outer i loop break; // break from inner j loop } @@ -229,8 +229,8 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) } const char *o = isNull(opt) ? "message" : CHAR(STRING_ELT(opt,0)); if (strcmp(o,"message")==0) { eval(PROTECT(lang2(install("message"),PROTECT(ScalarString(mkChar(buff))))), R_GlobalEnv); UNPROTECT(2); } - else if (strcmp(o,"warning")==0) warning(buff); - else if (strcmp(o,"error")==0) error(buff); + else if (strcmp(o,"warning")==0) warning("%s", buff); + else if (strcmp(o,"error")==0) error("%s", buff); else if (strcmp(o,"none")!=0) warning(_("options()$datatable.rbindlist.check=='%s' which is not 'message'|'warning'|'error'|'none'. See news item 5 in v1.12.2."), o); } } @@ -490,7 +490,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) } for (int k=0; k Date: Sat, 2 Dec 2023 19:17:26 +0100 Subject: [PATCH 33/88] follow up of #5780 to resolve -Wformat warning --- src/gsumm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gsumm.c b/src/gsumm.c index 2047c61cd..742e718f4 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -616,7 +616,7 @@ SEXP gmean(SEXP x, SEXP narmArg) } else { // narm==true and anyNA==true int *restrict nna_counts = calloc(ngrp, sizeof(int)); - if (!nna_counts) error(_("Unable to allocate %d * %d bytes for non-NA counts in gmean na.rm=TRUE"), ngrp, sizeof(int)); + if (!nna_counts) error(_("Unable to allocate %d * %lu bytes for non-NA counts in gmean na.rm=TRUE"), ngrp, sizeof(int)); #pragma omp parallel for num_threads(getDTthreads(highSize, false)) for (int h=0; h Date: Sat, 2 Dec 2023 21:15:57 +0100 Subject: [PATCH 34/88] vignette render with markdown rather than rmarkdown (#5773) * vignette render with markdown rather than rmarkdown * tune TOC --- DESCRIPTION | 2 +- vignettes/css/toc.css | 6 +++ vignettes/datatable-benchmarking.Rmd | 17 +++++++-- vignettes/datatable-faq.Rmd | 37 ++++++++++--------- vignettes/datatable-importing.Rmd | 4 +- vignettes/datatable-intro.Rmd | 4 +- vignettes/datatable-keys-fast-subset.Rmd | 4 +- vignettes/datatable-programming.Rmd | 4 +- vignettes/datatable-reference-semantics.Rmd | 4 +- vignettes/datatable-reshape.Rmd | 4 +- vignettes/datatable-sd-usage.Rmd | 11 ++++-- ...le-secondary-indices-and-auto-indexing.Rmd | 4 +- 12 files changed, 61 insertions(+), 40 deletions(-) create mode 100644 vignettes/css/toc.css diff --git a/DESCRIPTION b/DESCRIPTION index 405b7a009..6756db8ae 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,7 +3,7 @@ Version: 1.14.9 Title: Extension of `data.frame` Depends: R (>= 3.1.0) Imports: methods -Suggests: bit64 (>= 4.0.0), bit (>= 4.0.4), curl, R.utils, xts, zoo (>= 1.8-1), yaml, knitr, rmarkdown, markdown +Suggests: bit64 (>= 4.0.0), bit (>= 4.0.4), curl, R.utils, xts, zoo (>= 1.8-1), yaml, knitr, markdown Description: Fast aggregation of large data (e.g. 100GB in RAM), fast ordered joins, fast add/modify/delete of columns by group using no copies at all, list columns, friendly and fast character-separated-value read/write. Offers a natural and flexible syntax, for faster development. License: MPL-2.0 | file LICENSE URL: https://r-datatable.com, https://Rdatatable.gitlab.io/data.table, https://github.com/Rdatatable/data.table diff --git a/vignettes/css/toc.css b/vignettes/css/toc.css new file mode 100644 index 000000000..86adaba5b --- /dev/null +++ b/vignettes/css/toc.css @@ -0,0 +1,6 @@ +#TOC { + border: 1px solid #ccc; + border-radius: 5px; + padding-left: 1em; + background: #f6f6f6; +} diff --git a/vignettes/datatable-benchmarking.Rmd b/vignettes/datatable-benchmarking.Rmd index 7614a27d5..da580764b 100644 --- a/vignettes/datatable-benchmarking.Rmd +++ b/vignettes/datatable-benchmarking.Rmd @@ -2,15 +2,24 @@ title: "Benchmarking data.table" date: "`r Sys.Date()`" output: - rmarkdown::html_vignette: - toc: true - number_sections: true + markdown::html_format: + options: + toc: true + number_sections: true + meta: + css: [default, css/toc.css] vignette: > %\VignetteIndexEntry{Benchmarking data.table} - %\VignetteEngine{knitr::rmarkdown} + %\VignetteEngine{knitr::knitr} \usepackage[utf8]{inputenc} --- + + This document is meant to guide on measuring performance of `data.table`. Single place to document best practices and traps to avoid. # fread: clear caches diff --git a/vignettes/datatable-faq.Rmd b/vignettes/datatable-faq.Rmd index 4b0645e6b..f1deaba78 100644 --- a/vignettes/datatable-faq.Rmd +++ b/vignettes/datatable-faq.Rmd @@ -2,12 +2,15 @@ title: "Frequently Asked Questions about data.table" date: "`r Sys.Date()`" output: - rmarkdown::html_vignette: - toc: true - number_sections: true + markdown::html_format: + options: + toc: true + number_sections: true + meta: + css: [default, css/toc.css] vignette: > %\VignetteIndexEntry{Frequently Asked Questions about data.table} - %\VignetteEngine{knitr::rmarkdown} + %\VignetteEngine{knitr::knitr} \usepackage[utf8]{inputenc} --- @@ -94,13 +97,13 @@ As [highlighted above](#j-num), `j` in `[.data.table` is fundamentally different Furthermore, data.table _inherits_ from `data.frame`. It _is_ a `data.frame`, too. A data.table can be passed to any package that only accepts `data.frame` and that package can use `[.data.frame` syntax on the data.table. See [this answer](https://stackoverflow.com/a/10529888/403310) for how that is achieved. -We _have_ proposed enhancements to R wherever possible, too. One of these was accepted as a new feature in R 2.12.0 : +We _have_ proposed enhancements to R wherever possible, too. One of these was accepted as a new feature in R 2.12.0: > `unique()` and `match()` are now faster on character vectors where all elements are in the global CHARSXP cache and have unmarked encoding (ASCII). Thanks to Matt Dowle for suggesting improvements to the way the hash code is generated in unique.c. A second proposal was to use `memcpy` in duplicate.c, which is much faster than a for loop in C. This would improve the _way_ that R copies data internally (on some measures by 13 times). The thread on r-devel is [here](https://stat.ethz.ch/pipermail/r-devel/2010-April/057249.html). -A third more significant proposal that was accepted is that R now uses data.table's radix sort code as from R 3.3.0 : +A third more significant proposal that was accepted is that R now uses data.table's radix sort code as from R 3.3.0: > The radix sort algorithm and implementation from data.table (forder) replaces the previous radix (counting) sort and adds a new method for order(). Contributed by Matt Dowle and Arun Srinivasan, the new algorithm supports logical, integer (even with large values), real, and character vectors. It outperforms all other methods, but there are some caveats (see ?sort). @@ -236,7 +239,7 @@ Then you are using a version prior to 1.5.3. Prior to 1.5.3 `[.data.table` detec ## What are the scoping rules for `j` expressions? -Think of the subset as an environment where all the column names are variables. When a variable `foo` is used in the `j` of a query such as `X[Y, sum(foo)]`, `foo` is looked for in the following order : +Think of the subset as an environment where all the column names are variables. When a variable `foo` is used in the `j` of a query such as `X[Y, sum(foo)]`, `foo` is looked for in the following order: 1. The scope of `X`'s subset; _i.e._, `X`'s column names. 2. The scope of each row of `Y`; _i.e._, `Y`'s column names (_join inherited scope_) @@ -295,18 +298,18 @@ The `Z[Y]` part is not a single name so that is evaluated within the frame of `X ## Can you explain further why data.table is inspired by `A[B]` syntax in `base`? -Consider `A[B]` syntax using an example matrix `A` : +Consider `A[B]` syntax using an example matrix `A`: ```{r} A = matrix(1:12, nrow = 4) A ``` -To obtain cells `(1, 2) = 5` and `(3, 3) = 11` many users (we believe) may try this first : +To obtain cells `(1, 2) = 5` and `(3, 3) = 11` many users (we believe) may try this first: ```{r} A[c(1, 3), c(2, 3)] ``` -However, this returns the union of those rows and columns. To reference the cells, a 2-column matrix is required. `?Extract` says : +However, this returns the union of those rows and columns. To reference the cells, a 2-column matrix is required. `?Extract` says: > When indexing arrays by `[` a single argument `i` can be a matrix with as many columns as there are dimensions of `x`; the result is then a vector with elements corresponding to the sets of indices in each row of `i`. @@ -354,7 +357,7 @@ Furthermore, matrices, especially sparse matrices, are often stored in a 3-colum data.table _inherits_ from `data.frame`. It _is_ a `data.frame`, too. A data.table _can_ be passed to any package that _only_ accepts `data.frame`. When that package uses `[.data.frame` syntax on the data.table, it works. It works because `[.data.table` looks to see where it was called from. If it was called from such a package, `[.data.table` diverts to `[.data.frame`. ## I've heard that data.table syntax is analogous to SQL. -Yes : +Yes: - `i` $\Leftrightarrow$ where - `j` $\Leftrightarrow$ select @@ -367,7 +370,7 @@ Yes : - `mult = "first"|"last"` $\Leftrightarrow$ N/A because SQL is inherently unordered - `roll = TRUE` $\Leftrightarrow$ N/A because SQL is inherently unordered -The general form is : +The general form is: ```{r, eval = FALSE} DT[where, select|update, group by][order by][...] ... [...] @@ -447,7 +450,7 @@ Many thanks to the R core team for fixing the issue in Sep 2019. data.table v1.1 This comes up quite a lot but it's really earth-shatteringly simple. A function such as `merge` is _generic_ if it consists of a call to `UseMethod`. When you see people talking about whether or not functions are _generic_ functions they are merely typing the function without `()` afterwards, looking at the program code inside it and if they see a call to `UseMethod` then it is _generic_. What does `UseMethod` do? It literally slaps the function name together with the class of the first argument, separated by period (`.`) and then calls that function, passing along the same arguments. It's that simple. For example, `merge(X, Y)` contains a `UseMethod` call which means it then _dispatches_ (i.e. calls) `paste("merge", class(X), sep = ".")`. Functions with dots in their name may or may not be methods. The dot is irrelevant really, other than dot being the separator that `UseMethod` uses. Knowing this background should now highlight why, for example, it is obvious to R folk that `as.data.table.data.frame` is the `data.frame` method for the `as.data.table` generic function. Further, it may help to elucidate that, yes, you are correct, it is not obvious from its name alone that `ls.fit` is not the fit method of the `ls` generic function. You only know that by typing `ls` (not `ls()`) and observing it isn't a single call to `UseMethod`. -You might now ask: where is this documented in R? Answer: it's quite clear, but, you need to first know to look in `?UseMethod` and _that_ help file contains : +You might now ask: where is this documented in R? Answer: it's quite clear, but, you need to first know to look in `?UseMethod` and _that_ help file contains: > When a function calling `UseMethod('fun')` is applied to an object with class attribute `c('first', 'second')`, the system searches for a function called `fun.first` and, if it finds it, applies it to the object. If no such function is found a function called `fun.second` is tried. If no class name produces a suitable function, the function `fun.default` is used, if it exists, or an error results. @@ -481,7 +484,7 @@ copied in bulk (`memcpy` in C) rather than looping in C. ## What are primary and secondary indexes in data.table? Manual: [`?setkey`](https://www.rdocumentation.org/packages/data.table/functions/setkey) -S.O. : [What is the purpose of setting a key in data.table?](https://stackoverflow.com/questions/20039335/what-is-the-purpose-of-setting-a-key-in-data-table/20057411#20057411) +S.O.: [What is the purpose of setting a key in data.table?](https://stackoverflow.com/questions/20039335/what-is-the-purpose-of-setting-a-key-in-data-table/20057411#20057411) `setkey(DT, col1, col2)` orders the rows by column `col1` then within each group of `col1` it orders by `col2`. This is a _primary index_. The row order is changed _by reference_ in RAM. Subsequent joins and groups on those key columns then take advantage of the sort order for efficiency. (Imagine how difficult looking for a phone number in a printed telephone directory would be if it wasn't sorted by surname then forename. That's literally all `setkey` does. It sorts the rows by the columns you specify.) The index doesn't use any RAM. It simply changes the row order in RAM and marks the key columns. Analogous to a _clustered index_ in SQL. @@ -521,7 +524,7 @@ DT[ , { mySD = copy(.SD) Please upgrade to v1.8.1 or later. From this version, if `.N` is returned by `j` it is renamed to `N` to avoid any ambiguity in any subsequent grouping between the `.N` special variable and a column called `".N"`. -The old behaviour can be reproduced by forcing `.N` to be called `.N`, like this : +The old behaviour can be reproduced by forcing `.N` to be called `.N`, like this: ```{r} DT = data.table(a = c(1,1,2,2,2), b = c(1,2,2,2,1)) DT @@ -533,7 +536,7 @@ cat(try( If you are already running v1.8.1 or later then the error message is now more helpful than the "cannot change value of locked binding" error, as you can see above, since this vignette was produced using v1.8.1 or later. -The more natural syntax now works : +The more natural syntax now works: ```{r} if (packageVersion("data.table") >= "1.8.1") { DT[ , .N, by = list(a, b)][ , unique(N), by = a] @@ -555,7 +558,7 @@ Hopefully, this is self explanatory. The full message is: Coerced numeric RHS to integer to match the column's type; may have truncated precision. Either change the column to numeric first by creating a new numeric vector length 5 (nrows of entire table) yourself and assigning that (i.e. 'replace' column), or coerce RHS to integer yourself (e.g. 1L or as.integer) to make your intent clear (and for speed). Or, set the column type correctly up front when you create the table and stick to it, please. -To generate it, try : +To generate it, try: ```{r} DT = data.table(a = 1:5, b = 1:5) diff --git a/vignettes/datatable-importing.Rmd b/vignettes/datatable-importing.Rmd index 41a3d629a..c37cd6f75 100644 --- a/vignettes/datatable-importing.Rmd +++ b/vignettes/datatable-importing.Rmd @@ -2,10 +2,10 @@ title: "Importing data.table" date: "`r Sys.Date()`" output: - rmarkdown::html_vignette + markdown::html_format vignette: > %\VignetteIndexEntry{Importing data.table} - %\VignetteEngine{knitr::rmarkdown} + %\VignetteEngine{knitr::knitr} \usepackage[utf8]{inputenc} --- diff --git a/vignettes/datatable-intro.Rmd b/vignettes/datatable-intro.Rmd index 3a5eda34c..5bd36437a 100644 --- a/vignettes/datatable-intro.Rmd +++ b/vignettes/datatable-intro.Rmd @@ -2,10 +2,10 @@ title: "Introduction to data.table" date: "`r Sys.Date()`" output: - rmarkdown::html_vignette + markdown::html_format vignette: > %\VignetteIndexEntry{Introduction to data.table} - %\VignetteEngine{knitr::rmarkdown} + %\VignetteEngine{knitr::knitr} \usepackage[utf8]{inputenc} --- diff --git a/vignettes/datatable-keys-fast-subset.Rmd b/vignettes/datatable-keys-fast-subset.Rmd index 465052d94..3e9a4f23c 100644 --- a/vignettes/datatable-keys-fast-subset.Rmd +++ b/vignettes/datatable-keys-fast-subset.Rmd @@ -2,10 +2,10 @@ title: "Keys and fast binary search based subset" date: "`r Sys.Date()`" output: - rmarkdown::html_vignette + markdown::html_format vignette: > %\VignetteIndexEntry{Keys and fast binary search based subset} - %\VignetteEngine{knitr::rmarkdown} + %\VignetteEngine{knitr::knitr} \usepackage[utf8]{inputenc} --- diff --git a/vignettes/datatable-programming.Rmd b/vignettes/datatable-programming.Rmd index bf481f06f..d63b1bccc 100644 --- a/vignettes/datatable-programming.Rmd +++ b/vignettes/datatable-programming.Rmd @@ -2,10 +2,10 @@ title: "Programming on data.table" date: "`r Sys.Date()`" output: - rmarkdown::html_vignette + markdown::html_format vignette: > %\VignetteIndexEntry{Programming on data.table} - %\VignetteEngine{knitr::rmarkdown} + %\VignetteEngine{knitr::knitr} \usepackage[utf8]{inputenc} --- diff --git a/vignettes/datatable-reference-semantics.Rmd b/vignettes/datatable-reference-semantics.Rmd index 33da89bb9..220a2a19a 100644 --- a/vignettes/datatable-reference-semantics.Rmd +++ b/vignettes/datatable-reference-semantics.Rmd @@ -2,10 +2,10 @@ title: "Reference semantics" date: "`r Sys.Date()`" output: - rmarkdown::html_vignette + markdown::html_format vignette: > %\VignetteIndexEntry{Reference semantics} - %\VignetteEngine{knitr::rmarkdown} + %\VignetteEngine{knitr::knitr} \usepackage[utf8]{inputenc} --- diff --git a/vignettes/datatable-reshape.Rmd b/vignettes/datatable-reshape.Rmd index 3f94392fc..c26d5510d 100644 --- a/vignettes/datatable-reshape.Rmd +++ b/vignettes/datatable-reshape.Rmd @@ -2,10 +2,10 @@ title: "Efficient reshaping using data.tables" date: "`r Sys.Date()`" output: - rmarkdown::html_vignette + markdown::html_format vignette: > %\VignetteIndexEntry{Efficient reshaping using data.tables} - %\VignetteEngine{knitr::rmarkdown} + %\VignetteEngine{knitr::knitr} \usepackage[utf8]{inputenc} --- diff --git a/vignettes/datatable-sd-usage.Rmd b/vignettes/datatable-sd-usage.Rmd index 60d5c07c1..8e7919f34 100644 --- a/vignettes/datatable-sd-usage.Rmd +++ b/vignettes/datatable-sd-usage.Rmd @@ -2,12 +2,15 @@ title: "Using .SD for Data Analysis" date: "`r Sys.Date()`" output: - rmarkdown::html_vignette: - toc: true - number_sections: true + markdown::html_format: + options: + toc: true + number_sections: true + meta: + css: [default, css/toc.css] vignette: > %\VignetteIndexEntry{Using .SD for Data Analysis} - %\VignetteEngine{knitr::rmarkdown} + %\VignetteEngine{knitr::knitr} \usepackage[utf8]{inputenc} --- diff --git a/vignettes/datatable-secondary-indices-and-auto-indexing.Rmd b/vignettes/datatable-secondary-indices-and-auto-indexing.Rmd index ef506605c..374ccd66b 100644 --- a/vignettes/datatable-secondary-indices-and-auto-indexing.Rmd +++ b/vignettes/datatable-secondary-indices-and-auto-indexing.Rmd @@ -2,10 +2,10 @@ title: "Secondary indices and auto indexing" date: "`r Sys.Date()`" output: - rmarkdown::html_vignette + markdown::html_format vignette: > %\VignetteIndexEntry{Secondary indices and auto indexing} - %\VignetteEngine{knitr::rmarkdown} + %\VignetteEngine{knitr::knitr} \usepackage[utf8]{inputenc} --- From c3ad47db96186acdad307c2336b7bfbc515b1ed6 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Sun, 3 Dec 2023 10:15:07 +0100 Subject: [PATCH 35/88] followup vignettes updated after change to markdown vignette engine (#5784) * do not use options max.print in vignettes, closes #5783 * follow up of markdown vign enginge #5773 * amend feedback from Michael --- vignettes/css/bootstrap.css | 118 ------------------ vignettes/datatable-intro.Rmd | 63 ++-------- vignettes/datatable-programming.Rmd | 67 +++++----- vignettes/datatable-reference-semantics.Rmd | 48 +++---- vignettes/datatable-reshape.Rmd | 22 +--- vignettes/datatable-sd-usage.Rmd | 3 +- ...le-secondary-indices-and-auto-indexing.Rmd | 4 +- 7 files changed, 74 insertions(+), 251 deletions(-) delete mode 100644 vignettes/css/bootstrap.css diff --git a/vignettes/css/bootstrap.css b/vignettes/css/bootstrap.css deleted file mode 100644 index 1453f27bf..000000000 --- a/vignettes/css/bootstrap.css +++ /dev/null @@ -1,118 +0,0 @@ -code, -kbd, -pre, -samp { - font-family: Source Code Pro, Inconsolata, Monaco, Consolas, Menlo, Courier New, monospace; -} - -code { - padding: 0px 2px; - font-size: 90%; - color: #c7254e; - white-space: nowrap; - background-color: #f9f2f4; - border-radius: 3px; - border: 0px; -} - -pre { - display: block; - padding: 9.5px; - margin: 0 0 10px; - font-size: 14px; - line-height: 1.428571429; - color: #c7254e; - background-color: #f9f2f4 - word-break: break-all; - word-wrap: break-word; - border: 0px ; - border-radius: 3px; - /*background-color: #FDF6E3;*/ - /*background-color: #f5f5f5; */ - /*border: 1px solid #FDF6E3;*/ -} - -pre code { - padding: 0; - font-size: inherit; - color: inherit; - white-space: pre-wrap; - background-color: transparent; - border-radius: 0; -} - -.bs-callout { - margin:20px 0; - padding:20px; - border-left:3px solid #eee -} - -.bs-callout h4 { - margin-top:0; - margin-bottom:5px -} - -.bs-callout p:last-child { - margin-bottom:0 -} - -.bs-callout code { - background-color:#fff; - border-radius:3px -} - -.bs-callout pre code { - background-color:transparent; - border-radius:3px -} - -.bs-callout-danger { - background-color:#fdf7f7; - border-color:#d9534f -} - -.bs-callout-danger h4 { - color:#d9534f -} - -.bs-callout-warning { - background-color:#fcf8f2; - border-color:#f0ad4e -} - -.bs-callout-warning h4 { - color:#f0ad4e -} - -.bs-callout-info { - background-color:#f4f8fa; - border-color:#5bc0de -} - -.bs-callout-info h4 { - color:#5bc0de -} - -// KeyWordTok -.sourceCode .kw { color: #268BD2; } -// DataTypeTok -.sourceCode .dt { color: #268BD2; } - -// DecValTok (decimal value), BaseNTok, FloatTok -.sourceCode .dv, .sourceCode .bn, .sourceCode .fl { color: #D33682; } -// CharTok -.sourceCode .ch { color: #DC322F; } -// StringTok -.sourceCode .st { color: #2AA198; } -// CommentTok -.sourceCode .co { color: #93A1A1; } -// OtherTok -.sourceCode .ot { color: #A57800; } -// AlertTok -.sourceCode .al { color: #CB4B16; font-weight: bold; } -// FunctionTok -.sourceCode .fu { color: #268BD2; } -// RegionMarkerTok -.sourceCode .re { } -// ErrorTok -.sourceCode .er { color: #D30102; font-weight: bold; } diff --git a/vignettes/datatable-intro.Rmd b/vignettes/datatable-intro.Rmd index 5bd36437a..04fd79e50 100644 --- a/vignettes/datatable-intro.Rmd +++ b/vignettes/datatable-intro.Rmd @@ -86,7 +86,7 @@ class(DT$ID) You can also convert existing objects to a `data.table` using `setDT()` (for `data.frame`s and `list`s) and `as.data.table()` (for other structures); the difference is beyond the scope of this vignette, see `?setDT` and `?as.data.table` for more details. -#### Note that: {.bs-callout .bs-callout-info} +#### Note that: * Row numbers are printed with a `:` in order to visually separate the row number from the first column. @@ -111,7 +111,7 @@ DT[i, j, by] Users who have an SQL background might perhaps immediately relate to this syntax. -#### The way to read it (out loud) is: {.bs-callout .bs-callout-info} +#### The way to read it (out loud) is: Take `DT`, subset/reorder rows using `i`, then calculate `j`, grouped by `by`. @@ -126,8 +126,6 @@ ans <- flights[origin == "JFK" & month == 6L] head(ans) ``` -#### {.bs-callout .bs-callout-info} - * Within the frame of a `data.table`, columns can be referred to *as if they are variables*, much like in SQL or Stata. Therefore, we simply refer to `origin` and `month` as if they are variables. We do not need to add the prefix `flights$` each time. Nevertheless, using `flights$origin` and `flights$month` would work just fine. * The *row indices* that satisfy the condition `origin == "JFK" & month == 6L` are computed, and since there is nothing else left to do, all columns from `flights` at rows corresponding to those *row indices* are simply returned as a `data.table`. @@ -140,7 +138,6 @@ head(ans) ans <- flights[1:2] ans ``` -#### {.bs-callout .bs-callout-info} * In this case, there is no condition. The row indices are already provided in `i`. We therefore return a `data.table` with all columns from `flights` at rows for those *row indices*. @@ -153,7 +150,7 @@ ans <- flights[order(origin, -dest)] head(ans) ``` -#### `order()` is internally optimised {.bs-callout .bs-callout-info} +#### `order()` is internally optimised * We can use "-" on a `character` columns within the frame of a `data.table` to sort in decreasing order. @@ -170,8 +167,6 @@ ans <- flights[, arr_delay] head(ans) ``` -#### {.bs-callout .bs-callout-info} - * Since columns can be referred to as if they are variables within the frame of `data.table`s, we directly refer to the *variable* we want to subset. Since we want *all the rows*, we simply skip `i`. * It returns *all* the rows for the column `arr_delay`. @@ -183,15 +178,13 @@ ans <- flights[, list(arr_delay)] head(ans) ``` -#### {.bs-callout .bs-callout-info} - * We wrap the *variables* (column names) within `list()`, which ensures that a `data.table` is returned. In case of a single column name, not wrapping with `list()` returns a vector instead, as seen in the [previous example](#select-j-1d). * `data.table` also allows wrapping columns with `.()` instead of `list()`. It is an *alias* to `list()`; they both mean the same. Feel free to use whichever you prefer; we have noticed most users seem to prefer `.()` for conciseness, so we will continue to use `.()` hereafter. `data.table`s (and `data.frame`s) are internally `list`s as well, with the stipulation that each element has the same length and the `list` has a `class` attribute. Allowing `j` to return a `list` enables converting and returning `data.table` very efficiently. -#### Tip: {.bs-callout .bs-callout-warning #tip-1} +#### Tip: {#tip-1} As long as `j-expression` returns a `list`, each element of the list will be converted to a column in the resulting `data.table`. This makes `j` quite powerful, as we will see shortly. It is also very important to understand this for when you'd like to make more complicated queries!! @@ -205,8 +198,6 @@ head(ans) # ans <- flights[, list(arr_delay, dep_delay)] ``` -#### {.bs-callout .bs-callout-info} - * Wrap both columns within `.()`, or `list()`. That's it. #### -- Select both `arr_delay` and `dep_delay` columns *and* rename them to `delay_arr` and `delay_dep`. @@ -229,7 +220,7 @@ ans <- flights[, sum( (arr_delay + dep_delay) < 0 )] ans ``` -#### What's happening here? {.bs-callout .bs-callout-info} +#### What's happening here? * `data.table`'s `j` can handle more than just *selecting columns* - it can handle *expressions*, i.e., *computing on columns*. This shouldn't be surprising, as columns can be referred to as if they are variables. Then we should be able to *compute* by calling functions on those variables. And that's what precisely happens here. @@ -243,8 +234,6 @@ ans <- flights[origin == "JFK" & month == 6L, ans ``` -#### {.bs-callout .bs-callout-info} - * We first subset in `i` to find matching *row indices* where `origin` airport equals `"JFK"`, and `month` equals `6L`. We *do not* subset the _entire_ `data.table` corresponding to those rows _yet_. * Now, we look at `j` and find that it uses only *two columns*. And what we have to do is to compute their `mean()`. Therefore we subset just those columns corresponding to the matching rows, and compute their `mean()`. @@ -262,7 +251,7 @@ The function `length()` requires an input argument. We just needed to compute th This type of operation occurs quite frequently, especially while grouping (as we will see in the next section), to the point where `data.table` provides a *special symbol* `.N` for it. -#### Special symbol `.N`: {.bs-callout .bs-callout-info #special-N} +#### Special symbol `.N`: {#special-N} `.N` is a special built-in variable that holds the number of observations _in the current group_. It is particularly useful when combined with `by` as we'll see in the next section. In the absence of group by operations, it simply returns the number of rows in the subset. @@ -273,8 +262,6 @@ ans <- flights[origin == "JFK" & month == 6L, .N] ans ``` -#### {.bs-callout .bs-callout-info} - * Once again, we subset in `i` to get the *row indices* where `origin` airport equals *"JFK"*, and `month` equals *6*. * We see that `j` uses only `.N` and no other columns. Therefore the entire subset is not materialised. We simply return the number of rows in the subset (which is just the length of row indices). @@ -372,8 +359,6 @@ ans # ans <- flights[, .(.N), by = "origin"] ``` -#### {.bs-callout .bs-callout-info} - * We know `.N` [is a special variable](#special-N) that holds the number of rows in the current group. Grouping by `origin` obtains the number of rows, `.N`, for each group. * By doing `head(flights)` you can see that the origin airports occur in the order *"JFK"*, *"LGA"* and *"EWR"*. The original order of grouping variables is preserved in the result. _This is important to keep in mind!_ @@ -400,8 +385,6 @@ ans <- flights[carrier == "AA", .N, by = origin] ans ``` -#### {.bs-callout .bs-callout-info} - * We first obtain the row indices for the expression `carrier == "AA"` from `i`. * Using those *row indices*, we obtain the number of rows while grouped by `origin`. Once again no columns are actually materialised here, because the `j-expression` does not require any columns to be actually subsetted and is therefore fast and memory efficient. @@ -416,8 +399,6 @@ head(ans) # ans <- flights[carrier == "AA", .N, by = c("origin", "dest")] ``` -#### {.bs-callout .bs-callout-info} - * `by` accepts multiple columns. We just provide all the columns by which to group by. Note the use of `.()` again in `by` -- again, this is just shorthand for `list()`, and `list()` can be used here as well. Again, we'll stick with `.()` in this vignette. #### -- How can we get the average arrival and departure delay for each `orig,dest` pair for each month for carrier code `"AA"`? {#origin-dest-month} @@ -429,8 +410,6 @@ ans <- flights[carrier == "AA", ans ``` -#### {.bs-callout .bs-callout-info} - * Since we did not provide column names for the expressions in `j`, they were automatically generated as `V1` and `V2`. * Once again, note that the input order of grouping columns is preserved in the result. @@ -450,8 +429,6 @@ ans <- flights[carrier == "AA", ans ``` -#### {.bs-callout .bs-callout-info} - * All we did was to change `by` to `keyby`. This automatically orders the result by the grouping variables in increasing order. In fact, due to the internal implementation of `by` first requiring a sort before recovering the original table's order, `keyby` is typically faster than `by` because it doesn't require this second step. **Keys:** Actually `keyby` does a little more than *just ordering*. It also *sets a key* after ordering by setting an `attribute` called `sorted`. @@ -475,8 +452,6 @@ ans <- ans[order(origin, -dest)] head(ans) ``` -#### {.bs-callout .bs-callout-info} - * Recall that we can use `-` on a `character` column in `order()` within the frame of a `data.table`. This is possible to due `data.table`'s internal query optimisation. * Also recall that `order(...)` with the frame of a `data.table` is *automatically optimised* to use `data.table`'s internal fast radix order `forder()` for speed. @@ -488,8 +463,6 @@ ans <- flights[carrier == "AA", .N, by = .(origin, dest)][order(origin, -dest)] head(ans, 10) ``` -#### {.bs-callout .bs-callout-info} - * We can tack expressions one after another, *forming a chain* of operations, i.e., `DT[ ... ][ ... ][ ... ]`. * Or you can also chain them vertically: @@ -512,8 +485,6 @@ ans <- flights[, .N, .(dep_delay>0, arr_delay>0)] ans ``` -#### {.bs-callout .bs-callout-info} - * The last row corresponds to `dep_delay > 0 = TRUE` and `arr_delay > 0 = FALSE`. We can see that `r flights[!is.na(arr_delay) & !is.na(dep_delay), .N, .(dep_delay>0, arr_delay>0)][, N[4L]]` flights started late but arrived early (or on time). * Note that we did not provide any names to `by-expression`. Therefore, names have been automatically assigned in the result. As with `j`, you can name these expressions as you would elements of any `list`, e.g. `DT[, .N, .(dep_delayed = dep_delay>0, arr_delayed = arr_delay>0)]`. @@ -528,7 +499,7 @@ It is of course not practical to have to type `mean(myCol)` for every column one How can we do this efficiently, concisely? To get there, refresh on [this tip](#tip-1) - *"As long as the `j`-expression returns a `list`, each element of the `list` will be converted to a column in the resulting `data.table`"*. Suppose we can refer to the *data subset for each group* as a variable *while grouping*, then we can loop through all the columns of that variable using the already- or soon-to-be-familiar base function `lapply()`. No new names to learn specific to `data.table`. -#### Special symbol `.SD`: {.bs-callout .bs-callout-info #special-SD} +#### Special symbol `.SD`: {#special-SD} `data.table` provides a *special* symbol, called `.SD`. It stands for **S**ubset of **D**ata. It by itself is a `data.table` that holds the data for *the current group* defined using `by`. @@ -542,8 +513,6 @@ DT DT[, print(.SD), by = ID] ``` -#### {.bs-callout .bs-callout-info} - * `.SD` contains all the columns *except the grouping columns* by default. * It is also generated by preserving the original order - data corresponding to `ID = "b"`, then `ID = "a"`, and then `ID = "c"`. @@ -554,8 +523,6 @@ To compute on (multiple) columns, we can then simply use the base R function `la DT[, lapply(.SD, mean), by = ID] ``` -#### {.bs-callout .bs-callout-info} - * `.SD` holds the rows corresponding to columns `a`, `b` and `c` for that group. We compute the `mean()` on each of these columns using the already-familiar base function `lapply()`. * Each group returns a list of three elements containing the mean value which will become the columns of the resulting `data.table`. @@ -566,7 +533,7 @@ We are almost there. There is one little thing left to address. In our `flights` #### -- How can we specify just the columns we would like to compute the `mean()` on? -#### .SDcols {.bs-callout .bs-callout-info} +#### .SDcols Using the argument `.SDcols`. It accepts either column names or column indices. For example, `.SDcols = c("arr_delay", "dep_delay")` ensures that `.SD` contains only these two columns for each group. @@ -590,8 +557,6 @@ ans <- flights[, head(.SD, 2), by = month] head(ans) ``` -#### {.bs-callout .bs-callout-info} - * `.SD` is a `data.table` that holds all the rows for *that group*. We simply subset the first two rows as we have seen [here](#subset-rows-integer) already. * For each group, `head(.SD, 2)` returns the first two rows as a `data.table`, which is also a `list`, so we do not have to wrap it with `.()`. @@ -606,8 +571,6 @@ So that we have a consistent syntax and keep using already existing (and familia DT[, .(val = c(a,b)), by = ID] ``` -#### {.bs-callout .bs-callout-info} - * That's it. There is no special syntax required. All we need to know is the base function `c()` which concatenates vectors and [the tip from before](#tip-1). #### -- What if we would like to have all the values of column `a` and `b` concatenated, but returned as a list column? @@ -616,8 +579,6 @@ DT[, .(val = c(a,b)), by = ID] DT[, .(val = list(c(a,b))), by = ID] ``` -#### {.bs-callout .bs-callout-info} - * Here, we first concatenate the values with `c(a,b)` for each group, and wrap that with `list()`. So for each group, we return a list of all concatenated values. * Note those commas are for display only. A list column can contain any object in each cell, and in this example, each cell is itself a vector and some cells contain longer vectors than others. @@ -646,7 +607,7 @@ DT[i, j, by] We have seen so far that, -#### Using `i`: {.bs-callout .bs-callout-info} +#### Using `i`: * We can subset rows similar to a `data.frame`- except you don't have to use `DT$` repetitively since columns within the frame of a `data.table` are seen as if they are *variables*. @@ -654,7 +615,7 @@ We have seen so far that, We can do much more in `i` by keying a `data.table`, which allows blazing fast subsets and joins. We will see this in the *"Keys and fast binary search based subsets"* and *"Joins and rolling joins"* vignette. -#### Using `j`: {.bs-callout .bs-callout-info} +#### Using `j`: 1. Select columns the `data.table` way: `DT[, .(colA, colB)]`. @@ -666,7 +627,7 @@ We can do much more in `i` by keying a `data.table`, which allows blazing fast s 5. Combine with `i`: `DT[colA > value, sum(colB)]`. -#### Using `by`: {.bs-callout .bs-callout-info} +#### Using `by`: * Using `by`, we can group by columns by specifying a *list of columns* or a *character vector of column names* or even *expressions*. The flexibility of `j`, combined with `by` and `i` makes for a very powerful syntax. @@ -682,7 +643,7 @@ We can do much more in `i` by keying a `data.table`, which allows blazing fast s 3. `DT[col > val, head(.SD, 1), by = ...]` - combine `i` along with `j` and `by`. -#### And remember the tip: {.bs-callout .bs-callout-warning} +#### And remember the tip: As long as `j` returns a `list`, each element of the list will become a column in the resulting `data.table`. diff --git a/vignettes/datatable-programming.Rmd b/vignettes/datatable-programming.Rmd index d63b1bccc..fc3ad726d 100644 --- a/vignettes/datatable-programming.Rmd +++ b/vignettes/datatable-programming.Rmd @@ -25,8 +25,16 @@ knitr::opts_chunk$set( `data.table`, from its very first releases, enabled the usage of `subset` and `with` (or `within`) functions by defining the`[.data.table` method. `subset` and `with` are base R functions that are useful for reducing repetition in code, enhancing readability, and reducing number the total characters the user has to type. This functionality is possible in R because of a quite unique feature called *lazy evaluation*. This feature allows a function to catch its arguments, before they are evaluated, and to evaluate them in a different scope than the one in which they were called. Let's recap usage of the `subset` function. -```{r opt_max_print_10, include = FALSE} -options(max.print = 10L) # 2 rows +```{r df_print, echo=FALSE} +registerS3method("print", "data.frame", function(x, ...) { + base::print.data.frame(head(x, 2L), ...) + cat("...\n") + invisible(x) +}) +.opts = options( + datatable.print.topn=2L, + datatable.print.nrows=20L +) ``` ```{r subset} @@ -149,20 +157,18 @@ Now, to use substitution inside `[.data.table`, we don't need to call the `subst Let's use the `iris` data set as a demonstration. Just as an example, let's pretend we want to compute the `Sepal.Hypotenuse`, treating the sepal width and length as if they were legs of a right triangle. -```{r opt_max_print_8, include = FALSE} -options(max.print = 8L) # 2 rows -``` - ```{r hypotenuse_datatable} DT = as.data.table(iris) -DT[, outer(inner(var1) + inner(var2)), - env = list( - outer = "sqrt", - inner = "square", - var1 = "Sepal.Length", - var2 = "Sepal.Width" - )] +str( + DT[, outer(inner(var1) + inner(var2)), + env = list( + outer = "sqrt", + inner = "square", + var1 = "Sepal.Length", + var2 = "Sepal.Width" + )] +) # return as a data.table DT[, .(Species, var1, var2, out = outer(inner(var1) + inner(var2))), @@ -235,10 +241,6 @@ The example presented above illustrates a neat and powerful way to make your cod An obvious use case could be to mimic `.SD` functionality by injecting a `list` call into the `j` argument. -```{r opt_max_print_4, include = FALSE} -options(max.print = 4L) # 2 rows -``` - ```{r splice_sd} cols = c("Sepal.Length", "Sepal.Width") DT[, .SD, .SDcols = cols] @@ -316,10 +318,6 @@ It takes arbitrary number of variables on input, but now we cannot just *splice* First, we have to construct calls to the `square` function for each of the variables (see `inner_calls`). Then, we have to reduce the list of calls into a single call, having a nested sequence of `+` calls (see `add_calls`). Lastly, we have to substitute the constructed call into the surrounding expression (see `rms`). -```{r opt_max_print_12, include = FALSE} -options(max.print = 12L) # 2 rows -``` - ```{r complex} outer = "sqrt" inner = "square" @@ -344,15 +342,19 @@ rms = substitute2( ) print(rms) -DT[, j, env = list(j = rms)] +str( + DT[, j, env = list(j = rms)] +) # same, but skipping last substitute2 call and using add_calls directly -DT[, outer((add_calls) / len), - env = list( - outer = outer, - add_calls = add_calls, - len = length(vars) - )] +str( + DT[, outer((add_calls) / len), + env = list( + outer = outer, + add_calls = add_calls, + len = length(vars) + )] +) # return as data.table j = substitute2(j, list(j = as.list(setNames(nm = c(vars, "Species", "rms"))))) @@ -377,10 +379,6 @@ In `[.data.table`, it is also possible to use other mechanisms for variable subs ### `get` -```{r opt_max_print_4b, include = FALSE} -options(max.print = 4L) # 2 rows -``` - ```{r old_get} v1 = "Petal.Width" v2 = "Sepal.Width" @@ -418,3 +416,8 @@ DT[, eval(cl)] DT[, cl, env = list(cl = cl)] ``` + +```{r cleanup, echo=FALSE} +options(.opts) +registerS3method("print", "data.frame", base::print.data.frame) +``` \ No newline at end of file diff --git a/vignettes/datatable-reference-semantics.Rmd b/vignettes/datatable-reference-semantics.Rmd index 220a2a19a..c96ed090f 100644 --- a/vignettes/datatable-reference-semantics.Rmd +++ b/vignettes/datatable-reference-semantics.Rmd @@ -71,7 +71,7 @@ both (1) and (2) resulted in deep copy of the entire data.frame in versions of ` Great performance improvements were made in `R v3.1` as a result of which only a *shallow* copy is made for (1) and not *deep* copy. However, for (2) still, the entire column is *deep* copied even in `R v3.1+`. This means the more columns one subassigns to in the *same query*, the more *deep* copies R does. -#### *shallow* vs *deep* copy {.bs-callout .bs-callout-info} +#### *shallow* vs *deep* copy A *shallow* copy is just a copy of the vector of column pointers (corresponding to the columns in a *data.frame* or *data.table*). The actual data is not physically copied in memory. @@ -86,31 +86,27 @@ It can be used in `j` in two ways: (a) The `LHS := RHS` form - ```{r eval = FALSE} - DT[, c("colA", "colB", ...) := list(valA, valB, ...)] +```{r eval = FALSE} +DT[, c("colA", "colB", ...) := list(valA, valB, ...)] - # when you have only one column to assign to you - # can drop the quotes and list(), for convenience - DT[, colA := valA] - ``` +# when you have only one column to assign to you +# can drop the quotes and list(), for convenience +DT[, colA := valA] +``` (b) The functional form - ```{r eval = FALSE} - DT[, `:=`(colA = valA, # valA is assigned to colA - colB = valB, # valB is assigned to colB - ... - )] - ``` - -#### {.bs-callout .bs-callout-warning} +```{r eval = FALSE} +DT[, `:=`(colA = valA, # valA is assigned to colA + colB = valB, # valB is assigned to colB + ... +)] +``` Note that the code above explains how `:=` can be used. They are not working examples. We will start using them on `flights` *data.table* from the next section. # -#### {.bs-callout .bs-callout-info} - * In (a), `LHS` takes a character vector of column names and `RHS` a *list of values*. `RHS` just needs to be a `list`, irrespective of how its generated (e.g., using `lapply()`, `list()`, `mget()`, `mapply()` etc.). This form is usually easy to program with and is particularly useful when you don't know the columns to assign values to in advance. * On the other hand, (b) is handy if you would like to jot some comments down for later. @@ -140,7 +136,7 @@ head(flights) # flights[, c("speed", "delay") := list(distance/(air_time/60), arr_delay + dep_delay)] ``` -#### Note that {.bs-callout .bs-callout-info} +#### Note that * We did not have to assign the result back to `flights`. @@ -166,8 +162,6 @@ We see that there are totally `25` unique values in the data. Both *0* and *24* flights[hour == 24L, hour := 0L] ``` -#### {.bs-callout .bs-callout-info} - * We can use `i` along with `:=` in `j` the very same way as we have already seen in the *"Introduction to data.table"* vignette. * Column `hour` is replaced with `0` only on those *row indices* where the condition `hour == 24L` specified in `i` evaluates to `TRUE`. @@ -186,7 +180,7 @@ Let's look at all the `hours` to verify. flights[, sort(unique(hour))] ``` -#### Exercise: {.bs-callout .bs-callout-warning #update-by-reference-question} +#### Exercise: {#update-by-reference-question} What is the difference between `flights[hour == 24L, hour := 0L]` and `flights[hour == 24L][, hour := 0L]`? Hint: The latter needs an assignment (`<-`) if you would want to use the result later. @@ -204,7 +198,7 @@ head(flights) # flights[, `:=`(delay = NULL)] ``` -#### {.bs-callout .bs-callout-info #delete-convenience} +#### {#delete-convenience} * Assigning `NULL` to a column *deletes* that column. And it happens *instantly*. @@ -229,8 +223,6 @@ flights[, max_speed := max(speed), by = .(origin, dest)] head(flights) ``` -#### {.bs-callout .bs-callout-info} - * We add a new column `max_speed` using the `:=` operator by reference. * We provide the columns to group by the same way as shown in the *Introduction to data.table* vignette. For each group, `max(speed)` is computed, which returns a single value. That value is recycled to fit the length of the group. Once again, no copies are being made at all. `flights` *data.table* is modified *in-place*. @@ -249,7 +241,6 @@ out_cols = c("max_dep_delay", "max_arr_delay") flights[, c(out_cols) := lapply(.SD, max), by = month, .SDcols = in_cols] head(flights) ``` -#### {.bs-callout .bs-callout-info} * We use the `LHS := RHS` form. We store the input column names and the new columns to add in separate variables and provide them to `.SDcols` and for `LHS` (for better readability). @@ -283,7 +274,6 @@ ans = foo(flights) head(flights) head(ans) ``` -#### {.bs-callout .bs-callout-info} * Note that the new column `speed` has been added to `flights` *data.table*. This is because `:=` performs operations by reference. Since `DT` (the function argument) and `flights` refer to the same object in memory, modifying `DT` also reflects on `flights`. @@ -293,8 +283,6 @@ head(ans) In the previous section, we used `:=` for its side effect. But of course this may not be always desirable. Sometimes, we would like to pass a *data.table* object to a function, and might want to use the `:=` operator, but *wouldn't* want to update the original object. We can accomplish this using the function `copy()`. -#### {.bs-callout .bs-callout-info} - The `copy()` function *deep* copies the input object and therefore any subsequent update by reference operations performed on the copied object will not affect the original object. # @@ -321,8 +309,6 @@ There are two particular places where `copy()` function is essential: head(ans) ``` -#### {.bs-callout .bs-callout-info} - * Using `copy()` function did not update `flights` *data.table* by reference. It doesn't contain the column `speed`. * And `ans` contains the maximum speed corresponding to each month. @@ -354,7 +340,7 @@ However we could improve this functionality further by *shallow* copying instead ## Summary -#### The `:=` operator {.bs-callout .bs-callout-info} +#### The `:=` operator * It is used to *add/update/delete* columns by reference. diff --git a/vignettes/datatable-reshape.Rmd b/vignettes/datatable-reshape.Rmd index c26d5510d..0b5d7a57d 100644 --- a/vignettes/datatable-reshape.Rmd +++ b/vignettes/datatable-reshape.Rmd @@ -77,8 +77,6 @@ DT.m1 str(DT.m1) ``` -#### {.bs-callout .bs-callout-info} - * `measure.vars` specify the set of columns we would like to collapse (or combine) together. * We can also specify column *indices* instead of *names*. @@ -98,8 +96,6 @@ DT.m1 = melt(DT, measure.vars = c("dob_child1", "dob_child2", "dob_child3"), DT.m1 ``` -#### {.bs-callout .bs-callout-info} - * By default, when one of `id.vars` or `measure.vars` is missing, the rest of the columns are *automatically assigned* to the missing argument. * When neither `id.vars` nor `measure.vars` are specified, as mentioned under `?melt`, all *non*-`numeric`, `integer`, `logical` columns will be assigned to `id.vars`. @@ -118,8 +114,6 @@ That is, we'd like to collect all *child* observations corresponding to each `fa dcast(DT.m1, family_id + age_mother ~ child, value.var = "dob") ``` -#### {.bs-callout .bs-callout-info} - * `dcast` uses *formula* interface. The variables on the *LHS* of formula represents the *id* vars and *RHS* the *measure* vars. * `value.var` denotes the column to be filled in with while casting to wide format. @@ -165,7 +159,7 @@ DT.c1 str(DT.c1) ## gender column is character type now! ``` -#### Issues {.bs-callout .bs-callout-info} +#### Issues 1. What we wanted to do was to combine all the `dob` and `gender` type columns together respectively. Instead we are combining *everything* together, and then splitting them again. I think it's easy to see that it's quite roundabout (and inefficient). @@ -198,8 +192,6 @@ DT.m2 str(DT.m2) ## col type is preserved ``` -#### {.bs-callout .bs-callout-info} - * We can remove the `variable` column if necessary. * The functionality is implemented entirely in C, and is therefore both *fast* and *memory efficient* in addition to being *straightforward*. @@ -210,7 +202,7 @@ Usually in these problems, the columns we'd like to melt can be distinguished by ```{r} DT.m2 = melt(DT, measure = patterns("^dob", "^gender"), value.name = c("dob", "gender")) -print(DT.m2, class=TRUE) +DT.m2 ``` #### - Using `measure()` to specify `measure.vars` via separator or pattern @@ -260,7 +252,7 @@ is used to convert the `child` string values to integers: ```{r} DT.m3 = melt(DT, measure = measure(value.name, child=as.integer, sep="_child")) -print(DT.m3, class=TRUE) +DT.m3 ``` In the code above we used `sep="_child"` which results in melting only @@ -288,12 +280,12 @@ groups, two numeric output columns, and an anonymous type conversion function, ```{r} -print(melt(who, measure.vars = measure( +melt(who, measure.vars = measure( diagnosis, gender, ages, ymin=as.numeric, ymax=function(y)ifelse(y=="", Inf, as.numeric(y)), pattern="new_?(.*)_(.)(([0-9]{2})([0-9]{0,2}))" -)), class=TRUE) +)) ``` ### b) Enhanced `dcast` @@ -312,15 +304,13 @@ DT.c2 = dcast(DT.m2, family_id + age_mother ~ variable, value.var = c("dob", "ge DT.c2 ``` -#### {.bs-callout .bs-callout-info} - * Attributes are preserved in result wherever possible. * Everything is taken care of internally, and efficiently. In addition to being fast, it is also very memory efficient. # -#### Multiple functions to `fun.aggregate`: {.bs-callout .bs-callout-info} +#### Multiple functions to `fun.aggregate`: You can also provide *multiple functions* to `fun.aggregate` to `dcast` for *data.tables*. Check the examples in `?dcast` which illustrates this functionality. diff --git a/vignettes/datatable-sd-usage.Rmd b/vignettes/datatable-sd-usage.Rmd index 8e7919f34..e7b08650e 100644 --- a/vignettes/datatable-sd-usage.Rmd +++ b/vignettes/datatable-sd-usage.Rmd @@ -202,7 +202,8 @@ Note that the `x[y]` syntax returns `nrow(y)` values (i.e., it's a right join), Often, we'd like to perform some operation on our data _at the group level_. When we specify `by =` (or `keyby = `), the mental model for what happens when `data.table` processes `j` is to think of your `data.table` as being split into many component sub-`data.table`s, each of which corresponds to a single value of your `by` variable(s): -![Grouping, Illustrated](plots/grouping_illustration.png 'A visual depiction of how grouping works. On the left is a grid. The first column is titled "ID COLUMN" with values the capital letters A through G, and the rest of the data is unlabelled, but is in a darker color and simply has "Data" written to indicate that's arbitrary. A right arrow shows how this data is split into groups. Each capital letter A through G has a grid on the right-hand side; the grid on the left has been subdivided to create that on the right.') +![Grouping, Illustrated](plots/grouping_illustration.png) + In the case of grouping, `.SD` is multiple in nature -- it refers to _each_ of these sub-`data.table`s, _one-at-a-time_ (slightly more accurately, the scope of `.SD` is a single sub-`data.table`). This allows us to concisely express an operation that we'd like to perform on _each sub-`data.table`_ before the re-assembled result is returned to us. diff --git a/vignettes/datatable-secondary-indices-and-auto-indexing.Rmd b/vignettes/datatable-secondary-indices-and-auto-indexing.Rmd index 374ccd66b..6f2474c11 100644 --- a/vignettes/datatable-secondary-indices-and-auto-indexing.Rmd +++ b/vignettes/datatable-secondary-indices-and-auto-indexing.Rmd @@ -105,7 +105,7 @@ setkey(flights, origin) flights["JFK"] # or flights[.("JFK")] ``` -#### `setkey()` requires: {.bs-callout .bs-callout-info} +#### `setkey()` requires: a) computing the order vector for the column(s) provided, here, `origin`, and @@ -139,7 +139,7 @@ Since there can be multiple secondary indices, and creating an index is as simpl As we will see in the next section, the `on` argument provides several advantages: -#### `on` argument {.bs-callout .bs-callout-info} +#### `on` argument * enables subsetting by computing secondary indices on the fly. This eliminates having to do `setindex()` every time. From 5061828a9d1827409bdd24806622a5a5d6ca899f Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Sun, 3 Dec 2023 10:31:40 +0100 Subject: [PATCH 36/88] ignore base R warning on 32bit platforms, closes #5785 (#5786) --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 59ca6aabd..0063d9d8c 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -10404,7 +10404,7 @@ test(1702.2, isoweek(as.Date(test_cases)), test_values) test(1702.3, isoweek(as.POSIXct(test_cases)), test_values) # 1% sample of a 400-year cycle of dates for extra robustness -if (test_R.utils) test(1702.4, isoweek((DT<-fread(testDir('isoweek_test.csv.bz2')))$input_date), DT$expected_output) +if (test_R.utils) test(1702.4, isoweek((DT<-fread(testDir('isoweek_test.csv.bz2')))$input_date), DT$expected_output, ignore.warning="datetimes before") ## ignore.warning due to #5785 # fread, ensure no shell commands #1702 if (.Platform$OS.type=="unix") { From a01ca604842aeec7469e3b80e2411f13bb9211f2 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 4 Dec 2023 09:57:22 +0800 Subject: [PATCH 37/88] cast pointers to standard type for printf (#5787) --- src/assign.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/assign.c b/src/assign.c index 3356e918b..d433c2b54 100644 --- a/src/assign.c +++ b/src/assign.c @@ -1197,7 +1197,7 @@ static R_len_t *savedtl=NULL, nalloc=0, nsaved=0; void savetl_init(void) { if (nsaved || nalloc || saveds || savedtl) { - error(_("Internal error: savetl_init checks failed (%d %d %p %p). please report to data.table issue tracker."), nsaved, nalloc, saveds, savedtl); // # nocov + error(_("Internal error: savetl_init checks failed (%d %d %p %p). please report to data.table issue tracker."), nsaved, nalloc, (void *)saveds, (void *)savedtl); // # nocov } nsaved = 0; nalloc = 100; From ae809e882c68727e99b2a84fbab5af5ef0f3b093 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Mon, 4 Dec 2023 18:07:36 +0100 Subject: [PATCH 38/88] remove survey advertise (#5789) --- README.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/README.md b/README.md index fbe2de22a..8455602f1 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,6 @@ # data.table -The data.table 2023 community survey is now live! Click on https://tinyurl.com/datatable-survey to fill it out. The survey will remain open until **December 1st, 2023**. - -In addition to filling out the survey, it would be great if you could share it with others who might be interested in participating. - ---- - [![CRAN status](https://badges.cranchecks.info/flavor/release/data.table.svg)](https://cran.r-project.org/web/checks/check_results_data.table.html) [![R-CMD-check](https://github.com/Rdatatable/data.table/workflows/R-CMD-check/badge.svg)](https://github.com/Rdatatable/data.table/actions) From f6146cea66d772175bfa26a318edbc349639acd8 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 5 Dec 2023 05:54:55 -0800 Subject: [PATCH 39/88] Create devcontainer.json --- .devcontainer/devcontainer.json | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .devcontainer/devcontainer.json diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 000000000..bbda2085f --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,3 @@ +{ + "image": "registry.gitlab.com/jangorecki/dockerfiles/r-devel-gcc" +} From b147969e9d5c30aefc4f2e90b4950dde48ec357c Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Tue, 5 Dec 2023 16:36:45 +0100 Subject: [PATCH 40/88] GLCI rework (#5793) --- .ci/README.md | 64 +++----- .ci/deploy.sh | 30 ---- .ci/publish.R | 82 +++++++--- .gitlab-ci.yml | 363 ++++++++++++++++++------------------------ R/devel.R | 6 +- _pkgdown.yml | 35 ++-- man/update_dev_pkg.Rd | 15 +- 7 files changed, 271 insertions(+), 324 deletions(-) delete mode 100644 .ci/deploy.sh diff --git a/.ci/README.md b/.ci/README.md index 3f303e34a..d684a598e 100644 --- a/.ci/README.md +++ b/.ci/README.md @@ -1,72 +1,50 @@ # data.table continuous integration and deployment -On each Pull Request opened in GitHub we run Travis CI and Appveyor to provide prompt feedback about the status of PR. Our main CI pipeline runs on GitLab CI. GitLab repository automatically mirrors our GitHub repository and runs pipeline on `master` branch. It tests more environments and different configurations. It publish variety of artifacts. +On each Pull Request opened in GitHub we run GitHub Actions test jobs to provide prompt feedback about the status of PR. Our main CI pipeline runs on GitLab CI nightly. GitLab repository automatically mirrors our GitHub repository and runs pipeline on `master` branch every night. It tests more environments and different configurations. It publish variety of artifacts. ## Environments ### [GitLab CI](./../.gitlab-ci.yml) Test jobs: -- `test-rel-lin` - `r-release` on Linux, most comprehensive test environment, `-O3 -flto -fno-common -Wunused-result`, extra check for no compilation warnings, includes testing [_with other packages_](./../inst/tests/other.Rraw) -- `test-rel-cran-lin` - `--as-cran` on Linux, `-g0`, extra check for final status of `R CMD check` where we allow one NOTE (_size of tarball_). -- `test-dev-cran-lin` - `r-devel` and `--as-cran` on Linux, `--with-recommended-packages --enable-strict-barrier --disable-long-double`, tests for compilation warnings in pkg install and new NOTEs/Warnings in pkg check, and because it is R-devel it is marked as allow_failure -- `test-rel-vanilla-lin` - `r-release` on Linux, no suggested deps, no OpenMP, `-O0`, tracks memory usage during tests -- `test-310-cran-lin` - R 3.1.0 on Linux -- `test-344-cran-lin` - R 3.4.4 on Linux -- `test-350-cran-lin` - R 3.5.0 on Linux, no `r-recommended` -- `test-rel-win` - `r-release` on Windows -- `test-dev-win` - `r-devel` on Windows -- `test-old-win` - `r-oldrel` on Windows -- `test-rel-osx` - MacOSX build not yet deployed, see [#3326](https://github.com/Rdatatable/data.table/issues/3326) for status +- `test-lin-rel` - `r-release` on Linux, most comprehensive test environment, force all suggests, `-O3 -flto=auto -fno-common -Wunused-result`, test for no compilation warnings. +- `test-lin-rel-vanilla` - `r-release` on Linux, no suggested deps, no zlib, no OpenMP, flags `-g -O0 -fno-openmp`, skip manual and vignettes. +- `test-lin-rel-cran` - `--as-cran` on Linux, strict test for final status of `R CMD check`. +- `test-lin-dev-gcc-strict-cran` - `--as-cran` on Linux, `r-devel` built with `-enable-strict-barrier --disable-long-double`, test for compilation warnings, test for new NOTEs/WARNINGs from `R CMD check`. +- `test-lin-dev-clang-cran` - same as `gcc-strict` job but R built with `clang` and no `--enable-strict-barrier --disable-long-double` flags. +- `test-lin-310-cran` - R 3.1.0 on Linux, stated R dependency version. +- `test-win-rel` - `r-release` on Windows. +- `test-win-dev` - `r-devel` on Windows. +- `test-win-old` - `r-oldrel` on Windows. +- `test-mac-rel` - macOS build not yet available, see [#3326](https://github.com/Rdatatable/data.table/issues/3326) for status + +Tests jobs are allowed to fail, summary and logs of test jobs are later published at _CRAN-like checks_ page, see artifacts below. Artifacts: - [homepage](https://rdatatable.gitlab.io/data.table) - made with [pkgdown](https://github.com/r-lib/pkgdown) - [html manual](https://rdatatable.gitlab.io/data.table/library/data.table/html/00Index.html) - [pdf manual](https://rdatatable.gitlab.io/data.table/web/packages/data.table/data.table.pdf) - [html vignettes](https://rdatatable.gitlab.io/data.table/library/data.table/doc/index.html) -- R packages repository for `data.table` and all _Suggests_ dependencies, url: `https://Rdatatable.gitlab.io/data.table` +- R packages repository for `data.table` and all _Suggests_ dependencies, url: `https://rdatatable.gitlab.io/data.table` - sources - Windows binaries for `r-release`, `r-devel` and `r-oldrel` - [CRAN-like homepage](https://rdatatable.gitlab.io/data.table/web/packages/data.table/index.html) -- [CRAN-like checks results](https://rdatatable.gitlab.io/data.table/web/checks/check_results_data.table.html) - note that all artifacts, including check results page, are being published only when all test jobs successfully pass, thus one will not see an _ERROR_ status there (unless error happened on a job marked as `allow_failure`). -- [docker images](https://gitlab.com/Rdatatable/data.table/container_registry) - copy/paste-able `docker pull` commands can be found at the bottom of our [CRAN-like homepage](https://rdatatable.gitlab.io/data.table/web/packages/data.table/index.html) +- [CRAN-like checks results](https://rdatatable.gitlab.io/data.table/web/checks/check_results_data.table.html) -### [Travis CI](./../.travis.yml) +### [GitHub Actions](./../.github/workflows) -Test jobs: -- `r-release` on Linux, includes code coverage check -- _(might be disabled)_ `r-release` on OSX - -Artifacts: -- R packages repository having `data.table` sources only, url: `https://Rdatatable.github.io/data.table` -- code coverage stats pushed to [codecov.io/gh/Rdatatable/data.table](https://codecov.io/gh/Rdatatable/data.table) +TODO document ### [Appveyor](./../.appveyor.yml) -Test jobs: -- Windows `r-release` -- _(might be disabled)_ Windows `r-devel` - -Artifacts: -- Windows `r-release` binaries accessed only via web UI +TODO document -## Tools +## CI tools ### [`ci.R`](./ci.R) -Base R implemented helper script, [originally proposed to R](https://svn.r-project.org/R/branches/tools4pkgs/src/library/tools/R/packages.R), that ease the process of extracting dependency information from description files, also to mirror packages and their recursive dependencies from CRAN to local CRAN-like directory. It is widely used in our [GitLab CI pipeline](./../.gitlab-ci.yml). +Base R implemented helper script, [originally proposed to base R](https://svn.r-project.org/R/branches/tools4pkgs/src/library/tools/R/packages.R), that ease the process of extracting dependency information from description files, and to mirror packages and their recursive dependencies from CRAN to local CRAN-like directory. It is used in [GitLab CI pipeline](./../.gitlab-ci.yml). ### [`publish.R`](./publish.R) -Base R implemented helper script to orchestrate generation of most artifacts. It is being used only in [_integration_ stage in GitLab CI pipeline](./../.gitlab-ci.yml). - -### [`Dockerfile.in`](./Dockerfile.in) - -Template file to produce `Dockerfile` for, as of now, three docker images. Docker images are being built and published in [_deploy_ stage in GitLab CI pipeline](./../.gitlab-ci.yml). -- `r-base-dev` using `r-release`: publish docker image of `data.table` on R-release -- `r-builder` using `r-release`: publish on R-release and OS dependencies for building Rmarkdown vignettes -- `r-devel`: publish docker image of `data.table` on R-devel built with `--with-recommended-packages --enable-strict-barrier --disable-long-double` - -### [`deploy.sh`](./deploy.sh) - -Script used on Travis CI to publish CRAN-like repository of `data.table` sources. It publishes to `gh-pages` branch in GitHub repository. It depends on a token, which is provided based on `secure` environment variable in [.travis.yml](./../.travis.yml). It has been generated by @jangorecki. +Base R implemented helper script to orchestrate generation of most artifacts and to arrange them nicely. It is being used only in [_integration_ stage in GitLab CI pipeline](./../.gitlab-ci.yml). diff --git a/.ci/deploy.sh b/.ci/deploy.sh deleted file mode 100644 index 6f01ef136..000000000 --- a/.ci/deploy.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash - -set -o errexit -o nounset -PKG_REPO=$PWD -PKG_TARBALL=$(ls -1t *.tar.gz | head -n 1) -cd .. - -addToDrat(){ - mkdir drat; cd drat - - ## Set up Repo parameters - git init - git config user.name "addToDrat" - git config user.email "addToDrat@travis.ci" - - ## Get drat repo - git remote add upstream "https://$GH_TOKEN@github.com/Rdatatable/data.table.git" 2>err.txt - git fetch upstream gh-pages 2>err.txt - git checkout gh-pages 2>err.txt - git reset --hard "88000defd316538c37af4c8dc842e73e7953f4e2" 2>err.txt - - Rscript -e "drat::insertPackage('$PKG_REPO/$PKG_TARBALL', \ - repodir = '.', \ - commit='Travis publish data.table: build $TRAVIS_COMMIT', \ - addFiles=TRUE, fields='Revision')" - git push --force upstream gh-pages 2>err.txt - -} - -addToDrat diff --git a/.ci/publish.R b/.ci/publish.R index ec35fe43f..923b89b5d 100644 --- a/.ci/publish.R +++ b/.ci/publish.R @@ -91,16 +91,16 @@ package.index <- function(package, lib.loc, repodir="bus/integration/cran") { ) vign = tools::getVignetteInfo(pkg, lib.loc=lib.loc) r_rel_ver = Sys.getenv("R_REL_VERSION") - r_devel_ver = Sys.getenv("R_DEVEL_VERSION") - r_oldrel_ver = Sys.getenv("R_OLDREL_VERSION") - stopifnot(nzchar(r_rel_ver), nzchar(r_devel_ver), nzchar(r_oldrel_ver)) + r_dev_ver = Sys.getenv("R_DEV_VERSION") + r_old_ver = Sys.getenv("R_OLD_VERSION") + stopifnot(nzchar(r_rel_ver), nzchar(r_dev_ver), nzchar(r_old_ver)) cran.home = "../../.." tbl.dl = c( sprintf(" Reference manual: %s.pdf, 00Index.html ", pkg, pkg, cran.home, pkg), if (nrow(vign)) sprintf("Vignettes:%s", paste(sprintf("%s
", cran.home, vign[,"PDF"], vign[,"Title"]), collapse="\n")), # location unline cran web/pkg/vignettes to not duplicate content, documentation is in ../../../library sprintf(" Package source: %s_%s.tar.gz ", cran.home,pkg, version, pkg, version), - sprintf(" Windows binaries: %s ", format.bins(ver=c("r-devel","r-release","r-oldrel"), bin_ver=c(r_devel_ver, r_rel_ver, r_oldrel_ver), cran.home=cran.home, os.type="windows", pkg=pkg, version=version, repodir=repodir)), - sprintf(" macOS binaries: %s ", format.bins(ver=c("r-release","r-oldrel"), bin_ver=c(r_rel_ver, r_oldrel_ver), cran.home=cran.home, os.type="macosx", pkg=pkg, version=version, repodir=repodir)) + sprintf(" Windows binaries: %s ", format.bins(ver=c("r-devel","r-release","r-oldrel"), bin_ver=c(r_dev_ver, r_rel_ver, r_old_ver), cran.home=cran.home, os.type="windows", pkg=pkg, version=version, repodir=repodir)), + sprintf(" macOS binaries: %s ", format.bins(ver=c("r-release","r-oldrel"), bin_ver=c(r_rel_ver, r_old_ver), cran.home=cran.home, os.type="macosx", pkg=pkg, version=version, repodir=repodir)) ) index.file = file.path(repodir, "web/packages", pkg, "index.html") if (!dir.exists(dirname(index.file))) dir.create(dirname(index.file), recursive=TRUE) @@ -155,24 +155,30 @@ plat <- function(x) if (grepl("^.*win", x)) "Windows" else if (grepl("^.*mac", x r.ver <- function(x) { tmp = strsplit(x, "-", fixed=TRUE)[[1L]] - if (length(tmp) < 2L) stop("test job names must be test-[r.version]-...") - v = tmp[2L] + if (length(tmp) < 3L) stop("test job names must be test-[lin|win|mac]-[r.version]-...") + v = tmp[3L] if (identical(v, "rel")) "r-release" else if (identical(v, "dev")) "r-devel" else if (identical(v, "old")) "r-oldrel" else { - if (grepl("\\D", v)) stop("second word in test job name must be rel/dev/old or numbers of R version") + if (grepl("\\D", v)) stop("third word in test job name must be rel/dev/old or numbers of R version") paste0("r-", paste(strsplit(v, "")[[1L]], collapse=".")) } } # this for now is constant but when we move to independent pipelines (commit, daily, weekly) those values can be different pkg.version <- function(job, pkg) { - dcf = read.dcf(file.path("bus", job, paste(pkg, "Rcheck", sep="."), pkg, "DESCRIPTION")) + Rcheck = file.path("bus", job, paste(pkg, "Rcheck", sep=".")) + if (!dir.exists(Rcheck)) + return(NA_character_) + dcf = read.dcf(file.path(Rcheck, "00_pkg_src", pkg, "DESCRIPTION")) dcf[,"Version"] } pkg.revision <- function(job, pkg) { - dcf = read.dcf(file.path("bus", job, paste(pkg, "Rcheck", sep="."), pkg, "DESCRIPTION")) + Rcheck = file.path("bus", job, paste(pkg, "Rcheck", sep=".")) + if (!dir.exists(Rcheck)) + return(NA_character_) + dcf = read.dcf(file.path(Rcheck, "00_pkg_src", pkg, "DESCRIPTION")) if ("Revision" %in% colnames(dcf)) { proj.url = Sys.getenv("CI_PROJECT_URL", "") if (!nzchar(proj.url)) { @@ -184,7 +190,10 @@ pkg.revision <- function(job, pkg) { } else "" } pkg.flags <- function(job, pkg) { - cc = file.path("bus", job, paste(pkg, "Rcheck", sep="."), pkg, "cc") ## data.table style cc file + Rcheck = file.path("bus", job, paste(pkg, "Rcheck", sep=".")) + if (!dir.exists(Rcheck)) + return(NA_character_) + cc = file.path(Rcheck, pkg, "cc") ## data.table style cc file if (file.exists(cc)) { d = readLines(cc) w.cflags = substr(d, 1, 7)=="CFLAGS=" @@ -254,6 +263,34 @@ check.flavors <- function(jobs, repodir="bus/integration/cran") { setNames(file.exists(file), file) } +log.copy <- function(job, repodir="bus/integration/cran") { + dir.create(job.checks<-file.path(repodir, "web", "checks", pkg<-"data.table", job), recursive=TRUE, showWarnings=FALSE) + to = file.path(job.checks, "log") + if (!file.exists(job_id_file <- file.path("bus", job, "id"))) + return(setNames(file.exists(to), "log")) + job_id = readLines(job_id_file, warn=FALSE)[1L] + from = sprintf("https://gitlab.com/Rdatatable/data.table/-/jobs/%s/raw", job_id) + download.file(from, to, method="wget", quiet=TRUE) + Sys.sleep(0.1) ## to not get ban from gitlab.com + setNames(file.exists(to), "log") +} + +ci.status <- function(job) { + if (!file.exists(status_file <- file.path("bus", job, "status"))) + return(NA_character_) + readLines(status_file, warn=FALSE)[1L] +} + +ci.log <- function(jobs, repodir="bus/integration/cran") { + pkg = "data.table" + ans = vector("character", length(jobs)) + logs = sapply(jobs, log.copy, repodir=repodir) + statuses = sapply(jobs, ci.status) + ans[!logs] = statuses[!logs] + ans[logs] = sprintf('%s', pkg[any(logs)], jobs[logs], statuses[logs]) + ans +} + check.index <- function(pkg, jobs, repodir="bus/integration/cran") { status = function(x) if (grepl("^.*ERROR", x)) "ERROR" else if (grepl("^.*WARNING", x)) "WARNING" else if (grepl("^.*NOTE", x)) "NOTE" else if (grepl("^.*OK", x)) "OK" else NA_character_ test.files = function(job, files, trim.name=FALSE, trim.exts=0L, pkg="data.table") { @@ -294,17 +331,18 @@ check.index <- function(pkg, jobs, repodir="bus/integration/cran") { } memouts }) - th = "FlavorVersionRevisionInstallStatusFlagsRout.failMemtest" + th = "FlavorVersionRevisionInstallStatusFlagsRout.failLogMemtest" tbl = sprintf( - "%s%s%sout%s%s%s%s", - sub("test-", "", jobs, fixed=TRUE), - sapply(jobs, pkg.version, pkg), - sapply(jobs, pkg.revision, pkg), - pkg, jobs, ## install - pkg, jobs, sapply(sapply(jobs, check.test, pkg="data.table"), status), ## check - sapply(jobs, pkg.flags, pkg), - mapply(test.files, jobs, routs, trim.exts=2L), # 1st fail, 2nd Rout, keep just: tests_x64/main - mapply(test.files, jobs, memouts, trim.name=TRUE) + "%s%s%sout%s%s%s%s%s", + sub("test-", "", jobs, fixed=TRUE), ## Flavor + sapply(jobs, pkg.version, pkg), ## Version + sapply(jobs, pkg.revision, pkg), ## Revision + pkg, jobs, ## Install + pkg, jobs, sapply(sapply(jobs, check.test, pkg="data.table"), status), ## Status + sapply(jobs, pkg.flags, pkg), ## Flags + mapply(test.files, jobs, routs, trim.exts=2L), ## Rout.fail: 1st fail, 2nd Rout, keep just: tests_x64/main + ci.log(jobs), ## CI job logs + mapply(test.files, jobs, memouts, trim.name=TRUE) ## Memtest // currently not used ) file = file.path(repodir, "web/checks", sprintf("check_results_%s.html", pkg)) writeLines(c( @@ -340,7 +378,7 @@ check.test <- function(job, pkg) { check[length(check)] } -move.bin <- function(job, bin.version, os.type, file="DESCRIPTION", silent=FALSE) { +move.bin <- function(job, bin.version, os.type, file="DESCRIPTION", silent=TRUE) { if (os.type=="unix") { stop("publish of linux binaries not supported") } else if (os.type=="windows") { diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 80fa5d00a..60cf09bb5 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,3 +1,9 @@ +workflow: + rules: + - if: '$CI_PIPELINE_SOURCE=="schedule" && $CI_COMMIT_REF_NAME=="master"' ## nightly scheduled pipeline at 4:15 UTC + - if: '$CI_PIPELINE_SOURCE=="web"' ## manually started from web UI + - if: '$CI_PIPELINE_SOURCE=="push" && $CI_COMMIT_REF_NAME!="master"' ## branches pushed to GL directly, mirror is set for master branch only + variables: CRAN_MIRROR: "https://cloud.r-project.org" _R_CHECK_FORCE_SUGGESTS_: "false" @@ -6,9 +12,18 @@ variables: TZ: "UTC" ## to avoid 'Failed to create bus connection' from timedatectl via Sys.timezone() on Docker with R 3.4. ## Setting TZ for all GLCI jobs to isolate them from timezone. We could have a new GLCI job to test under ## a non-UTC timezone, although, that's what we do routinely in dev. - R_REL_VERSION: "4.3" - R_DEVEL_VERSION: "4.4" - R_OLDREL_VERSION: "4.2" + R_REL_VERSION: "4.3" + R_REL_WIN_BIN: "https://cloud.r-project.org/bin/windows/base/old/4.3.2/R-4.3.2-win.exe" + RTOOLS_REL_BIN: "https://cloud.r-project.org/bin/windows/Rtools/rtools43/files/rtools43-5863-5818.exe" + RTOOLS43_HOME: "/c/rtools" + R_DEV_VERSION: "4.4" + R_DEV_WIN_BIN: "https://cloud.r-project.org/bin/windows/base/R-devel-win.exe" + RTOOLS_DEV_BIN: "https://cloud.r-project.org/bin/windows/Rtools/rtools43/files/rtools43-5863-5818.exe" + RTOOLS44_HOME: "" ## in case R-devel will use new Rtools toolchain, now it uses 4.3 env var + R_OLD_VERSION: "4.2" + R_OLD_WIN_BIN: "https://cloud.r-project.org/bin/windows/base/old/4.2.3/R-4.2.3-win.exe" + RTOOLS_OLD_BIN: "https://cloud.r-project.org/bin/windows/Rtools/rtools42/files/rtools42-5355-5357.exe" + RTOOLS42_HOME: "/c/rtools" stages: - dependencies @@ -22,7 +37,7 @@ stages: expire_in: 2 weeks when: always paths: - - bus + - bus/$CI_JOB_NAME ## mirror packages # download all recursive dependencies once to be used across multiple test jobs @@ -40,9 +55,13 @@ mirror-packages: - echo 'source(".ci/ci.R")' >> .Rprofile - mkdir -p bus/$CI_JOB_NAME/cran/src/contrib - Rscript -e 'mirror.packages(dcf.dependencies("DESCRIPTION", "all"), repos=Sys.getenv("CRAN_MIRROR"), repodir="bus/mirror-packages/cran")' - - Rscript -e 'sapply(simplify=FALSE, setNames(nm=Sys.getenv(c("R_REL_VERSION","R_DEVEL_VERSION","R_OLDREL_VERSION"))), function(binary.ver) mirror.packages(type="win.binary", dcf.dependencies("DESCRIPTION", "all"), repos=Sys.getenv("CRAN_MIRROR"), repodir="bus/mirror-packages/cran", binary.ver=binary.ver))' + - Rscript -e 'sapply(simplify=FALSE, setNames(nm=Sys.getenv(c("R_REL_VERSION","R_DEV_VERSION","R_OLD_VERSION"))), function(binary.ver) mirror.packages(type="win.binary", dcf.dependencies("DESCRIPTION", "all"), repos=Sys.getenv("CRAN_MIRROR"), repodir="bus/mirror-packages/cran", binary.ver=binary.ver))' <<: *artifacts +## install deps aliases +.test-install-deps: &install-deps + - Rscript -e 'source(".ci/ci.R"); install.packages(dcf.dependencies("DESCRIPTION", which="all"), repos=file.path("file:", normalizePath("bus/mirror-packages/cran", mustWork=FALSE)), quiet=FALSE)' + ## build # sources as tar.gz archive # build vignettes @@ -50,137 +69,78 @@ build: stage: build tags: - linux - image: registry.gitlab.com/jangorecki/dockerfiles/r-base ## r-base-gcc after rstudio/markdown#108 + image: registry.gitlab.com/jangorecki/dockerfiles/r-base-gcc needs: ["mirror-packages"] before_script: - - Rscript -e 'install.packages(c("knitr","rmarkdown"), repos=file.path("file:",normalizePath("bus/mirror-packages/cran")), quiet=TRUE)' + - *install-deps - rm -r bus + script: - sed -i '/^[[:space:]]*$/d' ./DESCRIPTION ## make last line end abruptly; i.e. without a final \n - echo "Revision:" $CI_COMMIT_SHA >> ./DESCRIPTION - script: - R CMD build . - - mkdir -p bus/$CI_JOB_NAME/cran/src/contrib - - mv $(ls -1t data.table_*.tar.gz | head -n 1) bus/$CI_JOB_NAME/cran/src/contrib/. - - Rscript -e 'tools::write_PACKAGES(contrib.url("bus/build/cran"), fields="Revision", addFiles=TRUE)' + - mkdir -p bus/$CI_JOB_NAME/ + - mv $(ls -1t data.table_*.tar.gz | head -n 1) bus/$CI_JOB_NAME/ <<: *artifacts -## install deps aliases -.test-install-deps: &install-deps - - Rscript -e 'source(".ci/ci.R"); install.packages(dcf.dependencies("DESCRIPTION", which="all"), repos=file.path("file:", normalizePath("bus/mirror-packages/cran", mustWork=FALSE)), quiet=TRUE)' -.test-install-deps-win: &install-deps-win - - Rscript.exe -e "source('.ci/ci.R'); install.packages(dcf.dependencies('DESCRIPTION', which='all'), repos=file.path('file://',getwd(),'bus/mirror-packages/cran'), quiet=TRUE)" - -## copy data.table tar.gz from bus R repo to current directory -.test-cp-src: &cp-src - - cp $(ls -1t bus/build/cran/src/contrib/data.table_*.tar.gz | head -n 1) . -.test-cp-src-win: &cp-src-win - - cp.exe $(ls.exe -1t bus/build/cran/src/contrib/data.table_*.tar.gz | head.exe -n 1) . - -## move data.table tar.gz to bus -.test-mv-src: &mv-src - - mkdir -p bus/$CI_JOB_NAME && mv $(ls -1t data.table_*.tar.gz | head -n 1) bus/$CI_JOB_NAME -.test-mv-src-win: &mv-src-win - - mkdir.exe -p bus/$CI_JOB_NAME; mv.exe $(ls.exe -1t data.table_*.tar.gz | head.exe -n 1) bus/$CI_JOB_NAME - -## move data.table binaries to bus R repo -.test-mv-bin-win: &mv-bin-win - - mkdir.exe -p cran/bin/windows/contrib/$R_VERSION; mv.exe $(ls.exe -1t data.table_*.zip | head.exe -n 1) cran/bin/windows/contrib/$R_VERSION - -## remove data.table tar.gz -.test-rm-src: &rm-src - - rm $(ls -1t data.table_*.tar.gz | head -n 1) -.test-rm-src-win: &rm-src-win - - rm.exe $(ls.exe -1t data.table_*.tar.gz | head.exe -n 1) - -## install R on windows -.test-install-r-rel-win: &install-r-rel-win - - curl.exe -s -o ../R-rel.exe https://cloud.r-project.org/bin/windows/base/old/4.3.2/R-4.3.2-win.exe; Start-Process -FilePath ..\R-rel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait -.test-install-r-devel-win: &install-r-devel-win - - curl.exe -s -o ../R-devel.exe https://cloud.r-project.org/bin/windows/base/R-devel-win.exe; Start-Process -FilePath ..\R-devel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait -.test-install-r-oldrel-win: &install-r-oldrel-win - - curl.exe -s -o ../R-oldrel.exe https://cloud.r-project.org/bin/windows/base/old/4.2.3/R-4.2.3-win.exe; Start-Process -FilePath ..\R-oldrel.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait - -## install Rtools on windows -.test-install-rtools42-win: &install-rtools42-win - - curl.exe -s -o ../rtools.exe https://cloud.r-project.org/bin/windows/Rtools/rtools42/files/rtools42-5355-5357.exe; Start-Process -FilePath ..\rtools.exe -ArgumentList "/VERYSILENT /DIR=C:\rtools42" -NoNewWindow -Wait -.test-install-rtools43-win: &install-rtools43-win - - curl.exe -s -o ../rtools.exe https://cloud.r-project.org/bin/windows/Rtools/rtools43/files/rtools43-5863-5818.exe; Start-Process -FilePath ..\rtools.exe -ArgumentList "/VERYSILENT /DIR=C:\rtools43" -NoNewWindow -Wait - .test-template: &test stage: test needs: ["mirror-packages","build"] + allow_failure: true <<: *artifacts .test-lin-template: &test-lin <<: *test tags: - linux - -.test-win-template: &test-win - <<: *test - tags: - - windows - - shared-windows - -#.test-mac-template: &test-mac -# <<: *test -# tags: -# - macosx + before_script: + - cp $(ls -1t bus/build/data.table_*.tar.gz | head -n 1) . + - mkdir -p ~/.R + after_script: + - mkdir -p bus/$CI_JOB_NAME + - echo $CI_JOB_ID > bus/$CI_JOB_NAME/id + - echo $CI_JOB_STATUS > bus/$CI_JOB_NAME/status + - echo $CI_JOB_IMAGE > bus/$CI_JOB_NAME/image + - '[ -d data.table.Rcheck ] && mv data.table.Rcheck bus/$CI_JOB_NAME/' ## most comprehensive tests # force all suggests # flags: gcc -O3 -flto=auto -fno-common -Wunused-result # tests for compilation warnings -test-rel-lin: +test-lin-rel: <<: *test-lin image: registry.gitlab.com/jangorecki/dockerfiles/r-data.table - needs: ["mirror-packages","build"] variables: _R_CHECK_CRAN_INCOMING_: "FALSE" _R_CHECK_CRAN_INCOMING_REMOTE_: "FALSE" _R_CHECK_FORCE_SUGGESTS_: "TRUE" _R_CHECK_TESTS_NLINES_: "0" OPENBLAS_MAIN_FREE: "1" - before_script: + script: - apt-get update -q && apt-get install -y --no-install-recommends libcurl4-openssl-dev ## remove after #5749 - *install-deps - - *cp-src - - rm -r bus - - mkdir -p ~/.R - - echo 'CFLAGS=-g -O3 -flto=auto -fno-common -Wunused-result -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars - - echo 'CXXFLAGS=-g -O3 -flto=auto -fno-common -Wunused-result -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars - script: - - *mv-src - - cd bus/$CI_JOB_NAME + - echo 'CFLAGS=-g -O3 -flto=auto -fno-common -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars + - echo 'CXXFLAGS=-g -O3 -flto=auto -fno-common -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars - R CMD check $(ls -1t data.table_*.tar.gz | head -n 1) - - *rm-src - (! grep "warning:" data.table.Rcheck/00install.out) ## vanilla minimal +# no zlib # no suggested deps # no vignettes or manuals # no openmp # flags: gcc -O0 -fno-openmp -test-rel-vanilla-lin: +test-lin-rel-vanilla: <<: *test-lin image: registry.gitlab.com/jangorecki/dockerfiles/r-base-gcc - allow_failure: true ## temp workaround #5484 - before_script: - - *cp-src - - rm -r bus - - mkdir -p ~/.R + script: - echo 'CFLAGS=-g -O0 -fno-openmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars - echo 'CXXFLAGS=-g -O0 -fno-openmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars - script: - - *mv-src - - cd bus/$CI_JOB_NAME - R CMD check --no-manual --ignore-vignettes $(ls -1t data.table_*.tar.gz | head -n 1) - - *rm-src ## R-release on Linux # strict checks for 0 NOTEs # extra NOTEs check and build pdf manual thus not from cran-lin template -test-rel-cran-lin: +test-lin-rel-cran: <<: *test-lin image: registry.gitlab.com/jangorecki/dockerfiles/r-base variables: @@ -188,147 +148,139 @@ test-rel-cran-lin: _R_CHECK_CRAN_INCOMING_REMOTE_: "FALSE" ## Other than no URL checking (takes many minutes) or 'Days since last update 0' NOTEs needed, #3284 _R_CHECK_CRAN_INCOMING_TARBALL_THRESHOLD_: "7500000" ## bytes _R_CHECK_PKG_SIZES_THRESHOLD_: "7" ## MB 'checking installed package size' NOTE - before_script: - - apt-get update -q && apt-get install -y --no-install-recommends libcurl4-openssl-dev ## temp workaround #5749 + script: + - apt-get -qq update && apt-get install -y --no-install-recommends libcurl4-openssl-dev ## temp workaround for curl dep #5749 - *install-deps - - *cp-src - - rm -r bus - - mkdir -p ~/.R - echo 'CFLAGS=-g -O2 -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars - echo 'CXXFLAGS=-g -O2 -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars - script: - - *mv-src - - cd bus/$CI_JOB_NAME - R CMD check --as-cran $(ls -1t data.table_*.tar.gz | head -n 1) - - *rm-src - >- Rscript -e 'l=tail(readLines("data.table.Rcheck/00check.log"), 1L); if (!identical(l, "Status: OK")) stop("Last line of ", shQuote("00check.log"), " is not ", shQuote("Status: OK"), " but ", shQuote(l)) else q("no")' -## R-devel on Linux -# TODO: --enable-strict-barrier --disable-long-double +## R-devel on Linux gcc strict +# R built with --enable-strict-barrier --disable-long-double # tests for compilation warnings # tests for new notes -# thus allow_failure -test-dev-cran-lin: +test-lin-dev-gcc-strict-cran: <<: *test-lin - image: registry.gitlab.com/jangorecki/dockerfiles/r-devel - allow_failure: true ## to not be blocked by changes in r-devel + image: registry.gitlab.com/jangorecki/dockerfiles/r-devel-gcc-strict variables: _R_CHECK_CRAN_INCOMING_: "TRUE" _R_CHECK_CRAN_INCOMING_REMOTE_: "FALSE" _R_S3_METHOD_LOOKUP_BASEENV_AFTER_GLOBALENV_: "FALSE" ## detects S3 method lookup found on search path #4777 _R_S3_METHOD_LOOKUP_REPORT_SEARCH_PATH_USES_: "TRUE" - before_script: + script: + - echo 'CFLAGS=-g -O2 -flto=auto -fno-common -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars + - echo 'CXXFLAGS=-g -O2 -flto=auto -fno-common -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars - *install-deps - - *cp-src - - rm -r bus + - R CMD check --as-cran $(ls -1t data.table_*.tar.gz | head -n 1) + - (! grep "warning:" data.table.Rcheck/00install.out) + - >- + Rscript -e 'l=tail(readLines("data.table.Rcheck/00check.log"), 1L); if (!identical(l, "Status: 3 NOTEs")) stop("Last line of ", shQuote("00check.log"), " is not ", shQuote("Status: 3 NOTEs"), " (size of tarball, suggested but not available for checking: 'curl', installed package size) but ", shQuote(l)) else q("no")' + +## R-devel on Linux clang +# R compiled with clang +# tests for compilation warnings +# tests for new notes +test-lin-dev-clang-cran: + <<: *test-lin + image: registry.gitlab.com/jangorecki/dockerfiles/r-devel-clang + variables: + _R_CHECK_CRAN_INCOMING_: "TRUE" + _R_CHECK_CRAN_INCOMING_REMOTE_: "FALSE" + _R_S3_METHOD_LOOKUP_BASEENV_AFTER_GLOBALENV_: "FALSE" + _R_S3_METHOD_LOOKUP_REPORT_SEARCH_PATH_USES_: "TRUE" script: - - *mv-src - - cd bus/$CI_JOB_NAME - - R CMD check --as-cran --no-manual $(ls -1t data.table_*.tar.gz | head -n 1) - - *rm-src + - echo 'CFLAGS=-g -O2 -flto=auto -fno-common -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars + - echo 'CXXFLAGS=-g -O2 -flto=auto -fno-common -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars + - *install-deps + - R CMD check --as-cran $(ls -1t data.table_*.tar.gz | head -n 1) - (! grep "warning:" data.table.Rcheck/00install.out) - - >- ## this likely need an update but check fails now on complex NA so CI is not reaching here anyway - Rscript -e 'l=tail(readLines("data.table.Rcheck/00check.log"), 1L); if (!identical(l, "Status: 3 NOTEs")) stop("Last line of ", shQuote("00check.log"), " is not ", shQuote("Status: 3 NOTEs"), " (size of tarball, installed package size, top-level files) but ", shQuote(l)) else q("no")' + - >- + Rscript -e 'l=tail(readLines("data.table.Rcheck/00check.log"), 1L); if (!identical(l, "Status: 3 NOTEs")) stop("Last line of ", shQuote("00check.log"), " is not ", shQuote("Status: 3 NOTEs"), " (size of tarball, suggested but not available for checking: 'curl', installed package size) but ", shQuote(l)) else q("no")' ## R 3.1.0 # stated dependency on R -test-310-cran-lin: +test-lin-310-cran: image: registry.gitlab.com/jangorecki/dockerfiles/r-3.1.0 <<: *test-lin - before_script: - - *install-deps - - *cp-src - - rm -r bus script: - - *mv-src - - cd bus/$CI_JOB_NAME + - *install-deps - R CMD check --no-manual $(ls -1t data.table_*.tar.gz | head -n 1) - - *rm-src + +.test-win-template: &test-win + <<: *test + tags: + - shared-windows + before_script: + - curl.exe -s -o ../R-win.exe $R_BIN; Start-Process -FilePath ..\R-win.exe -ArgumentList "/VERYSILENT /DIR=C:\R" -NoNewWindow -Wait + - curl.exe -s -o ../rtools.exe $RTOOLS_BIN; Start-Process -FilePath ..\rtools.exe -ArgumentList "/VERYSILENT /DIR=C:\rtools" -NoNewWindow -Wait + - $env:PATH = "C:\R\bin;C:\rtools\usr\bin;$env:PATH" + - Rscript.exe -e "source('.ci/ci.R'); install.packages(dcf.dependencies('DESCRIPTION', which='all'), repos=file.path('file://',getwd(),'bus/mirror-packages/cran'), quiet=TRUE)" + - cp.exe $(ls.exe -1t bus/build/data.table_*.tar.gz | head.exe -n 1) . + script: + - R.exe CMD check --no-manual $(ls.exe -1t data.table_*.tar.gz | head.exe -n 1) + - R.exe CMD INSTALL --build $(ls.exe -1t data.table_*.tar.gz | head.exe -n 1) + after_script: + - $env:PATH = "C:\R\bin;C:\rtools\usr\bin;$env:PATH" + - mkdir.exe -p bus/$CI_JOB_NAME + - Rscript.exe -e "cat(Sys.getenv('CI_JOB_ID'), file=file.path('bus', Sys.getenv('CI_JOB_NAME'), 'id'))" + - Rscript.exe -e "cat(Sys.getenv('CI_JOB_STATUS'), file=file.path('bus', Sys.getenv('CI_JOB_NAME'), 'status'))" + - Rscript.exe -e "cat(Sys.getenv('CI_JOB_IMAGE'), file=file.path('bus', Sys.getenv('CI_JOB_NAME'), 'image'))" + - Rscript.exe -e "if (dir.exists(from<-'data.table.Rcheck')) file.rename(from, file.path('bus', Sys.getenv('CI_JOB_NAME'), 'data.table.Rcheck'))" + - Rscript.exe -e "if (length(from<-tail(list.files('^data\\.table_.*\\.zip$'), 1L))) file.rename(from, file.path('bus', Sys.getenv('CI_JOB_NAME'), from))" ## R-release on Windows # test and build binaries -test-rel-win: +test-win-rel: <<: *test-win variables: R_VERSION: "$R_REL_VERSION" - before_script: - - *install-r-rel-win - - *install-rtools43-win - - $ENV:PATH = "C:\R\bin;C:\rtools43\usr\bin;$ENV:PATH" - - *install-deps-win - - *cp-src-win - - rm.exe -r bus - script: - - *mv-src-win - - cd bus/$CI_JOB_NAME - - R.exe CMD check --no-manual $(ls.exe -1t data.table_*.tar.gz | head.exe -n 1) - - R.exe CMD INSTALL --build $(ls.exe -1t data.table_*.tar.gz | head.exe -n 1) - - *rm-src-win - - *mv-bin-win + R_BIN: "$R_REL_WIN_BIN" + RTOOLS_BIN: "$RTOOLS_REL_BIN" ## R-devel on Windows # test and build binaries -test-dev-win: +test-win-dev: <<: *test-win variables: - R_VERSION: "$R_DEVEL_VERSION" - allow_failure: true ## temp workaround #5748 - before_script: - - *install-r-devel-win - - *install-rtools43-win - - $ENV:PATH = "C:\R\bin;C:\rtools43\usr\bin;$ENV:PATH" - - *install-deps-win - - *cp-src-win - - rm.exe -r bus - script: - - *mv-src-win - - cd bus/$CI_JOB_NAME - - R.exe CMD check --no-manual --ignore-vignettes $(ls.exe -1t data.table_*.tar.gz | head.exe -n 1) - - R.exe CMD INSTALL --build $(ls.exe -1t data.table_*.tar.gz | head.exe -n 1) - - *rm-src-win - - *mv-bin-win + R_VERSION: "$R_DEV_VERSION" + R_BIN: "$R_DEV_WIN_BIN" + RTOOLS_BIN: "$RTOOLS_DEV_BIN" ## R-oldrel on Windows # test and build binaries -test-old-win: +test-win-old: <<: *test-win variables: - R_VERSION: "$R_OLDREL_VERSION" + R_VERSION: "$R_OLD_VERSION" + R_BIN: "$R_OLD_WIN_BIN" + RTOOLS_BIN: "$RTOOLS_OLD_BIN" + +.test-mac-template: &test-mac + <<: *test + tags: + - saas-macos-medium-m1 before_script: - - *install-r-oldrel-win - - *install-rtools42-win - - $ENV:PATH = "C:\R\bin;C:\rtools42\usr\bin;$ENV:PATH" - - *install-deps-win - - *cp-src-win - - rm.exe -r bus - script: - - *mv-src-win - - cd bus/$CI_JOB_NAME - - R.exe CMD check --no-manual --ignore-vignettes $(ls.exe -1t data.table_*.tar.gz | head.exe -n 1) - - R.exe CMD INSTALL --build $(ls.exe -1t data.table_*.tar.gz | head.exe -n 1) - - *rm-src-win - - *mv-bin-win + - cp $(ls -1t bus/build/data.table_*.tar.gz | head -n 1) . + after_script: + - mkdir -p bus/$CI_JOB_NAME + - '[ -d data.table.Rcheck ] && mv data.table.Rcheck bus/$CI_JOB_NAME/' + #- '[ -f data.table_*.tgz ] && mv $(ls -1t data.table_*.tgz | head -n 1) bus/$CI_JOB_NAME/' + - echo $CI_JOB_ID > bus/$CI_JOB_NAME/id + - echo $CI_JOB_STATUS > bus/$CI_JOB_NAME/status + - echo $CI_JOB_IMAGE > bus/$CI_JOB_NAME/image ## R-release on MacOS # no macosx runner set yet -#test-rel-mac: -# <<: *test-mac -# variables: -# R_VERSION: "$R_REL_VERSION" -# before_script: -# - *install-deps -# - *cp-src -# - rm -r bus -# script: -# - *mv-src -# - cd bus/$CI_JOB_NAME -# - R CMD check $(ls -1t data.table_*.tar.gz | head -n 1) -# - R CMD INSTALL --build $(ls -1t data.table_*.tar.gz | head -n 1) -# - mkdir -p cran/bin/macosx/el-capitan/contrib/$R_VERSION -# - mv $(ls -1t data.table_*.tgz | head -n 1) cran/bin/macosx/el-capitan/contrib/$R_VERSION -# - *rm-src -# - *mv-bin-mac +.test-mac-rel: + <<: *test-mac + variables: + R_VERSION: "$R_REL_VERSION" + script: + - *install-deps + - R CMD check $(ls -1t data.table_*.tar.gz | head -n 1) + - R CMD INSTALL --build $(ls -1t data.table_*.tar.gz | head -n 1) ## integrate artifacts # merging package tarballs and binaries into single R repository @@ -342,10 +294,13 @@ integration: - linux only: - master - needs: ["mirror-packages","build","test-rel-lin","test-rel-cran-lin","test-dev-cran-lin","test-rel-vanilla-lin","test-310-cran-lin","test-rel-win","test-dev-win","test-old-win"] + needs: ["mirror-packages","build","test-lin-rel","test-lin-rel-cran","test-lin-dev-gcc-strict-cran","test-lin-dev-clang-cran","test-lin-rel-vanilla","test-lin-310-cran","test-win-rel","test-win-dev" ,"test-win-old"] script: - R --version + - *install-deps ## markdown pkg not present in r-pkgdown image + - rm -rf ./vignettes ## r-lib/pkgdown#2383 - Rscript -e 'pkgdown::build_site(override=list(destination="./pkgdown"))' + - sed -i 's!!!g' pkgdown/index.html ## html manual, vignettes, repos, cran_web, cran_checks - echo 'source(".ci/ci.R"); source(".ci/publish.R")' >> .Rprofile ## list of available test-* jobs dynamically based on bus/test-* directories @@ -355,30 +310,33 @@ integration: ## delete any existing non-dev version of data.table - rm -f bus/mirror-packages/cran/src/contrib/data.table_*.tar.gz - rm -f bus/mirror-packages/cran/bin/windows/contrib/$R_REL_VERSION/data.table_*.zip - - rm -f bus/mirror-packages/cran/bin/windows/contrib/$R_DEVEL_VERSION/data.table_*.zip - - rm -f bus/mirror-packages/cran/bin/windows/contrib/$R_OLDREL_VERSION/data.table_*.zip + - rm -f bus/mirror-packages/cran/bin/windows/contrib/$R_DEV_VERSION/data.table_*.zip + - rm -f bus/mirror-packages/cran/bin/windows/contrib/$R_OLD_VERSION/data.table_*.zip #- rm -f bus/mirror-packages/cran/bin/macosx/el-capitan/contrib/$R_REL_VERSION/data.table_*.tgz - #- rm -f bus/mirror-packages/cran/bin/macosx/el-capitan/contrib/$R_DEVEL_VERSION/data.table_*.tgz - #- rm -f bus/mirror-packages/cran/bin/macosx/el-capitan/contrib/$R_OLDREL_VERSION/data.table_*.tgz + #- rm -f bus/mirror-packages/cran/bin/macosx/el-capitan/contrib/$R_DEV_VERSION/data.table_*.tgz + #- rm -f bus/mirror-packages/cran/bin/macosx/el-capitan/contrib/$R_OLD_VERSION/data.table_*.tgz ## merge mirror-packages and R devel packages - mv bus/mirror-packages/cran bus/$CI_JOB_NAME/ ## publish package sources - mkdir -p bus/$CI_JOB_NAME/cran/library bus/$CI_JOB_NAME/cran/doc - - mv $(ls -1t bus/build/cran/src/contrib/data.table_*.tar.gz | head -n 1) bus/$CI_JOB_NAME/cran/src/contrib + - mv $(ls -1t bus/build/data.table_*.tar.gz | head -n 1) bus/$CI_JOB_NAME/cran/src/contrib - Rscript -e 'tools::write_PACKAGES(contrib.url("bus/integration/cran", type="source"), type="source", fields="Revision", addFiles=TRUE)' ## publish binaries - - Rscript -e 'move.bin("test-rel-win", Sys.getenv("R_REL_VERSION"), os.type="windows")' - - Rscript -e 'move.bin("test-dev-win", Sys.getenv("R_DEVEL_VERSION"), os.type="windows", silent=TRUE)' - - Rscript -e 'move.bin("test-old-win", Sys.getenv("R_OLDREL_VERSION"), os.type="windows")' + - mkdir -p bus/integration/cran/bin/windows/contrib/$R_REL_VERSION/ + - mkdir -p bus/integration/cran/bin/windows/contrib/$R_DEV_VERSION/ + - mkdir -p bus/integration/cran/bin/windows/contrib/$R_OLD_VERSION/ + - '[ -f bus/test-win-rel/data.table_*.zip ] && cp bus/test-win-rel/data.table_*.zip bus/integration/cran/bin/windows/contrib/$R_REL_VERSION/' + - '[ -f bus/test-win-dev/data.table_*.zip ] && cp bus/test-win-dev/data.table_*.zip bus/integration/cran/bin/windows/contrib/$R_DEV_VERSION/' + - '[ -f bus/test-win-old/data.table_*.zip ] && cp bus/test-win-old/data.table_*.zip bus/integration/cran/bin/windows/contrib/$R_OLD_VERSION/' - Rscript -e 'tools::write_PACKAGES(contrib.url("bus/integration/cran", type="win.binary", ver=Sys.getenv("R_REL_VERSION")), type="win.binary", fields="Revision", addFiles=TRUE)' - - Rscript -e 'tools::write_PACKAGES(contrib.url("bus/integration/cran", type="win.binary", ver=Sys.getenv("R_DEVEL_VERSION")), type="win.binary", fields="Revision", addFiles=TRUE)' - - Rscript -e 'tools::write_PACKAGES(contrib.url("bus/integration/cran", type="win.binary", ver=Sys.getenv("R_OLDREL_VERSION")), type="win.binary", fields="Revision", addFiles=TRUE)' - #- Rscript -e 'move.bin("test-rel-mac", Sys.getenv("R_REL_VERSION"), os.type="macosx")' - #- Rscript -e 'move.bin("test-dev-mac", Sys.getenv("R_DEVEL_VERSION"), os.type="macosx")' - #- Rscript -e 'move.bin("test-old-mac", Sys.getenv("R_OLDREL_VERSION"), os.type="macosx")' + - Rscript -e 'tools::write_PACKAGES(contrib.url("bus/integration/cran", type="win.binary", ver=Sys.getenv("R_DEV_VERSION")), type="win.binary", fields="Revision", addFiles=TRUE)' + - Rscript -e 'tools::write_PACKAGES(contrib.url("bus/integration/cran", type="win.binary", ver=Sys.getenv("R_OLD_VERSION")), type="win.binary", fields="Revision", addFiles=TRUE)' + #- Rscript -e 'move.bin("test-mac-rel", Sys.getenv("R_REL_VERSION"), os.type="macosx")' + #- Rscript -e 'move.bin("test-mac-dev", Sys.getenv("R_DEV_VERSION"), os.type="macosx")' + #- Rscript -e 'move.bin("test-mac-old", Sys.getenv("R_OLD_VERSION"), os.type="macosx")' #- Rscript -e 'tools::write_PACKAGES(contrib.url("bus/integration/cran", type="mac.binary.el-capitan", ver=Sys.getenv("R_REL_VERSION")), type="mac.binary.el-capitan", fields="Revision", addFiles=TRUE)' - #- Rscript -e 'tools::write_PACKAGES(contrib.url("bus/integration/cran", type="mac.binary.el-capitan", ver=Sys.getenv("R_DEVEL_VERSION")), type="mac.binary.el-capitan", fields="Revision", addFiles=TRUE)' - #- Rscript -e 'tools::write_PACKAGES(contrib.url("bus/integration/cran", type="mac.binary.el-capitan", ver=Sys.getenv("R_OLDREL_VERSION")), type="mac.binary.el-capitan", fields="Revision", addFiles=TRUE)' + #- Rscript -e 'tools::write_PACKAGES(contrib.url("bus/integration/cran", type="mac.binary.el-capitan", ver=Sys.getenv("R_DEV_VERSION")), type="mac.binary.el-capitan", fields="Revision", addFiles=TRUE)' + #- Rscript -e 'tools::write_PACKAGES(contrib.url("bus/integration/cran", type="mac.binary.el-capitan", ver=Sys.getenv("R_OLD_VERSION")), type="mac.binary.el-capitan", fields="Revision", addFiles=TRUE)' ## install all pkgs to render html and double check successful installation of all devel packages - mkdir -p /tmp/opencran/library /tmp/opencran/doc/html ## reset R_LIBS_USER to re-install all with html because pkgdown image has pre installed curl knitr - R_LIBS_USER="" Rscript -e 'install.packages("data.table", dependencies=TRUE, lib="/tmp/opencran/library", repos=file.path("file:",normalizePath("bus/integration/cran")), INSTALL_opts="--html", quiet=TRUE)' @@ -397,7 +355,7 @@ integration: ## web/checks/$pkg/$job 00install.out, 00check.log, *.Rout, memtest.csv, memtest.png ## memtest not available for now #5764 - Rscript -e 'sapply(names(test.jobs), check.copy, simplify=FALSE)' ## web/packages/$pkg/$pkg.pdf - - Rscript -e 'pdf.copy("data.table", "test-rel-lin")' + - Rscript -e 'pdf.copy("data.table", "test-lin-rel")' ## web/checks/check_results_$pkg.html - Rscript -e 'check.index("data.table", names(test.jobs))' ## web/checks/check_flavors.html @@ -405,11 +363,6 @@ integration: ## pkgdown merge - Rscript -e 'common_files<-function(path1, path2) intersect(list.files(path1, all.files=TRUE, no..=TRUE), list.files(path2, all.files=TRUE, no..=TRUE)); msg = if (length(f<-common_files("pkgdown","bus/integration/cran"))) paste(c("Following artifacts will be overwritten by pkgdown artifacts:", paste0(" ", f)), collapse="\n") else "No overlapping files from pkgdown artifacts"; message(msg); q("no")' - mv pkgdown/* bus/integration/cran/ - ## cleanup artifacts from other jobs - - mkdir tmpbus - - mv bus/$CI_JOB_NAME tmpbus - - rm -r bus - - mv tmpbus bus <<: *artifacts ## publish diff --git a/R/devel.R b/R/devel.R index 8bd7a1466..3aed1017f 100644 --- a/R/devel.R +++ b/R/devel.R @@ -17,9 +17,8 @@ dcf.repo = function(pkg, repo, field, type) { dcf[dcf[,"Package"]==pkg, field][[1L]] } -update_dev_pkg = function(object="data.table", repo="https://Rdatatable.gitlab.io/data.table", field="Revision", type=getOption("pkgType"), lib=NULL, ...) { +update_dev_pkg = function(pkg="data.table", repo="https://Rdatatable.gitlab.io/data.table", field="Revision", type=getOption("pkgType"), lib=NULL, ...) { # this works for any package, not just data.table - pkg = object # perform package upgrade when new Revision present stopifnot(is.character(pkg), length(pkg)==1L, !is.na(pkg), is.character(repo), length(repo)==1L, !is.na(repo), @@ -28,7 +27,7 @@ update_dev_pkg = function(object="data.table", repo="https://Rdatatable.gitlab.i # get Revision field from remote repository PACKAGES file una = is.na(ups<-dcf.repo(pkg, repo, field, type)) if (una) - catf("No revision information found in DESCRIPTION file for %s package. Unsure '%s' is correct field in PACKAGES file in your package repository '%s'. Otherwise package will be re-installed every time, proceeding to installation.\n", + catf("No revision information found in DESCRIPTION file for %s package. Make sure that '%s' is correct field in PACKAGES file in your package repository '%s'. Otherwise package will be re-installed every time, proceeding to installation.\n", pkg, field, contrib.url(repo, type=type)) # see if Revision is different then currently installed Revision, note that installed package will have Revision info only when it was installed from remote devel repo upg = una || !identical(ups, dcf.lib(pkg, field, lib.loc=lib)) @@ -44,6 +43,7 @@ update_dev_pkg = function(object="data.table", repo="https://Rdatatable.gitlab.i unname(read.dcf(system.file("DESCRIPTION", package=pkg, lib.loc=lib, mustWork=TRUE), fields=field)[, field]), utils::packageVersion(pkg, lib.loc=lib))) }) + invisible(upg) } # non-exported utility when using devel version #3272: data.table:::.git() diff --git a/_pkgdown.yml b/_pkgdown.yml index 4b02b3949..117ec2957 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,8 +1,4 @@ -url: https://Rdatatable.gitlab.io/data.table - -template: - params: - ganalytics: UA-129166154-2 +url: https://rdatatable.gitlab.io/data.table development: version_tooltip: "Development version" @@ -11,6 +7,8 @@ home: links: - text: CRAN-like website href: web/packages/data.table/index.html + - text: CRAN-like checks + href: web/checks/check_results_data.table.html navbar: structure: @@ -22,30 +20,30 @@ navbar: href: index.html introduction: text: Introduction - href: articles/datatable-intro.html + href: library/data.table/doc/datatable-intro.html articles: text: Vignettes menu: - text: "Introduction to data.table" - href: articles/datatable-intro.html + href: library/data.table/doc/datatable-intro.html - text: "Reference semantics" - href: articles/datatable-reference-semantics.html + href: library/data.table/doc/datatable-reference-semantics.html - text: "Using .SD for Data Analysis" - href: articles/datatable-sd-usage.html + href: library/data.table/doc/datatable-sd-usage.html - text: "Keys and fast binary search based subset" - href: articles/datatable-keys-fast-subset.html + href: library/data.table/doc/datatable-keys-fast-subset.html - text: "Secondary indices and auto indexing" - href: articles/datatable-secondary-indices-and-auto-indexing.html + href: library/data.table/doc/datatable-secondary-indices-and-auto-indexing.html - text: "Efficient reshaping using data.table" - href: articles/datatable-reshape.html + href: library/data.table/doc/datatable-reshape.html - text: "Programming on data.table" - href: articles/datatable-programming.html + href: library/data.table/doc/datatable-programming.html - text: "Frequently asked questions" - href: articles/datatable-faq.html + href: library/data.table/doc/datatable-faq.html - text: "Importing data.table" - href: articles/datatable-importing.html + href: library/data.table/doc/datatable-importing.html - text: "Benchmarking data.table" - href: articles/datatable-benchmarking.html + href: library/data.table/doc/datatable-benchmarking.html news: text: News href: news/index.html @@ -64,3 +62,8 @@ navbar: github: icon: fab fa-github fa-lg href: https://github.com/Rdatatable/data.table + +templates: + includes: + in_header: | + diff --git a/man/update_dev_pkg.Rd b/man/update_dev_pkg.Rd index 3db5b9831..66fff0422 100644 --- a/man/update_dev_pkg.Rd +++ b/man/update_dev_pkg.Rd @@ -2,14 +2,14 @@ \alias{update_dev_pkg} \title{Perform update of development version of a package} \description{ - Downloads and installs latest development version only when a new commit is available which has also passed all tests. Defaults are set to update \code{data.table}, other packages can be used as well. Their repository has to include git commit information in PACKAGES file. + Downloads and installs latest development version, only when a new commit is available. Defaults are set to update \code{data.table}, other packages can be used as well. Repository of a package has to include git commit SHA information in PACKAGES file. } -\usage{update_dev_pkg(object="data.table", +\usage{update_dev_pkg(pkg="data.table", repo="https://Rdatatable.gitlab.io/data.table", field="Revision", type=getOption("pkgType"), lib=NULL, \dots) } \arguments{ - \item{object}{ character scalar, package name. } + \item{pkg}{ character scalar, package name. } \item{repo}{ character scalar, url of package devel repository. } \item{field}{ character scalar, metadata field to use in PACKAGES file and DESCRIPTION file, default \code{"Revision"}. } @@ -20,13 +20,18 @@ \item{\dots}{ passed to \code{\link[utils]{install.packages}}. } } \details{ - In case if a devel repository does not provide binaries user will need development tools installed for package compilation, like \emph{Rtools} on Windows, and eventually set \code{type="source"}. + In case if a devel repository does not provide binaries user will need development tools installed for package compilation, like \emph{Rtools} on Windows, or alternatively eventually set \code{type="source"}. +} +\section{data.table repositories}{ + By default the function uses our GitLab-hosted R repository at \code{https://Rdatatable.gitlab.io/data.table}. This repository is updated nightly. It runs multiple test jobs (on top of GitHub tests jobs run upstream) and publish the package (sources and binaries), even if GitLab test jobs are failing. Status of GitLab test jobs can be checked at \href{https://rdatatable.gitlab.io/data.table/web/checks/check_results_data.table.html}{Package Check Results}.\cr + We also publish bleeding edge version of the package on GitHub-hosted R repository at \code{https://Rdatatable.gitlab.io/data.table} (just minor change in url from \emph{lab} to \emph{hub}). GitHub version should be considered less stable than GitLab one. It publishes only package sources.\cr + There are also other repositories maintained by R community, for example \code{https://rdatatable.r-universe.dev}. Those can be used as well, but as they are unlikely to provide git commit SHA, the function will install the package even if latest version is already installed. } \note{ Package namespace is unloaded before attempting to install newer version. } \value{ - NULL. + Invisibly \code{TRUE} if package was updated, otherwise \code{FALSE}. } \examples{ \dontshow{ # using if(FALSE) because \dontrun could still be run by --run-dontrun; #5421 } From 155bb87826e38ea054000ae24a67d945d297ea93 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 6 Dec 2023 09:55:22 +0800 Subject: [PATCH 41/88] Fix two "unusual" format warnings (#5792) * Fix two "unusual" format warnings * Add comment, find more %d usages * Update src/assign.c --------- Co-authored-by: jangorecki --- src/assign.c | 2 +- src/fread.c | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/assign.c b/src/assign.c index d433c2b54..8ae56e3c6 100644 --- a/src/assign.c +++ b/src/assign.c @@ -471,7 +471,7 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values) // strong error message for now. else if (TRUELENGTH(names) != oldtncol) // Use (long long) to cast R_xlen_t to a fixed type to robustly avoid -Wformat compiler warnings, see #5768, PRId64 didnt work - error(_("Internal error: selfrefnames is ok but tl names [%ld] != tl [%d]"), TRUELENGTH(names), oldtncol); // # nocov + error(_("Internal error: selfrefnames is ok but tl names [%lld] != tl [%d]"), (long long)TRUELENGTH(names), oldtncol); // # nocov SETLENGTH(dt, oldncol+LENGTH(newcolnames)); SETLENGTH(names, oldncol+LENGTH(newcolnames)); for (int i=0; i Date: Tue, 5 Dec 2023 17:55:57 -0800 Subject: [PATCH 42/88] ignore new .devcontainer dir --- .Rbuildignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.Rbuildignore b/.Rbuildignore index 5f47bbacd..08508569d 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -12,6 +12,7 @@ ^\.ci$ ^\.dev$ +^\.devcontainer$ ^\.graphics$ ^\.github$ From 3535631b06707e8922e54f7a3cf3896beaf19238 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 6 Dec 2023 10:48:36 +0800 Subject: [PATCH 43/88] Use %zu for sizeof() formats (#5791) --- src/assign.c | 2 +- src/gsumm.c | 6 +++--- src/init.c | 24 ++++++++++++------------ 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/assign.c b/src/assign.c index 8ae56e3c6..ce2c707df 100644 --- a/src/assign.c +++ b/src/assign.c @@ -809,7 +809,7 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con // # nocov start for (int k=0; k Date: Wed, 6 Dec 2023 11:20:40 +0800 Subject: [PATCH 44/88] Add a simple dockerfile for extending basic CI image to do dev stuff (first: git) (#5795) * Dockerfile for dev * add git --- .devcontainer/Dockerfile | 4 ++++ .devcontainer/devcontainer.json | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 .devcontainer/Dockerfile diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 000000000..0fb2392ae --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,4 @@ +FROM registry.gitlab.com/jangorecki/dockerfiles/r-devel-gcc + +RUN apt-get -qq update \ + && apt-get install -y --no-install-recommends git diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index bbda2085f..a1447f19e 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,3 +1,3 @@ { - "image": "registry.gitlab.com/jangorecki/dockerfiles/r-devel-gcc" + "build": { "dockerfile": "Dockerfile" } } From d8f7a3008723be0746ae41911ff29a3211bb8997 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Wed, 6 Dec 2023 15:11:00 +0800 Subject: [PATCH 45/88] Fix tests for complex (#5796) * fix tests for complex * Use NA_complex_ --- inst/tests/tests.Rraw | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 0063d9d8c..a5c0ce3b0 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -9935,7 +9935,7 @@ test(1658.56, fwrite(data.table(exp(1) - pi*1i)), output='2.718[0-9]*-3.141[0-9] ## formerly 1658.46 DT = data.table(a=1:3, b=list(1:4, c(3.14, 100e10), c(3i,4i,5i))) test(1658.57, fwrite(DT), output='0+3i|0+4i|0+5i') -DT[ , b := c(1i, -1-1i, NA)] +DT[ , b := c(1i, -1-1i, NA_complex_)] test(1658.58, fwrite(DT), output='a,b\n1,0\\+1i\n2,-1-1i\n3,$') # more coverage @@ -10964,7 +10964,7 @@ test(1743.217, sapply(fread("a,b,c,d,e,f\na,b,c,d,e,f", colClasses = list(factor test(1743.218, sapply(fread("a,b,c,d,e,f\na,b,c,d,e,f", colClasses = list(factor = c(1, 2, 4), factor = 3), select = c(5, 4, 2, 3)), class), y = c(e = "character", d = "factor", b = "factor", c = "factor")) test(1743.22, fread("a,b,c\n1999/01/01,2,f", colClasses=list(Date=1L), drop="a"), data.table(b=2L, c="f")) -test(1743.231, fread("a,b,c\n2,1,4i", colClasses=list(complex="c", integer=2L), drop="a"), data.table(b=1L, c="4i"), +test(1743.231, fread("a,b,c\n2,1,4j", colClasses=list(complex="c", integer=2L), drop="a"), data.table(b=1L, c="4j"), warning=paste0(base_messages$coerce_na, ".*left as type 'character'")) test(1743.232, fread("a,b,c\n2,1,3+4i", colClasses=list(complex="c", integer=2L), drop="a"), data.table(b=1L, c=3+4i)) test(1743.241, fread("a,b,c\n2,2,f", colClasses = list(character="c", integer="b"), drop="a"), data.table(b=2L, c="f")) @@ -15406,9 +15406,9 @@ test(2060.503, xx_addr, address(xx)) test(2060.504, xx, x) test(2060.505, address(setcoalesce(xx)), xx_addr) # complex support for fcoalesce -z1 = c(1i, NA, 1-1i, NA, 0+3i, NA) -z2 = c(NA, 4-2i, 0+0i, NA, NA, NA) -z3 = c(2, NA, 3+6i, 5-1i, NA, NA) +z1 = c(1i, NA_complex_, 1-1i, NA_complex_, 0+3i, NA_complex_) +z2 = c(NA_complex_, 4-2i, 0+0i, NA_complex_, NA_complex_, NA_complex_) +z3 = c(2, NA_complex_, 3+6i, 5-1i, NA_complex_, NA_complex_) na_idx = c(2L, 4L, 6L) test(2060.600, fcoalesce(z1, 0+0i), `[<-`(z1, na_idx, 0+0i)) test(2060.601, fcoalesce(z1, z2), `[<-`(z1, na_idx, c(4-2i, NA, NA))) @@ -15509,7 +15509,7 @@ z = c(1:3) + c(3:1)*1i test(2067.1, shift(z), c(NA, z[1:2])) test(2067.2, shift(z, type = 'lead'), c(z[2:3], NA)) test(2067.3, shift(z, fill = 1i), c(1i, z[1:2])) -test(2067.4, shift(list(z, 1:3)), list(c(NA, z[1:2]), c(NA, 1:2))) +test(2067.4, shift(list(z, 1:3)), list(c(NA_complex_, z[1:2]), c(NA, 1:2))) test(2067.5, shift(z, n=1, type = 'cyclic'), c(z[3], z[1:2])) test(2067.6, shift(z, n=-1, type = 'cyclic'), c(z[2:3], z[1])) test(2067.7, shift(list(z, 1L:3L), n=1, type = 'cyclic'), list(c(z[3], z[1:2]), c(3L, 1:2))) From 25064ee2846605c06fd1eedb8e05e77fcc1c58b1 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Wed, 6 Dec 2023 13:15:48 +0100 Subject: [PATCH 46/88] plausible.io website traffic statistics (#5799) --- .gitlab-ci.yml | 4 +++- _pkgdown.yml | 5 ----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 60cf09bb5..6844c8853 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -266,6 +266,7 @@ test-win-old: after_script: - mkdir -p bus/$CI_JOB_NAME - '[ -d data.table.Rcheck ] && mv data.table.Rcheck bus/$CI_JOB_NAME/' + ## no pattern matching in [, TODO when macos available #- '[ -f data.table_*.tgz ] && mv $(ls -1t data.table_*.tgz | head -n 1) bus/$CI_JOB_NAME/' - echo $CI_JOB_ID > bus/$CI_JOB_NAME/id - echo $CI_JOB_STATUS > bus/$CI_JOB_NAME/status @@ -300,7 +301,6 @@ integration: - *install-deps ## markdown pkg not present in r-pkgdown image - rm -rf ./vignettes ## r-lib/pkgdown#2383 - Rscript -e 'pkgdown::build_site(override=list(destination="./pkgdown"))' - - sed -i 's!!!g' pkgdown/index.html ## html manual, vignettes, repos, cran_web, cran_checks - echo 'source(".ci/ci.R"); source(".ci/publish.R")' >> .Rprofile ## list of available test-* jobs dynamically based on bus/test-* directories @@ -363,6 +363,8 @@ integration: ## pkgdown merge - Rscript -e 'common_files<-function(path1, path2) intersect(list.files(path1, all.files=TRUE, no..=TRUE), list.files(path2, all.files=TRUE, no..=TRUE)); msg = if (length(f<-common_files("pkgdown","bus/integration/cran"))) paste(c("Following artifacts will be overwritten by pkgdown artifacts:", paste0(" ", f)), collapse="\n") else "No overlapping files from pkgdown artifacts"; message(msg); q("no")' - mv pkgdown/* bus/integration/cran/ + ## add plausible.io stats + - find bus/integration/cran -type f -iname "*.html" | xargs sed -i 's!!!g' <<: *artifacts ## publish diff --git a/_pkgdown.yml b/_pkgdown.yml index 117ec2957..1b9478e38 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -62,8 +62,3 @@ navbar: github: icon: fab fa-github fa-lg href: https://github.com/Rdatatable/data.table - -templates: - includes: - in_header: | - From 5e8ca96f7a9505704289f008f8a940ed2eefe83e Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Wed, 6 Dec 2023 13:52:48 +0100 Subject: [PATCH 47/88] follow up of compilation warnings (#5800) --- src/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/init.c b/src/init.c index af439bc0f..7369141c9 100644 --- a/src/init.c +++ b/src/init.c @@ -160,7 +160,7 @@ static void setSizes(void) { __sizes[CPLXSXP] = sizeof(Rcomplex); __typeorder[CPLXSXP] = 4; __sizes[STRSXP] = sizeof(SEXP *); __typeorder[STRSXP] = 5; __sizes[VECSXP] = sizeof(SEXP *); __typeorder[VECSXP] = 6; // list column - if (sizeof(char *)>8) error(_("Pointers are %lu bytes, greater than 8. We have not tested on any architecture greater than 64bit yet."), sizeof(char *)); + if (sizeof(char *)>8) error(_("Pointers are %zu bytes, greater than 8. We have not tested on any architecture greater than 64bit yet."), sizeof(char *)); // One place we need the largest sizeof is the working memory malloc in reorder.c } From 63300ffb6373da6a1440a9da97420280e82ad32b Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Wed, 6 Dec 2023 16:09:28 +0100 Subject: [PATCH 48/88] Remove `curl` from suggests (#5749) --- DESCRIPTION | 2 +- NEWS.md | 2 ++ R/fread.R | 16 ++++++---------- man/fread.Rd | 2 +- 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6756db8ae..6ba6d9226 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,7 +3,7 @@ Version: 1.14.9 Title: Extension of `data.frame` Depends: R (>= 3.1.0) Imports: methods -Suggests: bit64 (>= 4.0.0), bit (>= 4.0.4), curl, R.utils, xts, zoo (>= 1.8-1), yaml, knitr, markdown +Suggests: bit64 (>= 4.0.0), bit (>= 4.0.4), R.utils, xts, zoo (>= 1.8-1), yaml, knitr, markdown Description: Fast aggregation of large data (e.g. 100GB in RAM), fast ordered joins, fast add/modify/delete of columns by group using no copies at all, list columns, friendly and fast character-separated-value read/write. Offers a natural and flexible syntax, for faster development. License: MPL-2.0 | file LICENSE URL: https://r-datatable.com, https://Rdatatable.gitlab.io/data.table, https://github.com/Rdatatable/data.table diff --git a/NEWS.md b/NEWS.md index 52333e9b3..0aaa2e436 100644 --- a/NEWS.md +++ b/NEWS.md @@ -561,6 +561,8 @@ identical(DT1, DT2) # TRUE ``` +55. `fread(URL)` with `https:` and `ftps:` could timeout if proxy settings were not guessed right by `curl::curl_download`, [#1686](https://github.com/Rdatatable/data.table/issues/1686). `fread(URL)` now uses `download.file()` as default for downloading files from urls. Thanks to @cderv for the report and Benjamin Schwendinger for the fix. + ## NOTES 1. New feature 29 in v1.12.4 (Oct 2019) introduced zero-copy coercion. Our thinking is that requiring you to get the type right in the case of `0` (type double) vs `0L` (type integer) is too inconvenient for you the user. So such coercions happen in `data.table` automatically without warning. Thanks to zero-copy coercion there is no speed penalty, even when calling `set()` many times in a loop, so there's no speed penalty to warn you about either. However, we believe that assigning a character value such as `"2"` into an integer column is more likely to be a user mistake that you would like to be warned about. The type difference (character vs integer) may be the only clue that you have selected the wrong column, or typed the wrong variable to be assigned to that column. For this reason we view character to numeric-like coercion differently and will warn about it. If it is correct, then the warning is intended to nudge you to wrap the RHS with `as.()` so that it is clear to readers of your code that a coercion from character to that type is intended. For example : diff --git a/R/fread.R b/R/fread.R index e0337c591..8e9a11b12 100644 --- a/R/fread.R +++ b/R/fread.R @@ -76,17 +76,13 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC") if (w <- startsWithAny(file, c("https://", "ftps://", "http://", "ftp://", "file://"))) { # avoid grepl() for #2531 # nocov start tmpFile = tempfile(fileext = paste0(".",tools::file_ext(file)), tmpdir=tmpdir) # retain .gz extension in temp filename so it knows to be decompressed further below - if (w<=2L) { # https: or ftps: - if (!requireNamespace("curl", quietly = TRUE)) - stopf("URL requires https:// connection for which fread() requires 'curl' package which cannot be found. Please install 'curl' using 'install.packages('curl')'.") # nocov - - curl::curl_download(file, tmpFile, mode="wb", quiet = !showProgress) - } else { - method = if (w==5L) "internal" # force 'auto' when file: to ensure we don't use an invalid option (e.g. wget), #1668 - else getOption("download.file.method", default="auto") # http: or ftp: - download.file(file, tmpFile, method=method, mode="wb", quiet=!showProgress) - # In text mode on Windows-only, R doubles up \r to make \r\r\n line endings. mode="wb" avoids that. See ?connections:"CRLF" + if (w<=2L && base::getRversion()<"3.2.2") { # https: or ftps: can be read by default by download.file() since 3.2.2 + stopf("URL requires download.file functionalities from R >=3.2.2. You can still manually download the file and fread the downloaded file.") } + method = if (w==5L) "internal" # force 'auto' when file: to ensure we don't use an invalid option (e.g. wget), #1668 + else getOption("download.file.method", default="auto") # http: or ftp: + # In text mode on Windows-only, R doubles up \r to make \r\r\n line endings. mode="wb" avoids that. See ?connections:"CRLF" + download.file(file, tmpFile, method=method, mode="wb", quiet=!showProgress) file = tmpFile on.exit(unlink(tmpFile), add=TRUE) # nocov end diff --git a/man/fread.Rd b/man/fread.Rd index 78c8a7628..4456e11d1 100644 --- a/man/fread.Rd +++ b/man/fread.Rd @@ -115,7 +115,7 @@ Currently, the \code{yaml} setting is somewhat inflexible with respect to incorp \bold{File Download:} -When \code{input} begins with http://, https://, ftp://, ftps://, or file://, \code{fread} detects this and \emph{downloads} the target to a temporary file (at \code{tempfile()}) before proceeding to read the file as usual. Secure URLS (ftps:// and https://) are downloaded with \code{curl::curl_download}; ftp:// and http:// paths are downloaded with \code{download.file} and \code{method} set to \code{getOption("download.file.method")}, defaulting to \code{"auto"}; and file:// is downloaded with \code{download.file} with \code{method="internal"}. NB: this implies that for file://, even files found on the current machine will be "downloaded" (i.e., hard-copied) to a temporary file. See \code{\link{download.file}} for more details. +When \code{input} begins with http://, https://, ftp://, ftps://, or file://, \code{fread} detects this and \emph{downloads} the target to a temporary file (at \code{tempfile()}) before proceeding to read the file as usual. URLS (ftps:// and https:// as well as ftp:// and http://) paths are downloaded with \code{download.file} and \code{method} set to \code{getOption("download.file.method")}, defaulting to \code{"auto"}; and file:// is downloaded with \code{download.file} with \code{method="internal"}. NB: this implies that for file://, even files found on the current machine will be "downloaded" (i.e., hard-copied) to a temporary file. See \code{\link{download.file}} for more details. \bold{Shell commands:} From 4bf4ef328e12705192e068f299ddb611447f6e67 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Wed, 6 Dec 2023 17:45:06 +0100 Subject: [PATCH 49/88] improvements to CI (#5802) --- .ci/publish.R | 1 + .gitlab-ci.yml | 23 ++++++++++++----------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/.ci/publish.R b/.ci/publish.R index 923b89b5d..0657790d2 100644 --- a/.ci/publish.R +++ b/.ci/publish.R @@ -379,6 +379,7 @@ check.test <- function(job, pkg) { } move.bin <- function(job, bin.version, os.type, file="DESCRIPTION", silent=TRUE) { + ## currently not used, if not used for macos in future then can be removed if (os.type=="unix") { stop("publish of linux binaries not supported") } else if (os.type=="windows") { diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 6844c8853..a394f68e7 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -58,9 +58,9 @@ mirror-packages: - Rscript -e 'sapply(simplify=FALSE, setNames(nm=Sys.getenv(c("R_REL_VERSION","R_DEV_VERSION","R_OLD_VERSION"))), function(binary.ver) mirror.packages(type="win.binary", dcf.dependencies("DESCRIPTION", "all"), repos=Sys.getenv("CRAN_MIRROR"), repodir="bus/mirror-packages/cran", binary.ver=binary.ver))' <<: *artifacts -## install deps aliases +## install deps alias .test-install-deps: &install-deps - - Rscript -e 'source(".ci/ci.R"); install.packages(dcf.dependencies("DESCRIPTION", which="all"), repos=file.path("file:", normalizePath("bus/mirror-packages/cran", mustWork=FALSE)), quiet=FALSE)' + - Rscript -e 'source(".ci/ci.R"); install.packages(dcf.dependencies("DESCRIPTION", which="all"), repos=file.path("file:", normalizePath("bus/mirror-packages/cran", mustWork=FALSE)), quiet=TRUE)' ## build # sources as tar.gz archive @@ -116,7 +116,6 @@ test-lin-rel: _R_CHECK_TESTS_NLINES_: "0" OPENBLAS_MAIN_FREE: "1" script: - - apt-get update -q && apt-get install -y --no-install-recommends libcurl4-openssl-dev ## remove after #5749 - *install-deps - echo 'CFLAGS=-g -O3 -flto=auto -fno-common -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars - echo 'CXXFLAGS=-g -O3 -flto=auto -fno-common -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars @@ -149,7 +148,6 @@ test-lin-rel-cran: _R_CHECK_CRAN_INCOMING_TARBALL_THRESHOLD_: "7500000" ## bytes _R_CHECK_PKG_SIZES_THRESHOLD_: "7" ## MB 'checking installed package size' NOTE script: - - apt-get -qq update && apt-get install -y --no-install-recommends libcurl4-openssl-dev ## temp workaround for curl dep #5749 - *install-deps - echo 'CFLAGS=-g -O2 -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars - echo 'CXXFLAGS=-g -O2 -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars @@ -176,7 +174,7 @@ test-lin-dev-gcc-strict-cran: - R CMD check --as-cran $(ls -1t data.table_*.tar.gz | head -n 1) - (! grep "warning:" data.table.Rcheck/00install.out) - >- - Rscript -e 'l=tail(readLines("data.table.Rcheck/00check.log"), 1L); if (!identical(l, "Status: 3 NOTEs")) stop("Last line of ", shQuote("00check.log"), " is not ", shQuote("Status: 3 NOTEs"), " (size of tarball, suggested but not available for checking: 'curl', installed package size) but ", shQuote(l)) else q("no")' + Rscript -e 'l=tail(readLines("data.table.Rcheck/00check.log"), 1L); notes<-"Status: 2 NOTEs"; if (!identical(l, notes)) stop("Last line of ", shQuote("00check.log"), " is not ", shQuote(notes), " (size of tarball, installed package size) but ", shQuote(l)) else q("no")' ## R-devel on Linux clang # R compiled with clang @@ -197,7 +195,7 @@ test-lin-dev-clang-cran: - R CMD check --as-cran $(ls -1t data.table_*.tar.gz | head -n 1) - (! grep "warning:" data.table.Rcheck/00install.out) - >- - Rscript -e 'l=tail(readLines("data.table.Rcheck/00check.log"), 1L); if (!identical(l, "Status: 3 NOTEs")) stop("Last line of ", shQuote("00check.log"), " is not ", shQuote("Status: 3 NOTEs"), " (size of tarball, suggested but not available for checking: 'curl', installed package size) but ", shQuote(l)) else q("no")' + Rscript -e 'l=tail(readLines("data.table.Rcheck/00check.log"), 1L); notes<-"Status: 2 NOTEs"; if (!identical(l, notes)) stop("Last line of ", shQuote("00check.log"), " is not ", shQuote(notes), " (size of tarball, installed package size) but ", shQuote(l)) else q("no")' ## R 3.1.0 # stated dependency on R @@ -221,14 +219,15 @@ test-lin-310-cran: script: - R.exe CMD check --no-manual $(ls.exe -1t data.table_*.tar.gz | head.exe -n 1) - R.exe CMD INSTALL --build $(ls.exe -1t data.table_*.tar.gz | head.exe -n 1) + - -not (grep.exe "warning:" data.table.Rcheck\00install.out) after_script: - $env:PATH = "C:\R\bin;C:\rtools\usr\bin;$env:PATH" - mkdir.exe -p bus/$CI_JOB_NAME - Rscript.exe -e "cat(Sys.getenv('CI_JOB_ID'), file=file.path('bus', Sys.getenv('CI_JOB_NAME'), 'id'))" - Rscript.exe -e "cat(Sys.getenv('CI_JOB_STATUS'), file=file.path('bus', Sys.getenv('CI_JOB_NAME'), 'status'))" - Rscript.exe -e "cat(Sys.getenv('CI_JOB_IMAGE'), file=file.path('bus', Sys.getenv('CI_JOB_NAME'), 'image'))" - - Rscript.exe -e "if (dir.exists(from<-'data.table.Rcheck')) file.rename(from, file.path('bus', Sys.getenv('CI_JOB_NAME'), 'data.table.Rcheck'))" - - Rscript.exe -e "if (length(from<-tail(list.files('^data\\.table_.*\\.zip$'), 1L))) file.rename(from, file.path('bus', Sys.getenv('CI_JOB_NAME'), from))" + - Rscript.exe -e "to<-file.path('bus', Sys.getenv('CI_JOB_NAME'), 'data.table.Rcheck'); if (dir.exists(from<-'data.table.Rcheck')) invisible(file.rename(from, to)); dir.exists(to)" + - Rscript.exe -e "from<-tail(list.files(pattern='^data\\.table_.*\\.zip$'), 1L); to<-file.path('bus', Sys.getenv('CI_JOB_NAME'), from); if (length(from)) invisible(file.rename(from, to)); length(to)&&file.exists(to)" ## R-release on Windows # test and build binaries @@ -262,12 +261,12 @@ test-win-old: tags: - saas-macos-medium-m1 before_script: + - *install-deps - cp $(ls -1t bus/build/data.table_*.tar.gz | head -n 1) . after_script: - mkdir -p bus/$CI_JOB_NAME - '[ -d data.table.Rcheck ] && mv data.table.Rcheck bus/$CI_JOB_NAME/' - ## no pattern matching in [, TODO when macos available - #- '[ -f data.table_*.tgz ] && mv $(ls -1t data.table_*.tgz | head -n 1) bus/$CI_JOB_NAME/' + - '[ -f data.table_*.tgz ] && mv $(ls -1t data.table_*.tgz | head -n 1) bus/$CI_JOB_NAME/' - echo $CI_JOB_ID > bus/$CI_JOB_NAME/id - echo $CI_JOB_STATUS > bus/$CI_JOB_NAME/status - echo $CI_JOB_IMAGE > bus/$CI_JOB_NAME/image @@ -279,7 +278,6 @@ test-win-old: variables: R_VERSION: "$R_REL_VERSION" script: - - *install-deps - R CMD check $(ls -1t data.table_*.tar.gz | head -n 1) - R CMD INSTALL --build $(ls -1t data.table_*.tar.gz | head -n 1) @@ -326,8 +324,11 @@ integration: - mkdir -p bus/integration/cran/bin/windows/contrib/$R_DEV_VERSION/ - mkdir -p bus/integration/cran/bin/windows/contrib/$R_OLD_VERSION/ - '[ -f bus/test-win-rel/data.table_*.zip ] && cp bus/test-win-rel/data.table_*.zip bus/integration/cran/bin/windows/contrib/$R_REL_VERSION/' + - ls -1 "bus/integration/cran/bin/windows/contrib/$R_REL_VERSION"/data.table_*.zip || true - '[ -f bus/test-win-dev/data.table_*.zip ] && cp bus/test-win-dev/data.table_*.zip bus/integration/cran/bin/windows/contrib/$R_DEV_VERSION/' + - ls -1 "bus/integration/cran/bin/windows/contrib/$R_DEV_VERSION"/data.table_*.zip || true - '[ -f bus/test-win-old/data.table_*.zip ] && cp bus/test-win-old/data.table_*.zip bus/integration/cran/bin/windows/contrib/$R_OLD_VERSION/' + - ls -1 "bus/integration/cran/bin/windows/contrib/$R_OLD_VERSION"/data.table_*.zip || true - Rscript -e 'tools::write_PACKAGES(contrib.url("bus/integration/cran", type="win.binary", ver=Sys.getenv("R_REL_VERSION")), type="win.binary", fields="Revision", addFiles=TRUE)' - Rscript -e 'tools::write_PACKAGES(contrib.url("bus/integration/cran", type="win.binary", ver=Sys.getenv("R_DEV_VERSION")), type="win.binary", fields="Revision", addFiles=TRUE)' - Rscript -e 'tools::write_PACKAGES(contrib.url("bus/integration/cran", type="win.binary", ver=Sys.getenv("R_OLD_VERSION")), type="win.binary", fields="Revision", addFiles=TRUE)' From 3e590f84194b8ce975ad9ee84ef3330a74c0c63d Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Wed, 6 Dec 2023 18:56:42 +0100 Subject: [PATCH 50/88] workaround pkgdown bug by copy rather link to avoid dead links (#5804) --- .gitlab-ci.yml | 4 ++++ _pkgdown.yml | 22 +++++++++++----------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a394f68e7..099f39977 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -361,6 +361,10 @@ integration: - Rscript -e 'check.index("data.table", names(test.jobs))' ## web/checks/check_flavors.html - Rscript -e 'check.flavors(names(test.jobs))' + ## pkgdown vignettes workaround r-lib/pkgdown#2383 + - mkdir -p pkgdown/articles + - cp bus/integration/cran/library/data.table/doc/*.html pkgdown/articles/. + - rm pkgdown/articles/index.html ## pkgdown merge - Rscript -e 'common_files<-function(path1, path2) intersect(list.files(path1, all.files=TRUE, no..=TRUE), list.files(path2, all.files=TRUE, no..=TRUE)); msg = if (length(f<-common_files("pkgdown","bus/integration/cran"))) paste(c("Following artifacts will be overwritten by pkgdown artifacts:", paste0(" ", f)), collapse="\n") else "No overlapping files from pkgdown artifacts"; message(msg); q("no")' - mv pkgdown/* bus/integration/cran/ diff --git a/_pkgdown.yml b/_pkgdown.yml index 1b9478e38..66488b928 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -20,30 +20,30 @@ navbar: href: index.html introduction: text: Introduction - href: library/data.table/doc/datatable-intro.html + href: articles/datatable-intro.html articles: text: Vignettes menu: - text: "Introduction to data.table" - href: library/data.table/doc/datatable-intro.html + href: articles/datatable-intro.html - text: "Reference semantics" - href: library/data.table/doc/datatable-reference-semantics.html + href: articles/datatable-reference-semantics.html - text: "Using .SD for Data Analysis" - href: library/data.table/doc/datatable-sd-usage.html + href: articles/datatable-sd-usage.html - text: "Keys and fast binary search based subset" - href: library/data.table/doc/datatable-keys-fast-subset.html + href: articles/datatable-keys-fast-subset.html - text: "Secondary indices and auto indexing" - href: library/data.table/doc/datatable-secondary-indices-and-auto-indexing.html + href: articles/datatable-secondary-indices-and-auto-indexing.html - text: "Efficient reshaping using data.table" - href: library/data.table/doc/datatable-reshape.html + href: articles/datatable-reshape.html - text: "Programming on data.table" - href: library/data.table/doc/datatable-programming.html + href: articles/datatable-programming.html - text: "Frequently asked questions" - href: library/data.table/doc/datatable-faq.html + href: articles/datatable-faq.html - text: "Importing data.table" - href: library/data.table/doc/datatable-importing.html + href: articles/datatable-importing.html - text: "Benchmarking data.table" - href: library/data.table/doc/datatable-benchmarking.html + href: articles/datatable-benchmarking.html news: text: News href: news/index.html From 9225c169042a6032c7bb21ffa4ee74b6219934db Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Fri, 8 Dec 2023 15:42:52 +0100 Subject: [PATCH 51/88] follow version number conventions #5715 (#5803) --- .dev/CRAN_Release.cmd | 55 +++++++++++++++++++++++++++---------------- DESCRIPTION | 2 +- Makefile | 6 ++--- NEWS.md | 2 +- src/init.c | 2 +- 5 files changed, 41 insertions(+), 26 deletions(-) diff --git a/.dev/CRAN_Release.cmd b/.dev/CRAN_Release.cmd index 3442dcb38..6134d923e 100644 --- a/.dev/CRAN_Release.cmd +++ b/.dev/CRAN_Release.cmd @@ -195,15 +195,15 @@ R CMD build . export GITHUB_PAT="f1c.. github personal access token ..7ad" # avoids many too-many-requests in --as-cran's ping-all-URLs step (20 mins) inside the `checking CRAN incoming feasibility...` step. # Many thanks to Dirk for the tipoff that setting this env variable solves the problem, #4832. -R CMD check data.table_1.14.9.tar.gz --as-cran -R CMD INSTALL data.table_1.14.9.tar.gz --html +R CMD check data.table_1.14.99.tar.gz --as-cran +R CMD INSTALL data.table_1.14.99.tar.gz --html # Test C locale doesn't break test suite (#2771) echo LC_ALL=C > ~/.Renviron R Sys.getlocale()=="C" q("no") -R CMD check data.table_1.14.9.tar.gz +R CMD check data.table_1.14.99.tar.gz rm ~/.Renviron # Test non-English does not break test.data.table() due to translation of messages; #3039, #630 @@ -220,9 +220,9 @@ q("no") # User supplied PKG_CFLAGS and PKG_LIBS passed through, #4664 # Next line from https://mac.r-project.org/openmp/. Should see the arguments passed through and then fail with gcc on linux. -PKG_CFLAGS='-Xclang -fopenmp' PKG_LIBS=-lomp R CMD INSTALL data.table_1.14.9.tar.gz +PKG_CFLAGS='-Xclang -fopenmp' PKG_LIBS=-lomp R CMD INSTALL data.table_1.14.99.tar.gz # Next line should work on Linux, just using superfluous and duplicate but valid parameters here to see them retained and work -PKG_CFLAGS='-fopenmp' PKG_LIBS=-lz R CMD INSTALL data.table_1.14.9.tar.gz +PKG_CFLAGS='-fopenmp' PKG_LIBS=-lz R CMD INSTALL data.table_1.14.99.tar.gz R remove.packages("xml2") # we checked the URLs; don't need to do it again (many minutes) @@ -266,7 +266,7 @@ alias R310=~/build/R-3.1.0/bin/R ### END ONE TIME BUILD cd ~/GitHub/data.table -R310 CMD INSTALL ./data.table_1.14.9.tar.gz +R310 CMD INSTALL ./data.table_1.14.99.tar.gz R310 require(data.table) test.data.table(script="*.Rraw") @@ -278,7 +278,7 @@ test.data.table(script="*.Rraw") vi ~/.R/Makevars # Make line SHLIB_OPENMP_CFLAGS= active to remove -fopenmp R CMD build . -R CMD INSTALL data.table_1.14.9.tar.gz # ensure that -fopenmp is missing and there are no warnings +R CMD INSTALL data.table_1.14.99.tar.gz # ensure that -fopenmp is missing and there are no warnings R require(data.table) # observe startup message about no OpenMP detected test.data.table() @@ -286,7 +286,7 @@ q("no") vi ~/.R/Makevars # revert change above R CMD build . -R CMD check data.table_1.14.9.tar.gz +R CMD check data.table_1.14.99.tar.gz ##################################################### @@ -341,11 +341,11 @@ alias Rdevel-strict-gcc='~/build/R-devel-strict-gcc/bin/R --vanilla' alias Rdevel-strict-clang='~/build/R-devel-strict-clang/bin/R --vanilla' cd ~/GitHub/data.table -Rdevel-strict-[gcc|clang] CMD INSTALL data.table_1.14.9.tar.gz +Rdevel-strict-[gcc|clang] CMD INSTALL data.table_1.14.99.tar.gz # Check UBSAN and ASAN flags appear in compiler output above. Rdevel was compiled with them so they should be # passed through to here. However, our configure script seems to get in the way and gets them from {R_HOME}/bin/R # So I needed to edit my ~/.R/Makevars to get CFLAGS the way I needed. -Rdevel-strict-[gcc|clang] CMD check data.table_1.14.9.tar.gz +Rdevel-strict-[gcc|clang] CMD check data.table_1.14.99.tar.gz # Use the (failed) output to get the list of currently needed packages and install them Rdevel-strict-[gcc|clang] isTRUE(.Machine$sizeof.longdouble==0) # check noLD is being tested @@ -354,7 +354,7 @@ install.packages(c("bit64", "bit", "curl", "R.utils", "xts","nanotime", "zoo", " Ncpus=4) # Issue #5491 showed that CRAN is running UBSAN on .Rd examples which found an error so we now run full R CMD check q("no") -Rdevel-strict-[gcc|clang] CMD check data.table_1.14.9.tar.gz +Rdevel-strict-[gcc|clang] CMD check data.table_1.14.99.tar.gz # UBSAN errors occur on stderr and don't affect R CMD check result. Made many failed attempts to capture them. So grep for them. find data.table.Rcheck -name "*Rout*" -exec grep -H "runtime error" {} \; @@ -391,7 +391,7 @@ cd R-devel-valgrind make cd ~/GitHub/data.table vi ~/.R/Makevars # make the -O2 -g line active, for info on source lines with any problems -Rdevel-valgrind CMD INSTALL data.table_1.14.9.tar.gz +Rdevel-valgrind CMD INSTALL data.table_1.14.99.tar.gz R_DONT_USE_TK=true Rdevel-valgrind -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --show-leak-kinds=definite,possible --gen-suppressions=all --suppressions=./.dev/valgrind.supp -s" # the default for --show-leak-kinds is 'definite,possible' which we're setting explicitly here as a reminder. CRAN uses the default too. # including 'reachable' (as 'all' does) generates too much output from R itself about by-design permanent blocks @@ -429,7 +429,7 @@ cd ~/build/rchk/trunk . ../scripts/config.inc . ../scripts/cmpconfig.inc vi ~/.R/Makevars # set CFLAGS=-O0 -g so that rchk can provide source line numbers -echo 'install.packages("~/GitHub/data.table/data.table_1.14.9.tar.gz",repos=NULL)' | ./bin/R --slave +echo 'install.packages("~/GitHub/data.table/data.table_1.14.99.tar.gz",repos=NULL)' | ./bin/R --slave # objcopy warnings (if any) can be ignored: https://github.com/kalibera/rchk/issues/17#issuecomment-497312504 . ../scripts/check_package.sh data.table cat packages/lib/data.table/libs/*check @@ -594,7 +594,7 @@ du -k inst/tests # 0.75MB after R CMD build . export GITHUB_PAT="f1c.. github personal access token ..7ad" Rdevel -q -e "packageVersion('xml2')" # ensure installed -Rdevel CMD check data.table_1.14.10.tar.gz --as-cran # use latest Rdevel as it may have extra checks +Rdevel CMD check data.table_1.16.0.tar.gz --as-cran # use latest Rdevel as it may have extra checks # bunzip2 inst/tests/*.Rraw.bz2 # decompress *.Rraw again so as not to commit compressed *.Rraw to git # @@ -614,15 +614,30 @@ If it's evening, SLEEP. It can take a few days for CRAN's checks to run. If any issues arise, backport locally. Resubmit the same even version to CRAN. CRAN's first check is automatic and usually received within an hour. WAIT FOR THAT EMAIL. When CRAN's email contains "Pretest results OK pending a manual inspection" (or similar), or if not and it is known why not and ok, then bump dev. -###### Bump dev -0. Close milestone to prevent new issues being tagged with it. Update its name to the even release. The final 'release checks' issue can be left open in a closed milestone. + +###### Bump dev for NON-PATCH RELEASE +0. Close milestone to prevent new issues being tagged with it. The final 'release checks' issue can be left open in a closed milestone. 1. Check that 'git status' shows 4 files in modified and uncommitted state: DESCRIPTION, NEWS.md, init.c and this .dev/CRAN_Release.cmd -2. Bump version in DESCRIPTION to next odd number. Note that DESCRIPTION was in edited and uncommitted state so even number never appears in git. +2. Bump minor version in DESCRIPTION to next odd number. Note that DESCRIPTION was in edited and uncommitted state so even number never appears in git. 3. Add new heading in NEWS for the next dev version. Add "(submitted to CRAN on )" on the released heading. -4. Bump dllVersion() in init.c -5. Bump 3 version numbers in Makefile +4. Bump minor version in dllVersion() in init.c +5. Bump 3 minor version numbers in Makefile +6. Search and replace this .dev/CRAN_Release.cmd to update 1.14.99 to 1.15.99 inc below, 1.15.0 to 1.16.0 above, 1.14.0 to 1.15.0 below +7. Another final gd to view all diffs using meld. (I have `alias gd='git difftool &> /dev/null'` and difftool meld: http://meldmerge.org/) +8. Push to master with this consistent commit message: "1.15.0 on CRAN. Bump to 1.14.10" +9. Take sha from step 8 and run `git tag 1.15.0 96c..sha..d77` then `git push origin 1.15.0` (not `git push --tags` according to https://stackoverflow.com/a/5195913/403310) +###### + +###### Bump dev for PATCH RELEASE +## WARNING: review this process during the next first patch release (x.y.2) from a regular release (x,y,0), possibly during 1.15.2 release. +0. Close milestone to prevent new issues being tagged with it. The final 'release checks' issue can be left open in a closed milestone. +1. Check that 'git status' shows 4 files in modified and uncommitted state: DESCRIPTION, NEWS.md, init.c and this .dev/CRAN_Release.cmd +2. Bump patch version in DESCRIPTION to next odd number. Note that DESCRIPTION was in edited and uncommitted state so even number never appears in git. +3. Add new heading in NEWS for the next dev PATCH version. Add "(submitted to CRAN on )" on the released heading. +4. Bump patch version in dllVersion() in init.c +5. Bump 3 patch version numbers in Makefile 6. Search and replace this .dev/CRAN_Release.cmd to update 1.14.9 to 1.14.11 inc below, 1.14.10 to 1.14.12 above, 1.14.8 to 1.14.10 below 7. Another final gd to view all diffs using meld. (I have `alias gd='git difftool &> /dev/null'` and difftool meld: http://meldmerge.org/) 8. Push to master with this consistent commit message: "1.14.8 on CRAN. Bump to 1.14.10" 9. Take sha from step 8 and run `git tag 1.14.8 96c..sha..d77` then `git push origin 1.14.8` (not `git push --tags` according to https://stackoverflow.com/a/5195913/403310) -###### +###### \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 6ba6d9226..8942c48d1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: data.table -Version: 1.14.9 +Version: 1.14.99 Title: Extension of `data.frame` Depends: R (>= 3.1.0) Imports: methods diff --git a/Makefile b/Makefile index b4d8517df..45fb6203b 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,7 @@ some: .PHONY: clean clean: - $(RM) data.table_1.14.9.tar.gz + $(RM) data.table_1.14.99.tar.gz $(RM) src/*.o $(RM) src/*.so @@ -28,7 +28,7 @@ build: .PHONY: install install: - $(R) CMD INSTALL data.table_1.14.9.tar.gz + $(R) CMD INSTALL data.table_1.14.99.tar.gz .PHONY: uninstall uninstall: @@ -40,7 +40,7 @@ test: .PHONY: check check: - _R_CHECK_CRAN_INCOMING_REMOTE_=false $(R) CMD check data.table_1.14.9.tar.gz --as-cran --ignore-vignettes --no-stop-on-test-error + _R_CHECK_CRAN_INCOMING_REMOTE_=false $(R) CMD check data.table_1.14.99.tar.gz --as-cran --ignore-vignettes --no-stop-on-test-error .PHONY: revision revision: diff --git a/NEWS.md b/NEWS.md index 0aaa2e436..a9ac6e8a9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,6 @@ **If you are viewing this file on CRAN, please check [latest news on GitHub](https://github.com/Rdatatable/data.table/blob/master/NEWS.md) where the formatting is also better.** -# data.table [v1.14.9](https://github.com/Rdatatable/data.table/milestone/20) (in development) +# data.table [v1.14.99](https://github.com/Rdatatable/data.table/milestone/20) (in development) ## NEW FEATURES diff --git a/src/init.c b/src/init.c index 7369141c9..c8e8452ec 100644 --- a/src/init.c +++ b/src/init.c @@ -353,6 +353,6 @@ SEXP initLastUpdated(SEXP var) { SEXP dllVersion(void) { // .onLoad calls this and checks the same as packageVersion() to ensure no R/C version mismatch, #3056 - return(ScalarString(mkChar("1.14.9"))); + return(ScalarString(mkChar("1.14.99"))); } From ae215c70ef804f98dc66a01bb373d2669ffa923a Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Fri, 8 Dec 2023 16:01:53 +0100 Subject: [PATCH 52/88] minor programming vignette fixes (#5432) * minor vignette fixes * update version and add link --- man/data.table.Rd | 2 +- vignettes/datatable-programming.Rmd | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/man/data.table.Rd b/man/data.table.Rd index 502595d7c..4f8d402fc 100644 --- a/man/data.table.Rd +++ b/man/data.table.Rd @@ -177,7 +177,7 @@ data.table(\dots, keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFac See examples as well as \href{../doc/datatable-secondary-indices-and-auto-indexing.html}{\code{vignette("datatable-secondary-indices-and-auto-indexing")}}. } - \item{env}{ List or an environment, passed to \code{\link{substitute2}} for substitution of parameters in \code{i}, \code{j} and \code{by} (or \code{keyby}). Use \code{verbose} to preview constructed expressions. } + \item{env}{ List or an environment, passed to \code{\link{substitute2}} for substitution of parameters in \code{i}, \code{j} and \code{by} (or \code{keyby}). Use \code{verbose} to preview constructed expressions. For more details see \href{../doc/datatable-programming.html}{\code{vignette("datatable-programming")}}. } } \details{ \code{data.table} builds on base \R functionality to reduce 2 types of time:\cr diff --git a/vignettes/datatable-programming.Rmd b/vignettes/datatable-programming.Rmd index fc3ad726d..89d129201 100644 --- a/vignettes/datatable-programming.Rmd +++ b/vignettes/datatable-programming.Rmd @@ -122,7 +122,7 @@ Though these can be helpful, we will be discussing a `data.table`-unique approac Now that we've established the proper way to parameterize code that uses *lazy evaluation*, we can move on to the main subject of this vignette, *programming on data.table*. -Starting from version 1.14.2, data.table provides a robust mechanism for parameterizing expressions passed to the `i`, `j`, and `by` (or `keyby`) arguments of `[.data.table`. It is built upon the base R `substitute` function, and mimics its interface. Here, we introduce `substitute2` as a more robust and more user-friendly version of base R's `substitute`. For a complete list of differences between `base::substitute` and `data.table::substitute2` please read the [`substitute2` manual](https://rdatatable.gitlab.io/data.table/library/data.table/html/substitute2.html). +Starting from version 1.15.0, data.table provides a robust mechanism for parameterizing expressions passed to the `i`, `j`, and `by` (or `keyby`) arguments of `[.data.table`. It is built upon the base R `substitute` function, and mimics its interface. Here, we introduce `substitute2` as a more robust and more user-friendly version of base R's `substitute`. For a complete list of differences between `base::substitute` and `data.table::substitute2` please read the [`substitute2` manual](https://rdatatable.gitlab.io/data.table/library/data.table/html/substitute2.html). ### Substituting variables and names @@ -203,7 +203,7 @@ DT[filter_col %in% filter_val, ### Substitute variables and character values -In the above example, we have seen a convenient feature of `substitute2`: automatic conversion from strings into names/symbols. An obvious question arises: what if we actually want to substitute a parameter with a *character* value, so as to have base R `substitute` behaviour. We provide a mechanism to escape automatic conversion by wrapping the elements into base R `I()` call. The `I` function marks an object as *AsIs*, preventing its arguments from substitution. (Read the `?AsIs` documentation for more details.) If base R behaviour is desired for the whole `env` argument, then it's best to wrap the whole argument in `I()`. Alternatively, each list element can be wrapped in `I()` individually. Let's explore both cases below. +In the above example, we have seen a convenient feature of `substitute2`: automatic conversion from strings into names/symbols. An obvious question arises: what if we actually want to substitute a parameter with a *character* value, so as to have base R `substitute` behaviour. We provide a mechanism to escape automatic conversion by wrapping the elements into base R `I()` call. The `I` function marks an object as *AsIs*, preventing its arguments from character-to-symbol automatic conversion. (Read the `?AsIs` documentation for more details.) If base R behaviour is desired for the whole `env` argument, then it's best to wrap the whole argument in `I()`. Alternatively, each list element can be wrapped in `I()` individually. Let's explore both cases below. ```{r rank} substitute( # base R behaviour @@ -253,7 +253,7 @@ DT[, list(Sepal.Length, Sepal.Width)] ``` *Splicing* is an operation where a list of objects have to be inlined into an expression as a sequence of arguments to call. -In base R, splicing `cols` into a `list` can be achieved using `as.call(c(quote(list), cols))`. Additionally, starting from R 4.0.0, there is new interface for such an operation in the `bquote` function. +In base R, splicing `cols` into a `list` can be achieved using `as.call(c(quote(list), lapply(cols, as.name)))`. Additionally, starting from R 4.0.0, there is new interface for such an operation in the `bquote` function. In data.table, we make it easier by automatically _enlist_-ing a list of objects into a list call with those objects. This means that any `list` object inside the `env` list argument will be turned into list `call`, making the API for that use case as simple as presented below. From 4e7d46bcb515679e1af18f0205b3bc3eb3588582 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Fri, 8 Dec 2023 16:05:15 +0100 Subject: [PATCH 53/88] notin docs #5481 (#5729) --- man/notin.Rd | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/man/notin.Rd b/man/notin.Rd index d84bb2024..e041ff5cb 100644 --- a/man/notin.Rd +++ b/man/notin.Rd @@ -1,33 +1,30 @@ \name{notin} \alias{\%notin\%} - \title{ Convenience operator for checking if an example is not in a set of elements } - \description{ -Check whether an object is absent from a table, i.e., the logical inverse of \code{\link[=base]{in}}. +Check whether an object is absent from a table, i.e., the logical inverse of \code{\link[=base]{in}}. See examples on how missing values are being handled. } - \usage{ x \%notin\% table } - \arguments{ \item{x}{ Vector or \code{NULL}: the values to be matched. } \item{table}{ Vector or \code{NULL}: the values to be matched against. } } - - \value{ Logical vector, \code{TRUE} for each element of \code{x} \emph{absent} from \code{table}, and \code{FALSE} for each element of \code{x} \emph{present} in \code{table}. } - \seealso{ \code{\link[base]{match}}, \code{\link[data.table]{chmatch}} } - - \examples{ 11 \%notin\% 1:10 # TRUE "a" \%notin\% c("a", "b") # FALSE -} + ## NAs on the LHS + NA \%in\% 1:2 + NA \%notin\% 1:2 + ## NAs on the RHS + NA \%in\% c(1:2,NA) + NA \%notin\% c(1:2,NA) +} From 05a1be88a2b03cef336490a8963c6c9ed85ed154 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Fri, 8 Dec 2023 16:09:13 +0100 Subject: [PATCH 54/88] add case for missing values NA (all types) (#5423) --- NEWS.md | 2 +- inst/tests/tests.Rraw | 20 ++++++++++---------- src/idatetime.c | 6 ++++++ 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/NEWS.md b/NEWS.md index a9ac6e8a9..a1fee2ac6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -290,7 +290,7 @@ # 2: 2 10 ``` -40. New functions `yearmon()` and `yearqtr` give a combined representation of `year()` and `month()`/`quarter()`. These and also `yday`, `wday`, `mday`, `week`, `month` and `year` are now optimized for memory and compute efficiency by removing the `POSIXlt` dependency, [#649](https://github.com/Rdatatable/data.table/issues/649). Thanks to Matt Dowle for the request, and Benjamin Schwendinger for the PR. +40. New functions `yearmon()` and `yearqtr` give a combined representation of `year()` and `month()`/`quarter()`. These and also `yday`, `wday`, `mday`, `week`, `month` and `year` are now optimized for memory and compute efficiency by removing the `POSIXlt` dependency, [#649](https://github.com/Rdatatable/data.table/issues/649). Thanks to Matt Dowle for the request, and Benjamin Schwendinger for the PR. Thanks to @berg-michael for testing dev and filing a bug report for special case of missing values which was fixed before release. 41. New function `%notin%` provides a convenient alternative to `!(x %in% y)`, [#4152](https://github.com/Rdatatable/data.table/issues/4152). Thanks to Jan Gorecki for suggesting and Michael Czekanski for the PR. `%notin%` uses half the memory because it computes the result directly as opposed to `!` which allocates a new vector to hold the negated result. If `x` is long enough to occupy more than half the remaining free memory, this can make the difference between the operation working, or failing with an out-of-memory error. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index a5c0ce3b0..825a7e73f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18073,16 +18073,16 @@ test(2235.1, copy(DT)[, c("z", "x") := {x = NULL; list(2, NULL)}], data.table(z test(2235.2, copy(DT)[, c("z", "x") := {list(2, NULL)}], data.table(z = 2)) # move IDate from POSIXlt to C, add yearquarter; #649 -x = c("1111-11-11", "2019-01-01", "2019-02-28", "2019-03-01", "2019-12-31", "2020-02-29", "2020-03-01", "2020-12-31", "2040-01-01", "2040-12-31", "2100-03-01") -test(2236.1, yday(x), c(315L, 1L, 59L, 60L, 365L, 60L, 61L, 366L, 1L, 366L, 60L)) -test(2236.2, mday(x), c(11L, 1L, 28L, 1L, 31L, 29L, 1L, 31L, 1L, 31L, 1L)) -test(2236.3, wday(x), c(7L, 3L, 5L, 6L, 3L, 7L, 1L, 5L, 1L, 2L, 2L)) -test(2236.4, week(x), c(46L, 1L, 9L, 9L, 53L, 9L, 9L, 53L, 1L, 53L, 9L)) -test(2236.5, month(x), c(11L, 1L, 2L, 3L, 12L, 2L, 3L, 12L, 1L, 12L, 3L)) -test(2236.6, quarter(x), c(4L, 1L, 1L, 1L, 4L, 1L, 1L, 4L, 1L, 4L, 1L)) -test(2236.7, year(x), c(1111L, 2019L, 2019L, 2019L, 2019L, 2020L, 2020L, 2020L, 2040L, 2040L, 2100L)) -test(2236.8, yearmon(x), c(1111+10/12, 2019, 2019+1/12, 2019+2/12, 2019+11/12, 2020+1/12, 2020+2/12, 2020+11/12, 2040, 2040+11/12, 2100+2/12)) -test(2236.9, yearqtr(x), c(1111.75, 2019, 2019, 2019, 2019.75, 2020, 2020, 2020.75, 2040, 2040.75, 2100)) +x = c("1111-11-11", "2019-01-01", "2019-02-28", "2019-03-01", "2019-12-31", "2020-02-29", "2020-03-01", "2020-12-31", "2040-01-01", "2040-12-31", "2100-03-01", NA) +test(2236.1, yday(x), c(315L, 1L, 59L, 60L, 365L, 60L, 61L, 366L, 1L, 366L, 60L, NA)) +test(2236.2, mday(x), c(11L, 1L, 28L, 1L, 31L, 29L, 1L, 31L, 1L, 31L, 1L, NA)) +test(2236.3, wday(x), c(7L, 3L, 5L, 6L, 3L, 7L, 1L, 5L, 1L, 2L, 2L, NA)) +test(2236.4, week(x), c(46L, 1L, 9L, 9L, 53L, 9L, 9L, 53L, 1L, 53L, 9L, NA)) +test(2236.5, month(x), c(11L, 1L, 2L, 3L, 12L, 2L, 3L, 12L, 1L, 12L, 3L, NA)) +test(2236.6, quarter(x), c(4L, 1L, 1L, 1L, 4L, 1L, 1L, 4L, 1L, 4L, 1L, NA)) +test(2236.7, year(x), c(1111L, 2019L, 2019L, 2019L, 2019L, 2020L, 2020L, 2020L, 2040L, 2040L, 2100L, NA)) +test(2236.8, yearmon(x), c(1111+10/12, 2019, 2019+1/12, 2019+2/12, 2019+11/12, 2020+1/12, 2020+2/12, 2020+11/12, 2040, 2040+11/12, 2100+2/12, NA)) +test(2236.9, yearqtr(x), c(1111.75, 2019, 2019, 2019, 2019.75, 2020, 2020, 2020.75, 2040, 2040.75, 2100, NA)) # as.data.table() no longer ignores row.names=, #5319 dt = data.table(a=1:2, b=3:4) diff --git a/src/idatetime.c b/src/idatetime.c index c70df3b05..c25e9ec9c 100644 --- a/src/idatetime.c +++ b/src/idatetime.c @@ -16,6 +16,12 @@ void convertSingleDate(int x, datetype type, void *out) static const char months[] = {31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31, 29}; static const int quarter[] = {31, 91, 92, 92, 60}; + if (x == NA_INTEGER) { + if (type == YEARMON || type == YEARQTR) *(double *)out = NA_REAL; + else *(int *)out = NA_INTEGER; + return; + } + if (type == WDAY) { int wday = (x + 4) % 7; if (wday < 0) wday += 7; From 1b130efafebd362b14b9bbd5520e8723b333c27f Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Fri, 8 Dec 2023 16:11:35 +0100 Subject: [PATCH 55/88] rbindlist segfault for fill=TRUE and usenames=FALSE (#5468) * add fix for fill=TRUE and usenames=FALSE --- NEWS.md | 8 ++++---- inst/tests/tests.Rraw | 5 ++++- src/rbindlist.c | 16 +++++++++------- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/NEWS.md b/NEWS.md index a1fee2ac6..28aba0f96 100644 --- a/NEWS.md +++ b/NEWS.md @@ -205,7 +205,7 @@ # v1.14.4 0.4826 0.5586 0.6586 0.6329 0.7348 1.318 100 ``` -31. `rbind()` and `rbindlist()` now support `fill=TRUE` with `use.names=FALSE` instead of issuing the warning `use.names= cannot be FALSE when fill is TRUE. Setting use.names=TRUE.` +31. `rbind()` and `rbindlist()` now support `fill=TRUE` with `use.names=FALSE` instead of issuing the warning `use.names= cannot be FALSE when fill is TRUE. Setting use.names=TRUE.`, [#5444](https://github.com/Rdatatable/data.table/issues/5444). Thanks to @sindribaldur for testing dev and filing a bug report which was fixed before release. ```R DT1 @@ -249,7 +249,7 @@ # 3: 3 NA # 4: 4 NA ``` - + 32. `fread()` already made a good guess as to whether column names are present by comparing the type of the fields in row 1 to the type of the fields in the sample. This guess is now improved when a column contains a string in row 1 (i.e. a potential column name) but all blank in the sample rows, [#2526](https://github.com/Rdatatable/data.table/issues/2526). Thanks @st-pasha for reporting, and @ben-schwen for the PR. 33. `fread()` can now read `.zip` and `.tar` directly, [#3834](https://github.com/Rdatatable/data.table/issues/3834). Moreover, if a compressed file name is missing its extension, `fread()` now attempts to infer the correct filetype from its magic bytes. Thanks to Michael Chirico for the idea, and Benjamin Schwendinger for the PR. @@ -265,7 +265,7 @@ # 1: 1 3 a # 2: 2 4 b ``` - + 35. `weighted.mean()` is now optimised by group, [#3977](https://github.com/Rdatatable/data.table/issues/3977). Thanks to @renkun-ken for requesting, and Benjamin Schwendinger for the PR. 36. `as.xts.data.table()` now supports non-numeric xts coredata matrixes, [5268](https://github.com/Rdatatable/data.table/issues/5268). Existing numeric only functionality is supported by a new `numeric.only` parameter, which defaults to `TRUE` for backward compatability and the most common use case. To convert non-numeric columns, set this parameter to `FALSE`. Conversions of `data.table` columns to a `matrix` now uses `data.table::as.matrix`, with all its performance benefits. Thanks to @ethanbsmith for the report and fix. @@ -282,7 +282,7 @@ # # 1: 3 5 # 2: 4 6 - + DT[, sum(.SD), by=.I] # I V1 # diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 825a7e73f..8015439f5 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14336,7 +14336,10 @@ test(2003.3, rbindlist(list(data.table(a=1:2), data.table(b=3:4)), fill=TRUE, us test(2003.4, rbindlist(list(data.table(a=1:2,c=5:6), data.table(b=3:4)), fill=TRUE, use.names=FALSE), data.table(a=c(1:4), c=INT(5,6,NA,NA))) test(2003.5, rbindlist(list(data.table(a=1:2), data.table(b=3:4, c=5:6)), fill=TRUE, use.names=FALSE), - data.table(a=c(1:4), V1=INT(NA,NA,5,6))) + data.table(a=c(1:4), c=INT(NA,NA,5,6))) +# rbindlist segfault with fill=TRUE and usenames=FALSE #5444 +test(2003.6, rbindlist(list(list(1), list(2,3)), fill=TRUE, use.names=FALSE), data.table(c(1,2), c(NA, 3))) +test(2003.7, rbindlist(list(list(1), list(2,factor(3))), fill=TRUE, use.names=FALSE), data.table(c(1,2), factor(c(NA, 3)))) # chmatch coverage for two different non-ascii encodings matching; issues mentioned in comments in chmatch.c #69 #2538 #111 x1 = "fa\xE7ile" diff --git a/src/rbindlist.c b/src/rbindlist.c index 2ffff3af8..ba19d2c38 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -282,7 +282,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) for (int i=0; i Date: Fri, 8 Dec 2023 08:12:33 -0700 Subject: [PATCH 56/88] add CODEOWNERS file (#5629) * add CODEOWNERS * Add jangorecki to codeowners * Add new env arg files * add ben-schwen to codeowners * add commented sections, vignette * comments * Set @michaelchirico ownership for some file * co-owner for shift & IDateTime * Also printing --------- Co-authored-by: Jan Gorecki Co-authored-by: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Co-authored-by: Michael Chirico --- CODEOWNERS | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 CODEOWNERS diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 000000000..5d98e0242 --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1,45 @@ +# https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners +* @mattdowle + +# melt +/R/fmelt.R @tdhock +/src/fmelt.c @tdhock +/man/melt.data.table.Rd @tdhock +/vignettes/datatable-reshape.Rmd @tdhock + +# rolling statistics +/R/froll.R @jangorecki +/man/froll.Rd @jangorecki +/src/froll.c @jangorecki +/src/frollR.c @jangorecki +/src/frolladaptive.c @jangorecki + +# meta-programming +/R/programming.R @jangorecki +/man/substitute2.Rd @jangorecki +/src/programming.c @jangorecki +/vignettes/datatable-programming.Rmd @jangorecki + +# GForce groupby +/src/gsumm.c @ben-schwen +# datetime classes +/R/IDateTime.R @ben-schwen @michaelchirico +/src/idatetime.c @ben-schwen @michaelchirico +/man/IDateTime.Rd @ben-schwen @michaelchirico + +# shift +/R/shift.R @ben-schwen @michaelchirico +/src/shift.c @ben-schwen @michaelchirico +/man/shift.Rd @ben-schwen @michaelchirico + +# translations +/inst/po/ @michaelchirico +/po/ @michaelchirico +/R/translation.R @michaelchirico +/src/po.h @michaelchirico + +# printing +/R/print.data.table.R @michaelchirico + +# .SD vignette +/vignettes/datatable-sd-usage.Rmd @michaelchirico From 2a554646ea13a66ed98afd2f83bda42e3058653c Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Fri, 8 Dec 2023 16:36:34 +0100 Subject: [PATCH 57/88] update maintainer (#5724) --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8942c48d1..a59298fcb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,7 +11,8 @@ BugReports: https://github.com/Rdatatable/data.table/issues VignetteBuilder: knitr ByteCompile: TRUE Authors@R: c( - person("Matt","Dowle", role=c("aut","cre"), email="mattjdowle@gmail.com"), + person("Tyson","Barrett", role=c("aut","cre"), email="t.barrett88@gmail.com"), + person("Matt","Dowle", role="aut", email="mattjdowle@gmail.com"), person("Arun","Srinivasan", role="aut", email="asrini@pm.me"), person("Jan","Gorecki", role="ctb"), person("Michael","Chirico", role="ctb"), @@ -59,7 +60,6 @@ Authors@R: c( person("Davis","Vaughan", role="ctb"), person("Toby","Hocking", role="ctb"), person("Leonardo","Silvestri", role="ctb"), - person("Tyson","Barrett", role="ctb"), person("Jim","Hester", role="ctb"), person("Anthony","Damico", role="ctb"), person("Sebastian","Freundt", role="ctb"), From 3ad0e8e5c67dfb4a9ff72990d35fce2adcccda88 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Fri, 8 Dec 2023 16:49:02 +0100 Subject: [PATCH 58/88] update CRAN release procedure for less deps (#5810) --- .dev/CRAN_Release.cmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.dev/CRAN_Release.cmd b/.dev/CRAN_Release.cmd index 6134d923e..94a4a17ec 100644 --- a/.dev/CRAN_Release.cmd +++ b/.dev/CRAN_Release.cmd @@ -350,7 +350,7 @@ Rdevel-strict-[gcc|clang] CMD check data.table_1.14.99.tar.gz Rdevel-strict-[gcc|clang] isTRUE(.Machine$sizeof.longdouble==0) # check noLD is being tested options(repos = "http://cloud.r-project.org") -install.packages(c("bit64", "bit", "curl", "R.utils", "xts","nanotime", "zoo", "yaml", "knitr", "rmarkdown", "markdown"), +install.packages(c("bit64", "bit", "R.utils", "xts", "zoo", "yaml", "knitr", "markdown"), Ncpus=4) # Issue #5491 showed that CRAN is running UBSAN on .Rd examples which found an error so we now run full R CMD check q("no") @@ -372,7 +372,7 @@ print(Sys.time()); started.at<-proc.time(); try(test.data.table()); print(Sys.ti ## apt-get update ## apt-get install libc6:i386 libstdc++6:i386 gcc-multilib g++-multilib gfortran-multilib libbz2-dev:i386 liblzma-dev:i386 libpcre3-dev:i386 libcurl3-dev:i386 libstdc++-7-dev:i386 ## sudo apt-get purge libcurl4-openssl-dev # cannot coexist, it seems -## sudo apt-get install libcurl4-openssl-dev:i386 +## sudo apt-get install libcurl4-openssl-dev:i386 ## may not be needed anymore as we dropped dependency on curl, try and update when reproducing ## cd ~/build/32bit/R-devel ## ./configure --without-recommended-packages --disable-byte-compiled-packages --disable-openmp --without-readline --without-x CC="gcc -m32" CXX="g++ -m32" F77="gfortran -m32" FC=${F77} OBJC=${CC} LDFLAGS="-L/usr/local/lib" LIBnn=lib LIBS="-lpthread" CFLAGS="-O0 -g -Wall -pedantic" ## From 6bde0083f4274c55b296abdb0305c3f634b47e8f Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Fri, 8 Dec 2023 17:05:33 +0100 Subject: [PATCH 59/88] unexport and undocument DT(), closes #5472 (#5730) * unexport and undocument DT(), closes #5472 * handle DT() in tests * Michael feedback --- NAMESPACE | 2 +- NEWS.md | 47 ++++++++++++++++++------------------------- inst/tests/tests.Rraw | 6 +++++- man/data.table.Rd | 8 -------- 4 files changed, 26 insertions(+), 37 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index ef0aa2d17..ac5415082 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -57,7 +57,7 @@ export(setnafill) export(.Last.updated) export(fcoalesce) export(substitute2) -export(DT) # mtcars |> DT(i,j,by) #4872 +#export(DT) # mtcars |> DT(i,j,by) #4872 #5472 S3method("[", data.table) export("[.data.table") # so that functional DT() finds it; PR#5176 diff --git a/NEWS.md b/NEWS.md index 28aba0f96..bf4250b16 100644 --- a/NEWS.md +++ b/NEWS.md @@ -107,15 +107,8 @@ 21. `melt()` was pseudo generic in that `melt(DT)` would dispatch to the `melt.data.table` method but `melt(not-DT)` would explicitly redirect to `reshape2`. Now `melt()` is standard generic so that methods can be developed in other packages, [#4864](https://github.com/Rdatatable/data.table/pull/4864). Thanks to @odelmarcelle for suggesting and implementing. -22. `DT(i, j, by, ...)` has been added, i.e. functional form of a `data.table` query, [#641](https://github.com/Rdatatable/data.table/issues/641) [#4872](https://github.com/Rdatatable/data.table/issues/4872). Thanks to Yike Lu and Elio Campitelli for filing requests, many others for comments and suggestions, and Matt Dowle for the PR. This enables the `data.table` general form query to be invoked on a `data.frame` without converting it to a `data.table` first. The class of the input object is retained. Thanks to Mark Fairbanks and Boniface Kamgang for testing and reporting problems that have been fixed before release, [#5106](https://github.com/Rdatatable/data.table/issues/5106) [#5107](https://github.com/Rdatatable/data.table/issues/5107). - ```R - mtcars |> DT(mpg>20, .(mean_hp=mean(hp)), by=cyl) - ``` - - When `data.table` queries (either `[...]` or `|> DT(...)`) receive a `data.table`, the operations maintain `data.table`'s attributes such as its key and any indices. For example, if a `data.table` is reordered by `data.table`, or a key column has a value changed by `:=` in `data.table`, its key and indices will either be dropped or reordered appropriately. Some `data.table` operations automatically add and store an index on a `data.table` for reuse in future queries, if `options(datatable.auto.index=TRUE)`, which is `TRUE` by default. `data.table`'s are also over-allocated, which means there are spare column pointer slots allocated in advance so that a `data.table` in the `.GlobalEnv` can have a column added to it truly by reference, like an in-memory database with multiple client sessions connecting to one server R process, as a `data.table` video has shown in the past. But because R and other packages don't maintain `data.table`'s attributes or over-allocation (e.g. a subset or reorder by R or another package will create invalid `data.table` attributes) `data.table` cannot use these attributes when it detects that base R or another package has touched the `data.table` in the meantime, even if the attributes may sometimes still be valid. So, please realize that, `DT()` on a `data.table` should realize better speed and memory usage than `DT()` on a `data.frame`. `DT()` on a `data.frame` may still be useful to use `data.table`'s syntax (e.g. sub-queries within group: `|> DT(i, .SD[sub-query], by=grp)`) without needing to convert to a `data.table` first. - -23. `DT[i, nomatch=NULL]` where `i` contains row numbers now excludes `NA` and any outside the range [1,nrow], [#3109](https://github.com/Rdatatable/data.table/issues/3109) [#3666](https://github.com/Rdatatable/data.table/issues/3666). Before, `NA` rows were returned always for such values; i.e. `nomatch=0|NULL` was ignored. Thanks Michel Lang and Hadley Wickham for the requests, and Jan Gorecki for the PR. Using `nomatch=0` in this case when `i` is row numbers generates the warning `Please use nomatch=NULL instead of nomatch=0; see news item 5 in v1.12.0 (Jan 2019)`. +22. `DT[i, nomatch=NULL]` where `i` contains row numbers now excludes `NA` and any outside the range [1,nrow], [#3109](https://github.com/Rdatatable/data.table/issues/3109) [#3666](https://github.com/Rdatatable/data.table/issues/3666). Before, `NA` rows were returned always for such values; i.e. `nomatch=0|NULL` was ignored. Thanks Michel Lang and Hadley Wickham for the requests, and Jan Gorecki for the PR. Using `nomatch=0` in this case when `i` is row numbers generates the warning `Please use nomatch=NULL instead of nomatch=0; see news item 5 in v1.12.0 (Jan 2019)`. ```R DT = data.table(A=1:3) @@ -133,13 +126,13 @@ # 2: 3 ``` -24. `DT[, head(.SD,n), by=grp]` and `tail` are now optimized when `n>1`, [#5060](https://github.com/Rdatatable/data.table/issues/5060) [#523](https://github.com/Rdatatable/data.table/issues/523#issuecomment-162934391). `n==1` was already optimized. Thanks to Jan Gorecki and Michael Young for requesting, and Benjamin Schwendinger for the PR. +23. `DT[, head(.SD,n), by=grp]` and `tail` are now optimized when `n>1`, [#5060](https://github.com/Rdatatable/data.table/issues/5060) [#523](https://github.com/Rdatatable/data.table/issues/523#issuecomment-162934391). `n==1` was already optimized. Thanks to Jan Gorecki and Michael Young for requesting, and Benjamin Schwendinger for the PR. -25. `setcolorder()` gains `before=` and `after=`, [#4385](https://github.com/Rdatatable/data.table/issues/4358). Thanks to Matthias Gomolka for the request, and both Benjamin Schwendinger and Xianghui Dong for implementing. +24. `setcolorder()` gains `before=` and `after=`, [#4385](https://github.com/Rdatatable/data.table/issues/4358). Thanks to Matthias Gomolka for the request, and both Benjamin Schwendinger and Xianghui Dong for implementing. -26. `base::droplevels()` gains a fast method for `data.table`, [#647](https://github.com/Rdatatable/data.table/issues/647). Thanks to Steve Lianoglou for requesting, Boniface Kamgang and Martin Binder for testing, and Jan Gorecki and Benjamin Schwendinger for the PR. `fdroplevels()` for use on vectors has also been added. +25. `base::droplevels()` gains a fast method for `data.table`, [#647](https://github.com/Rdatatable/data.table/issues/647). Thanks to Steve Lianoglou for requesting, Boniface Kamgang and Martin Binder for testing, and Jan Gorecki and Benjamin Schwendinger for the PR. `fdroplevels()` for use on vectors has also been added. -27. `shift()` now also supports `type="cyclic"`, [#4451](https://github.com/Rdatatable/data.table/issues/4451). Arguments that are normally pushed out by `type="lag"` or `type="lead"` are re-introduced at this type at the first/last positions. Thanks to @RicoDiel for requesting, and Benjamin Schwendinger for the PR. +26. `shift()` now also supports `type="cyclic"`, [#4451](https://github.com/Rdatatable/data.table/issues/4451). Arguments that are normally pushed out by `type="lag"` or `type="lead"` are re-introduced at this type at the first/last positions. Thanks to @RicoDiel for requesting, and Benjamin Schwendinger for the PR. ```R # Usage @@ -167,11 +160,11 @@ # c(tail(x, 1), head(x, -1)) 6.96 7.16 7.49 7.32 7.64 8.60 10 ``` -28. `fread()` now supports "0" and "1" in `na.strings`, [#2927](https://github.com/Rdatatable/data.table/issues/2927). Previously this was not permitted since "0" and "1" can be recognized as boolean values. Note that it is still not permitted to use "0" and "1" in `na.strings` in combination with `logical01 = TRUE`. Thanks to @msgoussi for the request, and Benjamin Schwendinger for the PR. +27. `fread()` now supports "0" and "1" in `na.strings`, [#2927](https://github.com/Rdatatable/data.table/issues/2927). Previously this was not permitted since "0" and "1" can be recognized as boolean values. Note that it is still not permitted to use "0" and "1" in `na.strings` in combination with `logical01 = TRUE`. Thanks to @msgoussi for the request, and Benjamin Schwendinger for the PR. -29. `setkey()` now supports type `raw` as value columns (not as key columns), [#5100](https://github.com/Rdatatable/data.table/issues/5100). Thanks Hugh Parsonage for requesting, and Benjamin Schwendinger for the PR. +28. `setkey()` now supports type `raw` as value columns (not as key columns), [#5100](https://github.com/Rdatatable/data.table/issues/5100). Thanks Hugh Parsonage for requesting, and Benjamin Schwendinger for the PR. -30. `shift()` is now optimised by group, [#1534](https://github.com/Rdatatable/data.table/issues/1534). Thanks to Gerhard Nachtmann for requesting, and Benjamin Schwendinger for the PR. +29. `shift()` is now optimised by group, [#1534](https://github.com/Rdatatable/data.table/issues/1534). Thanks to Gerhard Nachtmann for requesting, and Benjamin Schwendinger for the PR. ```R N = 1e7 @@ -205,7 +198,7 @@ # v1.14.4 0.4826 0.5586 0.6586 0.6329 0.7348 1.318 100 ``` -31. `rbind()` and `rbindlist()` now support `fill=TRUE` with `use.names=FALSE` instead of issuing the warning `use.names= cannot be FALSE when fill is TRUE. Setting use.names=TRUE.`, [#5444](https://github.com/Rdatatable/data.table/issues/5444). Thanks to @sindribaldur for testing dev and filing a bug report which was fixed before release. +30. `rbind()` and `rbindlist()` now support `fill=TRUE` with `use.names=FALSE` instead of issuing the warning `use.names= cannot be FALSE when fill is TRUE. Setting use.names=TRUE.`, [#5444](https://github.com/Rdatatable/data.table/issues/5444). Thanks to @sindribaldur for testing dev and filing a bug report which was fixed before release. ```R DT1 @@ -250,11 +243,11 @@ # 4: 4 NA ``` -32. `fread()` already made a good guess as to whether column names are present by comparing the type of the fields in row 1 to the type of the fields in the sample. This guess is now improved when a column contains a string in row 1 (i.e. a potential column name) but all blank in the sample rows, [#2526](https://github.com/Rdatatable/data.table/issues/2526). Thanks @st-pasha for reporting, and @ben-schwen for the PR. +31. `fread()` already made a good guess as to whether column names are present by comparing the type of the fields in row 1 to the type of the fields in the sample. This guess is now improved when a column contains a string in row 1 (i.e. a potential column name) but all blank in the sample rows, [#2526](https://github.com/Rdatatable/data.table/issues/2526). Thanks @st-pasha for reporting, and @ben-schwen for the PR. -33. `fread()` can now read `.zip` and `.tar` directly, [#3834](https://github.com/Rdatatable/data.table/issues/3834). Moreover, if a compressed file name is missing its extension, `fread()` now attempts to infer the correct filetype from its magic bytes. Thanks to Michael Chirico for the idea, and Benjamin Schwendinger for the PR. +32. `fread()` can now read `.zip` and `.tar` directly, [#3834](https://github.com/Rdatatable/data.table/issues/3834). Moreover, if a compressed file name is missing its extension, `fread()` now attempts to infer the correct filetype from its magic bytes. Thanks to Michael Chirico for the idea, and Benjamin Schwendinger for the PR. -34. `DT[, let(...)]` is a new alias for the functional form of `:=`; i.e. `DT[, ':='(...)]`, [#3795](https://github.com/Rdatatable/data.table/issues/3795). Thanks to Elio Campitelli for requesting, and Benjamin Schwendinger for the PR. +33. `DT[, let(...)]` is a new alias for the functional form of `:=`; i.e. `DT[, ':='(...)]`, [#3795](https://github.com/Rdatatable/data.table/issues/3795). Thanks to Elio Campitelli for requesting, and Benjamin Schwendinger for the PR. ```R DT = data.table(A=1:2) @@ -266,15 +259,15 @@ # 2: 2 4 b ``` -35. `weighted.mean()` is now optimised by group, [#3977](https://github.com/Rdatatable/data.table/issues/3977). Thanks to @renkun-ken for requesting, and Benjamin Schwendinger for the PR. +34. `weighted.mean()` is now optimised by group, [#3977](https://github.com/Rdatatable/data.table/issues/3977). Thanks to @renkun-ken for requesting, and Benjamin Schwendinger for the PR. -36. `as.xts.data.table()` now supports non-numeric xts coredata matrixes, [5268](https://github.com/Rdatatable/data.table/issues/5268). Existing numeric only functionality is supported by a new `numeric.only` parameter, which defaults to `TRUE` for backward compatability and the most common use case. To convert non-numeric columns, set this parameter to `FALSE`. Conversions of `data.table` columns to a `matrix` now uses `data.table::as.matrix`, with all its performance benefits. Thanks to @ethanbsmith for the report and fix. +35. `as.xts.data.table()` now supports non-numeric xts coredata matrixes, [5268](https://github.com/Rdatatable/data.table/issues/5268). Existing numeric only functionality is supported by a new `numeric.only` parameter, which defaults to `TRUE` for backward compatability and the most common use case. To convert non-numeric columns, set this parameter to `FALSE`. Conversions of `data.table` columns to a `matrix` now uses `data.table::as.matrix`, with all its performance benefits. Thanks to @ethanbsmith for the report and fix. -37. `unique.data.table()` gains `cols` to specify a subset of columns to include in the resulting `data.table`, [#5243](https://github.com/Rdatatable/data.table/issues/5243). This saves the memory overhead of subsetting unneeded columns, and provides a cleaner API for a common operation previously needing more convoluted code. Thanks to @MichaelChirico for the suggestion & implementation. +36. `unique.data.table()` gains `cols` to specify a subset of columns to include in the resulting `data.table`, [#5243](https://github.com/Rdatatable/data.table/issues/5243). This saves the memory overhead of subsetting unneeded columns, and provides a cleaner API for a common operation previously needing more convoluted code. Thanks to @MichaelChirico for the suggestion & implementation. -38. `:=` is now optimized by group, [#1414](https://github.com/Rdatatable/data.table/issues/1414). Thanks to Arun Srinivasan for suggesting, and Benjamin Schwendinger for the PR. Thanks to @clerousset, @dcaseykc, @OfekShilon, and @SeanShao98 for testing dev and filing detailed bug reports which were fixed before release and their tests added to the test suite. +37. `:=` is now optimized by group, [#1414](https://github.com/Rdatatable/data.table/issues/1414). Thanks to Arun Srinivasan for suggesting, and Benjamin Schwendinger for the PR. Thanks to @clerousset, @dcaseykc, @OfekShilon, and @SeanShao98 for testing dev and filing detailed bug reports which were fixed before release and their tests added to the test suite. -39. `.I` is now available in `by` for rowwise operations, [#1732](https://github.com/Rdatatable/data.table/issues/1732). Thanks to Rafael H. M. Pereira for requesting, and Benjamin Schwendinger for the PR. +38. `.I` is now available in `by` for rowwise operations, [#1732](https://github.com/Rdatatable/data.table/issues/1732). Thanks to Rafael H. M. Pereira for requesting, and Benjamin Schwendinger for the PR. ```R DT @@ -290,11 +283,11 @@ # 2: 2 10 ``` -40. New functions `yearmon()` and `yearqtr` give a combined representation of `year()` and `month()`/`quarter()`. These and also `yday`, `wday`, `mday`, `week`, `month` and `year` are now optimized for memory and compute efficiency by removing the `POSIXlt` dependency, [#649](https://github.com/Rdatatable/data.table/issues/649). Thanks to Matt Dowle for the request, and Benjamin Schwendinger for the PR. Thanks to @berg-michael for testing dev and filing a bug report for special case of missing values which was fixed before release. +39. New functions `yearmon()` and `yearqtr` give a combined representation of `year()` and `month()`/`quarter()`. These and also `yday`, `wday`, `mday`, `week`, `month` and `year` are now optimized for memory and compute efficiency by removing the `POSIXlt` dependency, [#649](https://github.com/Rdatatable/data.table/issues/649). Thanks to Matt Dowle for the request, and Benjamin Schwendinger for the PR. Thanks to @berg-michael for testing dev and filing a bug report for special case of missing values which was fixed before release. -41. New function `%notin%` provides a convenient alternative to `!(x %in% y)`, [#4152](https://github.com/Rdatatable/data.table/issues/4152). Thanks to Jan Gorecki for suggesting and Michael Czekanski for the PR. `%notin%` uses half the memory because it computes the result directly as opposed to `!` which allocates a new vector to hold the negated result. If `x` is long enough to occupy more than half the remaining free memory, this can make the difference between the operation working, or failing with an out-of-memory error. +40. New function `%notin%` provides a convenient alternative to `!(x %in% y)`, [#4152](https://github.com/Rdatatable/data.table/issues/4152). Thanks to Jan Gorecki for suggesting and Michael Czekanski for the PR. `%notin%` uses half the memory because it computes the result directly as opposed to `!` which allocates a new vector to hold the negated result. If `x` is long enough to occupy more than half the remaining free memory, this can make the difference between the operation working, or failing with an out-of-memory error. -42. `tables()` is faster by default by excluding the size of character strings in R's global cache (which may be shared) and excluding the size of list column items (which also may be shared). `mb=` now accepts any function which accepts a `data.table` and returns a higher and better estimate of its size in bytes, albeit more slowly; e.g. `mb = utils::object.size`. +41. `tables()` is faster by default by excluding the size of character strings in R's global cache (which may be shared) and excluding the size of list column items (which also may be shared). `mb=` now accepts any function which accepts a `data.table` and returns a higher and better estimate of its size in bytes, albeit more slowly; e.g. `mb = utils::object.size`. ## BUG FIXES diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 8015439f5..98d81fe2b 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -8,6 +8,7 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) { if ((tt<-compiler::enableJIT(-1))>0) cat("This is dev mode and JIT is enabled (level ", tt, ") so there will be a brief pause around the first test.\n", sep="") rm_all = function() {} + DTfun = DT ## otherwise DT would be re-defined by many tests } else { require(data.table) # Make symbols to the installed version's ::: so that we can i) test internal-only not-exposed R functions @@ -32,6 +33,7 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) { compactprint = data.table:::compactprint cube.data.table = data.table:::cube.data.table dcast.data.table = data.table:::dcast.data.table + DTfun = data.table:::DT endsWith = data.table:::endsWith endsWithAny = data.table:::endsWithAny forder = data.table:::forder @@ -349,7 +351,7 @@ test(83, TESTDT[,list("a","b")], data.table(V1="a",V2="b")) test(83.1, TESTDT[,list("sum(a),sum(b)")], data.table("sum(a),sum(b)")) test(83.2, TESTDT[,list("sum(a),sum(b)"),by=a], {tt=data.table(a=c("a","c","d","g"),V1="sum(a),sum(b)",key="a");tt$V1=as.character(tt$V1);tt}) test(84, TESTDT[1:2,list(a,b)], data.table(a=c("a","c"), b=c("e","e"), key = 'a,b')) -# test(85, TESTDT[1:2,DT(a,b)], data.table(a=c("a","c"), b=c("e","e"))) #DT() now deprecated +# test(85, TESTDT[1:2,DT(a,b)], data.table(a=c("a","c"), b=c("e","e"))) #DT() now deprecated ## this is very old DT() functionality, completely different than DT() discussed in 2023 test(86, TESTDT[,sum(v),by="b"], data.table(b=c("e","f","i","b"),V1=INT(3,7,11,7))) # TESTDT is key'd by a,b, so correct that grouping by b should not be key'd in the result by default test(87, TESTDT[,list(MySum=sum(v)),by="b"], data.table(b=c("e","f","i","b"),MySum=INT(3,7,11,7))) @@ -17587,6 +17589,7 @@ for (col in c("a","b","c")) { # DT() functional form, #4872 #5106 #5107 #5129 if (base::getRversion() >= "4.1.0") { + DT = DTfun # we have to EVAL "|>" here too otherwise this tests.Rraw file won't parse in R<4.1.0 droprn = function(df) { rownames(df)=NULL; df } # TODO: could retain rownames where droprn is currently used below test(2212.011, EVAL("mtcars |> DT(mpg>20, .(mean_hp=round(mean(hp),2)), by=cyl)"), @@ -17638,6 +17641,7 @@ if (base::getRversion() >= "4.1.0") { test(2212.52, EVAL("D |> DT(D[, .I[which.max(mpg)], by=cyl]$V1)"), ans) test(2212.53, EVAL("filter |> DT(filter[, .I[which.max(mpg)], by=cyl]$V1)"), error="unused.*argument.*by.*cyl") # R's [.data.frame error on filter[...] test(2212.54, EVAL("filter |> DT((filter |> DT(, .I[which.max(mpg)], by=cyl))$V1)"), as.data.frame(ans)) + rm(DT) } # precision powers of 10^(-n), #4461 diff --git a/man/data.table.Rd b/man/data.table.Rd index 4f8d402fc..b8011b422 100644 --- a/man/data.table.Rd +++ b/man/data.table.Rd @@ -5,7 +5,6 @@ \alias{Ops.data.table} \alias{is.na.data.table} \alias{[.data.table} -\alias{DT} \alias{.} \alias{.(} \alias{.()} @@ -435,13 +434,6 @@ dev.off() # using rleid, get max(y) and min of all cols in .SDcols for each consecutive run of 'v' DT[, c(.(y=max(y)), lapply(.SD, min)), by=rleid(v), .SDcols=v:b] -# functional query DT(...) -\dontshow{ #dontrun to pass R CMD check prior to R 4.1.0 when |> was added - # an if getRVersion()>"4.1.0" still has its code parsed } -\dontrun{ -mtcars |> DT(mpg>20, .(mean_hp=mean(hp)), by=cyl) -} - # Support guide and links: # https://github.com/Rdatatable/data.table/wiki/Support From a40ec8ed8aa9bdab8cfc46598b91c2062978d9f2 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Fri, 8 Dec 2023 17:23:07 +0100 Subject: [PATCH 60/88] shift on matrix: news and improve error (#5462) * news and improve error * Michael feedback, actionable error --- NEWS.md | 4 ++++ inst/tests/tests.Rraw | 3 +++ src/shift.c | 2 ++ src/utils.c | 2 +- 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index bf4250b16..1cfd582f7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,10 @@ # data.table [v1.14.99](https://github.com/Rdatatable/data.table/milestone/20) (in development) +## BREAKING CHANGE + +1. `shift` and `nafill` will now raise error `input must not be matrix or array` when `matrix` or `array` is provided on input, rather than giving useless result, [#5287](https://github.com/Rdatatable/data.table/issues/5287). Thanks to @ethanbsmith for reporting. + ## NEW FEATURES 1. `nafill()` now applies `fill=` to the front/back of the vector when `type="locf|nocb"`, [#3594](https://github.com/Rdatatable/data.table/issues/3594). Thanks to @ben519 for the feature request. It also now returns a named object based on the input names. Note that if you are considering joining and then using `nafill(...,type='locf|nocb')` afterwards, please review `roll=`/`rollends=` which should achieve the same result in one step more efficiently. `nafill()` is for when filling-while-joining (i.e. `roll=`/`rollends=`/`nomatch=`) cannot be applied. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 98d81fe2b..8eeb8f7ee 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18108,3 +18108,6 @@ test(2238.6, "a" %notin% integer(), TRUE) test(2238.7, "a" %notin% NULL, TRUE) test(2238.8, NA %notin% 1:5, TRUE) test(2238.9, NA %notin% c(1:5, NA), FALSE) + +# shift actionable error on matrix input #5287 +test(2239.1, shift(matrix(1:10, ncol = 1)), error="consider wrapping") diff --git a/src/shift.c b/src/shift.c index dba598fe5..30c13a547 100644 --- a/src/shift.c +++ b/src/shift.c @@ -8,6 +8,8 @@ SEXP shift(SEXP obj, SEXP k, SEXP fill, SEXP type) if (!xlength(obj)) return(obj); // NULL, list() SEXP x; if (isVectorAtomic(obj)) { + if (!isNull(getAttrib(obj, R_DimSymbol))) + error(_("shift input must not be matrix or array, consider wrapping it into data.table() or c()")); x = PROTECT(allocVector(VECSXP, 1)); nprotect++; SET_VECTOR_ELT(x, 0, obj); } else { diff --git a/src/utils.c b/src/utils.c index 3dfd8bcc6..e5e343ac9 100644 --- a/src/utils.c +++ b/src/utils.c @@ -348,7 +348,7 @@ SEXP coerceAs(SEXP x, SEXP as, SEXP copyArg) { if (!isNull(getAttrib(x, R_DimSymbol))) error(_("'x' must not be matrix or array")); if (!isNull(getAttrib(as, R_DimSymbol))) - error(_("'as' must not be matrix or array")); + error(_("input must not be matrix or array")); bool verbose = GetVerbose()>=2; // verbose level 2 required if (!LOGICAL(copyArg)[0] && TYPEOF(x)==TYPEOF(as) && class1(x)==class1(as)) { if (verbose) From 2800a616ea4dfd1b9c5ac2a0911f0cd140a6f239 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Fri, 8 Dec 2023 17:29:04 +0100 Subject: [PATCH 61/88] add CODEOWNERS to Rbuildignore (#5811) --- .Rbuildignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.Rbuildignore b/.Rbuildignore index 08508569d..22a3a807f 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -23,6 +23,7 @@ ^NEWS\.0\.md$ ^_pkgdown\.yml$ ^src/Makevars$ +^CODEOWNERS$ ^\.RData$ ^\.Rhistory$ From 9b3b251d973a84c3304e0011ea1727faa7eb9f40 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Fri, 8 Dec 2023 20:44:39 +0100 Subject: [PATCH 62/88] Pull 1.14.10 into master (#5814) * updated NEWS and urls fixed url issues and added final details for patch release in news * add method for IDate Added `S3method(as.IDate, IDate)`. This is related to #4777 as discussed in NEWS.md. * Add `setDTthreads(1)` to vignettes To reduce runtime on building vignettes. * reset setDTthreads at end of vignettes * reset threads at end of vignettes --------- Co-authored-by: Tyson Barrett --- NAMESPACE | 1 + NEWS.md | 19 ++++++++++++++++--- README.md | 2 +- vignettes/datatable-faq.Rmd | 4 ++++ vignettes/datatable-intro.Rmd | 4 ++++ vignettes/datatable-keys-fast-subset.Rmd | 6 ++++++ vignettes/datatable-reference-semantics.Rmd | 5 +++++ vignettes/datatable-reshape.Rmd | 5 +++++ vignettes/datatable-sd-usage.Rmd | 5 +++++ ...le-secondary-indices-and-auto-indexing.Rmd | 6 ++++++ 10 files changed, 53 insertions(+), 4 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index ac5415082..75b490068 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -158,6 +158,7 @@ S3method(as.IDate, Date) S3method(as.IDate, POSIXct) S3method(as.IDate, default) S3method(as.IDate, numeric) +S3method(as.IDate, IDate) S3method(as.ITime, character) S3method(as.ITime, default) S3method(as.ITime, POSIXct) diff --git a/NEWS.md b/NEWS.md index 1cfd582f7..513ac9bc5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,6 @@ **If you are viewing this file on CRAN, please check [latest news on GitHub](https://github.com/Rdatatable/data.table/blob/master/NEWS.md) where the formatting is also better.** -# data.table [v1.14.99](https://github.com/Rdatatable/data.table/milestone/20) (in development) +# data.table [v1.14.99](https://github.com/Rdatatable/data.table/milestone/29) (in development) ## BREAKING CHANGE @@ -610,6 +610,19 @@ 15. Thanks to @ssh352, Václav Tlapák, Cole Miller, András Svraka and Toby Dylan Hocking for reporting and bisecting a significant performance regression in dev. This was fixed before release thanks to a PR by Jan Gorecki, [#5463](https://github.com/Rdatatable/data.table/pull/5463). +# data.table [v1.14.10](https://github.com/Rdatatable/data.table/milestone/20?closed=1) (8 Dec 2023) + +## NOTES + +1. Maintainer of the package for CRAN releases is from now on Tyson Barrett (@tysonstanley), [#5710](https://github.com/Rdatatable/data.table/issues/5710). + +2. Updated internal code for breaking change of `is.atomic(NULL)` in R-devel, [#5691](https://github.com/Rdatatable/data.table/pull/5691). Thanks to Martin Maechler for the patch. + +3. Fix multiple test concerning coercion to missing complex numbers, [#5695](https://github.com/Rdatatable/data.table/issues/5695) and [#5748](https://github.com/Rdatatable/data.table/issues/5748). Thanks to @MichaelChirico and @ben-schwen for the patches. + +4. Fix multiple format warnings (e.g., -Wformat) [#5712](https://github.com/Rdatatable/data.table/pull/5712), [#5781](https://github.com/Rdatatable/data.table/pull/5781), [#5880](https://github.com/Rdatatable/data.table/pull/5800), [#5786](https://github.com/Rdatatable/data.table/pull/5786). Thanks to @MichaelChirico and @jangorecki for the patches. + + # data.table [v1.14.8](https://github.com/Rdatatable/data.table/milestone/28?closed=1) (17 Feb 2023) ## NOTES @@ -736,7 +749,7 @@ ## NOTES -1. Continuous daily testing by CRAN using latest daily R-devel revealed, within one day of the change to R-devel, that a future version of R would break one of our tests, [#4769](https://github.com/Rdatatable/data.table/issues/4769). The characters "-alike" were added into one of R's error messages, so our too-strict test which expected the error `only defined on a data frame with all numeric variables` will fail when it sees the new error message `only defined on a data frame with all numeric-alike variables`. We have relaxed the pattern the test looks for to `data.*frame.*numeric` well in advance of the future version of R being released. Readers are reminded that CRAN is not just a host for packages. It is also a giant test suite for R-devel. For more information, [behind the scenes of cran, 2016](https://h2o.ai/blog/behind-the-scenes-of-cran/). +1. Continuous daily testing by CRAN using latest daily R-devel revealed, within one day of the change to R-devel, that a future version of R would break one of our tests, [#4769](https://github.com/Rdatatable/data.table/issues/4769). The characters "-alike" were added into one of R's error messages, so our too-strict test which expected the error `only defined on a data frame with all numeric variables` will fail when it sees the new error message `only defined on a data frame with all numeric-alike variables`. We have relaxed the pattern the test looks for to `data.*frame.*numeric` well in advance of the future version of R being released. Readers are reminded that CRAN is not just a host for packages. It is also a giant test suite for R-devel. For more information, [behind the scenes of cran, 2016](https://h2o.ai/blog/2016/behind-the-scenes-of-cran/). 2. `as.Date.IDate` is no longer exported as a function to solve a new error in R-devel `S3 method lookup found 'as.Date.IDate' on search path`, [#4777](https://github.com/Rdatatable/data.table/issues/4777). The S3 method is still exported; i.e. `as.Date(x)` will still invoke the `as.Date.IDate` method when `x` is class `IDate`. The function had been exported, in addition to exporting the method, to solve a compatibility issue with `zoo` (and `xts` which uses `zoo`) because `zoo` exports `as.Date` which masks `base::as.Date`. Happily, since zoo 1.8-1 (Jan 2018) made a change to its `as.IDate`, the workaround is no longer needed. @@ -1008,7 +1021,7 @@ has a better chance of working on Mac. * `colClasses` now supports `'complex'`, `'raw'`, `'Date'`, `'POSIXct'`, and user-defined classes (so long as an `as.` method exists), [#491](https://github.com/Rdatatable/data.table/issues/491) [#1634](https://github.com/Rdatatable/data.table/issues/1634) [#2610](https://github.com/Rdatatable/data.table/issues/2610). Any error during coercion results in a warning and the column is left as the default type (probably `"character"`). Thanks to @hughparsonage for the PR. * `stringsAsFactors=0.10` will factorize any character column containing under `0.10*nrow` unique strings, [#2025](https://github.com/Rdatatable/data.table/issues/2025). Thanks to @hughparsonage for the PR. * `colClasses=list(numeric=20:30, numeric="ID")` will apply the `numeric` type to column numbers `20:30` as before and now also column name `"ID"`; i.e. all duplicate class names are now respected rather than only the first. This need may arise when specifying some columns by name and others by number, as in this example. Thanks to @hughparsonage for the PR. - * gains `yaml` (default `FALSE`) and the ability to parse CSVY-formatted input files; i.e., csv files with metadata in a header formatted as YAML (https://csvy.org/), [#1701](https://github.com/Rdatatable/data.table/issues/1701). See `?fread` and files in `/inst/tests/csvy/` for sample formats. Please provide feedback if you find this feature useful and would like extended capabilities. For now, consider it experimental, meaning the API/arguments may change. Thanks to @leeper at [`rio`](https://github.com/leeper/rio) for the inspiration and @MichaelChirico for implementing. + * gains `yaml` (default `FALSE`) and the ability to parse CSVY-formatted input files; i.e., csv files with metadata in a header formatted as YAML (https://csvy.org/), [#1701](https://github.com/Rdatatable/data.table/issues/1701). See `?fread` and files in `/inst/tests/csvy/` for sample formats. Please provide feedback if you find this feature useful and would like extended capabilities. For now, consider it experimental, meaning the API/arguments may change. Thanks to @leeper at [`rio`](https://github.com/gesistsa/rio) for the inspiration and @MichaelChirico for implementing. * `select` can now be used to specify types for just the columns selected, [#1426](https://github.com/Rdatatable/data.table/issues/1426). Just like `colClasses` it can be a named vector of `colname=type` pairs, or a named `list` of `type=col(s)` pairs. For example: ```R diff --git a/README.md b/README.md index 8455602f1..562799db4 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![CRAN status](https://badges.cranchecks.info/flavor/release/data.table.svg)](https://cran.r-project.org/web/checks/check_results_data.table.html) [![R-CMD-check](https://github.com/Rdatatable/data.table/workflows/R-CMD-check/badge.svg)](https://github.com/Rdatatable/data.table/actions) [![AppVeyor build status](https://ci.appveyor.com/api/projects/status/kayjdh5qtgymhoxr/branch/master?svg=true)](https://ci.appveyor.com/project/Rdatatable/data-table) -[![Codecov test coverage](https://codecov.io/github/Rdatatable/data.table/coverage.svg?branch=master)](https://codecov.io/github/Rdatatable/data.table?branch=master) +[![Codecov test coverage](https://codecov.io/github/Rdatatable/data.table/coverage.svg?branch=master)](https://app.codecov.io/github/Rdatatable/data.table?branch=master) [![GitLab CI build status](https://gitlab.com/Rdatatable/data.table/badges/master/pipeline.svg)](https://gitlab.com/Rdatatable/data.table/-/pipelines) [![downloads](https://cranlogs.r-pkg.org/badges/data.table)](https://www.rdocumentation.org/trends) [![CRAN usage](https://jangorecki.gitlab.io/rdeps/data.table/CRAN_usage.svg?sanitize=true)](https://gitlab.com/jangorecki/rdeps) diff --git a/vignettes/datatable-faq.Rmd b/vignettes/datatable-faq.Rmd index f1deaba78..a2de14a2f 100644 --- a/vignettes/datatable-faq.Rmd +++ b/vignettes/datatable-faq.Rmd @@ -29,6 +29,7 @@ knitr::opts_chunk$set( tidy = FALSE, cache = FALSE, collapse = TRUE) +.old.th = setDTthreads(1) ``` The first section, Beginner FAQs, is intended to be read in order, from start to finish. It's just written in a FAQ style to be digested more easily. It isn't really the most frequently asked questions. A better measure for that is looking on Stack Overflow. @@ -615,3 +616,6 @@ Sure. You're more likely to get a faster answer from the Issues page or Stack Ov Please see [this answer](https://stackoverflow.com/a/10529888/403310). +```{r, echo=FALSE} +setDTthreads(.old.th) +``` \ No newline at end of file diff --git a/vignettes/datatable-intro.Rmd b/vignettes/datatable-intro.Rmd index 04fd79e50..3624a7c5b 100644 --- a/vignettes/datatable-intro.Rmd +++ b/vignettes/datatable-intro.Rmd @@ -18,6 +18,7 @@ knitr::opts_chunk$set( cache = FALSE, collapse = TRUE ) +.old.th = setDTthreads(1) ``` This vignette introduces the `data.table` syntax, its general form, how to *subset* rows, *select and compute* on columns, and perform aggregations *by group*. Familiarity with `data.frame` data structure from base R is useful, but not essential to follow this vignette. @@ -651,3 +652,6 @@ We will see how to *add/update/delete* columns *by reference* and how to combine *** +```{r, echo=FALSE} +setDTthreads(.old.th) +``` \ No newline at end of file diff --git a/vignettes/datatable-keys-fast-subset.Rmd b/vignettes/datatable-keys-fast-subset.Rmd index 3e9a4f23c..e73b71b92 100644 --- a/vignettes/datatable-keys-fast-subset.Rmd +++ b/vignettes/datatable-keys-fast-subset.Rmd @@ -17,6 +17,7 @@ knitr::opts_chunk$set( tidy = FALSE, cache = FALSE, collapse = TRUE) +.old.th = setDTthreads(1) ``` This vignette is aimed at those who are already familiar with *data.table* syntax, its general form, how to subset rows in `i`, select and compute on columns, add/modify/delete columns *by reference* in `j` and group by using `by`. If you're not familiar with these concepts, please read the *"Introduction to data.table"* and *"Reference semantics"* vignettes first. @@ -494,3 +495,8 @@ In this vignette, we have learnt another method to subset rows in `i` by keying * combine key based subsets with `j` and `by`. Note that the `j` and `by` operations are exactly the same as before. Key based subsets are **incredibly fast** and are particularly useful when the task involves *repeated subsetting*. But it may not be always desirable to set key and physically reorder the *data.table*. In the next vignette, we will address this using a *new* feature -- *secondary indexes*. + + +```{r, echo=FALSE} +setDTthreads(.old.th) +``` \ No newline at end of file diff --git a/vignettes/datatable-reference-semantics.Rmd b/vignettes/datatable-reference-semantics.Rmd index c96ed090f..7a9990ba4 100644 --- a/vignettes/datatable-reference-semantics.Rmd +++ b/vignettes/datatable-reference-semantics.Rmd @@ -17,6 +17,7 @@ knitr::opts_chunk$set( tidy = FALSE, cache = FALSE, collapse = TRUE) +.old.th = setDTthreads(1) ``` This vignette discusses *data.table*'s reference semantics which allows to *add/update/delete* columns of a *data.table by reference*, and also combine them with `i` and `by`. It is aimed at those who are already familiar with *data.table* syntax, its general form, how to subset rows in `i`, select and compute on columns, and perform aggregations by group. If you're not familiar with these concepts, please read the *"Introduction to data.table"* vignette first. @@ -348,6 +349,10 @@ However we could improve this functionality further by *shallow* copying instead * We can use `:=` for its side effect or use `copy()` to not modify the original object while updating by reference. +```{r, echo=FALSE} +setDTthreads(.old.th) +``` + # So far we have seen a whole lot in `j`, and how to combine it with `by` and little of `i`. Let's turn our attention back to `i` in the next vignette *"Keys and fast binary search based subset"* to perform *blazing fast subsets* by *keying data.tables*. diff --git a/vignettes/datatable-reshape.Rmd b/vignettes/datatable-reshape.Rmd index 0b5d7a57d..d282bc7de 100644 --- a/vignettes/datatable-reshape.Rmd +++ b/vignettes/datatable-reshape.Rmd @@ -17,6 +17,7 @@ knitr::opts_chunk$set( tidy = FALSE, cache = FALSE, collapse = TRUE) +.old.th = setDTthreads(1) ``` This vignette discusses the default usage of reshaping functions `melt` (wide to long) and `dcast` (long to wide) for *data.tables* as well as the **new extended functionalities** of melting and casting on *multiple columns* available from `v1.9.6`. @@ -314,6 +315,10 @@ DT.c2 You can also provide *multiple functions* to `fun.aggregate` to `dcast` for *data.tables*. Check the examples in `?dcast` which illustrates this functionality. +```{r, echo=FALSE} +setDTthreads(.old.th) +``` + # *** diff --git a/vignettes/datatable-sd-usage.Rmd b/vignettes/datatable-sd-usage.Rmd index e7b08650e..ae0b5a84a 100644 --- a/vignettes/datatable-sd-usage.Rmd +++ b/vignettes/datatable-sd-usage.Rmd @@ -25,6 +25,7 @@ knitr::opts_chunk$set( out.width = '100%', dpi = 144 ) +.old.th = setDTthreads(1) ``` This vignette will explain the most common ways to use the `.SD` variable in your `data.table` analyses. It is an adaptation of [this answer](https://stackoverflow.com/a/47406952/3576984) given on StackOverflow. @@ -254,3 +255,7 @@ abline(v = overall_coef, lty = 2L, col = 'red') While there is indeed a fair amount of heterogeneity, there's a distinct concentration around the observed overall value. The above is just a short introduction of the power of `.SD` in facilitating beautiful, efficient code in `data.table`! + +```{r, echo=FALSE} +setDTthreads(.old.th) +``` \ No newline at end of file diff --git a/vignettes/datatable-secondary-indices-and-auto-indexing.Rmd b/vignettes/datatable-secondary-indices-and-auto-indexing.Rmd index 6f2474c11..ff50ba97e 100644 --- a/vignettes/datatable-secondary-indices-and-auto-indexing.Rmd +++ b/vignettes/datatable-secondary-indices-and-auto-indexing.Rmd @@ -17,6 +17,7 @@ knitr::opts_chunk$set( tidy = FALSE, cache = FALSE, collapse = TRUE) +.old.th = setDTthreads(1) ``` This vignette assumes that the reader is familiar with data.table's `[i, j, by]` syntax, and how to perform fast key based subsets. If you're not familiar with these concepts, please read the *"Introduction to data.table"*, *"Reference semantics"* and *"Keys and fast binary search based subset"* vignettes first. @@ -325,3 +326,8 @@ In recent version we extended auto indexing to expressions involving more than o We will discuss fast *subsets* using keys and secondary indices to *joins* in the next vignette, *"Joins and rolling joins"*. *** + +```{r, echo=FALSE} +setDTthreads(.old.th) +``` + From 537688106718b72e04ddf2859c3ec61a5aed2dc0 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Fri, 8 Dec 2023 20:52:40 +0100 Subject: [PATCH 63/88] cutoff news (#5815) --- NEWS.1.md | 1549 +++++++++++++++++++++++++++++++++++++++++++++++++++++ NEWS.md | 1547 +--------------------------------------------------- 2 files changed, 1550 insertions(+), 1546 deletions(-) create mode 100644 NEWS.1.md diff --git a/NEWS.1.md b/NEWS.1.md new file mode 100644 index 000000000..249f34992 --- /dev/null +++ b/NEWS.1.md @@ -0,0 +1,1549 @@ + +**This is OLD NEWS. Latest news is on GitHub [here](https://github.com/Rdatatable/data.table/blob/master/NEWS.md).** + +# data.table [v1.14.10](https://github.com/Rdatatable/data.table/milestone/20?closed=1) (8 Dec 2023) + +## NOTES + +1. Maintainer of the package for CRAN releases is from now on Tyson Barrett (@tysonstanley), [#5710](https://github.com/Rdatatable/data.table/issues/5710). + +2. Updated internal code for breaking change of `is.atomic(NULL)` in R-devel, [#5691](https://github.com/Rdatatable/data.table/pull/5691). Thanks to Martin Maechler for the patch. + +3. Fix multiple test concerning coercion to missing complex numbers, [#5695](https://github.com/Rdatatable/data.table/issues/5695) and [#5748](https://github.com/Rdatatable/data.table/issues/5748). Thanks to @MichaelChirico and @ben-schwen for the patches. + +4. Fix multiple format warnings (e.g., -Wformat) [#5712](https://github.com/Rdatatable/data.table/pull/5712), [#5781](https://github.com/Rdatatable/data.table/pull/5781), [#5880](https://github.com/Rdatatable/data.table/pull/5800), [#5786](https://github.com/Rdatatable/data.table/pull/5786). Thanks to @MichaelChirico and @jangorecki for the patches. + + +# data.table [v1.14.8](https://github.com/Rdatatable/data.table/milestone/28?closed=1) (17 Feb 2023) + +## NOTES + +1. Test 1613.605 now passes changes to `as.data.frame()` in R-devel, [#5597](https://github.com/Rdatatable/data.table/pull/5597). Thanks to Avraham Adler for reporting. + +2. An out of bounds read when combining non-equi join with `by=.EACHI` has been found and fixed thanks to clang ASAN, [#5598](https://github.com/Rdatatable/data.table/issues/5598). There was no bug or consequence because the read was followed (now preceded) by a bounds test. + +3. `.rbind.data.table` (note the leading `.`) is no longer exported when `data.table` is installed in R>=4.0.0 (Apr 2020), [#5600](https://github.com/Rdatatable/data.table/pull/5600). It was never documented which R-devel now detects and warns about. It is only needed by `data.table` internals to support R<4.0.0; see note 1 in v1.12.6 (Oct 2019) below in this file for more details. + + +# data.table [v1.14.6](https://github.com/Rdatatable/data.table/milestone/27?closed=1) (16 Nov 2022) + +## BUG FIXES + +1. `fread()` could leak memory, [#3292](https://github.com/Rdatatable/data.table/issues/3292). Thanks to @patrickhowerter for reporting, and Jim Hester for the fix. The fix requires R 3.4.0 or later. Loading `data.table` in earlier versions now highlights this issue on startup, asks users to upgrade R, and warns that we intend to upgrade `data.table`'s dependency from 8 year old R 3.1.0 (April 2014) to 5 year old R 3.4.0 (April 2017). + +## NOTES + +1. Test 1962.098 has been modified to pass latest changes to `POSIXt` in R-devel. + +2. `test.data.table()` no longer creates `DT` in `.GlobalEnv`, a CRAN policy violation, [#5514](https://github.com/Rdatatable/data.table/issues/5514). No other writes occurred to `.GlobalEnv` and release procedures have been improved to prevent this happening again. + +3. The memory usage of the test suite has been halved, [#5507](https://github.com/Rdatatable/data.table/issues/5507). + + +# data.table [v1.14.4](https://github.com/Rdatatable/data.table/milestone/26?closed=1) (17 Oct 2022) + +## NOTES + +1. gcc 12.1 (May 2022) now detects and warns about an always-false condition (`-Waddress`) in `fread` which caused a small efficiency saving never to be invoked, [#5476](https://github.com/Rdatatable/data.table/pull/5476). Thanks to CRAN for testing latest versions of compilers. + +2. `update.dev.pkg()` has been renamed `update_dev_pkg()` to get out of the way of the `stats::update` generic function, [#5421](https://github.com/Rdatatable/data.table/pull/5421). This is a utility function which upgrades the version of `data.table` to the latest commit in development which has passed all tests. As such we don't expect any backwards compatibility concerns. Its manual page was causing an intermittent hang/crash from `R CMD check` on Windows-only on CRAN which we hope will be worked around by changing its name. + +3. Internal C code now passes `-Wstrict-prototypes` to satisfy the warnings now displayed on CRAN, [#5477](https://github.com/Rdatatable/data.table/pull/5477). + +4. `write.csv` in R-devel no longer responds to `getOption("digits.secs")` for `POSIXct`, [#5478](https://github.com/Rdatatable/data.table/issues/5478). This caused our tests of `fwrite(, dateTimeAs="write.csv")` to fail on CRAN's daily checks using latest daily R-devel. While R-devel discussion continues, and currently it seems like the change is intended with further changes possible, this `data.table` release massages our tests to pass on latest R-devel. The idea is to try to get out of the way of R-devel changes in this regard until the new behavior of `write.csv` is released and confirmed. Package updates are not accepted on CRAN if they do not pass the latest daily version of R-devel, even if R-devel changes after the package update is submitted. If the change to `write.csv()` stands, then a future release of `data.table` will be needed to make `fwrite(, dateTimeAs="write.csv")` match `write.csv()` output again in that future version of R onwards. If you use an older version of `data.table` than said future one in the said future version of R, then `fwrite(, dateTimeAs="write.csv")` may not match `write.csv()` if you are using `getOption("digits.secs")` too. However, you can always check that your installation of `data.table` works in your version of R on your platform by simply running `test.data.table()` yourself. Doing so would detect such a situation for you: test 1741 would fail in this case. `test.data.table()` runs the entire suite of tests and is always available to you locally. This way you do not need to rely on our statements about which combinations of versions of R and `data.table` on which platforms we have tested and support; just run `test.data.table()` yourself. Having said that, because test 1741 has been relaxed in this release in order to be accepted on CRAN to pass latest R-devel, this won't be true for this particular release in regard to this particular test. + + ```R + $ R --vanilla + R version 4.2.1 (2022-06-23) -- "Funny-Looking Kid" + > DF = data.frame(A=as.POSIXct("2022-10-01 01:23:45.012")) + > options(digits.secs=0) + > write.csv(DF) + "","A" + "1",2022-10-01 01:23:45 + > options(digits.secs=3) + > write.csv(DF) + "","A" + "1",2022-10-01 01:23:45.012 + + $ Rdevel --vanilla + R Under development (unstable) (2022-10-06 r83040) -- "Unsuffered Consequences" + > DF = data.frame(A=as.POSIXct("2022-10-01 01:23:45.012")) + > options(digits.secs=0) + > write.csv(DF) + "","A" + "1",2022-10-01 01:23:45.012 + ``` + +5. Many thanks to Kurt Hornik for investigating potential impact of a possible future change to `base::intersect()` on empty input, providing a patch so that `data.table` won't break if the change is made to R, and giving us plenty of notice, [#5183](https://github.com/Rdatatable/data.table/pull/5183). + +6. `datatable.[dll|so]` has changed name to `data_table.[dll|so]`, [#4442](https://github.com/Rdatatable/data.table/pull/4442). Thanks to Jan Gorecki for the PR. We had previously removed the `.` since `.` is not allowed by the following paragraph in the Writing-R-Extensions manual. Replacing `.` with `_` instead now seems more consistent with the last sentence. + + > ... the basename of the DLL needs to be both a valid file name and valid as part of a C entry point (e.g. it cannot contain ‘.’): for portable code it is best to confine DLL names to be ASCII alphanumeric plus underscore. If entry point R_init_lib is not found it is also looked for with ‘.’ replaced by ‘_’. + + +# data.table [v1.14.2](https://github.com/Rdatatable/data.table/milestone/24?closed=1) (27 Sep 2021) + +## NOTES + +1. clang 13.0.0 (Sep 2021) requires the system header `omp.h` to be included before R's headers, [#5122](https://github.com/Rdatatable/data.table/issues/5122). Many thanks to Prof Ripley for testing and providing a patch file. + + +# data.table [v1.14.0](https://github.com/Rdatatable/data.table/milestone/23?closed=1) (21 Feb 2021) + +## POTENTIALLY BREAKING CHANGES + +1. In v1.13.0 (July 2020) native parsing of datetime was added to `fread` by Michael Chirico which dramatically improved performance. Before then datetime was read as type character by default which was slow. Since v1.13.0, UTC-marked datetime (e.g. `2020-07-24T10:11:12.134Z` where the final `Z` is present) has been read automatically as POSIXct and quickly. We provided the migration option `datatable.old.fread.datetime.character` to revert to the previous slow character behavior. We also added the `tz=` argument to control unmarked datetime; i.e. where the `Z` (or equivalent UTC postfix) is missing in the data. The default `tz=""` reads unmarked datetime as character as before, slowly. We gave you the ability to set `tz="UTC"` to turn on the new behavior and read unmarked datetime as UTC, quickly. R sessions that are running in UTC by setting the TZ environment variable, as is good practice and common in production, have also been reading unmarked datetime as UTC since v1.13.0, much faster. Note 1 of v1.13.0 (below in this file) ended `In addition to convenience, fread is now significantly faster in the presence of dates, UTC-marked datetimes, and unmarked datetime when tz="UTC" is provided.`. + + At `rstudio::global(2021)`, Neal Richardson, Director of Engineering at Ursa Labs, compared Arrow CSV performance to `data.table` CSV performance, [Bigger Data With Ease Using Apache Arrow](https://posit.co/resources/videos/bigger-data-with-ease-using-apache-arrow/). He opened by comparing to `data.table` as his main point. Arrow was presented as 3 times faster than `data.table`. He talked at length about this result. However, no reproducible code was provided and we were not contacted in advance in case we had any comments. He mentioned New York Taxi data in his talk which is a dataset known to us as containing unmarked datetime. [Rebuttal](https://twitter.com/MattDowle/status/1360073970498875394). + + `tz=`'s default is now changed from `""` to `"UTC"`. If you have been using `tz=` explicitly then there should be no change. The change to read UTC-marked datetime as POSIXct rather than character already happened in v1.13.0. The change now is that unmarked datetimes are now read as UTC too by default without needing to set `tz="UTC"`. None of the 1,017 CRAN packages directly using `data.table` are affected. As before, the migration option `datatable.old.fread.datetime.character` can still be set to TRUE to revert to the old character behavior. This migration option is temporary and will be removed in the near future. + + The community was consulted in [this tweet](https://twitter.com/MattDowle/status/1358011599336931328) before release. + +## BUG FIXES + +1. If `fread()` discards a single line footer, the warning message which includes the discarded text now displays any non-ASCII characters correctly on Windows, [#4747](https://github.com/Rdatatable/data.table/issues/4747). Thanks to @shrektan for reporting and the PR. + +2. `fintersect()` now retains the order of the first argument as reasonably expected, rather than retaining the order of the second argument, [#4716](https://github.com/Rdatatable/data.table/issues/4716). Thanks to Michel Lang for reporting, and Ben Schwen for the PR. + +## NOTES + +1. Compiling from source no longer requires `zlib` header files to be available, [#4844](https://github.com/Rdatatable/data.table/pull/4844). The output suggests installing `zlib` headers, and how (e.g. `zlib1g-dev` on Ubuntu) as before, but now proceeds with `gzip` compression disabled in `fwrite`. Upon calling `fwrite(DT, "file.csv.gz")` at runtime, an error message suggests to reinstall `data.table` with `zlib` headers available. This does not apply to users on Windows or Mac who install the pre-compiled binary package from CRAN. + +2. `r-datatable.com` continues to be the short, canonical and long-standing URL which forwards to the current homepage. The homepage domain has changed a few times over the years but those using `r-datatable.com` did not need to change their links. For example, we use `r-datatable.com` in messages (and translated messages) in preference to the word 'homepage' to save users time in searching for the current homepage. The web forwarding was provided by Domain Monster but they do not support `https://r-datatable.com`, only `http://r-datatable.com`, despite the homepage being forwarded to being `https:` for many years. Meanwhile, CRAN submission checks now require all URLs to be `https:`, rejecting `http:`. Therefore we have moved to [gandi.net](https://www.gandi.net) who do support `https:` web forwarding and so [https://r-datatable.com](https://r-datatable.com) now forwards correctly. Thanks to Dirk Eddelbuettel for suggesting Gandi. Further, Gandi allows the web-forward to be marked 301 (permanent) or 302 (temporary). Since the very point of `https://r-datatable.com` is to be a forward, 302 is appropriate in this case. This enables us to link to it in DESCRIPTION, README, and this NEWS item. Otherwise, CRAN submission checks would require the 301 forward to be followed; i.e. the forward replaced with where it points to and the package resubmitted. Thanks to Uwe Ligges for explaining this distinction. + + +# data.table [v1.13.6](https://github.com/Rdatatable/data.table/milestone/22?closed=1) (30 Dec 2020) + +## BUG FIXES + +1. Grouping could throw an error `Failed to allocate counts or TMP` with more than 1e9 rows even with sufficient RAM due to an integer overflow, [#4295](https://github.com/Rdatatable/data.table/issues/4295) [#4818](https://github.com/Rdatatable/data.table/issues/4818). Thanks to @renkun-ken and @jangorecki for reporting, and @shrektan for fixing. + +2. `fwrite()`'s mutithreaded `gzip` compression failed on Solaris with Z_STREAM_ERROR, [#4099](https://github.com/Rdatatable/data.table/issues/4099). Since this feature was released in Oct 2019 (see item 3 in v1.12.4 below in this news file) there have been no known problems with it on Linux, Windows or Mac. For Solaris, we have been successively adding more and more detailed tracing to the output in each release, culminating in tracing `zlib` internals at byte level by reading `zlib`'s source. The problem did not manifest itself on [R-hub](https://builder.r-hub.io/)'s Solaris instances, so we had to work via CRAN output. If `zlib`'s `z_stream` structure is declared inside a parallel region but before a parallel for, it appears that the particular OpenMP implementation used by CRAN's Solaris moves the structure to a new address on entering the parallel for. Ordinarily this memory move would not matter, however, `zlib` internals have a self reference pointer to the parent, and check that the pointers match. This mismatch caused the -2 (Z_STREAM_ERROR). Allocating an array of structures, one for each thread, before the parallel region avoids the memory move with no cost. + + It should be carefully noted that we cannot be sure it really is a problem unique to CRAN's Solaris. Even if it seems that way after one year of observations. For example, it could be compiler flags, or particular memory circumstances, either of which could occur on other operating systems too. However, we are unaware of why it would make sense for the OpenMP implementation to move the structure at that point. Any optimizations such as aligning the set of structures to cache line boundaries could be performed at the start of the parallel region, not after the parallel for. If anyone reading this knows more, please let us know. + +## NOTES + +1. The last release took place at the same time as several breaking changes were made to R-devel. The CRAN submissions process runs against latest daily R-devel so we had to keep up with those latest changes by making several resubmissions. Then each resubmission reruns against the new latest R-devel again. Overall it took 7 days. For example, we added the new `environments=FALSE` to our `all.equal` call. Then about 4 hours after 1.13.4 was accepted, the `s` was dropped and we now need to resubmit with `environment=FALSE`. In any case, we have suggested that the default should be FALSE first to give packages some notice, as opposed to generating errors in the CRAN submissions process within hours. Then the default for `environment=` could be TRUE in 6 months time after packages have had some time to update in advance of the default change. Readers of this NEWS file will be familiar with `data.table`'s approach to change control and know that we do this ourselves. + + +# data.table [v1.13.4](https://github.com/Rdatatable/data.table/milestone/21?closed=1) (08 Dec 2020) + +## BUG FIXES + +1. `as.matrix()` now retains the column type for the empty matrix result, [#4762](https://github.com/Rdatatable/data.table/issues/4762). Thus, for example, `min(DT[0])` where DT's columns are numeric, is now consistent with non-empty all-NA input and returns `Inf` with R's warning `no non-missing arguments to min; returning Inf` rather than R's error `only defined on a data frame with all numeric[-alike] variables`. Thanks to @mb706 for reporting. + +2. `fsort()` could crash when compiled using `clang-11` (Oct 2020), [#4786](https://github.com/Rdatatable/data.table/issues/4786). Multithreaded debugging revealed that threads are no longer assigned iterations monotonically by the dynamic schedule. Although never guaranteed by the OpenMP standard, in practice monotonicity could be relied on as far as we knew, until now. We rely on monotonicity in the `fsort` implementation. Happily, a schedule modifier `monotonic:dynamic` was added in OpenMP 4.5 (Nov 2015) which we now use if available (e.g. gcc 6+, clang 3.9+). If you have an old compiler which does not support OpenMP 4.5, it's probably the case that the unmodified dynamic schedule is monotonic anyway, so `fsort` now checks that threads are receiving iterations monotonically and emits a graceful error if not. It may be that `clang` prior to version 11, and `gcc` too, exhibit the same crash. It was just that `clang-11` was the first report. To know which version of OpenMP `data.table` is using, `getDTthreads(verbose=TRUE)` now reports the `YYYYMM` value `_OPENMP`; e.g. 201511 corresponds to v4.5, and 201811 corresponds to v5.0. Oddly, the `x.y` version number is not provided by the OpenMP API. OpenMP 4.5 may be enabled in some compilers using `-fopenmp-version=45`. Otherwise, if you need to upgrade compiler, https://www.openmp.org/resources/openmp-compilers-tools/ may be helpful. + +3. Columns containing functions that don't inherit the class `'function'` would fail to group, [#4814](https://github.com/Rdatatable/data.table/issues/4814). Thanks @mb706 for reporting, @ecoRoland2 for helping investigate, and @Coorsaa for a follow-up example involving environments. + +## NOTES + +1. Continuous daily testing by CRAN using latest daily R-devel revealed, within one day of the change to R-devel, that a future version of R would break one of our tests, [#4769](https://github.com/Rdatatable/data.table/issues/4769). The characters "-alike" were added into one of R's error messages, so our too-strict test which expected the error `only defined on a data frame with all numeric variables` will fail when it sees the new error message `only defined on a data frame with all numeric-alike variables`. We have relaxed the pattern the test looks for to `data.*frame.*numeric` well in advance of the future version of R being released. Readers are reminded that CRAN is not just a host for packages. It is also a giant test suite for R-devel. For more information, [behind the scenes of cran, 2016](https://h2o.ai/blog/2016/behind-the-scenes-of-cran/). + +2. `as.Date.IDate` is no longer exported as a function to solve a new error in R-devel `S3 method lookup found 'as.Date.IDate' on search path`, [#4777](https://github.com/Rdatatable/data.table/issues/4777). The S3 method is still exported; i.e. `as.Date(x)` will still invoke the `as.Date.IDate` method when `x` is class `IDate`. The function had been exported, in addition to exporting the method, to solve a compatibility issue with `zoo` (and `xts` which uses `zoo`) because `zoo` exports `as.Date` which masks `base::as.Date`. Happily, since zoo 1.8-1 (Jan 2018) made a change to its `as.IDate`, the workaround is no longer needed. + +3. Thanks to @fredguinog for testing `fcase` in development before 1.13.0 was released and finding a segfault, [#4378](https://github.com/Rdatatable/data.table/issues/4378). It was found separately by the `rchk` tool (which uses static code analysis) in release procedures and fixed before `fcase` was released, but the reproducible example has now been added to the test suite for completeness. Thanks also to @shrektan for investigating, proposing a very similar fix at C level, and a different reproducible example which has also been added to the test suite. + + +# data.table [v1.13.2](https://github.com/Rdatatable/data.table/milestone/19?closed=1) (19 Oct 2020) + +## BUG FIXES + +1. `test.data.table()` could fail the 2nd time it is run by a user in the same R session on Windows due to not resetting locale properly after testing Chinese translation, [#4630](https://github.com/Rdatatable/data.table/pull/4630). Thanks to Cole Miller for investigating and fixing. + +2. A regression in v1.13.0 resulted in installation on Mac often failing with `shared object 'datatable.so' not found`, and FreeBSD always failing with `expr: illegal option -- l`, [#4652](https://github.com/Rdatatable/data.table/issues/4652) [#4640](https://github.com/Rdatatable/data.table/issues/4640) [#4650](https://github.com/Rdatatable/data.table/issues/4650). Thanks to many for assistance including Simon Urbanek, Brian Ripley, Wes Morgan, and @ale07alvarez. There were no installation problems on Windows or Linux. + +3. Operating on columns of type `list`, e.g. `dt[, listCol[[1]], by=id]`, suffered a performance regression in v1.13.0, [#4646](https://github.com/Rdatatable/data.table/issues/4646) [#4658](https://github.com/Rdatatable/data.table/issues/4658). Thanks to @fabiocs8 and @sandoronodi for the detailed reports, and to Cole Miller for substantial debugging, investigation and proposals at C level which enabled the root cause to be fixed. Related, and also fixed, was a segfault revealed by package POUMM, [#4746](https://github.com/Rdatatable/data.table/issues/4746), when grouping a list column where each item has an attribute; e.g., `coda::mcmc.list`. Detected thanks to CRAN's ASAN checks, and thanks to Venelin Mitov for assistance in tracing the memory fault. Thanks also to Hongyuan Jia and @ben-schwen for assistance in debugging the fix in dev to pass reverse dependency testing which highlighted, before release, that package `eplusr` would fail. Its good usage has been added to `data.table`'s test suite. + +4. `fread("1.2\n", colClasses='integer')` (note no columns names in the data) would segfault when creating a warning message, [#4644](https://github.com/Rdatatable/data.table/issues/4644). It now warns with `Attempt to override column 1 of inherent type 'float64' down to 'int32' ignored.` When column names are present however, the warning message includes the name as before; i.e., `fread("A\n1.2\n", colClasses='integer')` produces `Attempt to override column 1 <> of inherent type 'float64' down to 'int32' ignored.`. Thanks to Kun Ren for reporting. + +5. `dplyr::mutate(setDT(as.list(1:64)), V1=11)` threw error `can't set ALTREP truelength`, [#4734](https://github.com/Rdatatable/data.table/issues/4734). Thanks to @etryn for the reproducible example, and to Cole Miller for refinements. + +## NOTES + +1. `bit64` v4.0.2 and `bit` v4.0.3, both released on 30th July, correctly broke `data.table`'s tests. Like other packages on our `Suggest` list, we check `data.table` works with `bit64` in our tests. The first break was because `all.equal` always returned `TRUE` in previous versions of `bit64`. Now that `all.equal` works for `integer64`, the incorrect test comparison was revealed. If you use `bit64`, or `nanotime` which uses `bit64`, it is highly recommended to upgrade to the latest `bit64` version. Thanks to Cole Miller for the PR to accommodate `bit64`'s update. + + The second break caused by `bit` was the addition of a `copy` function. We did not ask, but the `bit` package kindly offered to change to a different name since `data.table::copy` is long standing. `bit` v4.0.4 released 4th August renamed `copy` to `copy_vector`. Otherwise, users of `data.table` would have needed to prefix every occurrence of `copy` with `data.table::copy` if they use `bit64` too, since `bit64` depends on (rather than importing) `bit`. Again, this impacted `data.table`'s tests which mimic a user's environment; not `data.table` itself per se. + + We have requested that CRAN policy be modified to require that reverse dependency testing include packages which `Suggest` the package. Had this been the case, reverse dependency testing of `bit64` would have caught the impact on `data.table` before release. + +2. `?.NGRP` now displays the help page as intended, [#4946](https://github.com/Rdatatable/data.table/issues/4649). Thanks to @KyleHaynes for posting the issue, and Cole Miller for the fix. `.NGRP` is a symbol new in v1.13.0; see below in this file. + +3. `test.data.table()` failed in non-English locales such as `LC_TIME=fr_FR.UTF-8` due to `Jan` vs `janv.` in tests 168 and 2042, [#3450](https://github.com/Rdatatable/data.table/issues/3450). Thanks to @shrektan for reporting, and @tdhock for making the tests locale-aware. + +4. User-supplied `PKG_LIBS` and `PKG_CFLAGS` are now retained and the suggestion in https://mac.r-project.org/openmp/; i.e., + `PKG_CPPFLAGS='-Xclang -fopenmp' PKG_LIBS=-lomp R CMD INSTALL data.table_.tar.gz` +has a better chance of working on Mac. + + +# data.table [v1.13.0](https://github.com/Rdatatable/data.table/milestone/17?closed=1) (24 Jul 2020) + +## POTENTIALLY BREAKING CHANGES + +1. `fread` now supports native parsing of `%Y-%m-%d`, and [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `%Y-%m-%dT%H:%M:%OS%z`, [#4464](https://github.com/Rdatatable/data.table/pull/4464). Dates are returned as `data.table`'s `integer`-backed `IDate` class (see `?IDate`), and datetimes are returned as `POSIXct` provided either `Z` or the offset from `UTC` is present; e.g. `fwrite()` outputs UTC by default including the final `Z`. Reminder that `IDate` inherits from R's `Date` and is identical other than it uses the `integer` type where (oddly) R uses the `double` type for dates (8 bytes instead of 4). `fread()` gains a `tz` argument to control datetime values that are missing a Z or UTC-offset (now referred to as *unmarked* datetimes); e.g. as written by `write.csv`. By default `tz=""` means, as in R, read the unmarked datetime in local time. Unless the timezone of the R session is UTC (e.g. the TZ environment variable is set to `"UTC"`, or `""` on non-Windows), unmarked datetime will then by read by `fread` as character, as before. If you have been using `colClasses="POSIXct"` that will still work using R's `as.POSIXct()` which will interpret the unmarked datetime in local time, as before, and still slowly. You can tell `fread` to read unmarked datetime as UTC, and quickly, by passing `tz="UTC"` which may be appropriate in many circumstances. Note that the default behaviour of R to read and write csv using unmarked datetime can lead to different research results when the csv file has been saved in one timezone and read in another due to observations being shifted to a different date. If you have been using `colClasses="POSIXct"` for UTC-marked datetime (e.g. as written by `fwrite` including the final `Z`) then it will automatically speed up with no changes needed. + + Since this is a potentially breaking change, i.e. existing code may depend on dates and datetimes being read as type character as before, a temporary option is provided to restore the old behaviour: `options(datatable.old.fread.datetime.character=TRUE)`. However, in most cases, we expect existing code to still work with no changes. + + The minor version number is bumped from 12 to 13, i.e. `v1.13.0`, where the `.0` conveys 'be-aware' as is common practice. As with any new feature, there may be bugs to fix and changes to defaults required in future. In addition to convenience, `fread` is now significantly faster in the presence of dates, UTC-marked datetimes, and unmarked datetime when tz="UTC" is provided. + +## NEW FEATURES + +1. `%chin%` and `chmatch(x, table)` are faster when `x` is length 1, `table` is long, and `x` occurs near the start of `table`. Thanks to Michael Chirico for the suggestion, [#4117](https://github.com/Rdatatable/data.table/pull/4117#discussion_r358378409). + +2. `CsubsetDT` C function is now exported for use by other packages, [#3751](https://github.com/Rdatatable/data.table/issues/3751). Thanks to Leonardo Silvestri for the request and the PR. This uses R's `R_RegisterCCallable` and `R_GetCCallable` mechanism, [R-exts§5.4.3](https://cran.r-project.org/doc/manuals/r-devel/R-exts.html#Linking-to-native-routines-in-other-packages) and [`?cdt`](https://rdatatable.gitlab.io/data.table/reference/cdt.html). Note that organization of our C interface will be changed in future. + +3. `print` method for `data.table` gains `trunc.cols` argument (and corresponding option `datatable.print.trunc.cols`, default `FALSE`), [#1497](https://github.com/Rdatatable/data.table/issues/1497), part of [#1523](https://github.com/Rdatatable/data.table/issues/1523). This prints only as many columns as fit in the console without wrapping to new lines (e.g., the first 5 of 80 columns) and a message that states the count and names of the variables not shown. When `class=TRUE` the message also contains the classes of the variables. `data.table` has always automatically truncated _rows_ of a table for efficiency (e.g. printing 10 rows instead of 10 million); in the future, we may do the same for _columns_ (e.g., 10 columns instead of 20,000) by changing the default for this argument. Thanks to @nverno for the initial suggestion and to @TysonStanley for the PR. + +4. `setnames(DT, new=new_names)` (i.e. explicitly named `new=` argument) now works as expected rather than an error message requesting that `old=` be supplied too, [#4041](https://github.com/Rdatatable/data.table/issues/4041). Thanks @Kodiologist for the suggestion. + +5. `nafill` and `setnafill` gain `nan` argument to say whether `NaN` should be considered the same as `NA` for filling purposes, [#4020](https://github.com/Rdatatable/data.table/issues/4020). Prior versions had an implicit value of `nan=NaN`; the default is now `nan=NA`, i.e., `NaN` is treated as if it's missing. Thanks @AnonymousBoba for the suggestion. Also, while `nafill` still respects `getOption('datatable.verbose')`, the `verbose` argument has been removed. + +6. New function `fcase(...,default)` implemented in C by Morgan Jacob, [#3823](https://github.com/Rdatatable/data.table/issues/3823), is inspired by SQL `CASE WHEN` which is a common tool in SQL for e.g. building labels or cutting age groups based on conditions. `fcase` is comparable to R function `dplyr::case_when` however it evaluates its arguments in a lazy way (i.e. only when needed) as shown below. Please see `?fcase` for more details. + + ```R + # Lazy evaluation + x = 1:10 + data.table::fcase( + x < 5L, 1L, + x >= 5L, 3L, + x == 5L, stop("provided value is an unexpected one!") + ) + # [1] 1 1 1 1 3 3 3 3 3 3 + + dplyr::case_when( + x < 5L ~ 1L, + x >= 5L ~ 3L, + x == 5L ~ stop("provided value is an unexpected one!") + ) + # Error in eval_tidy(pair$rhs, env = default_env) : + # provided value is an unexpected one! + + # Benchmark + x = sample(1:100, 3e7, replace = TRUE) # 114 MB + microbenchmark::microbenchmark( + dplyr::case_when( + x < 10L ~ 0L, + x < 20L ~ 10L, + x < 30L ~ 20L, + x < 40L ~ 30L, + x < 50L ~ 40L, + x < 60L ~ 50L, + x > 60L ~ 60L + ), + data.table::fcase( + x < 10L, 0L, + x < 20L, 10L, + x < 30L, 20L, + x < 40L, 30L, + x < 50L, 40L, + x < 60L, 50L, + x > 60L, 60L + ), + times = 5L, + unit = "s") + # Unit: seconds + # expr min lq mean median uq max neval + # dplyr::case_when 11.57 11.71 12.22 11.82 12.00 14.02 5 + # data.table::fcase 1.49 1.55 1.67 1.71 1.73 1.86 5 + ``` + +7. `.SDcols=is.numeric` now works; i.e., `SDcols=` accepts a function which is used to select the columns of `.SD`, [#3950](https://github.com/Rdatatable/data.table/issues/3950). Any function (even _ad hoc_) that returns scalar `TRUE`/`FALSE` for each column will do; e.g., `.SDcols=!is.character` will return _non_-character columns (_a la_ `Negate()`). Note that `.SDcols=patterns(...)` can still be used for filtering based on the column names. + +8. Compiler support for OpenMP is now detected during installation, which allows `data.table` to compile from source (in single threaded mode) on macOS which, frustratingly, does not include OpenMP support by default, [#2161](https://github.com/Rdatatable/data.table/issues/2161), unlike Windows and Linux. A helpful message is emitted during installation from source, and on package startup as before. Many thanks to @jimhester for the PR. + +9. `rbindlist` now supports columns of type `expression`, [#546](https://github.com/Rdatatable/data.table/issues/546). Thanks @jangorecki for the report. + +10. The dimensions of objects in a `list` column are now displayed, [#3671](https://github.com/Rdatatable/data.table/issues/3671). Thanks to @randomgambit for the request, and Tyson Barrett for the PR. + +11. `frank` gains `ties.method='last'`, paralleling the same in `base::order` which has been available since R 3.3.0 (April 2016), [#1689](https://github.com/Rdatatable/data.table/issues/1689). Thanks @abudis for the encouragement to accommodate this. + +12. The `keep.rownames` argument in `as.data.table.xts` now accepts a string, which can be used for specifying the column name of the index of the xts input, [#4232](https://github.com/Rdatatable/data.table/issues/4232). Thanks to @shrektan for the request and the PR. + +13. New symbol `.NGRP` available in `j`, [#1206](https://github.com/Rdatatable/data.table/issues/1206). `.GRP` (the group number) was already available taking values from `1` to `.NGRP`. The number of groups, `.NGRP`, might be useful in `j` to calculate a percentage of groups processed so far, or to do something different for the last or penultimate group, for example. + +14. Added support for `round()` and `trunc()` to extend functionality of `ITime`. `round()` and `trunc()` can be used with argument units: "hours" or "minutes". Thanks to @JensPederM for the suggestion and PR. + +15. A new throttle feature has been introduced to speed up small data tasks that are repeated in a loop, [#3175](https://github.com/Rdatatable/data.table/issues/3175) [#3438](https://github.com/Rdatatable/data.table/issues/3438) [#3205](https://github.com/Rdatatable/data.table/issues/3205) [#3735](https://github.com/Rdatatable/data.table/issues/3735) [#3739](https://github.com/Rdatatable/data.table/issues/3739) [#4284](https://github.com/Rdatatable/data.table/issues/4284) [#4527](https://github.com/Rdatatable/data.table/issues/4527) [#4294](https://github.com/Rdatatable/data.table/issues/4294) [#1120](https://github.com/Rdatatable/data.table/issues/1120). The default throttle of 1024 means that a single thread will be used when nrow<=1024, two threads when nrow<=2048, etc. To change the default, use `setDTthreads(throttle=)`. Or use the new environment variable `R_DATATABLE_THROTTLE`. If you use `Sys.setenv()` in a running R session to change this environment variable, be sure to run an empty `setDTthreads()` call afterwards for the change to take effect; see `?setDTthreads`. The word *throttle* is used to convey that the number of threads is restricted (throttled) for small data tasks. Reducing throttle to 1 will turn off throttling and should revert behaviour to past versions (i.e. using many threads even for small data). Increasing throttle to, say, 65536 will utilize multi-threading only for larger datasets. The value 1024 is a guess. We welcome feedback and test results indicating what the best default should be. + +## BUG FIXES + +1. A NULL timezone on POSIXct was interpreted by `as.IDate` and `as.ITime` as UTC rather than the session's default timezone (`tz=""`) , [#4085](https://github.com/Rdatatable/data.table/issues/4085). + +2. `DT[i]` could segfault when `i` is a zero-column `data.table`, [#4060](https://github.com/Rdatatable/data.table/issues/4060). Thanks @shrektan for reporting and fixing. + +3. Dispatch of `first` and `last` functions now properly works again for `xts` objects, [#4053](https://github.com/Rdatatable/data.table/issues/4053). Thanks to @ethanbsmith for reporting. + +4. If `.SD` is returned as-is during grouping, it is now unlocked for downstream usage, part of [#4159](https://github.com/Rdatatable/data.table/issues/4159). Thanks also to @mllg for detecting a problem with the initial fix here during the dev release [#4173](https://github.com/Rdatatable/data.table/issues/4173). + +5. `GForce` is deactivated for `[[` on non-atomic input, part of [#4159](https://github.com/Rdatatable/data.table/issues/4159). Thanks @hongyuanjia and @ColeMiller1 for helping debug an issue in dev with the original fix before release, [#4612](https://github.com/Rdatatable/data.table/issues/4612). + +6. `all.equal(DT, y)` no longer errors when `y` is not a data.table, [#4042](https://github.com/Rdatatable/data.table/issues/4042). Thanks to @d-sci for reporting and the PR. + +7. A length 1 `colClasses=NA_character_` would cause `fread` to incorrectly coerce all columns to character, [#4237](https://github.com/Rdatatable/data.table/issues/4237). + +8. An `fwrite` error message could include a garbled number and cause test 1737.5 to fail, [#3492](https://github.com/Rdatatable/data.table/issues/3492). Thanks to @QuLogic for debugging the issue on ARMv7hl, and the PR fixing it. + +9. `fread` improves handling of very small (<1e-300) or very large (>1e+300) floating point numbers on non-x86 architectures (specifically ppc64le and armv7hl). Thanks to @QuLogic for reporting and fixing, [PR#4165](https://github.com/Rdatatable/data.table/pull/4165). + +10. When updating by reference, the use of `get` could result in columns being re-ordered silently, [#4089](https://github.com/Rdatatable/data.table/issues/4089). Thanks to @dmongin for reporting and Cole Miller for the fix. + +11. `copy()` now overallocates deeply nested lists of `data.table`s, [#4205](https://github.com/Rdatatable/data.table/issues/4205). Thanks to @d-sci for reporting and the PR. + +12. `rbindlist` no longer errors when coercing complex vectors to character vectors, [#4202](https://github.com/Rdatatable/data.table/issues/4202). Thanks to @sritchie73 for reporting and the PR. + +13. A relatively rare case of segfault when combining non-equi joins with `by=.EACHI` is now fixed, closes [#4388](https://github.com/Rdatatable/data.table/issues/4388). + +14. Selecting key columns could incur a large speed penalty, [#4498](https://github.com/Rdatatable/data.table/issues/4498). Thanks to @Jesper on Stack Overflow for the report. + +15. `all.equal(DT1, DT2, ignore.row.order=TRUE)` could return TRUE incorrectly in the presence of NAs, [#4422](https://github.com/Rdatatable/data.table/issues/4422). + +16. Non-equi joins now automatically set `allow.cartesian=TRUE`, [4489](https://github.com/Rdatatable/data.table/issues/4489). Thanks to @Henrik-P for reporting. + +17. `X[Y, on=character(0)]` and `merge(X, Y, by.x=character(0), by.y=character(0))` no longer crash, [#4272](https://github.com/Rdatatable/data.table/pull/4272). Thanks to @tlapak for the PR. + +18. `by=col1:col4` gave an incorrect result if `key(DT)==c("col1","col4")`, [#4285](https://github.com/Rdatatable/data.table/issues/4285). Thanks to @cbilot for reporting, and Cole Miller for the PR. + +19. Matrices resulting from logical operators or comparisons on `data.table`s, e.g. in `dta == dtb`, can no longer have their colnames changed by reference later, [#4323](https://github.com/Rdatatable/data.table/issues/4323). Thanks to @eyherabh for reporting and @tlapak for the PR. + +20. The environment variable `R_DATATABLE_NUM_THREADS` was being limited by `R_DATATABLE_NUM_PROCS_PERCENT` (by default 50%), [#4514](https://github.com/Rdatatable/data.table/issues/4514). It is now consistent with `setDTthreads()` and only limited by the full number of logical CPUs. For example, on a machine with 8 logical CPUs, `R_DATATABLE_NUM_THREADS=6` now results in 6 threads rather than 4 (50% of 8). + +## NOTES + +0. Retrospective license change permission was sought from and granted by 4 contributors who were missed in [PR#2456](https://github.com/Rdatatable/data.table/pull/2456), [#4140](https://github.com/Rdatatable/data.table/pull/4140). We had used [GitHub's contributor page](https://github.com/Rdatatable/data.table/graphs/contributors) which omits 3 of these due to invalid email addresses, unlike GitLab's contributor page which includes the ids. The 4th omission was a PR to a script which should not have been excluded; a script is code too. We are sorry these contributors were not properly credited before. They have now been added to the contributors list as displayed on CRAN. All the contributors of code to data.table hold its copyright jointly; your contributions belong to you. You contributed to data.table when it had a particular license at that time, and you contributed on that basis. This is why in the last license change, all contributors of code were consulted and each had a veto. + +1. `as.IDate`, `as.ITime`, `second`, `minute`, and `hour` now recognize UTC equivalents for speed: GMT, GMT-0, GMT+0, GMT0, Etc/GMT, and Etc/UTC, [#4116](https://github.com/Rdatatable/data.table/issues/4116). + +2. `set2key`, `set2keyv`, and `key2` have been removed, as they have been warning since v1.9.8 (Nov 2016) and halting with helpful message since v1.11.0 (May 2018). When they were introduced in version 1.9.4 (Oct 2014) they were marked as 'experimental' and quickly superseded by `setindex` and `indices`. + +3. `data.table` now supports messaging in simplified Chinese (locale `zh_CN`). This was the result of a monumental collaboration to translate `data.table`'s roughly 1400 warnings, errors, and verbose messages (about 16,000 words/100,000 characters) over the course of two months from volunteer translators in at least 4 time zones, most of whom are first-time `data.table` contributors and many of whom are first-time OSS contributors! + + A big thanks goes out to @fengqifang, @hongyuanjia, @biobai, @zhiiiyang, @Leo-Lee15, @soappp9527, @amy17519, @Zachary-Wu, @caiquanyou, @dracodoc, @JulianYlli12, @renkun-ken, @Xueliang24, @koohoko, @KingdaShi, @gaospecial, @shrektan, @sunshine1126, @shawnchen1996, @yc0802, @HesperusArcher, and @Emberwhirl, all of whom took time from their busy schedules to translate and review others' translations. Especial thanks goes to @zhiiiyang and @hongyuanjia who went above and beyond in helping to push the project over the finish line, and to @GuangchuangYu who helped to organize the volunteer pool. + + `data.table` joins `lubridate` and `nlme` as the only of the top 200 most-downloaded community packages on CRAN to offer non-English messaging, and is the only of the top 50 packages to offer complete support of all messaging. We hope this is a first step in broadening the reach and accessibility of the R ecosystem to more users globally and look forward to working with other maintainers looking to bolster the portability of their packages by offering advice on learnings from this undertaking. + + We would be remiss not to mention the laudable lengths to which the R core team goes to maintain the _much_ larger repository (about 6,000 messages in more than 10 languages) of translations for R itself. + + We will evaluate the feasibility (in terms of maintenance difficulty and CRAN package size limits) of offering support for other languages in later releases. + +4. `fifelse` and `fcase` now notify users that S4 objects (except `nanotime`) are not supported [#4135](https://github.com/Rdatatable/data.table/issues/4135). Thanks to @torema-ed for bringing it to our attention and Morgan Jacob for the PR. + +5. `frank(..., ties.method="random", na.last=NA)` now returns the same random ordering that `base::rank` does, [#4243](https://github.com/Rdatatable/data.table/pull/4243). + +6. The error message when mistakenly using `:=` in `i` instead of `j` has been much improved, [#4227](https://github.com/Rdatatable/data.table/issues/4227). Thanks to Hugh Parsonage for the detailed suggestion. + + ```R + > DT = data.table(A=1:2) + > DT[B:=3] + Error: Operator := detected in i, the first argument inside DT[...], but is only valid in + the second argument, j. Most often, this happens when forgetting the first comma + (e.g. DT[newvar:=5] instead of DT[, new_var:=5]). Please double-check the + syntax. Run traceback(), and debugger() to get a line number. + > DT[, B:=3] + > DT + A B + + 1: 1 3 + 2: 2 3 + ``` + +7. Added more explanation/examples to `?data.table` for how to use `.BY`, [#1363](https://github.com/Rdatatable/data.table/issues/1363). + +8. Changes upstream in R have been accomodated; e.g. `c.POSIXct` now raises `'origin' must be supplied` which impacted `foverlaps`, [#4428](https://github.com/Rdatatable/data.table/pull/4428). + +9. `data.table::update.dev.pkg()` now unloads the `data.table` namespace to alleviate a DLL lock issue on Windows, [#4403](https://github.com/Rdatatable/data.table/issues/4403). Thanks to @drag5 for reporting. + +10. `data.table` packages binaries built by R version 3 (R3) should only be installed in R3, and similarly `data.table` package binaries built by R4 should only be installed in R4. Otherwise, `package ‘data.table’ was built under R version...` warning will occur which should not be ignored. This is due to a very welcome change to `rbind` and `cbind` in R 4.0.0 which enabled us to remove workarounds, see news item in v1.12.6 below in this file. To continue to support both R3 and R4, `data.table`'s NAMESPACE file contains a condition on the R major version (3 or 4) and this is what gives rise to the requirement that the major version used to build `data.table` must match the major version used to install it. Thanks to @vinhdizzo for reporting, [#4528](https://github.com/Rdatatable/data.table/issues/4528). + +11. Internal function `shallow()` no longer makes a deep copy of secondary indices. This eliminates a relatively small time and memory overhead when indices are present that added up significantly when performing many operations, such as joins, in a loop or when joining in `j` by group, [#4311](https://github.com/Rdatatable/data.table/issues/4311). Many thanks to @renkun-ken for the report, and @tlapak for the investigation and PR. + +12. The `datatable.old.unique.by.key` option has been removed as per the 4 year schedule detailed in note 10 of v1.12.4 (Oct 2019), note 10 of v1.11.0 (May 2018), and note 1 of v1.9.8 (Nov 2016). It has been generating a helpful warning for 2 years, and helpful error for 1 year. + + +# data.table [v1.12.8](https://github.com/Rdatatable/data.table/milestone/15?closed=1) (09 Dec 2019) + +## NEW FEATURES + +1. `DT[, {...; .(A,B)}]` (i.e. when `.()` is the final item of a multi-statement `{...}`) now auto-names the columns `A` and `B` (just like `DT[, .(A,B)]`) rather than `V1` and `V2`, [#2478](https://github.com/Rdatatable/data.table/issues/2478) [#609](https://github.com/Rdatatable/data.table/issues/609). Similarly, `DT[, if (.N>1) .(B), by=A]` now auto-names the column `B` rather than `V1`. Explicit names are unaffected; e.g. `DT[, {... y= ...; .(A=C+y)}, by=...]` named the column `A` before, and still does. Thanks also to @renkun-ken for his go-first strong testing which caught an issue not caught by the test suite or by revdep testing, related to NULL being the last item, [#4061](https://github.com/Rdatatable/data.table/issues/4061). + +## BUG FIXES + +1. `frollapply` could segfault and exceed R's C protect limits, [#3993](https://github.com/Rdatatable/data.table/issues/3993). Thanks to @DavisVaughan for reporting and fixing. + +2. `DT[, sum(grp), by=grp]` (i.e. aggregating the same column being grouped) could error with `object 'grp' not found`, [#3103](https://github.com/Rdatatable/data.table/issues/3103). Thanks to @cbailiss for reporting. + +## NOTES + +1. Links in the manual were creating warnings when installing HTML, [#4000](https://github.com/Rdatatable/data.table/issues/4000). Thanks to Morgan Jacob. + +2. Adjustments for R-devel (R 4.0.0) which now has reference counting turned on, [#4058](https://github.com/Rdatatable/data.table/issues/4058) [#4093](https://github.com/Rdatatable/data.table/issues/4093). This motivated early release to CRAN because every day CRAN tests every package using the previous day's changes in R-devel; a much valued feature of the R ecosystem. It helps R-core if packages can pass changes in R-devel as soon as possible. Thanks to Luke Tierney for the notice, and for implementing reference counting which we look forward to very much. + +3. C internals have been standardized to use `PRI[u|d]64` to print `[u]int64_t`. This solves new warnings from `gcc-8` on Windows with `%lld`, [#4062](https://github.com/Rdatatable/data.table/issues/4062), in many cases already working around `snprintf` on Windows not supporting `%zu`. Release procedures have been augmented to prevent any internal use of `llu`, `lld`, `zu` or `zd`. + +4. `test.data.table()` gains `showProgress=interactive()` to suppress the thousands of `Running test id ...` lines displayed by CRAN checks when there are warnings or errors. + + +# data.table [v1.12.6](https://github.com/Rdatatable/data.table/milestone/18?closed=1) (18 Oct 2019) + +## BUG FIXES + +1. `shift()` on a `nanotime` with the default `fill=NA` now fills a `nanotime` missing value correctly, [#3945](https://github.com/Rdatatable/data.table/issues/3945). Thanks to @mschubmehl for reporting and fixing in PR [#3942](https://github.com/Rdatatable/data.table/pull/3942). + +2. Compilation failed on CRAN's MacOS due to an older version of `zlib.h/zconf.h` which did not have `z_const` defined, [#3939](https://github.com/Rdatatable/data.table/issues/3939). Other open-source projects unrelated to R have experienced this problem on MacOS too. We have followed the common practice of removing `z_const` to support the older `zlib` versions, and data.table's release procedures have gained a `grep` to ensure `z_const` isn't used again by accident in future. The library `zlib` is used for `fwrite`'s new feature of multithreaded compression on-the-fly; see item 3 of 1.12.4 below. + +3. A runtime error in `fwrite`'s compression, but only observed so far on Solaris 10 32bit with zlib 1.2.8 (Apr 2013), [#3931](https://github.com/Rdatatable/data.table/issues/3931): `Error -2: one or more threads failed to allocate buffers or there was a compression error.` In case it happens again, this area has been made more robust and the error more detailed. As is often the case, investigating the Solaris problem revealed secondary issues in the same area of the code. In this case, some `%d` in verbose output should have been `%lld`. This obliquity that CRAN's Solaris provides is greatly appreciated. + +4. A leak could occur in the event of an unsupported column type error, or if working memory could only partially be allocated; [#3940](https://github.com/Rdatatable/data.table/issues/3940). Found thanks to `clang`'s Leak Sanitizer (prompted by CRAN's diligent use of latest tools), and two tests in the test suite which tested the unsupported-type error. + +## NOTES + +1. Many thanks to Kurt Hornik for fixing R's S3 dispatch of `rbind` and `cbind` methods, [#3948](https://github.com/Rdatatable/data.table/issues/3948). With `R>=4.0.0` (current R-devel), `data.table` now registers the S3 methods `cbind.data.table` and `rbind.data.table`, and no longer applies the workaround documented in FAQ 2.24. + + +# data.table [v1.12.4](https://github.com/Rdatatable/data.table/milestone/16?closed=1) (03 Oct 2019) + +## NEW FEATURES + +1. `rleid()` functions now support long vectors (length > 2 billion). + +2. `fread()`: + * now skips embedded `NUL` (`\0`), [#3400](https://github.com/Rdatatable/data.table/issues/3400). Thanks to Marcus Davy for reporting with examples, Roy Storey for the initial PR, and Bingjie Qian for testing this feature on a very complicated real-world file. + * `colClasses` now supports `'complex'`, `'raw'`, `'Date'`, `'POSIXct'`, and user-defined classes (so long as an `as.` method exists), [#491](https://github.com/Rdatatable/data.table/issues/491) [#1634](https://github.com/Rdatatable/data.table/issues/1634) [#2610](https://github.com/Rdatatable/data.table/issues/2610). Any error during coercion results in a warning and the column is left as the default type (probably `"character"`). Thanks to @hughparsonage for the PR. + * `stringsAsFactors=0.10` will factorize any character column containing under `0.10*nrow` unique strings, [#2025](https://github.com/Rdatatable/data.table/issues/2025). Thanks to @hughparsonage for the PR. + * `colClasses=list(numeric=20:30, numeric="ID")` will apply the `numeric` type to column numbers `20:30` as before and now also column name `"ID"`; i.e. all duplicate class names are now respected rather than only the first. This need may arise when specifying some columns by name and others by number, as in this example. Thanks to @hughparsonage for the PR. + * gains `yaml` (default `FALSE`) and the ability to parse CSVY-formatted input files; i.e., csv files with metadata in a header formatted as YAML (https://csvy.org/), [#1701](https://github.com/Rdatatable/data.table/issues/1701). See `?fread` and files in `/inst/tests/csvy/` for sample formats. Please provide feedback if you find this feature useful and would like extended capabilities. For now, consider it experimental, meaning the API/arguments may change. Thanks to @leeper at [`rio`](https://github.com/gesistsa/rio) for the inspiration and @MichaelChirico for implementing. + * `select` can now be used to specify types for just the columns selected, [#1426](https://github.com/Rdatatable/data.table/issues/1426). Just like `colClasses` it can be a named vector of `colname=type` pairs, or a named `list` of `type=col(s)` pairs. For example: + + ```R + fread(file, select=c(colD="character", # returns 2 columns: colD,colA + colA="integer64")) + fread(file, select=list(character="colD", # returns 5 columns: colD,8,9,10,colA + integer= 8:10, + character="colA")) + ``` + * gains `tmpdir=` argument which is passed to `tempfile()` whenever a temporary file is needed. Thanks to @mschubmehl for the PR. As before, setting `TMPDIR` (to `/dev/shm` for example) before starting the R session still works too; see `?base::tempdir`. + +3. `fwrite()`: + * now writes compressed `.gz` files directly, [#2016](https://github.com/Rdatatable/data.table/issues/2016). Compression, like `fwrite()`, is multithreaded and compresses each chunk on-the-fly (a full size intermediate file is not created). Use a ".gz" extension, or the new `compress=` option. Many thanks to Philippe Chataignon for the significant PR. For example: + + ```R + DT = data.table(A=rep(1:2, 100e6), B=rep(1:4, 50e6)) + fwrite(DT, "data.csv") # 763MB; 1.3s + fwrite(DT, "data.csv.gz") # 2MB; 1.6s + identical(fread("data.csv.gz"), DT) + ``` + + Note that compression is handled using `zlib` library. In the unlikely event of missing `zlib.h`, on a machine that is compiling `data.table` from sources, one may get `fwrite.c` compilation error `zlib.h: No such file or directory`. As of now, the easiest solution is to install missing library using `sudo apt install zlib1g-dev` (Debian/Ubuntu). Installing R (`r-base-dev`) depends on `zlib1g-dev` so this should be rather uncommon. If it happens to you please upvote related issue [#3872](https://github.com/Rdatatable/data.table/issues/3872). + + * Gains `yaml` argument matching that of `fread`, [#3534](https://github.com/Rdatatable/data.table/issues/3534). See the item in `fread` for a bit more detail; here, we'd like to reiterate that feedback is appreciated in the initial phase of rollout for this feature. + + * Gains `bom` argument to add a *byte order mark* (BOM) at the beginning of the file to signal that the file is encoded in UTF-8, [#3488](https://github.com/Rdatatable/data.table/issues/3488). Thanks to Stefan Fleck for requesting and Philippe Chataignon for implementing. + + * Now supports type `complex`, [#3690](https://github.com/Rdatatable/data.table/issues/3690). + + * Gains `scipen` [#2020](https://github.com/Rdatatable/data.table/issues/2020), the number 1 most-requested feature [#3189](https://github.com/Rdatatable/data.table/issues/3189). The default is `getOption("scipen")` so that `fwrite` will now respect R's option in the same way as `base::write.csv` and `base::format`, as expected. The parameter and option name have been kept the same as base R's `scipen` for consistency and to aid online search. It stands for 'scientific penalty'; i.e., the number of characters to add to the width within which non-scientific number format is used if it will fit. A high penalty essentially turns off scientific format. We believe that common practice is to use a value of 999, however, if you do use 999, because your data _might_ include very long numbers such as `10^300`, `fwrite` needs to account for the worst case field width in its buffer allocation per thread. This may impact space or time. If you experience slowdowns or unacceptable memory usage, please pass `verbose=TRUE` to `fwrite`, inspect the output, and report the issue. A workaround, until we can determine the best strategy, may be to pass a smaller value to `scipen`, such as 50. We have observed that `fwrite(DT, scipen=50)` appears to write `10^50` accurately, unlike base R. However, this may be a happy accident and not apply generally. Further work may be needed in this area. + + ```R + DT = data.table(a=0.0001, b=1000000) + fwrite(DT) + # a,b + # 1e-04,1e+06 + fwrite(DT,scipen=1) + # a,b + # 0.0001,1e+06 + fwrite(DT,scipen=2) + # a,b + # 0.0001,1000000 + + 10^50 + # [1] 1e+50 + options(scipen=50) + 10^50 + # [1] 100000000000000007629769841091887003294964970946560 + fwrite(data.table(A=10^50)) + # A + # 100000000000000000000000000000000000000000000000000 + ``` + +4. Assigning to one item of a list column no longer requires the RHS to be wrapped with `list` or `.()`, [#950](https://github.com/Rdatatable/data.table/issues/950). + + ```R + > DT = data.table(A=1:3, B=list(1:2,"foo",3:5)) + > DT + A B + + 1: 1 1,2 + 2: 2 foo + 3: 3 3,4,5 + > + # The following all accomplish the same assignment: + > DT[2, B:=letters[9:13]] # was error, now works + > DT[2, B:=.(letters[9:13])] # was error, now works + > DT[2, B:=.(list(letters[9:13]))] # .(list()) was needed, still works + > DT + A B + + 1: 1 1,2 + 2: 2 i,j,k,l,m + 3: 3 3,4,5 + ``` + +5. `print.data.table()` gains an option to display the timezone of `POSIXct` columns when available, [#2842](https://github.com/Rdatatable/data.table/issues/2842). Thanks to Michael Chirico for reporting and Felipe Parages for the PR. + +6. New functions `nafill` and `setnafill`, [#854](https://github.com/Rdatatable/data.table/issues/854). Thanks to Matthieu Gomez for the request and Jan Gorecki for implementing. + + ```R + DT = setDT(lapply(1:100, function(i) sample(c(rnorm(9e6), rep(NA_real_, 1e6))))) + format(object.size(DT), units="GB") ## 7.5 Gb + zoo::na.locf(DT, na.rm=FALSE) ## zoo 53.518s + setDTthreads(1L) + nafill(DT, "locf") ## DT 1 thread 7.562s + setDTthreads(0L) + nafill(DT, "locf") ## DT 40 threads 0.605s + setnafill(DT, "locf") ## DT in-place 0.367s + ``` + +7. New variable `.Last.updated` (similar to R's `.Last.value`) contains the number of rows affected by the most recent `:=` or `set()`, [#1885](https://github.com/Rdatatable/data.table/issues/1885). For details see `?.Last.updated`. + +8. `between()` and `%between%` are faster for `POSIXct`, [#3519](https://github.com/Rdatatable/data.table/issues/3519), and now support the `.()` alias, [#2315](https://github.com/Rdatatable/data.table/issues/2315). Thanks to @Henrik-P for the reports. There is now also support for `bit64`'s `integer64` class and more robust coercion of types, [#3517](https://github.com/Rdatatable/data.table/issues/3517). `between()` gains `check=` which checks `any(lower>upper)`; off by default for speed in particular for type character. + +9. New convenience functions `%ilike%` and `%flike%` which map to new `like()` arguments `ignore.case` and `fixed` respectively, [#3333](https://github.com/Rdatatable/data.table/issues/3333). `%ilike%` is for case-insensitive pattern matching. `%flike%` is for more efficient matching of fixed strings. Thanks to @andreasLD for providing most of the core code. + +10. `on=.NATURAL` (or alternatively `X[on=Y]` [#3621](https://github.com/Rdatatable/data.table/issues/3621)) joins two tables on their common column names, so called _natural join_, [#629](https://github.com/Rdatatable/data.table/issues/629). Thanks to David Kulp for request. As before, when `on=` is not provided, `X` must have a key and the key columns are used to join (like rownames, but multi-column and multi-type). + +11. `as.data.table` gains `key` argument mirroring its use in `setDT` and `data.table`, [#890](https://github.com/Rdatatable/data.table/issues/890). As a byproduct, the arguments of `as.data.table.array` have changed order, which could affect code relying on positional arguments to this method. Thanks @cooldome for the suggestion and @MichaelChirico for implementation. + +12. `merge.data.table` is now exported, [#2618](https://github.com/Rdatatable/data.table/pull/2618). We realize that S3 methods should not ordinarily be exported. Rather, the method should be invoked via S3 dispatch. But users continue to request its export, perhaps because of intricacies relating to the fact that data.table inherits from data.frame, there are two arguments to `merge()` but S3 dispatch applies just to the first, and a desire to explicitly call `data.table::merge.data.table` from package code. Thanks to @AndreMikulec for the most recent request. + +13. New rolling function to calculate rolling sum has been implemented and exported, see `?frollsum`, [#2778](https://github.com/Rdatatable/data.table/issues/2778). + +14. `setkey` to an existing index now uses the index, [#2889](https://github.com/Rdatatable/data.table/issues/2889). Thanks to @MichaelChirico for suggesting and @saraswatmks for the PR. + +15. `DT[order(col)[1:5], ...]` (i.e. where `i` is a compound expression involving `order()`) is now optimized to use `data.table`'s multithreaded `forder`, [#1921](https://github.com/Rdatatable/data.table/issues/1921). This example is not a fully optimal top-N query since the full ordering is still computed. The improvement is that the call to `order()` is computed faster for any `i` expression using `order`. + +16. `as.data.table` now unpacks columns in a `data.frame` which are themselves a `data.frame` or `matrix`. This need arises when parsing JSON, a corollary in [#3369](https://github.com/Rdatatable/data.table/issues/3369#issuecomment-462662752). Bug fix 19 in v1.12.2 (see below) added a helpful error (rather than segfault) to detect such invalid `data.table`, and promised that `as.data.table()` would unpack these columns in the next release (i.e. this release) so that the invalid `data.table` is not created in the first place. Further, `setDT` now warns if it observes such columns and suggests using `as.data.table` instead, [#3760](https://github.com/Rdatatable/data.table/issues/3760). + +17. `CJ` has been ported to C and parallelized, thanks to a PR by Michael Chirico, [#3596](https://github.com/Rdatatable/data.table/pull/3596). All types benefit, but, as in many `data.table` operations, factors benefit more than character. + + ```R + # default 4 threads on a laptop with 16GB RAM and 8 logical CPU + + ids = as.vector(outer(LETTERS, LETTERS, paste0)) + system.time( CJ(ids, 1:500000) ) # 3.9GB; 340m rows + # user system elapsed (seconds) + # 3.000 0.817 3.798 # was + # 1.800 0.832 2.190 # now + + # ids = as.factor(ids) + system.time( CJ(ids, 1:500000) ) # 2.6GB; 340m rows + # user system elapsed (seconds) + # 1.779 0.534 2.293 # was + # 0.357 0.763 0.292 # now + ``` + +18. New function `fcoalesce(...)` has been written in C, and is multithreaded for `numeric` and `factor`. It replaces missing values according to a prioritized list of candidates (as per SQL COALESCE, `dplyr::coalesce`, and `hutils::coalesce`), [#3424](https://github.com/Rdatatable/data.table/issues/3424). It accepts any number of vectors in several forms. For example, given three vectors `x`, `y`, and `z`, where each `NA` in `x` is to be replaced by the corresponding value in `y` if that is non-NA, else the corresponding value in `z`, the following equivalent forms are all accepted: `fcoalesce(x,y,z)`, `fcoalesce(x,list(y,z))`, and `fcoalesce(list(x,y,z))`. Being a new function, its behaviour is subject to change particularly for type `list`, [#3712](https://github.com/Rdatatable/data.table/issues/3712). + + ```R + # default 4 threads on a laptop with 16GB RAM and 8 logical CPU + N = 100e6 + x = replicate(5, {x=sample(N); x[sample(N, N/2)]=NA; x}, simplify=FALSE) # 2GB + y1 = do.call(dplyr::coalesce, x)) + y2 = do.call(hutils::coalesce, x)) + y3 = do.call(data.table::fcoalesce, x)) + # user system elapsed (seconds) + # 4.935 1.876 6.810 # dplyr::coalesce + # 3.122 0.831 3.956 # hutils::coalesce + # 0.915 0.099 0.379 # data.table::fcoalesce + identical(y1,y2) && identical(y1,y3) + # TRUE + ``` + +19. Type `complex` is now supported by `setkey`, `setorder`, `:=`, `by=`, `keyby=`, `shift`, `dcast`, `frank`, `rowid`, `rleid`, `CJ`, `fcoalesce`, `unique`, and `uniqueN`, [#3690](https://github.com/Rdatatable/data.table/issues/3690). Thanks to Gareth Ward and Elio Campitelli for their reports and input. Sorting `complex` is achieved the same way as base R; i.e., first by the real part then by the imaginary part (as if the `complex` column were two separate columns of `double`). There is no plan to support joining/merging on `complex` columns until a user demonstrates a need for that. + +20. `setkey`, `[key]by=` and `on=` in verbose mode (`options(datatable.verbose=TRUE)`) now detect any columns inheriting from `Date` which are stored as 8 byte double, test if any fractions are present, and if not suggest using a 4 byte integer instead (such as `data.table::IDate`) to save space and time, [#1738](https://github.com/Rdatatable/data.table/issues/1738). In future this could be upgraded to `message` or `warning` depending on feedback. + +21. New function `fifelse(test, yes, no, na)` has been implemented in C by Morgan Jacob, [#3657](https://github.com/Rdatatable/data.table/issues/3657) and [#3753](https://github.com/Rdatatable/data.table/issues/3753). It is comparable to `base::ifelse`, `dplyr::if_else`, `hutils::if_else`, and (forthcoming) [`vctrs::if_else()`](https://vctrs.r-lib.org/articles/stability.html#ifelse). It returns a vector of the same length as `test` but unlike `base::ifelse` the output type is consistent with those of `yes` and `no`. Please see `?data.table::fifelse` for more details. + + ```R + # default 4 threads on a laptop with 16GB RAM and 8 logical CPU + x = sample(c(TRUE,FALSE), 3e8, replace=TRUE) # 1GB + microbenchmark::microbenchmark( + base::ifelse(x, 7L, 11L), + dplyr::if_else(x, 7L, 11L), + hutils::if_else(x, 7L, 11L), + data.table::fifelse(x, 7L, 11L), + times = 5L, unit="s" + ) + # Unit: seconds + # expr min med max neval + # base::ifelse(x, 7L, 11L) 8.5 8.6 8.8 5 + # dplyr::if_else(x, 7L, 11L) 9.4 9.5 9.7 5 + # hutils::if_else(x, 7L, 11L) 2.6 2.6 2.7 5 + # data.table::fifelse(x, 7L, 11L) 1.5 1.5 1.6 5 # setDTthreads(1) + # data.table::fifelse(x, 7L, 11L) 0.8 0.8 0.9 5 # setDTthreads(2) + # data.table::fifelse(x, 7L, 11L) 0.4 0.4 0.5 5 # setDTthreads(4) + ``` + +22. `transpose` gains `keep.names=` and `make.names=` arguments, [#1886](https://github.com/Rdatatable/data.table/issues/1886). Previously, column names were dropped and there was no way to keep them. `keep.names="rn"` keeps the column names and puts them in the `"rn"` column of the result. Similarly, `make.names="rn"` uses column `"rn"` as the column names of the result. Both arguments are `NULL` by default for backwards compatibility. As these new arguments are new, they are subject to change in future according to community feedback. Thanks to @ghost for the request. + +23. Added a `data.table` method for `utils::edit` to ensure a `data.table` is returned, for convenience, [#593](https://github.com/Rdatatable/data.table/issues/593). + +24. More efficient optimization of many columns in `j` (e.g. from `.SD`), [#1470](https://github.com/Rdatatable/data.table/issues/1470). Thanks @Jorges1000 for the report. + +25. `setnames(DT, old, new)` now omits any `old==new` to save redundant key and index name updates, [#3783](https://github.com/Rdatatable/data.table/issues/3783). `setnames(DT, new)` (i.e. not providing `old`) already omitted any column name updates where `names(DT)==new`; e.g. `setnames(DT, gsub('^_', '', names(DT)))` exits early if no columns start with `_`. + +26. `[[` by group is now optimized for regular vectors (not type list), [#3209](https://github.com/Rdatatable/data.table/issues/3209). Thanks @renkun-ken for the suggestion. `[` by group was already optimized. Please file a feature request if you would like this optimization for list columns. + +27. New function `frollapply` for rolling computation of arbitrary R functions (caveat: input `x` is coerced to numeric beforehand, and the function must return a scalar numeric value). The API is consistent to extant rolling functions `frollmean` and `frollsum`; note that it will generally be slower than those functions because (1) the known functions use our optimized internal C implementation and (2) there is no thread-safe API to R's C `eval`. Nevertheless `frollapply` is faster than corresponding `base`-only and `zoo` versions: + + ```R + set.seed(108) + x = rnorm(1e6); n = 1e3 + base_rollapply = function(x, n, FUN) { + nx = length(x) + ans = rep(NA_real_, nx) + for (i in n:nx) ans[i] = FUN(x[(i-n+1):i]) + ans + } + system.time(base_rollapply(x, n, mean)) + system.time(zoo::rollapplyr(x, n, function(x) mean(x), fill=NA)) + system.time(zoo::rollmeanr(x, n, fill=NA)) + system.time(frollapply(x, n, mean)) + system.time(frollmean(x, n)) + + ### fun mean sum median + # base_rollapply 8.815 5.151 60.175 + # zoo::rollapply 34.373 27.837 88.552 + # zoo::roll[fun] 0.215 0.185 NA ## median not fully supported + # frollapply 5.404 1.419 56.475 + # froll[fun] 0.003 0.002 NA ## median not yet supported + ``` + +28. `setnames()` now accepts functions in `old=` and `new=`, [#3703](https://github.com/Rdatatable/data.table/issues/3703). Thanks @smingerson for the feature request and @shrektan for the PR. + + ```R + DT = data.table(a=1:3, b=4:6, c=7:9) + setnames(DT, toupper) + names(DT) + # [1] "A" "B" "C" + setnames(DT, c(1,3), tolower) + names(DT) + # [1] "a" "B" "c" + ``` + +29. `:=` and `set()` now use zero-copy type coercion. Accordingly, `DT[..., integerColumn:=0]` and `set(DT,i,j,0)` no longer warn about the `0` ('numeric') needing to be `0L` ('integer') because there is no longer any time or space used for this coercion. The old long warning was off-putting to new users ("what and why L?"), whereas advanced users appreciated the old warning so they could avoid the coercion. Although the time and space for one coercion in a single call is unmeasurably small, when placed in a loop the small overhead of any allocation on R's heap could start to become noticeable (more so for `set()` whose purpose is low-overhead looping). Further, when assigning a value across columns of varying types, it could be inconvenient to supply the correct type for every column. Hence, zero-copy coercion was introduced to satisfy all these requirements. A warning is still issued, as before, when fractional data is discarded; e.g. when 3.14 is assigned to an integer column. Zero-copy coercion applies to length>1 vectors as well as length-1 vectors. + +## BUG FIXES + +1. `first`, `last`, `head` and `tail` by group no longer error in some cases, [#2030](https://github.com/Rdatatable/data.table/issues/2030) [#3462](https://github.com/Rdatatable/data.table/issues/3462). Thanks to @franknarf1 for reporting. + +2. `keyby=colName` could use the wrong index and return incorrect results if both `colName` and `colNameExtra` (where `colName` is a leading subset of characters of `colNameExtra`) are column names and an index exists on `colNameExtra`, [#3498](https://github.com/Rdatatable/data.table/issues/3498). Thanks to Xianying Tan for the detailed report and pinpointing the source line at fault. + +3. A missing item in `j` such as `j=.(colA, )` now gives a helpful error (`Item 2 of the .() or list() passed to j is missing`) rather than the unhelpful error `argument "this_jsub" is missing, with no default` (v1.12.2) or `argument 2 is empty` (v1.12.0 and before), [#3507](https://github.com/Rdatatable/data.table/issues/3507). Thanks to @eddelbuettel for the report. + +4. `fwrite()` could crash when writing very long strings such as 30 million characters, [#2974](https://github.com/Rdatatable/data.table/issues/2974), and could be unstable in memory constrained environments, [#2612](https://github.com/Rdatatable/data.table/issues/2612). Thanks to @logworthy and @zachokeeffe for reporting and Philippe Chataignon for fixing in PR [#3288](https://github.com/Rdatatable/data.table/pull/3288). + +5. `fread()` could crash if `quote=""` (i.e. ignore quotes), the last line is too short, and `fill=TRUE`, [#3524](https://github.com/Rdatatable/data.table/pull/3524). Thanks to Jiucang Hao for the report and reproducible example. + +6. Printing could occur unexpectedly when code is run with `source`, [#2369](https://github.com/Rdatatable/data.table/issues/2369). Thanks to @jan-glx for the report and reproducible example. + +7. Grouping by `NULL` on zero rows `data.table` now behaves consistently to non-zero rows `data.table`, [#3530](https://github.com/Rdatatable/data.table/issues/3530). Thanks to @SymbolixAU for the report and reproducible example. + +8. GForce optimization of `median` did not retain the class; e.g. `median` of `Date` or `POSIXct` would return a raw number rather than retain the date class, [#3079](https://github.com/Rdatatable/data.table/issues/3079). Thanks to @Henrik-P for reporting. + +9. `DT[, format(mean(date,""%b-%Y")), by=group]` could fail with `invalid 'trim' argument`, [#1876](https://github.com/Rdatatable/data.table/issues/1876). Thanks to Ross Holmberg for reporting. + +10. `externalVar=1:5; DT[, mean(externalVar), by=group]` could return incorrect results rather than a constant (`3` in this example) for each group, [#875](https://github.com/Rdatatable/data.table/issues/875). GForce optimization was being applied incorrectly to the `mean` without realizing `externalVar` was not a column. + +11. `test.data.table()` now passes in non-English R sessions, [#630](https://github.com/Rdatatable/data.table/issues/630) [#3039](https://github.com/Rdatatable/data.table/issues/3039). Each test still checks that the number of warnings and/or errors produced is correct. However, a message is displayed suggesting to restart R with `LANGUAGE=en` in order to test that the text of the warning and/or error messages are as expected, too. + +12. Joining a double column in `i` containing say 1.3, with an integer column in `x` containing say 1, would result in the 1.3 matching to 1, [#2592](https://github.com/Rdatatable/data.table/issues/2592), and joining a factor column to an integer column would match the factor's integers rather than error. The type coercion logic has been revised and strengthened. Many thanks to @MarkusBonsch for reporting and fixing. Joining a character column in `i` to a factor column in `x` is now faster and retains the character column in the result rather than coercing it to factor. Joining an integer column in `i` to a double column in `x` now retains the integer type in the result rather than coercing the integers into the double type. Logical columns may now only be joined to logical columns, other than all-NA columns which are coerced to the matching column's type. All coercions are reported in verbose mode: `options(datatable.verbose=TRUE)`. + +13. Attempting to recycle 2 or more items into an existing `list` column now gives the intended helpful error rather than `Internal error: recycle length error not caught earlier.`, [#3543](https://github.com/Rdatatable/data.table/issues/3543). Thanks to @MichaelChirico for finding and reporting. + +14. Subassigning using `$<-` to a `data.table` embedded in a list column of a single-row `data.table` could fail, [#3474](https://github.com/Rdatatable/data.table/issues/3474). Note that `$<-` is not recommended; please use `:=` instead which already worked in this case. Thanks to Jakob Richter for reporting. + +15. `rbind` and `rbindlist` of zero-row items now retain (again) the unused levels of any (zero-length) factor columns, [#3508](https://github.com/Rdatatable/data.table/issues/3508). This was a regression in v1.12.2 just for zero-row items. Unused factor levels were already retained for items having `nrow>=1`. Thanks to Gregory Demin for reporting. + +16. `rbind` and `rbindlist` of an item containing an ordered factor with levels containing an `NA` (as opposed to an NA integer) could segfault, [#3601](https://github.com/Rdatatable/data.table/issues/3601). This was a a regression in v1.12.2. Thanks to Damian Betebenner for reporting. Also a related segfault when recycling a length-1 factor column, [#3662](https://github.com/Rdatatable/data.table/issues/3662). + +17. `example(":=", local=TRUE)` now works rather than error, [#2972](https://github.com/Rdatatable/data.table/issues/2972). Thanks @vlulla for the report. + +18. `rbind.data.frame` on `IDate` columns changed the column from `integer` to `double`, [#2008](https://github.com/Rdatatable/data.table/issues/2008). Thanks to @rmcgehee for reporting. + +19. `merge.data.table` now retains any custom classes of the first argument, [#1378](https://github.com/Rdatatable/data.table/issues/1378). Thanks to @michaelquinn32 for reopening. + +20. `c`, `seq` and `mean` of `ITime` objects now retain the `ITime` class via new `ITime` methods, [#3628](https://github.com/Rdatatable/data.table/issues/3628). Thanks @UweBlock for reporting. The `cut` and `split` methods for `ITime` have been removed since the default methods work, [#3630](https://github.com/Rdatatable/data.table/pull/3630). + +21. `as.data.table.array` now handles the case when some of the array's dimension names are `NULL`, [#3636](https://github.com/Rdatatable/data.table/issues/3636). + +22. Adding a `list` column using `cbind`, `as.data.table`, or `data.table` now works rather than treating the `list` as if it were a set of columns and introducing an invalid NA column name, [#3471](https://github.com/Rdatatable/data.table/pull/3471). However, please note that using `:=` to add columns is preferred. + + ```R + cbind( data.table(1:2), list(c("a","b"),"a") ) + # V1 V2 NA # v1.12.2 and before + # + # 1: 1 a a + # 2: 2 b a + # + # V1 V2 # v1.12.4+ + # + # 1: 1 a,b + # 2: 2 a + ``` + +23. Incorrect sorting/grouping results due to a bug in Intel's `icc` compiler 2019 (Version 19.0.4.243 Build 20190416) has been worked around thanks to a report and fix by Sebastian Freundt, [#3647](https://github.com/Rdatatable/data.table/issues/3647). Please run `data.table::test.data.table()`. If that passes, your installation does not have the problem. + +24. `column not found` could incorrectly occur in rare non-equi-join cases, [#3635](https://github.com/Rdatatable/data.table/issues/3635). Thanks to @UweBlock for the report. + +25. Slight fix to the logic for auto-naming the `by` clause for using a custom function like `evaluate` to now be named `evaluate` instead of the name of the first symbolic argument, [#3758](https://github.com/Rdatatable/data.table/issues/3758). + +26. Column binding of zero column `data.table` will now work as expected, [#3334](https://github.com/Rdatatable/data.table/issues/3334). Thanks to @kzenstratus for the report. + +27. `integer64` sum-by-group is now properly optimized, [#1647](https://github.com/Rdatatable/data.table/issues/1647), [#3464](https://github.com/Rdatatable/data.table/issues/3464). Thanks to @mlandry22-h2o for the report. + +28. From v1.12.0 `between()` and `%between%` interpret missing values in `lower=` or `upper=` as unlimited bounds. A new parameter `NAbounds` has been added to achieve the old behaviour of returning `NA`, [#3522](https://github.com/Rdatatable/data.table/issues/3522). Thanks @cguill95 for reporting. This is now consistent for character input, [#3667](https://github.com/Rdatatable/data.table/issues/3667) (thanks @AnonymousBoba), and class `nanotime` is now supported too. + +29. `integer64` defined on a subset of a new column would leave "gibberish" on the remaining rows, [#3723](https://github.com/Rdatatable/data.table/issues/3723). A bug in `rbindlist` with the same root cause was also fixed, [#1459](https://github.com/Rdatatable/data.table/issues/1459). Thanks @shrektan and @jangorecki for the reports. + +30. `groupingsets` functions now properly handle alone special symbols when using an empty set to group by, [#3653](https://github.com/Rdatatable/data.table/issues/3653). Thanks to @Henrik-P for the report. + +31. A `data.table` created using `setDT()` on a `data.frame` containing identical columns referencing each other would cause `setkey()` to return incorrect results, [#3496](https://github.com/Rdatatable/data.table/issues/3496) and [#3766](https://github.com/Rdatatable/data.table/issues/3766). Thanks @kirillmayantsev and @alex46015 for reporting, and @jaapwalhout and @Atrebas for helping to debug and isolate the issue. + +32. `x[, round(.SD, 1)]` and similar operations on the whole of `.SD` could return a locked result, incorrectly preventing `:=` on the result, [#2245](https://github.com/Rdatatable/data.table/issues/2245). Thanks @grayskripko for raising. + +33. Using `get`/`mget` in `j` could cause `.SDcols` to be ignored or reordered, [#1744](https://github.com/Rdatatable/data.table/issues/1744), [#1965](https://github.com/Rdatatable/data.table/issues/1965), and [#2036](https://github.com/Rdatatable/data.table/issues/2036). Thanks @franknarf1, @MichaelChirico, and @TonyBonen, for the reports. + +34. `DT[, i-1L, with=FALSE]` would misinterpret the minus sign and return an incorrect result, [#2019](https://github.com/Rdatatable/data.table/issues/2109). Thanks @cguill95 for the report. + +35. `DT[id==1, DT2[.SD, on="id"]]` (i.e. joining from `.SD` in `j`) could incorrectly fail in some cases due to `.SD` being locked, [#1926](https://github.com/Rdatatable/data.table/issues/1926), and when updating-on-join with factors [#3559](https://github.com/Rdatatable/data.table/issues/3559) [#2099](https://github.com/Rdatatable/data.table/issues/2099). Thanks @franknarf1 and @Henrik-P for the reports and for diligently tracking use cases for almost 3 years! + +36. `as.IDate.POSIXct` returned `NA` for UTC times before Dec 1901 and after Jan 2038, [#3780](https://github.com/Rdatatable/data.table/issues/3780). Thanks @gschett for the report. + +37. `rbindlist` now returns correct idcols for lists with different length vectors, [#3785](https://github.com/Rdatatable/data.table/issues/3785), [#3786](https://github.com/Rdatatable/data.table/pull/3786). Thanks to @shrektan for the report and fix. + +38. `DT[ , !rep(FALSE, ncol(DT)), with=FALSE]` correctly returns the full table, [#3013](https://github.com/Rdatatable/data.table/issues/3013) and [#2917](https://github.com/Rdatatable/data.table/issues/2917). Thanks @alexnss and @DavidArenburg for the reports. + +39. `shift(x, 0:1, type='lead', give.names=TRUE)` uses `lead` in all returned column names, [#3832](https://github.com/Rdatatable/data.table/issues/3832). Thanks @daynefiler for the report. + +40. Subtracting two `POSIXt` objects by group could lead to incorrect results because the `base` method internally calls `difftime` with `units='auto'`; `data.table` does not notice if the chosen units differ by group and only the last group's `units` attribute was retained, [#3694](https://github.com/Rdatatable/data.table/issues/3694) and [#761](https://github.com/Rdatatable/data.table/issues/761). To surmount this, we now internally force `units='secs'` on all `POSIXt-POSIXt` calls (reported when `verbose=TRUE`); generally we recommend calling `difftime` directly instead. Thanks @oliver-oliver and @boethian for the reports. + +41. Using `get`/`mget` in `j` could cause `.SDcols` to be ignored or reordered, [#1744](https://github.com/Rdatatable/data.table/issues/1744), [#1965](https://github.com/Rdatatable/data.table/issues/1965), [#2036](https://github.com/Rdatatable/data.table/issues/2036), and [#2946](https://github.com/Rdatatable/data.table/issues/2946). Thanks @franknarf1, @MichaelChirico, @TonyBonen, and Steffen J. (StackOverflow) for the reports. + +42. `DT[...,by={...}]` now handles expressions in `{`, [#3156](https://github.com/Rdatatable/data.table/issues/3156). Thanks to @tdhock for the report. + +43. `:=` could change a `data.table` creation statement in the body of the function calling it, or a variable in calling scope, [#3890](https://github.com/Rdatatable/data.table/issues/3890). Many thanks to @kirillmayantsev for the detailed reports. + +44. Grouping could create a `malformed factor` and/or segfault when the factors returned by each group did not have identical levels, [#2199](https://github.com/Rdatatable/data.table/issues/2199) and [#2522](https://github.com/Rdatatable/data.table/issues/2522). Thanks to Václav Hausenblas, @franknarf1, @ben519, and @Henrik-P for reporting. + +45. `rbindlist` (and printing a `data.table` with over 100 rows because that uses `rbindlist(head, tail)`) could error with `malformed factor` for unordered factor columns containing a used `NA_character_` level, [#3915](https://github.com/Rdatatable/data.table/issues/3915). This is an unusual input for unordered factors because NA_integer_ is recommended by default in R. Thanks to @sindribaldur for reporting. + +46. Adding a `list` column containing an item of type `list` to a one row `data.table` could fail, [#3626](https://github.com/Rdatatable/data.table/issues/3626). Thanks to Jakob Richter for reporting. + +## NOTES + +1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below. + +2. Adding a new column by reference using `set()` on a `data.table` loaded from binary file now give a more helpful error message, [#2996](https://github.com/Rdatatable/data.table/issues/2996). Thanks to Joseph Burling for reporting. + + ``` + This data.table has either been loaded from disk (e.g. using readRDS()/load()) or constructed + manually (e.g. using structure()). Please run setDT() or alloc.col() on it first (to pre-allocate + space for new columns) before adding new columns by reference to it. + ``` + +3. `setorder` on a superset of a keyed `data.table`'s key now retains its key, [#3456](https://github.com/Rdatatable/data.table/issues/3456). For example, if `a` is the key of `DT`, `setorder(DT, a, -v)` will leave `DT` keyed by `a`. + +4. New option `options(datatable.quiet = TRUE)` turns off the package startup message, [#3489](https://github.com/Rdatatable/data.table/issues/3489). `suppressPackageStartupMessages()` continues to work too. Thanks to @leobarlach for the suggestion inspired by `options(tidyverse.quiet = TRUE)`. We don't know of a way to make a package respect the `quietly=` option of `library()` and `require()` because the `quietly=` isn't passed through for use by the package's own `.onAttach`. If you can see how to do that, please submit a patch to R. + +5. When loading a `data.table` from disk (e.g. with `readRDS`), best practice is to run `setDT()` on the new object to assure it is correctly allocated memory for new column pointers. Barring this, unexpected behavior can follow; for example, if you assign a new column to `DT` from a function `f`, the new columns will only be assigned within `f` and `DT` will be unchanged. The `verbose` messaging in this situation is now more helpful, [#1729](https://github.com/Rdatatable/data.table/issues/1729). Thanks @vspinu for sharing his experience to spur this. + +6. New vignette _Using `.SD` for Data Analysis_, a deep dive into use cases for the `.SD` variable to help illuminate this topic which we've found to be a sticking point for beginning and intermediate `data.table` users, [#3412](https://github.com/Rdatatable/data.table/issues/3412). + +7. Added a note to `?frank` clarifying that ranking is being done according to C sorting (i.e., like `forder`), [#2328](https://github.com/Rdatatable/data.table/issues/2328). Thanks to @cguill95 for the request. + +8. Historically, `dcast` and `melt` were built as enhancements to `reshape2`'s own `dcast`/`melt`. We removed dependency on `reshape2` in v1.9.6 but maintained some backward compatibility. As that package has been superseded since December 2017, we will begin to formally complete the split from `reshape2` by removing some last vestiges. In particular we now warn when redirecting to `reshape2` methods and will later error before ultimately completing the split; see [#3549](https://github.com/Rdatatable/data.table/issues/3549) and [#3633](https://github.com/Rdatatable/data.table/issues/3633). We thank the `reshape2` authors for their original inspiration for these functions, and @ProfFancyPants for testing and reporting regressions in dev which have been fixed before release. + +9. `DT[col]` where `col` is a column containing row numbers of itself to select, now suggests the correct syntax (`DT[(col)]` or `DT[DT$col]`), [#697](https://github.com/Rdatatable/data.table/issues/697). This expands the message introduced in [#1884](https://github.com/Rdatatable/data.table/issues/1884) for the case where `col` is type `logical` and `DT[col==TRUE]` is suggested. + +10. The `datatable.old.unique.by.key` option has been warning for 1 year that it is deprecated: `... Please stop using it and pass by=key(DT) instead for clarity ...`. This warning is now upgraded to error as per the schedule in note 10 of v1.11.0 (May 2018), and note 1 of v1.9.8 (Nov 2016). In June 2020 the option will be removed. + +11. We intend to deprecate the `datatable.nomatch` option, [more info](https://github.com/Rdatatable/data.table/pull/3578/files). A message is now printed upon use of the option (once per session) as a first step. It asks you to please stop using the option and to pass `nomatch=NULL` explicitly if you require inner join. Outer join (`nomatch=NA`) has always been the default because it is safer; it does not drop missing data silently. The problem is that the option is global; i.e., if a user changes the default using this option for their own use, that can change the behavior of joins inside packages that use `data.table` too. This is the only `data.table` option with this concern. + +12. The test suite of 9k tests now runs with three R options on: `warnPartialMatchArgs`, `warnPartialMatchAttr`, and `warnPartialMatchDollar`. This ensures that we don't rely on partial argument matching in internal code, for robustness and efficiency, and so that users can turn these options on for their code in production, [#3664](https://github.com/Rdatatable/data.table/issues/3664). Thanks to Vijay Lulla for the suggestion, and Michael Chirico for fixing 48 internal calls to `attr()` which were missing `exact=TRUE`, for example. Thanks to R-core for adding these options to R 2.6.0 (Oct 2007). + +13. `test.data.table()` could fail if the `datatable.integer64` user option was set, [#3683](https://github.com/Rdatatable/data.table/issues/3683). Thanks @xiaguoxin for reporting. + +14. The warning message when using `keyby=` together with `:=` is clearer, [#2763](https://github.com/Rdatatable/data.table/issues/2763). Thanks to @eliocamp. + +15. `first` and `last` gain an explicit `n=1L` argument so that it's clear the default is 1, and their almost identical manual pages have been merged into one. + +16. Rolling functions (`?froll`) coerce `logical` input to `numeric` (instead of failing) to mimic the behavior of `integer` input. + +17. The warning message when using `strptime` in `j` has been improved, [#2068](https://github.com/Rdatatable/data.table/issues/2068). Thanks to @tdhock for the report. + +18. Added a note to `?setkey` clarifying that `setkey` always uses C-locale sorting (as has been noted in `?setorder`). Thanks @JBreidaks for the report in [#2114](https://github.com/Rdatatable/data.table/issues/2114). + +19. `hour()`/`minute()`/`second()` are much faster for `ITime` input, [#3518](https://github.com/Rdatatable/data.table/issues/3158). + +20. New alias `setalloccol` for `alloc.col`, [#3475](https://github.com/Rdatatable/data.table/issues/3475). For consistency with `set*` prefixes for functions that operate in-place (like `setkey`, `setorder`, etc.). `alloc.col` is not going to be deprecated but we recommend using `setalloccol`. + +21. `dcast` no longer emits a message when `value.var` is missing but `fun.aggregate` is explicitly set to `length` (since `value.var` is arbitrary in this case), [#2980](https://github.com/Rdatatable/data.table/issues/2980). + +22. Optimized `mean` of `integer` columns no longer warns about a coercion to numeric, [#986](https://github.com/Rdatatable/data.table/issues/986). Thanks @dgrtwo for his [YouTube tutorial at 3:01](https://youtu.be/AmE4LXPQErM?t=175) where the warning occurs. + +23. Using `first` and `last` function on `POSIXct` object no longer loads `xts` namespace, [#3857](https://github.com/Rdatatable/data.table/issues/3857). `first` on empty `data.table` returns empty `data.table` now [#3858](https://github.com/Rdatatable/data.table/issues/3858). + +24. Added some clarifying details about what happens when a shell command is used in `fread`, [#3877](https://github.com/Rdatatable/data.table/issues/3877). Thanks Brian for the StackOverflow question which highlighted the lack of explanation here. + +25. We continue to encourage packages to `Import` rather than `Depend` on `data.table`, [#3076](https://github.com/Rdatatable/data.table/issues/3076). To prevent the growth rate in new packages using `Depend`, we have requested that CRAN apply a small patch we provided to prevent new submissions using `Depend`. If this is accepted, the error under `--as-cran` will be as follows. The existing 73 packages using `Depend` will continue to pass OK until they next update, at which point they will be required to change from `Depend` to `Import`. + + ``` + R CMD check --as-cran + ... + * checking package dependencies ... ERROR + + data.table should be in Imports not Depends. Please contact its + maintainer for more information. + ``` + + +# data.table [v1.12.2](https://github.com/Rdatatable/data.table/milestone/14?closed=1) (07 Apr 2019) + +## NEW FEATURES + +1. `:=` no longer recycles length>1 RHS vectors. There was a warning when recycling left a remainder but no warning when the LHS length was an exact multiple of the RHS length (the same behaviour as base R). Consistent feedback for several years has been that recycling is more often a bug. In rare cases where you need to recycle a length>1 vector, please use `rep()` explicitly. Single values are still recycled silently as before. Early warning was given in [this tweet](https://twitter.com/MattDowle/status/1088544083499311104). The 774 CRAN and Bioconductor packages using `data.table` were tested and the maintainers of the 16 packages affected (2%) were consulted before going ahead, [#3310](https://github.com/Rdatatable/data.table/pull/3310). Upon agreement we went ahead. Many thanks to all those maintainers for already updating on CRAN, [#3347](https://github.com/Rdatatable/data.table/pull/3347). + +2. `foverlaps` now supports `type="equal"`, [#3416](https://github.com/Rdatatable/data.table/issues/3416) and part of [#3002](https://github.com/Rdatatable/data.table/issues/3002). + +3. The number of logical CPUs used by default has been reduced from 100% to 50%. The previous 100% default was reported to cause significant slow downs when other non-trivial processes were also running, [#3395](https://github.com/Rdatatable/data.table/issues/3395) [#3298](https://github.com/Rdatatable/data.table/issues/3298). Two new optional environment variables (`R_DATATABLE_NUM_PROCS_PERCENT` & `R_DATATABLE_NUM_THREADS`) control this default. `setDTthreads()` gains `percent=` and `?setDTthreads` has been significantly revised. The output of `getDTthreads(verbose=TRUE)` has been expanded. The environment variable `OMP_THREAD_LIMIT` is now respected ([#3300](https://github.com/Rdatatable/data.table/issues/3300)) in addition to `OMP_NUM_THREADS` as before. + +4. `rbind` and `rbindlist` now retain the position of duplicate column names rather than grouping them together [#3373](https://github.com/Rdatatable/data.table/issues/3373), fill length 0 columns (including NULL) with NA with warning [#1871](https://github.com/Rdatatable/data.table/issues/1871), and recycle length-1 columns [#524](https://github.com/Rdatatable/data.table/issues/524). Thanks to Kun Ren for the requests which arose when parsing JSON. + +5. `rbindlist`'s `use.names=` default has changed from `FALSE` to `"check"`. This emits a message if the column names of each item are not identical and then proceeds as if `use.names=FALSE` for backwards compatibility; i.e., bind by column position not by column name. The `rbind` method for `data.table` already sets `use.names=TRUE` so this change affects `rbindlist` only and not `rbind.data.table`. To stack differently named columns together silently (the previous default behavior of `rbindlist`), it is now necessary to specify `use.names=FALSE` for clarity to readers of your code. Thanks to Clayton Stanley who first raised the issue [here](https://lists.r-forge.r-project.org/pipermail/datatable-help/2014-April/002480.html). To aid pinpointing the calls to `rbindlist` that need attention, the message can be turned to error using `options(datatable.rbindlist.check="error")`. This option also accepts `"warning"`, `"message"` and `"none"`. In this release the message is suppressed for default column names (`"V[0-9]+"`); the next release will emit the message for those too. In 6 months the default will be upgraded from message to warning. There are two slightly different messages. They are helpful, include context and point to this news item : + + ``` + Column %d ['%s'] of item %d is missing in item %d. Use fill=TRUE to fill with + NA (NULL for list columns), or use.names=FALSE to ignore column names. + See news item 5 in v1.12.2 for options to control this message. + + Column %d ['%s'] of item %d appears in position %d in item %d. Set use.names=TRUE + to match by column name, or use.names=FALSE to ignore column names. + See news item 5 in v1.12.2 for options to control this message. + ``` + +6. `fread` gains `keepLeadingZeros`, [#2999](https://github.com/Rdatatable/data.table/issues/2999). By default `FALSE` so that, as before, a field containing `001` is interpreted as the integer 1, otherwise the character string `"001"`. The default may be changed using `options(datatable.keepLeadingZeros=TRUE)`. Many thanks to @marc-outins for the PR. + +## BUG FIXES + +1. `rbindlist()` of a malformed factor which is missing a levels attribute is now a helpful error rather than a cryptic error about `STRING_ELT`, [#3315](https://github.com/Rdatatable/data.table/issues/3315). Thanks to Michael Chirico for reporting. + +2. Forgetting `type=` in `shift(val, "lead")` would segfault, [#3354](https://github.com/Rdatatable/data.table/issues/3354). A helpful error is now produced to indicate `"lead"` is being passed to `n=` rather than the intended `type=` argument. Thanks to @SymbolixAU for reporting. + +3. The default print output (top 5 and bottom 5 rows) when ncol>255 could display the columns in the wrong order, [#3306](https://github.com/Rdatatable/data.table/issues/3306). Thanks to Kun Ren for reporting. + +4. Grouping by unusual column names such as `by='string_with_\\'` and `keyby="x y"` could fail, [#3319](https://github.com/Rdatatable/data.table/issues/3319) [#3378](https://github.com/Rdatatable/data.table/issues/3378). Thanks to @HughParsonage for reporting and @MichaelChirico for the fixes. + +5. `foverlaps()` could return incorrect results for `POSIXct <= 1970-01-01`, [#3349](https://github.com/Rdatatable/data.table/issues/3349). Thanks to @lux5 for reporting. + +6. `dcast.data.table` now handles functions passed to `fun.aggregate=` via a variable; e.g., `funs <- list(sum, mean); dcast(..., fun.aggregate=funs`, [#1974](https://github.com/Rdatatable/data.table/issues/1974) [#1369](https://github.com/Rdatatable/data.table/issues/1369) [#2064](https://github.com/Rdatatable/data.table/issues/2064) [#2949](https://github.com/Rdatatable/data.table/issues/2949). Thanks to @sunbee, @Ping2016, @smidelius and @d0rg0ld for reporting. + +7. Some non-equijoin cases could segfault, [#3401](https://github.com/Rdatatable/data.table/issues/3401). Thanks to @Gayyam for reporting. + +8. `dcast.data.table` could sort rows containing `NA` incorrectly, [#2202](https://github.com/Rdatatable/data.table/issues/2202). Thanks to @Galileo-Galilei for the report. + +9. Sorting, grouping and finding unique values of a numeric column containing at most one finite value (such as `c(Inf,0,-Inf)`) could return incorrect results, [#3372](https://github.com/Rdatatable/data.table/issues/3372) [#3381](https://github.com/Rdatatable/data.table/issues/3381); e.g., `data.table(A=c(Inf,0,-Inf), V=1:3)[,sum(V),by=A]` would treat the 3 rows as one group. This was a regression in 1.12.0. Thanks to Nicolas Ampuero for reporting. + +10. `:=` with quoted expression and dot alias now works as expected, [#3425](https://github.com/Rdatatable/data.table/pull/3425). Thanks to @franknarf1 for raising and @jangorecki for the PR. + +11. A join's result could be incorrectly keyed when a single nomatch occurred at the very beginning while all other values matched, [#3441](https://github.com/Rdatatable/data.table/issues/3441). The incorrect key would cause incorrect results in subsequent queries. Thanks to @symbalex for reporting and @franknarf1 for pinpointing the root cause. + +12. `rbind` and `rbindlist(..., use.names=TRUE)` with over 255 columns could return the columns in a random order, [#3373](https://github.com/Rdatatable/data.table/issues/3373). The contents and name of each column was correct but the order that the columns appeared in the result might not have matched the original input. + +13. `rbind` and `rbindlist` now combine `integer64` columns together with non-`integer64` columns correctly [#1349](https://github.com/Rdatatable/data.table/issues/1349), and support `raw` columns [#2819](https://github.com/Rdatatable/data.table/issues/2819). + +14. `NULL` columns are caught and error appropriately rather than segfault in some cases, [#2303](https://github.com/Rdatatable/data.table/issues/2303) [#2305](https://github.com/Rdatatable/data.table/issues/2305). Thanks to Hugh Parsonage and @franknarf1 for reporting. + +15. `melt` would error with 'factor malformed' or segfault in the presence of duplicate column names, [#1754](https://github.com/Rdatatable/data.table/issues/1754). Many thanks to @franknarf1, William Marble, wligtenberg and Toby Dylan Hocking for reproducible examples. All examples have been added to the test suite. + +16. Removing a column from a null (0-column) data.table is now a (standard and simpler) warning rather than error, [#2335](https://github.com/Rdatatable/data.table/issues/2335). It is no longer an error to add a column to a null (0-column) data.table. + +17. Non-UTF8 strings were not always sorted correctly on Windows (a regression in v1.12.0), [#3397](https://github.com/Rdatatable/data.table/issues/3397) [#3451](https://github.com/Rdatatable/data.table/issues/3451). Many thanks to @shrektan for reporting and fixing. + +18. `cbind` with a null (0-column) `data.table` now works as expected, [#3445](https://github.com/Rdatatable/data.table/issues/3445). Thanks to @mb706 for reporting. + +19. Subsetting does a better job of catching a malformed `data.table` with error rather than segfault. A column may not be NULL, nor may a column be an object which has columns (such as a `data.frame` or `matrix`). Thanks to a comment and reproducible example in [#3369](https://github.com/Rdatatable/data.table/issues/3369) from Drew Abbot which demonstrated the issue which arose from parsing JSON. The next release will enable `as.data.table` to unpack columns which are `data.frame` to support this use case. + +## NOTES + +1. When upgrading to 1.12.0 some Windows users might have seen `CdllVersion not found` in some circumstances. We found a way to catch that so the [helpful message](https://twitter.com/MattDowle/status/1084528873549705217) now occurs for those upgrading from versions prior to 1.12.0 too, as well as those upgrading from 1.12.0 to a later version. See item 1 in notes section of 1.12.0 below for more background. + +2. v1.12.0 checked itself on loading using `tools::checkMD5sums("data.table")` but this check failed under the `packrat` package manager on Windows because `packrat` appears to modify the DESCRIPTION file of packages it has snapshot, [#3329](https://github.com/Rdatatable/data.table/issues/3329). This check is now removed. The `CdllVersion` check was introduced after the `checkMD5sums()` attempt and is better; e.g., reliable on all platforms. + +3. As promised in new feature 6 of v1.11.6 Sep 2018 (see below in this news file), the `datatable.CJ.names` option's default is now `TRUE`. In v1.13.0 it will be removed. + +4. Travis CI gains OSX using homebrew llvm for OpenMP support, [#3326](https://github.com/Rdatatable/data.table/issues/3326). Thanks @marcusklik for the PR. + +5. Calling `data.table:::print.data.table()` directly (i.e. bypassing method dispatch by using 3 colons) and passing it a 0-column `data.frame` (not `data.table`) now works, [#3363](https://github.com/Rdatatable/data.table/pull/3363). Thanks @heavywatal for the PR. + +6. v1.12.0 did not compile on Solaris 10 using Oracle Developer Studio 12.6, [#3285](https://github.com/Rdatatable/data.table/issues/3285). Many thanks to Prof Ripley for providing and testing a patch. For future reference and other package developers, a `const` variable should not be passed to OpenMP's `num_threads()` directive otherwise `left operand must be modifiable lvalue` occurs. This appears to be a compiler bug which is why the specific versions are mentioned in this note. + +7. `foverlaps` provides clearer error messages w.r.t. factor and POSIXct interval columns, [#2645](https://github.com/Rdatatable/data.table/issues/2645) [#3007](https://github.com/Rdatatable/data.table/issues/3007) [#1143](https://github.com/Rdatatable/data.table/issues/1143). Thanks to @sritchie73, @msummersgill and @DavidArenburg for the reports. + +8. `unique(DT)` checks up-front the types of all the columns and will fail if any column is type `list` even though those `list` columns may not be needed to establish uniqueness. Use `unique(DT, by=...)` to specify columns that are not type `list`. v1.11.8 and before would also correctly fail with the same error, but not when uniqueness had been established in prior columns: it would stop early, not look at the `list` column and return the correct result. Checking up-front was necessary for some internal optimizations and it's probably best to be explicit anyway. Thanks to James Lamb for reporting, [#3332](https://github.com/Rdatatable/data.table/issues/3332). The error message has been embellished : + + ``` + Column 2 of by= (2) is type 'list', not yet supported. Please use the by= argument to specify + columns with types that are supported. + ``` + +9. Reminder that note 11 in v1.11.0 (May 2018) warned that `set2key()` and `key2()` will be removed in May 2019. They have been warning since v1.9.8 (Nov 2016) and their warnings were upgraded to errors in v1.11.0 (May 2018). When they were introduced in version 1.9.4 (Oct 2014) they were marked as 'experimental'. + +10. The `key(DT)<-` form of `setkey()` has been warning since at least 2012 to use `setkey()`. The warning is now stronger: `key(x)<-value is deprecated and not supported. Please change to use setkey().`. This warning will be upgraded to error in one year. + + +# data.table v1.12.0 (13 Jan 2019) + +## NEW FEATURES + +1. `setDTthreads()` gains `restore_after_fork=`, [#2885](https://github.com/Rdatatable/data.table/issues/2885). The default `NULL` leaves the internal option unchanged which by default is `TRUE`. `data.table` has always switched to single-threaded mode on fork. It used to restore multithreading after a fork too but problems were reported on Mac and Intel OpenMP library (see 1.10.4 notes below). We are now trying again thanks to suggestions and success reported by Kun Ren and Mark Klik in package `fst`. If you experience problems with multithreading after a fork, please restart R and call `setDTthreads(restore_after_fork=FALSE)`. + +2. Subsetting, ordering and grouping now use more parallelism. See benchmarks [here](https://h2oai.github.io/db-benchmark/) and Matt Dowle's presentation in October 2018 on YouTube [here](https://youtu.be/Ddr8N9STSuI). These internal changes gave rise to 4 regressions which were found before release thanks to Kun Ren, [#3211](https://github.com/Rdatatable/data.table/issues/3211). He kindly volunteers to 'go-first' and runs data.table through his production systems before release. We are looking for a 'go-second' volunteer please. A request to test before release was tweeted on 17 Dec [here](https://twitter.com/MattDowle/status/1074746218645938176). As usual, all CRAN and Bioconductor packages using data.table (currently 750) have been tested against this release, [#3233](https://github.com/Rdatatable/data.table/issues/3233). There are now 8,000 tests in 13,000 lines of test code; more lines of test code than there is code. Overall coverage has increased to 94% thanks to Michael Chirico. + +3. New `frollmean` has been added by Jan Gorecki to calculate _rolling mean_, see `?froll` for documentation. Function name and arguments are experimental. Related to [#2778](https://github.com/Rdatatable/data.table/issues/2778) (and [#624](https://github.com/Rdatatable/data.table/issues/624), [#626](https://github.com/Rdatatable/data.table/issues/626), [#1855](https://github.com/Rdatatable/data.table/issues/1855)). Other rolling statistics will follow. + +4. `fread()` can now read a remote compressed file in one step; `fread("https://domain.org/file.csv.bz2")`. The `file=` argument now supports `.gz` and `.bz2` too; i.e. `fread(file="file.csv.gz")` works now where only `fread("file.csv.gz")` worked in 1.11.8. + +5. `nomatch=NULL` now does the same as `nomatch=0L` in both `DT[...]` and `foverlaps()`; i.e. discards missing values silently (inner join). The default is still `nomatch=NA` (outer join) for statistical safety so that missing values are retained by default. After several years have elapsed, we will start to deprecate `0L`; please start using `NULL`. In future `nomatch=.(0)` (note that `.()` creates a `list` type and is different to `nomatch=0`) will fill with `0` to save replacing `NA` with `0` afterwards, [#857](https://github.com/Rdatatable/data.table/issues/857). + +6. `setnames()` gains `skip_absent` to skip names in `old` that aren't present, [#3030](https://github.com/Rdatatable/data.table/issues/3030). By default `FALSE` so that it is still an error, as before, to attempt to change a column name that is not present. Thanks to @MusTheDataGuy for the suggestion and the PR. + +7. `NA` in `between()` and `%between%`'s `lower` and `upper` are now taken as missing bounds and return `TRUE` rather than `NA`. This is now documented. + +8. `shift()` now interprets negative values of `n` to mean the opposite `type=`, [#1708](https://github.com/Rdatatable/data.table/issues/1708). When `give.names=TRUE` the result is named using a positive `n` with the appropriate `type=`. Alternatively, a new `type="shift"` names the result using a signed `n` and constant type. + + ```R + shift(x, n=-5:5, give.names=TRUE) => "_lead_5" ... "_lag_5" + shift(x, n=-5:5, type="shift", give.names=TRUE) => "_shift_-5" ... "_shift_5" + ``` + +9. `fwrite()` now accepts `matrix`, [#2613](https://github.com/Rdatatable/data.table/issues/2613). Thanks to Michael Chirico for the suggestion and Felipe Parages for implementing. For now matrix input is converted to data.table (which can be costly) before writing. + +10. `fread()` and `fwrite()` can now handle file names in native and UTF-8 encoding, [#3078](https://github.com/Rdatatable/data.table/issues/3078). Thanks to Daniel Possenriede (@dpprdan) for reporting and fixing. + +11. `DT[i]` and `DT[i,cols]` now call internal parallel subsetting code, [#2951](https://github.com/Rdatatable/data.table/issues/2951). Subsetting is significantly faster (as are many other operations) with factor columns rather than character. + + ```R + N = 2e8 # 4GB data on 4-core CPU with 16GB RAM + DT = data.table(ID = sample(LETTERS,N,TRUE), + V1 = sample(5,N,TRUE), + V2 = runif(N)) + w = which(DT$V1 > 3) # select 40% of rows + # v1.12.0 v1.11.8 + system.time(DT[w]) # 0.8s 2.6s + DT[, ID := as.factor(ID)] + system.time(DT[w]) # 0.4s 2.3s + system.time(DT[w, c("ID","V2")]) # 0.3s 1.9s + ``` + +12. `DT[..., .SDcols=]` now accepts `patterns()`; e.g. `DT[..., .SDcols=patterns("^V")]`, for filtering columns according to a pattern (as in `melt.data.table`), [#1878](https://github.com/Rdatatable/data.table/issues/1878). Thanks to many people for pushing for this and @MichaelChirico for ultimately filing the PR. See `?data.table` for full details and examples. + +13. `split` data.table method will now preserve attributes, closes [#2047](https://github.com/Rdatatable/data.table/issues/2047). Thanks to @caneff for reporting. + +14. `DT[i,j]` now retains user-defined and inherited attributes, [#995](https://github.com/Rdatatable/data.table/issues/995); e.g. + + ```R + attr(datasets::BOD,"reference") # "A1.4, p. 270" + attr(as.data.table(datasets::BOD)[2],"reference") # was NULL now "A1.4, p. 270" + ``` + + If a superclass defines attributes that may not be valid after a `[` subset then the superclass should implement its own `[` method to manage those after calling `NextMethod()`. + +## BUG FIXES + +1. Providing an `i` subset expression when attempting to delete a column correctly failed with helpful error, but when the column was missing too created a new column full of `NULL` values, [#3089](https://github.com/Rdatatable/data.table/issues/3089). Thanks to Michael Chirico for reporting. + +2. Column names that look like expressions (e.g. `"a<=colB"`) caused an error when used in `on=` even when wrapped with backticks, [#3092](https://github.com/Rdatatable/data.table/issues/3092). Additionally, `on=` now supports white spaces around operators; e.g. `on = "colA == colB"`. Thanks to @mt1022 for reporting and to @MarkusBonsch for fixing. + +3. Unmatched `patterns` in `measure.vars` fail early and with feedback, [#3106](https://github.com/Rdatatable/data.table/issues/3106). + +4. `fread(..., skip=)` now skips non-standard `\r` and `\n\r` line endings properly again, [#3006](https://github.com/Rdatatable/data.table/issues/3006). Standard line endings (`\n` Linux/Mac and `\r\n` Windows) were skipped ok. Thanks to @brattono and @tbrycekelly for providing reproducible examples, and @st-pasha for fixing. + +5. `fread(..., colClasses=)` could return a corrupted result when a lower type was requested for one or more columns (e.g. reading "3.14" as integer), [#2922](https://github.com/Rdatatable/data.table/issues/2922) [#2863](https://github.com/Rdatatable/data.table/issues/2863) [#3143](https://github.com/Rdatatable/data.table/issues/3143). It now ignores the request as documented and the helpful message in verbose mode is upgraded to warning. In future, coercing to a lower type might be supported (with warning if any accuracy is lost). `"NULL"` is recognized again in both vector and list mode; e.g. `colClasses=c("integer","NULL","integer")` and `colClasses=list(NULL=2, integer=10:40)`. Thanks to Arun Srinivasan, Kun Ren, Henri Ståhl and @kszela24 for reporting. + +6. `cube()` will now produce expected order of results, [#3179](https://github.com/Rdatatable/data.table/issues/3179). Thanks to @Henrik-P for reporting. + +7. `groupingsets()` groups by empty column set and constant value in `j`, [#3173](https://github.com/Rdatatable/data.table/issues/3173). + +8. `split.data.table()` failed if `DT` had a factor column named `"x"`, [#3151](https://github.com/Rdatatable/data.table/issues/3151). Thanks to @tdeenes for reporting and fixing. + +9. `fsetequal` now handles properly datasets having last column a character, closes [#2318](https://github.com/Rdatatable/data.table/issues/2318). Thanks to @pschil and @franknarf1 for reporting. + +10. `DT[..., .SDcols=integer(0L)]` could fail, [#3185](https://github.com/Rdatatable/data.table/issues/3185). An empty `data.table` is now returned correctly. + +11. `as.data.table.default` method will now always copy its input, closes [#3230](https://github.com/Rdatatable/data.table/issues/3230). Thanks to @NikdAK for reporting. + +12. `DT[..., .SDcols=integer()]` failed with `.SDcols is numeric but has both +ve and -ve indices`, [#1789](https://github.com/Rdatatable/data.table/issues/1789) and [#3185](https://github.com/Rdatatable/data.table/issues/3185). It now functions as `.SDcols=character()` has done and creates an empty `.SD`. Thanks to Gabor Grothendieck and Hugh Parsonage for reporting. A related issue with empty `.SDcols` was fixed in development before release thanks to Kun Ren's testing, [#3211](https://github.com/Rdatatable/data.table/issues/3211). + +13. Multithreaded stability should be much improved with R 3.5+. Many thanks to Luke Tierney for pinpointing a memory issue with package `constellation` caused by `data.table` and his advice, [#3165](https://github.com/Rdatatable/data.table/issues/3165). Luke also added an extra check to R-devel when compiled with `--enable-strict-barrier`. The test suite is run through latest daily R-devel after every commit as usual, but now with `--enable-strict-barrier` on too via GitLab CI ("Extra" badge on the `data.table` homepage) thanks to Jan Gorecki. + +14. Fixed an edge-case bug of platform-dependent output of `strtoi("", base = 2L)` on which `groupingsets` had relied, [#3267](https://github.com/Rdatatable/data.table/issues/3267). + +## NOTES + +1. When data.table loads it now checks its DLL version against the version of its R level code. This is to detect installation issues on Windows when i) the DLL is in use by another R session and ii) the CRAN source version > CRAN binary binary which happens just after a new release (R prompts users to install from source until the CRAN binary is available). This situation can lead to a state where the package's new R code calls old C code in the old DLL; [R#17478](https://bugs.r-project.org/show_bug.cgi?id=17478), [#3056](https://github.com/Rdatatable/data.table/issues/3056). This broken state can persist until, hopefully, you experience a strange error caused by the mismatch. Otherwise, wrong results may occur silently. This situation applies to any R package with compiled code not just data.table, is Windows-only, and is long-standing. It has only recently been understood as it typically only occurs during the few days after each new release until binaries are available on CRAN. + +2. When `on=` is provided but not `i=`, a helpful error is now produced rather than silently ignoring `on=`. Thanks to Dirk Eddelbuettel for the idea. + +3. `.SDcols=` is more helpful when passed non-existent columns, [#3116](https://github.com/Rdatatable/data.table/issues/3116) and [#3118](https://github.com/Rdatatable/data.table/issues/3118). Thanks to Michael Chirico for the investigation and PR. + +4. `update.dev.pkg()` gains `type=` to specify if update should be made from binaries, sources or both. [#3148](https://github.com/Rdatatable/data.table/issues/3148). Thanks to Reino Bruner for the detailed suggestions. + +5. `setDT()` improves feedback when passed a ragged list (i.e. where all columns in the list are not the same length), [#3121](https://github.com/Rdatatable/data.table/issues/3121). Thanks @chuk-yong for highlighting. + +6. The one and only usage of `UNPROTECT_PTR()` has been removed, [#3232](https://github.com/Rdatatable/data.table/issues/3232). Thanks to Tomas Kalibera's investigation and advice here: https://developer.r-project.org/Blog/public/2018/12/10/unprotecting-by-value/index.html + + +# data.table v1.11.8 (30 Sep 2018) + +## NEW FEATURES + +1. `fread()` can now read `.gz` and `.bz2` files directly: `fread("file.csv.gz")`, [#717](https://github.com/Rdatatable/data.table/issues/717) [#3058](https://github.com/Rdatatable/data.table/issues/3058). It uses `R.utils::decompressFile` to decompress to a `tempfile()` which is then read by `fread()` in the usual way. For greater speed on large-RAM servers, it is recommended to use ramdisk for temporary files by setting `TMPDIR` to `/dev/shm` before starting R; see `?tempdir`. The decompressed temporary file is removed as soon as `fread` completes even if there is an error reading the file. Reading a remote compressed file in one step will be supported in the next version; e.g. `fread("https://domain.org/file.csv.bz2")`. + +## BUG FIXES + +1. Joining two keyed tables using `on=` to columns not forming a leading subset of `key(i)` could result in an invalidly keyed result, [#3061](https://github.com/Rdatatable/data.table/issues/3061). Subsequent queries on the result could then return incorrect results. A warning `longer object length is not a multiple of shorter object length` could also occur. Thanks to @renkun-ken for reporting and the PR. + +2. `keyby=` on columns for which an index exists now uses the index (new feature 7 in v1.11.6 below) but if an `i` subset is present in the same query then it could segfault, [#3062](https://github.com/Rdatatable/data.table/issues/3062). Again thanks to @renkun-ken for reporting. + +3. Assigning an out-of-range integer to an item in a factor column (a rare operation) correctly created an `NA` in that spot with warning, but now no longer also corrupts the variable being assigned, [#2984](https://github.com/Rdatatable/data.table/issues/2984). Thanks to @radfordneal for reporting and @MarkusBonsch for fixing. Assigning a string which is missing from the factor levels continues to automatically append the string to the factor levels. + +4. Assigning a sequence to a column using base R methods (e.g. `DT[["foo"]] = 1:10`) could cause subsetting to fail with `Internal error in subset.c: column is an ALTREP vector`, [#3051](https://github.com/Rdatatable/data.table/issues/3051). Thanks to Michel Lang for reporting. + +5. `as.data.table` `matrix` method now properly handles rownames for 0 column data.table output. Thanks @mllg for reporting. Closes [#3149](https://github.com/Rdatatable/data.table/issues/3149). + +## NOTES + +1. The test suite now turns on R's new _R_CHECK_LENGTH_1_LOGIC2_ to catch when internal use of `&&` or `||` encounter arguments of length more than one. Thanks to Hugh Parsonage for implementing and fixing the problems caught by this. + +2. Some namespace changes have been made with respect to melt, dcast and xts. No change is expected but if you do have any trouble, please file an issue. + +3. `split.data.table` was exported in v1.11.6 in addition to being registered using `S3method(split, data.table)`. The export has been removed again. It had been added because a user said they found it difficult to find, [#2920](https://github.com/Rdatatable/data.table/issues/2920). But S3 methods are not normally exported explicitly by packages. The proper way to access the `split.data.table` method is to call `split(DT)` where `DT` is a `data.table`. The generic (`base::split` in this case) then dispatches to the `split.data.table` method. v1.11.6 was not on CRAN very long (1 week) so we think it's better to revert this change quickly. To know what methods exist, R provides the `methods()` function. + + ```R + methods(split) # all the methods for the split generic + methods(class="data.table") # all the generics that data.table has a method for (47 currently) + ``` + + +# data.table v1.11.6 (19 Sep 2018) + +## NEW FEATURES + +1. For convenience when some of the files in `fnams` are empty in `rbindlist(lapply(fnams,fread))`, `fread` now reads empty input as a null-data.table with warning rather than error, [#2898](https://github.com/Rdatatable/data.table/issues/2898). For consistency, `fwrite(data.table(NULL))` now creates an empty file and warns instead of error, too. + +2. `setcolorder(DT)` without further arguments now defaults to moving the key columns to be first, [#2895](https://github.com/Rdatatable/data.table/issues/2895). Thanks to @jsams for the PR. + +3. Attempting to subset on `col` when the column is actually called `Col` will still error, but the error message will helpfully suggest similarly-spelled columns, [#2887](https://github.com/Rdatatable/data.table/issues/2887). This is experimental, applies just to `i` currently, and we look forward to feedback. Thanks to Michael Chirico for the suggestion and PR. + +4. `fread()` has always accepted literal data; e.g. `fread("A,B\n1,2\n3,4")`. It now gains explicit `text=`; e.g. `fread(text="A,B\n1,2\n3,4")`. Unlike the first general purpose `input=` argument, the `text=` argument accepts multi-line input; e.g. `fread(text=c("A,B","1,2","3,4"))`, [#1423](https://github.com/Rdatatable/data.table/issues/1423). Thanks to Douglas Clark for the request and Hugh Parsonage for the PR. + +5. `fread()` has always accepted system commands; e.g. `fread("grep blah file.txt")`. It now gains explicit `cmd=`; e.g. `fread(cmd="grep blah file.txt")`. Further, if and only if `input=` is a system command and a variable was used to hold that command (`fread(someCommand)` not `fread("grep blah file.txt")`) or a variable is used to construct it (`fread(paste("grep",variable,"file.txt"))`), a message is now printed suggesting `cmd=`. This is to inform all users that there is a potential security concern if you are i) creating apps, and ii) your app takes input from a public user who could be malicious, and iii) input from the malicious user (such as a filename) is passed by your app to `fread()`, and iv) your app in not running in a protected environment. If all 4 conditions hold then the malicious user could provide a system command instead of a filename which `fread()` would run, and that would be a problem too. If the app is not running in a protected environment (e.g. app is running as root) then this could do damage or obtain data you did not intend. Public facing apps should be running with limited operating system permission so that any breach from any source is contained. We agree with [Linus Torvald's advice](https://lkml.org/lkml/2017/11/21/356) on this which boils down to: "when addressing security concerns the first step is do no harm, just inform". If you aren't creating apps or apis that could have a malicious user then there is no risk but we can't distinguish you so we have to inform everyone. Please change to `fread(cmd=...)` at your leisure. The new message can be suppressed with `options(datatable.fread.input.cmd.message=FALSE)`. Passing system commands to `fread()` continues to be recommended and encouraged and is widely used; e.g. via the techniques gathered together in the book [Data Science at the Command Line](https://datascienceatthecommandline.com/). A `warning()` is too strong because best-practice for production systems is to set `options(warn=2)` to tolerate no warnings. Such production systems have no user input and so there is no security risk; we don't want to do harm by breaking production systems via a `warning()` which gets turned into an error by `options(warn=2)`. Now that we have informed all users, we request feedback. There are 3 options for future releases: i) remove the message, ii) leave the message in place, iii) upgrade the message to warning and then eventually error. The default choice is the middle one: leave the message in place. + +6. New `options(datatable.CJ.names=TRUE)` changes `CJ()` to auto-name its inputs exactly as `data.table()` does, [#1596](https://github.com/Rdatatable/data.table/issues/1596). Thanks @franknarf1 for the suggestion. Current default is `FALSE`; i.e. no change. The option's default will be changed to `TRUE` in v1.12.0 and then eventually the option will be removed. Any code that depends on `CJ(x,y)$V1` will need to be changed to `CJ(x,y)$x` and is more akin to a bug fix due to the inconsistency with `data.table()`. + +7. If an appropriate index exists, `keyby=` will now use it. For example, given `setindex(DT,colA,colB)`, both `DT[,j,keyby=colA]` (a leading subset of the index columns) and `DT[,j,keyby=.(colA,colB)]` will use the index, but not `DT[,j,keyby=.(colB,colA)]`. The option `options(datatable.use.index=FALSE)` will turn this feature off. Please always use `keyby=` unless you wish to retain the order of groups by first-appearance order (in which case use `by=`). Also, both `keyby=` and `by=` already used the key where possible but are now faster when using just the first column of the key. As usual, setting `verbose=TRUE` either per-query or globally using `options(datatable.verbose=TRUE)` will report what's being done internally. + +## BUG FIXES + +1. `fread` now respects the order of columns passed to `select=` when column numbers are used, [#2986](https://github.com/Rdatatable/data.table/issues/2986). It already respected the order when column names are used. Thanks @privefl for raising the issue. + +2. `gmin` and `gmax` no longer fail on _ordered_ factors, [#1947](https://github.com/Rdatatable/data.table/issues/1947). Thanks to @mcieslik-mctp for identifying and @mbacou for the nudge. + +3. `as.ITime.character` now properly handles NA when attempting to detect the format of non-NA values in vector. Thanks @polyjian for reporting, closes [#2940](https://github.com/Rdatatable/data.table/issues/2940). + +4. `as.matrix(DT, rownames="id")` now works when `DT` has a single row, [#2930](https://github.com/Rdatatable/data.table/issues/2930). Thanks to @malcook for reporting and @sritchie73 for fixing. The root cause was the dual meaning of the `rownames=` argument: i) a single column name/number (most common), or ii) rowname values length 1 for the single row. For clarity and safety, `rownames.value=` has been added. Old usage (i.e. `length(rownames)>1`) continues to work for now but will issue a warning in a future release, and then error in a release after that. + +5. Fixed regression in v1.11.0 (May 2018) caused by PR [#2389](https://github.com/Rdatatable/data.table/pull/2389) which introduced partial key retainment on `:=` assigns. This broke the joining logic that assumed implicitly that assigning always drops keys completely. Consequently, join and subset results could be wrong when matching character to factor columns with existing keys, [#2881](https://github.com/Rdatatable/data.table/issues/2881). Thanks to @ddong63 for reporting and to @MarkusBonsch for fixing. Missing test added to ensure this doesn't arise again. + +6. `as.IDate.numeric` no longer ignores "origin", [#2880](https://github.com/Rdatatable/data.table/issues/2880). Thanks to David Arenburg for reporting and fixing. + +7. `as.ITime.times` was rounding fractional seconds while other methods were truncating, [#2870](https://github.com/Rdatatable/data.table/issues/2870). The `as.ITime` method gains `ms=` taking `"truncate"` (default), `"nearest"` and `"ceil"`. Thanks to @rossholmberg for reporting and Michael Chirico for fixing. + +8. `fwrite()` now writes POSIXct dates after 2038 correctly, [#2995](https://github.com/Rdatatable/data.table/issues/2995). Thanks to Manfred Zorn for reporting and Philippe Chataignon for the PR fixing it. + +9. `fsetequal` gains the `all` argument to make it consistent with the other set operator functions `funion`, `fsetdiff` and `fintersect` [#2968](https://github.com/Rdatatable/data.table/issues/2968). When `all = FALSE` `fsetequal` will treat rows as elements in a set when checking whether two `data.tables` are equal (i.e. duplicate rows will be ignored). For now the default value is `all = TRUE` for backwards compatibility, but this will be changed to `all = FALSE` in a future release to make it consistent with the other set operation functions. Thanks to @franknarf1 for reporting and @sritchie73 for fixing. + +10. `fintersect` failed on tables with a column called `y`, [#3034](https://github.com/Rdatatable/data.table/issues/3034). Thanks to Maxim Nazarov for reporting. + +11. Compilation fails in AIX because NAN and INFINITY macros definition in AIX make them not constant literals, [#3043](https://github.com/Rdatatable/data.table/pull/3043). Thanks to Ayappan for reporting and fixing. + +12. The introduction of altrep in R 3.5.0 caused some performance regressions of about 20% in some cases, [#2962](https://github.com/Rdatatable/data.table/issues/2962). Investigating this led to some improvements to grouping which are faster than before R 3.5.0 in some cases. Thanks to Nikolay S. for reporting. The work to accomodate altrep is not complete but it is better and it is highly recommended to upgrade to this update. + +13. Fixed 7 memory faults thanks to CRAN's [`rchk`](https://github.com/kalibera/rchk) tool by Tomas Kalibera, [#3033](https://github.com/Rdatatable/data.table/pull/3033). + +## NOTES + +1. The type coercion warning message has been improved, [#2989](https://github.com/Rdatatable/data.table/pull/2989). Thanks to @sarahbeeysian on Twitter for highlighting. For example, given the follow statements: + + ```R + DT = data.table(id=1:3) + DT[2, id:="foo"] + ``` + + the warning message has changed from : + + ``` + Coerced character RHS to integer to match the column's type. Either change the target column + ['id'] to character first (by creating a new character vector length 3 (nrows of entire table) and + assign that; i.e. 'replace' column), or coerce RHS to integer (e.g. 1L, NA_[real|integer]_, as.*, + etc) to make your intent clear and for speed. Or, set the column type correctly up front when you + create the table and stick to it, please. + ``` + + to : + + ``` + Coerced character RHS to integer to match the type of the target column (column 1 named 'id'). If + the target column's type integer is correct, it's best for efficiency to avoid the coercion and + create the RHS as type integer. To achieve that consider the L postfix: typeof(0L) vs typeof(0), + and typeof(NA) vs typeof(NA_integer_) vs typeof(NA_real_). Wrapping the RHS with as.integer() will + avoid this warning but still perform the coercion. If the target column's type is not correct, it + is best to revisit where the DT was created and fix the column type there; e.g., by using + colClasses= in fread(). Otherwise, you can change the column type now by plonking a new column (of + the desired type) over the top of it; e.g. DT[, `id`:=as.character(`id`)]. If the RHS of := has + nrow(DT) elements then the assignment is called a column plonk and is the way to change a column's + type. Column types can be observed with sapply(DT,typeof). + ``` + + Further, if a coercion from double to integer is performed, fractional data such as 3.14 is now detected and the truncation to 3 is warned about if and only if truncation has occurred. + + ```R + DT = data.table(v=1:3) + DT[2, v:=3.14] + Warning message: + Coerced double RHS to integer to match the type of the target column (column 1 named 'v'). One + or more RHS values contain fractions which have been lost; e.g. item 1 with value 3.140000 has + been truncated to 3. + ``` + +2. `split.data.table` method is now properly exported, [#2920](https://github.com/Rdatatable/data.table/issues/2920). But we don't recommend it because `split` copies all the pieces into new memory. + +3. Setting indices on columns which are part of the key will now create those indices. + +4. `hour`, `minute`, and `second` utility functions use integer arithmetic when the input is already (explicitly) UTC-based `POSIXct` for 4-10x speedup vs. using `as.POSIXlt`. + +5. Error added for incorrect usage of `%between%`, with some helpful diagnostic hints, [#3014](https://github.com/Rdatatable/data.table/issues/3014). Thanks @peterlittlejohn for offering his user experience and providing the impetus. + + +# data.table v1.11.4 (27 May 2018) + +1. Empty RHS of `:=` is no longer an error when the `i` clause returns no rows to assign to anyway, [#2829](https://github.com/Rdatatable/data.table/issues/2829). Thanks to @cguill95 for reporting and to @MarkusBonsch for fixing. + +2. Fixed runaway memory usage with R-devel (R > 3.5.0), [#2882](https://github.com/Rdatatable/data.table/pull/2882). Thanks to many people but in particular to Trang Nguyen for making the breakthrough reproducible example, Paul Bailey for liaising, and Luke Tierney for then pinpointing the issue. It was caused by an interaction of two or more data.table threads operating on new compact vectors in the ALTREP framework, such as the sequence `1:n`. This interaction could result in R's garbage collector turning off, and hence the memory explosion. Problems may occur in R 3.5.0 too but we were only able to reproduce in R > 3.5.0. The R code in data.table's implementation benefits from ALTREP (`for` loops in R no longer allocate their range vector input, for example) but are not so appropriate as data.table columns. Sequences such as `1:n` are common in test data but not very common in real-world datasets. Therefore, there is no need for data.table to support columns which are ALTREP compact sequences. The `data.table()` function already expanded compact vectors (by happy accident) but `setDT()` did not (it now does). If, somehow, a compact vector still reaches the internal parallel regions, a helpful error will now be generated. If this happens, please report it as a bug. + +3. Tests 1590.3 & 1590.4 now pass when users run `test.data.table()` on Windows, [#2856](https://github.com/Rdatatable/data.table/pull/2856). Thanks to Avraham Adler for reporting. Those tests were passing on AppVeyor, win-builder and CRAN's Windows because `R CMD check` sets `LC_COLLATE=C` as documented in R-exts$1.3.1, whereas by default on Windows `LC_COLLATE` is usually a regional Windows-1252 dialect such as `English_United States.1252`. + +4. Around 1 billion very small groups (of size 1 or 2 rows) could result in `"Failed to realloc working memory"` even when plenty of memory is available, [#2777](https://github.com/Rdatatable/data.table/issues/2777). Thanks once again to @jsams for the detailed report as a follow up to bug fix 40 in v1.11.0. + + +# data.table v1.11.2 (08 May 2018) + +1. `test.data.table()` created/overwrote variable `x` in `.GlobalEnv`, [#2828](https://github.com/Rdatatable/data.table/issues/2828); i.e. a modification of user's workspace which is not allowed. Thanks to @etienne-s for reporting. + +2. `as.chron` methods for `IDate` and `ITime` have been removed, [#2825](https://github.com/Rdatatable/data.table/issues/2825). `as.chron` still works since `IDate` inherits from `Date`. We are not sure why we had specific methods in the first place. It may have been from a time when `IDate` did not inherit from `Date`, perhaps. Note that we don't use `chron` ourselves in our own work. + +3. Fixed `SETLENGTH() cannot be applied to an ALTVEC object` starting in R-devel (R 3.6.0) on 1 May 2018, a few hours after 1.11.0 was accepted on CRAN, [#2820](https://github.com/Rdatatable/data.table/issues/2820). Many thanks to Luke Tierney for pinpointing the problem. + +4. Fixed some rare memory faults in `fread()` and `rbindlist()` found with `gctorture2()` and [`rchk`](https://github.com/kalibera/rchk), [#2841](https://github.com/Rdatatable/data.table/issues/2841). + + +# data.table v1.11.0 (01 May 2018) + +## NOTICE OF INTENDED FUTURE POTENTIAL BREAKING CHANGES + +1. `fread()`'s `na.strings=` argument : + + ```R + "NA" # old default + getOption("datatable.na.strings", "NA") # this release; i.e. the same; no change yet + getOption("datatable.na.strings", "") # future release + ``` + + This option controls how `,,` is read in character columns. It does not affect numeric columns which read `,,` as `NA` regardless. We would like `,,`=>`NA` for consistency with numeric types, and `,"",`=>empty string to be the standard default for `fwrite/fread` character columns so that `fread(fwrite(DT))==DT` without needing any change to any parameters. `fwrite` has never written `NA` as `"NA"` in case `"NA"` is a valid string in the data; e.g., 2 character id columns sometimes do. Instead, `fwrite` has always written `,,` by default for an `` in a character columns. The use of R's `getOption()` allows users to move forward now, using `options(datatable.fread.na.strings="")`, or restore old behaviour when the default's default is changed in future, using `options(datatable.fread.na.strings="NA")`. + +2. `fread()` and `fwrite()`'s `logical01=` argument : + + ```R + logical01 = FALSE # old default + getOption("datatable.logical01", FALSE) # this release; i.e. the same; no change yet + getOption("datatable.logical01", TRUE) # future release + ``` + + This option controls whether a column of all 0's and 1's is read as `integer`, or `logical` directly to avoid needing to change the type afterwards to `logical` or use `colClasses`. `0/1` is smaller and faster than `"TRUE"/"FALSE"`, which can make a significant difference to space and time the more `logical` columns there are. When the default's default changes to `TRUE` for `fread` we do not expect much impact since all arithmetic operators that are currently receiving 0's and 1's as type `integer` (think `sum()`) but instead could receive `logical`, would return exactly the same result on the 0's and 1's as `logical` type. However, code that is manipulating column types using `is.integer` or `is.logical` on `fread`'s result, could require change. It could be painful if `DT[(logical_column)]` (i.e. `DT[logical_column==TRUE]`) changed behaviour due to `logical_column` no longer being type `logical` but `integer`. But that is not the change proposed. The change is the other way around; i.e., a previously `integer` column holding only 0's and 1's would now be type `logical`. Since it's that way around, we believe the scope for breakage is limited. We think a lot of code is converting 0/1 integer columns to logical anyway, either using `colClasses=` or afterwards with an assign. For `fwrite`, the level of breakage depends on the consumer of the output file. We believe `0/1` is a better more standard default choice to move to. See notes below about improvements to `fread`'s sampling for type guessing, and automatic rereading in the rare cases of out-of-sample type surprises. + + +These options are meant for temporary use to aid your migration, [#2652](https://github.com/Rdatatable/data.table/pull/2652). You are not meant to set them to the old default and then not migrate your code that is dependent on the default. Either set the argument explicitly so your code is not dependent on the default, or change the code to cope with the new default. Over the next few years we will slowly start to remove these options, warning you if you are using them, and return to a simple default. See the history of NEWS and NEWS.0 for past migrations that have, generally speaking, been successfully managed in this way. For example, at the end of NOTES for this version (below in this file) is a note about the usage of `datatable.old.unique.by.key` now warning, as you were warned it would do over a year ago. When that change was introduced, the default was changed and that option provided an option to restore the old behaviour. These `fread`/`fwrite` changes are even more cautious and not even changing the default's default yet. Giving you extra warning by way of this notice to move forward. And giving you a chance to object. + +## NEW FEATURES + +1. `fread()`: + * Efficiency savings at C level including **parallelization** announced [here](https://github.com/Rdatatable/data.table/wiki/talks/BARUG_201704_ParallelFread.pdf); e.g. a 9GB 2 column integer csv input is **50s down to 12s** to cold load on a 4 core laptop with 16GB RAM and SSD. Run `echo 3 >/proc/sys/vm/drop_caches` first to measure cold load time. Subsequent load time (after file has been cached by OS on the first run) **40s down to 6s**. + * The [fread for small data](https://github.com/Rdatatable/data.table/wiki/Convenience-features-of-fread) page has been revised. + * Memory maps lazily; e.g. reading just the first 10 rows with `nrow=10` is **12s down to 0.01s** from cold for the 9GB file. Large files close to your RAM limit may work more reliably too. The progress meter will commence sooner and more consistently. + * `fread` has always jumped to the middle and to the end of the file for a much improved column type guess. The sample size is increased from 100 rows at 10 jump jump points (1,000 rows) to 100 rows at 100 jumps points (10,000 row sample). In the rare case of there still being out-of-sample type exceptions, those columns are now *automatically reread* so you don't have to use `colClasses` yourself. + * Large number of columns support; e.g. **12,000 columns** tested. + * **Quoting rules** are more robust and flexible. See point 10 on the wiki page [here](https://github.com/Rdatatable/data.table/wiki/Convenience-features-of-fread#10-automatic-quote-escape-method-detection-including-no-escape). + * Numeric data that has been quoted is now detected and read as numeric. + * The ability to position `autostart` anywhere inside one of multiple tables in a single file is removed with warning. It used to search upwards from that line to find the start of the table based on a consistent number of columns. People appear to be using `skip="string"` or `skip=nrow` to find the header row exactly, which is retained and simpler. It was too difficult to retain search-upwards-autostart together with skipping/filling blank lines, filling incomplete rows and parallelization too. If there is any header info above the column names, it is still auto detected and auto skipped (particularly useful when loading a set of files where the column names start on different lines due to a varying height messy header). + * `dec=','` is now implemented directly so there is no dependency on locale. The options `datatable.fread.dec.experiment` and `datatable.fread.dec.locale` have been removed. + * `\\r\\r\\n` line endings are now handled such as produced by `base::download.file()` when it doubles up `\\r`. Other rare line endings (`\\r` and `\\n\\r`) are now more robust. + * Mixed line endings are now handled; e.g. a file formed by concatenating a Unix file and a Windows file so that some lines end with `\\n` while others end with `\\r\\n`. + * Improved automatic detection of whether the first row is column names by comparing the types of the fields on the first row against the column types ascertained by the 10,000 rows sample (or `colClasses` if provided). If a numeric column has a string value at the top, then column names are deemed present. + * Detects GB-18030 and UTF-16 encodings and in verbose mode prints a message about BOM detection. + * Detects and ignores trailing ^Z end-of-file control character sometimes created on MS DOS/Windows, [#1612](https://github.com/Rdatatable/data.table/issues/1612). Thanks to Gergely Daróczi for reporting and providing a file. + * Added ability to recognize and parse hexadecimal floating point numbers, as used for example in Java. Thanks for @scottstanfield [#2316](https://github.com/Rdatatable/data.table/issues/2316) for the report. + * Now handles floating-point NaN values in a wide variety of formats, including `NaN`, `sNaN`, `1.#QNAN`, `NaN1234`, `#NUM!` and others, [#1800](https://github.com/Rdatatable/data.table/issues/1800). Thanks to Jori Liesenborgs for highlighting and the PR. + * If negative numbers are passed to `select=` the out-of-range error now suggests `drop=` instead, [#2423](https://github.com/Rdatatable/data.table/issues/2423). Thanks to Michael Chirico for the suggestion. + * `sep=NULL` or `sep=""` (i.e., no column separator) can now be used to specify single column input reliably like `base::readLines`, [#1616](https://github.com/Rdatatable/data.table/issues/1616). `sep='\\n'` still works (even on Windows where line ending is actually `\\r\\n`) but `NULL` or `""` are now documented and recommended. Thanks to Dmitriy Selivanov for the pull request and many others for comments. As before, `sep=NA` is not valid; use the default `"auto"` for automatic separator detection. `sep='\\n'` is now deprecated and in future will start to warn when used. + * Single-column input with blank lines is now valid and the blank lines are significant (representing `NA`). The blank lines are significant even at the very end, which may be surprising on first glance. The change is so that `fread(fwrite(DT))==DT` for single-column inputs containing `NA` which are written as blank. There is no change when `ncol>1`; i.e., input stops with detailed warning at the first blank line, because a blank line when `ncol>1` is invalid input due to no separators being present. Thanks to @skanskan, Michael Chirico, @franknarf1 and Pasha for the testing and discussions, [#2106](https://github.com/Rdatatable/data.table/issues/2106). + * Too few column names are now auto filled with default column names, with warning, [#1625](https://github.com/Rdatatable/data.table/issues/1625). If there is just one missing column name it is guessed to be for the first column (row names or an index), otherwise the column names are filled at the end. Similarly, too many column names now automatically sets `fill=TRUE`, with warning. + * `skip=` and `nrow=` are more reliable and are no longer affected by invalid lines outside the range specified. Thanks to Ziyad Saeed and Kyle Chung for reporting, [#1267](https://github.com/Rdatatable/data.table/issues/1267). + * Ram disk (`/dev/shm`) is no longer used for the output of system command input. Although faster when it worked, it was causing too many device full errors; e.g., [#1139](https://github.com/Rdatatable/data.table/issues/1139) and [zUMIs/19](https://github.com/sdparekh/zUMIs/issues/19). Thanks to Kyle Chung for reporting. Standard `tempdir()` is now used. If you wish to use ram disk, set TEMPDIR to `/dev/shm`; see `?tempdir`. + * Detecting whether a very long input string is a file name or data is now much faster, [#2531](https://github.com/Rdatatable/data.table/issues/2531). Many thanks to @javrucebo for the detailed report, benchmarks and suggestions. + * A column of `TRUE/FALSE`s is ok, as well as `True/False`s and `true/false`s, but mixing styles (e.g. `TRUE/false`) is not and will be read as type `character`. + * New argument `index` to compliment the existing `key` argument for applying secondary orderings out of the box for convenience, [#2633](https://github.com/Rdatatable/data.table/issues/2633). + * A warning is now issued whenever incorrectly quoted fields have been detected and fixed using a non-standard quote rule. `fread` has always used these advanced rules but now it warns that it is using them. Most file writers correctly quote fields if the field contains the field separator, but a common error is not to also quote fields that contain a quote and then escape those quotes, particularly if that quote occurs at the start of the field. The ability to detect and fix such files is referred to as self-healing. Ambiguities are resolved using the knowledge that the number of columns is constant, and therefore this ability is not available when `fill=TRUE`. This feature can be improved in future by using column type consistency as well as the number of fields. For example: + + ```R + txt = 'A,B\n1,hello\n2,"howdy" said Joe\n3,bonjour\n' + cat(txt) + # A,B + # 1,hello + # 2,"howdy" said Joe + # 3,bonjour + fread(txt) + A B + + 1: 1 hello + 2: 2 "howdy" said Joe + 3: 3 bonjour + Warning message: + In fread(txt) : Found and resolved improper quoting + ``` + + * Many thanks to @yaakovfeldman, Guillermo Ponce, Arun Srinivasan, Hugh Parsonage, Mark Klik, Pasha Stetsenko, Mahyar K, Tom Crockett, @cnoelke, @qinjs, @etienne-s, Mark Danese, Avraham Adler, @franknarf1, @MichaelChirico, @tdhock, Luke Tierney, Ananda Mahto, @memoryfull, @brandenkmurray for testing dev and reporting these regressions before release to CRAN: #1464, #1671, #1888, #1895, #2070, #2073, #2087, #2091, #2092, #2107, #2118, #2123, #2167, #2194, #2196, #2201, #2222, #2228, #2238, #2246, #2251, #2265, #2267, #2285, #2287, #2299, #2322, #2347, #2352, #2370, #2371, #2395, #2404, #2446, #2453, #2457, #2464, #2481, #2499, #2512, #2515, #2516, #2518, #2520, #2523, #2526, #2535, #2542, #2548, #2561, #2600, #2625, #2666, #2697, #2735, #2744. + +2. `fwrite()`: + * empty strings are now always quoted (`,"",`) to distinguish them from `NA` which by default is still empty (`,,`) but can be changed using `na=` as before. If `na=` is provided and `quote=` is the default `'auto'` then `quote=` is set to `TRUE` so that if the `na=` value occurs in the data, it can be distinguished from `NA`. Thanks to Ethan Welty for the request [#2214](https://github.com/Rdatatable/data.table/issues/2214) and Pasha for the code change and tests, [#2215](https://github.com/Rdatatable/data.table/issues/2215). + * `logical01` has been added and the old name `logicalAsInt` retained. Pease move to the new name when convenient for you. The old argument name (`logicalAsInt`) will slowly be deprecated over the next few years. The default is unchanged: `FALSE`, so `logical` is still written as `"TRUE"`/`"FALSE"` in full by default. We intend to change the default's default in future to `TRUE`; see the notice at the top of these release notes. + +3. Added helpful message when subsetting by a logical column without wrapping it in parentheses, [#1844](https://github.com/Rdatatable/data.table/issues/1844). Thanks @dracodoc for the suggestion and @MichaelChirico for the PR. + +4. `tables` gains `index` argument for supplementary metadata about `data.table`s in memory (or any optionally specified environment), part of [#1648](https://github.com/Rdatatable/data.table/issues/1648). Thanks due variously to @jangorecki, @rsaporta, @MichaelChirico for ideas and work towards PR. + +5. Improved auto-detection of `character` inputs' formats to `as.ITime` to mirror the logic in `as.POSIXlt.character`, [#1383](https://github.com/Rdatatable/data.table/issues/1383) Thanks @franknarf1 for identifying a discrepancy and @MichaelChirico for investigating. + +6. `setcolorder()` now accepts less than `ncol(DT)` columns to be moved to the front, [#592](https://github.com/Rdatatable/data.table/issues/592). Thanks @MichaelChirico for the PR. This also incidentally fixed [#2007](https://github.com/Rdatatable/data.table/issues/2007) whereby explicitly setting `select = NULL` in `fread` errored; thanks to @rcapell for reporting that and @dselivanov and @MichaelChirico for investigating and providing a new test. + +7. Three new *Grouping Sets* functions: `rollup`, `cube` and `groupingsets`, [#1377](https://github.com/Rdatatable/data.table/issues/1377). Allows to aggregation on various grouping levels at once producing sub-totals and grand total. + +8. `as.data.table()` gains new method for `array`s to return a useful data.table, [#1418](https://github.com/Rdatatable/data.table/issues/1418). + +9. `print.data.table()` (all via master issue [#1523](https://github.com/Rdatatable/data.table/issues/1523)): + + * gains `print.keys` argument, `FALSE` by default, which displays the keys and/or indices (secondary keys) of a `data.table`. Thanks @MichaelChirico for the PR, Yike Lu for the suggestion and Arun for honing that idea to its present form. + + * gains `col.names` argument, `"auto"` by default, which toggles which registers of column names to include in printed output. `"top"` forces `data.frame`-like behavior where column names are only ever included at the top of the output, as opposed to the default behavior which appends the column names below the output as well for longer (>20 rows) tables. `"none"` shuts down column name printing altogether. Thanks @MichaelChirico for the PR, Oleg Bondar for the suggestion, and Arun for guiding commentary. + + * list columns would print the first 6 items in each cell followed by a comma if there are more than 6 in that cell. Now it ends ",..." to make it clearer, part of [#1523](https://github.com/Rdatatable/data.table/issues/1523). Thanks to @franknarf1 for drawing attention to an issue raised on Stack Overflow by @TMOTTM [here](https://stackoverflow.com/q/47679701). + +10. `setkeyv` accelerated if key already exists [#2331](https://github.com/Rdatatable/data.table/issues/2331). Thanks to @MarkusBonsch for the PR. + +11. Keys and indexes are now partially retained up to the key column assigned to with ':=' [#2372](https://github.com/Rdatatable/data.table/issues/2372). They used to be dropped completely if any one of the columns was affected by `:=`. Tanks to @MarkusBonsch for the PR. + +12. Faster `as.IDate` and `as.ITime` methods for `POSIXct` and `numeric`, [#1392](https://github.com/Rdatatable/data.table/issues/1392). Thanks to Jan Gorecki for the PR. + +13. `unique(DT)` now returns `DT` early when there are no duplicates to save RAM, [#2013](https://github.com/Rdatatable/data.table/issues/2013). Thanks to Michael Chirico for the PR, and thanks to @mgahan for pointing out a reversion in `na.omit.data.table` before release, [#2660](https://github.com/Rdatatable/data.table/issues/2660#issuecomment-371027948). + +14. `uniqueN()` is now faster on logical vectors. Thanks to Hugh Parsonage for [PR#2648](https://github.com/Rdatatable/data.table/pull/2648). + + ```R + N = 1e9 + # was now + x = c(TRUE,FALSE,NA,rep(TRUE,N)) # + uniqueN(x) == 3 # 5.4s 0.00s + x = c(TRUE,rep(FALSE,N), NA) # + uniqueN(x,na.rm=TRUE) == 2 # 5.4s 0.00s + x = c(rep(TRUE,N),FALSE,NA) # + uniqueN(x) == 3 # 6.7s 0.38s + ``` + +15. Subsetting optimization with keys and indices is now possible for compound queries like `DT[a==1 & b==2]`, [#2472](https://github.com/Rdatatable/data.table/issues/2472). +Thanks to @MichaelChirico for reporting and to @MarkusBonsch for the implementation. + +16. `melt.data.table` now offers friendlier functionality for providing `value.name` for `list` input to `measure.vars`, [#1547](https://github.com/Rdatatable/data.table/issues/1547). Thanks @MichaelChirico and @franknarf1 for the suggestion and use cases, @jangorecki and @mrdwab for implementation feedback, and @MichaelChirico for ultimate implementation. + +17. `update.dev.pkg` is new function to update package from development repository, it will download package sources only when newer commit is available in repository. `data.table::update.dev.pkg()` defaults updates `data.table`, but any package can be used. + +18. Item 1 in NEWS for [v1.10.2](https://github.com/Rdatatable/data.table/blob/master/NEWS.md#changes-in-v1102--on-cran-31-jan-2017) on CRAN in Jan 2017 included : + + > When j is a symbol prefixed with `..` it will be looked up in calling scope and its value taken to be column names or numbers. + > When you see the `..` prefix think one-level-up, like the directory `..` in all operating systems means the parent directory. + > In future the `..` prefix could be made to work on all symbols apearing anywhere inside `DT[...]`. + + The response has been positive ([this tweet](https://twitter.com/MattDowle/status/967290562725359617) and [FR#2655](https://github.com/Rdatatable/data.table/issues/2655)) and so this prefix is now expanded to all symbols appearing in `j=` as a first step; e.g. + + ```R + cols = "colB" + DT[, c(..cols, "colC")] # same as DT[, .(colB,colC)] + DT[, -..cols] # all columns other than colB + ``` + + Thus, `with=` should no longer be needed in any cases. Please change to using the `..` prefix and over the next few years we will start to formally deprecate and remove the `with=` parameter. If this is well received, the `..` prefix could be expanded to symbols appearing in `i=` and `by=`, too. Note that column names should not now start with `..`. If a symbol `..var` is used in `j=` but `..var` exists as a column name, the column still takes precedence, for backwards compatibility. Over the next few years, data.table will start issuing warnings/errors when it sees column names starting with `..`. This affects one CRAN package out of 475 using data.table, so we do not believe this restriction to be unreasonable. Our main focus here which we believe `..` achieves is to resolve the more common ambiguity when `var` is in calling scope and `var` is a column name too. Further, we have not forgotten that in the past we recommended prefixing the variable in calling scope with `..` yourself. If you did that and `..var` exists in calling scope, that still works, provided neither `var` exists in calling scope nor `..var` exists as a column name. Please now remove the `..` prefix on `..var` in calling scope to tidy this up. In future data.table will start to warn/error on such usage. + +19. `setindexv` can now assign multiple (separate) indices by accepting a `list` in the `cols` argument. + +20. `as.matrix.data.table` method now has an additional `rownames` argument allowing for a single column to be used as the `rownames` after conversion to a `matrix`. Thanks to @sritchie73 for the suggestion, use cases, [#2692](https://github.com/Rdatatable/data.table/issues/2692) and implementation [PR#2702](https://github.com/Rdatatable/data.table/pull/2702) and @MichaelChirico for additional use cases. + +## BUG FIXES + +1. The new quote rules handles this single field `"Our Stock Screen Delivers an Israeli Software Company (MNDO, CTCH)<\/a> SmallCapInvestor.com - Thu, May 19, 2011 10:02 AM EDT<\/cite><\/div>Yesterday in \""Google, But for Finding + Great Stocks\"", I discussed the value of stock screeners as a powerful tool"`, [#2051](https://github.com/Rdatatable/data.table/issues/2051). Thanks to @scarrascoso for reporting. Example file added to test suite. + +2. `fwrite()` creates a file with permissions that now play correctly with `Sys.umask()`, [#2049](https://github.com/Rdatatable/data.table/issues/2049). Thanks to @gnguy for reporting. + +3. `fread()` no longer holds an open lock on the file when a line outside the large sample has too many fields and generates an error, [#2044](https://github.com/Rdatatable/data.table/issues/2044). Thanks to Hugh Parsonage for reporting. + +4. Setting `j = {}` no longer results in an error, [#2142](https://github.com/Rdatatable/data.table/issues/2142). Thanks Michael Chirico for the pull request. + +5. Segfault in `rbindlist()` when one or more items are empty, [#2019](https://github.com/Rdatatable/data.table/issues/2019). Thanks Michael Lang for the pull request. Another segfault if the result would be more than 2bn rows, thanks to @jsams's comment in [#2340](https://github.com/Rdatatable/data.table/issues/2340#issuecomment-331505494). + +6. Error printing 0-length `ITime` and `NA` objects, [#2032](https://github.com/Rdatatable/data.table/issues/2032) and [#2171](https://github.com/Rdatatable/data.table/issues/2171). Thanks Michael Chirico for the pull requests and @franknarf1 for pointing out a shortcoming of the initial fix. + +7. `as.IDate.POSIXct` error with `NULL` timezone, [#1973](https://github.com/Rdatatable/data.table/issues/1973). Thanks @lbilli for reporting and Michael Chirico for the pull request. + +8. Printing a null `data.table` with `print` no longer visibly outputs `NULL`, [#1852](https://github.com/Rdatatable/data.table/issues/1852). Thanks @aaronmcdaid for spotting and @MichaelChirico for the PR. + +9. `data.table` now works with Shiny Reactivity / Flexdashboard. The error was typically something like `col not found` in `DT[col==val]`. Thanks to Dirk Eddelbuettel leading Matt through reproducible steps and @sergeganakou and Richard White for reporting. Closes [#2001](https://github.com/Rdatatable/data.table/issues/2001) and [shiny/#1696](https://github.com/rstudio/shiny/issues/1696). + +10. The `as.IDate.POSIXct` method passed `tzone` along but was not exported. So `tzone` is now taken into account by `as.IDate` too as well as `IDateTime`, [#977](https://github.com/Rdatatable/data.table/issues/977) and [#1498](https://github.com/Rdatatable/data.table/issues/1498). Tests added. + +11. Named logical vector now select rows as expected from single row data.table. Thanks to @skranz for reporting. Closes [#2152](https://github.com/Rdatatable/data.table/issues/2152). + +12. `fread()`'s rare `Internal error: Sampling jump point 10 is before the last jump ended` has been fixed, [#2157](https://github.com/Rdatatable/data.table/issues/2157). Thanks to Frank Erickson and Artem Klevtsov for reporting with example files which are now added to the test suite. + +13. `CJ()` no longer loses attribute information, [#2029](https://github.com/Rdatatable/data.table/issues/2029). Thanks to @MarkusBonsch and @royalts for the pull request. + +14. `split.data.table` respects `factor` ordering in `by` argument, [#2082](https://github.com/Rdatatable/data.table/issues/2082). Thanks to @MichaelChirico for identifying and fixing the issue. + +15. `.SD` would incorrectly include symbol on lhs of `:=` when `.SDcols` is specified and `get()` appears in `j`. Thanks @renkun-ken for reporting and the PR, and @ProfFancyPants for reporing a regression introduced in the PR. Closes [#2326](https://github.com/Rdatatable/data.table/issues/2326) and [#2338](https://github.com/Rdatatable/data.table/issues/2338). + +16. Integer values that are too large to fit in `int64` will now be read as strings [#2250](https://github.com/Rdatatable/data.table/issues/2250). + +17. Internal-only `.shallow` now retains keys correctly, [#2336](https://github.com/Rdatatable/data.table/issues/2336). Thanks to @MarkusBonsch for reporting, fixing ([PR #2337](https://github.com/Rdatatable/data.table/pull/2337)) and adding 37 tests. This much advances the journey towards exporting `shallow()`, [#2323](https://github.com/Rdatatable/data.table/issues/2323). + +18. `isoweek` calculation is correct regardless of local timezone setting (`Sys.timezone()`), [#2407](https://github.com/Rdatatable/data.table/issues/2407). Thanks to @MoebiusAV and @SimonCoulombe for reporting and @MichaelChirico for fixing. + +19. Fixed `as.xts.data.table` to support all xts supported time based index clasess [#2408](https://github.com/Rdatatable/data.table/issues/2408). Thanks to @ebs238 for reporting and for the PR. + +20. A memory leak when a very small number such as `0.58E-2141` is bumped to type `character` is resolved, [#918](https://github.com/Rdatatable/data.table/issues/918). + +21. The edge case `setnames(data.table(), character(0))` now works rather than error, [#2452](https://github.com/Rdatatable/data.table/issues/2452). + +22. Order of rows returned in non-equi joins were incorrect in certain scenarios as reported under [#1991](https://github.com/Rdatatable/data.table/issues/1991). This is now fixed. Thanks to @Henrik-P for reporting. + +23. Non-equi joins work as expected when `x` in `x[i, on=...]` is a 0-row data.table. Closes [#1986](https://github.com/Rdatatable/data.table/issues/1986). + +24. Non-equi joins along with `by=.EACHI` returned incorrect result in some rare cases as reported under [#2360](https://github.com/Rdatatable/data.table/issues/2360). This is fixed now. This fix also takes care of [#2275](https://github.com/Rdatatable/data.table/issues/2275). Thanks to @ebs238 for the nice minimal reproducible report, @Mihael for asking on SO and to @Frank for following up on SO and filing an issue. + +25. `by=.EACHI` works now when `list` columns are being returned and some join values are missing, [#2300](https://github.com/Rdatatable/data.table/issues/2300). Thanks to @jangorecki and @franknarf1 for the reproducible examples which have been added to the test suite. + +26. Indices are now retrieved by exact name, [#2465](https://github.com/Rdatatable/data.table/issues/2465). This prevents usage of wrong indices as well as unexpected row reordering in join results. Thanks to @pannnda for reporting and providing a reproducible example and to @MarkusBonsch for fixing. + +27. `setnames` of whole table when original table had `NA` names skipped replacing those, [#2475](https://github.com/Rdatatable/data.table/issues/2475). Thanks to @franknarf1 and [BenoitLondon on StackOverflow](https://stackoverflow.com/questions/47228836/) for the report and @MichaelChirico for fixing. + +28. `CJ()` works with multiple empty vectors now [#2511](https://github.com/Rdatatable/data.table/issues/2511). Thanks to @MarkusBonsch for fixing. + +29. `:=` assignment of one vector to two or more columns, e.g. `DT[, c("x", "y") := 1:10]`, failed to copy the `1:10` data causing errors later if and when those columns were updated by reference, [#2540](https://github.com/Rdatatable/data.table/issues/2540). This is an old issue ([#185](https://github.com/Rdatatable/data.table/issues/185)) that had been fixed but reappeared when code was refactored. Thanks to @patrickhowerter for the detailed report with reproducible example and to @MarkusBonsch for fixing and strengthening tests so it doesn't reappear again. + +30. "Negative length vectors not allowed" error when grouping `median` and `var` fixed, [#2046](https://github.com/Rdatatable/data.table/issues/2046) and [#2111](https://github.com/Rdatatable/data.table/issues/2111). Thanks to @caneff and @osofr for reporting and to @kmillar for debugging and explaining the cause. + +31. Fixed a bug on Windows where `data.table`s containing non-UTF8 strings in `key`s were not properly sorted, [#2462](https://github.com/Rdatatable/data.table/issues/2462), [#1826](https://github.com/Rdatatable/data.table/issues/1826) and [StackOverflow](https://stackoverflow.com/questions/47599934/why-doesnt-r-data-table-support-well-for-non-ascii-keys-on-windows). Thanks to @shrektan for reporting and fixing. + +32. `x.` prefixes during joins sometimes resulted in a "column not found" error. This is now fixed. Closes [#2313](https://github.com/Rdatatable/data.table/issues/2313). Thanks to @franknarf1 for the MRE. + +33. `setattr()` no longer segfaults when setting 'class' to empty character vector, [#2386](https://github.com/Rdatatable/data.table/issues/2386). Thanks to @hatal175 for reporting and to @MarkusBonsch for fixing. + +34. Fixed cases where the result of `merge.data.table()` would contain duplicate column names if `by.x` was also in `names(y)`. +`merge.data.table()` gains the `no.dups` argument (default TRUE) to match the correpsonding patched behaviour in `base:::merge.data.frame()`. Now, when `by.x` is also in `names(y)` the column name from `y` has the corresponding `suffixes` added to it. `by.x` remains unchanged for backwards compatibility reasons. +In addition, where duplicate column names arise anyway (i.e. `suffixes = c("", "")`) `merge.data.table()` will now throw a warning to match the behaviour of `base:::merge.data.frame()`. +Thanks to @sritchie73 for reporting and fixing [PR#2631](https://github.com/Rdatatable/data.table/pull/2631) and [PR#2653](https://github.com/Rdatatable/data.table/pull/2653) + +35. `CJ()` now fails with proper error message when results would exceed max integer, [#2636](https://github.com/Rdatatable/data.table/issues/2636). + +36. `NA` in character columns now display as `` just like base R to distinguish from `""` and `"NA"`. + +37. `getDTthreads()` could return INT_MAX (2 billion) after an explicit call to `setDTthreads(0)`, [PR#2708](https://github.com/Rdatatable/data.table/pull/2708). + +38. Fixed a bug on Windows that `data.table` may break if the garbage collecting was triggered when sorting a large number of non-ASCII characters. Thanks to @shrektan for reporting and fixing [PR#2678](https://github.com/Rdatatable/data.table/pull/2678), [#2674](https://github.com/Rdatatable/data.table/issues/2674). + +39. Internal aliasing of `.` to `list` was over-aggressive in applying `list` even when `.` was intended within `bquote`, [#1912](https://github.com/Rdatatable/data.table/issues/1912). Thanks @MichaelChirico for reporting/filing and @ecoRoland for suggesting and testing a fix. + +40. Attempt to allocate a wildly large amount of RAM (16EB) when grouping by key and there are close to 2 billion 1-row groups, [#2777](https://github.com/Rdatatable/data.table/issues/2777). Thanks to @jsams for the detailed report. + +41. Fix a bug that `print(dt, class=TRUE)` shows only `topn - 1` rows. Thanks to @heavywatal for reporting [#2803](https://github.com/Rdatatable/data.table/issues/2803) and filing [PR#2804](https://github.com/Rdatatable/data.table/pull/2804). + +## NOTES + +0. The license has been changed from GPL to MPL (Mozilla Public License). All contributors were consulted and approved. [PR#2456](https://github.com/Rdatatable/data.table/pull/2456) details the reasons for the change. + +1. `?data.table` makes explicit the option of using a `logical` vector in `j` to select columns, [#1978](https://github.com/Rdatatable/data.table/issues/1978). Thanks @Henrik-P for the note and @MichaelChirico for filing. + +2. Test 1675.1 updated to cope with a change in R-devel in June 2017 related to `factor()` and `NA` levels. + +3. Package `ezknitr` has been added to the whitelist of packages that run user code and should be consider data.table-aware, [#2266](https://github.com/Rdatatable/data.table/issues/2266). Thanks to Matt Mills for testing and reporting. + +4. Printing with `quote = TRUE` now quotes column names as well, [#1319](https://github.com/Rdatatable/data.table/issues/1319). Thanks @jan-glx for the suggestion and @MichaelChirico for the PR. + +5. Added a blurb to `?melt.data.table` explicating the subtle difference in behavior of the `id.vars` argument vis-a-vis its analog in `reshape2::melt`, [#1699](https://github.com/Rdatatable/data.table/issues/1699). Thanks @MichaelChirico for uncovering and filing. + +6. Added some clarification about the usage of `on` to `?data.table`, [#2383](https://github.com/Rdatatable/data.table/issues/2383). Thanks to @peterlittlejohn for volunteering his confusion and @MichaelChirico for brushing things up. + +7. Clarified that "data.table always sorts in `C-locale`" means that upper-case letters are sorted before lower-case letters by ordering in data.table (e.g. `setorder`, `setkey`, `DT[order(...)]`). Thanks to @hughparsonage for the pull request editing the documentation. Note this makes no difference in most cases of data; e.g. ids where only uppercase or lowercase letters are used (`"AB123"<"AC234"` is always true, regardless), or country names and words which are consistently capitalized. For example, `"America" < "Brazil"` is not affected (it's always true), and neither is `"america" < "brazil"` (always true too); since the first letter is consistently capitalized. But, whether `"america" < "Brazil"` (the words are not consistently capitalized) is true or false in base R depends on the locale of your R session. In America it is true by default and false if you i) type `Sys.setlocale(locale="C")`, ii) the R session has been started in a C locale for you which can happen on servers/services (the locale comes from the environment the R session is started in). However, `"america" < "Brazil"` is always, consistently false in data.table which can be a surprise because it differs to base R by default in most regions. It is false because `"B"<"a"` is true because all upper-case letters come first, followed by all lower case letters (the ascii number of each letter determines the order, which is what is meant by `C-locale`). + +8. `data.table`'s dependency has been moved forward from R 3.0.0 (Apr 2013) to R 3.1.0 (Apr 2014; i.e. 3.5 years old). We keep this dependency as old as possible for as long as possible as requested by users in managed environments. Thanks to Jan Gorecki, the test suite from latest dev now runs on R 3.1.0 continously, as well as R-release (currently 3.4.2) and latest R-devel snapshot. The primary motivation for the bump to R 3.1.0 was allowing one new test which relies on better non-copying behaviour in that version, [#2484](https://github.com/Rdatatable/data.table/issues/2484). It also allows further internal simplifications. Thanks to @MichaelChirico for fixing another test that failed on R 3.1.0 due to slightly different behaviour of `base::read.csv` in R 3.1.0-only which the test was comparing to, [#2489](https://github.com/Rdatatable/data.table/pull/2489). + +9. New vignette added: _Importing data.table_ - focused on using data.table as a dependency in R packages. Answers most commonly asked questions and promote good practices. + +10. As warned in v1.9.8 release notes below in this file (25 Nov 2016) it has been 1 year since then and so use of `options(datatable.old.unique.by.key=TRUE)` to restore the old default is now deprecated with warning. The new warning states that this option still works and repeats the request to pass `by=key(DT)` explicitly to `unique()`, `duplicated()`, `uniqueN()` and `anyDuplicated()` and to stop using this option. In another year, this warning will become error. Another year after that the option will be removed. + +11. As `set2key()` and `key2()` have been warning since v1.9.8 (Nov 2016), their warnings have now been upgraded to errors. Note that when they were introduced in version 1.9.4 (Oct 2014) they were marked as 'experimental' in NEWS item 4. They will be removed in one year. + + ``` + Was warning: set2key() will be deprecated in the next relase. Please use setindex() instead. + Now error: set2key() is now deprecated. Please use setindex() instead. + ``` + +12. The option `datatable.showProgress` is no longer set to a default value when the package is loaded. Instead, the `default=` argument of `getOption` is used by both `fwrite` and `fread`. The default is the result of `interactive()` at the time of the call. Using `getOption` in this way is intended to be more helpful to users looking at `args(fread)` and `?fread`. + +13. `print.data.table()` invisibly returns its first argument instead of `NULL`. This behavior is compatible with the standard `print.data.frame()` and tibble's `print.tbl_df()`. Thanks to @heavywatal for [PR#2807](https://github.com/Rdatatable/data.table/pull/2807) + + +# data.table v1.10.4-3 (20 Oct 2017) + +1. Fixed crash/hang on MacOS when `parallel::mclapply` is used and data.table is merely loaded, [#2418](https://github.com/Rdatatable/data.table/issues/2418). Oddly, all tests including test 1705 (which tests `mclapply` with data.table) passed fine on CRAN. It appears to be some versions of MacOS or some versions of libraries on MacOS, perhaps. Many thanks to Martin Morgan for reporting and confirming this fix works. Thanks also to @asenabouth, Joe Thorley and Danton Noriega for testing, debugging and confirming that automatic parallelism inside data.table (such as `fwrite`) works well even on these MacOS installations. See also news items below for 1.10.4-1 and 1.10.4-2. + + +# data.table v1.10.4-2 (12 Oct 2017) + +1. OpenMP on MacOS is now supported by CRAN and included in CRAN's package binaries for Mac. But installing v1.10.4-1 from source on MacOS failed when OpenMP was not enabled at compile time, [#2409](https://github.com/Rdatatable/data.table/issues/2409). Thanks to Liz Macfie and @fupangpangpang for reporting. The startup message when OpenMP is not enabled has been updated. + +2. Two rare potential memory faults fixed, thanks to CRAN's automated use of latest compiler tools; e.g. clang-5 and gcc-7 + + +# data.table v1.10.4-1 (09 Oct 2017) + +1. The `nanotime` v0.2.0 update (June 2017) changed from `integer64` to `S4` and broke `fwrite` of `nanotime` columns. Fixed to work with `nanotime` both before and after v0.2.0. + +2. Pass R-devel changes related to `deparse(,backtick=)` and `factor()`. + +3. Internal `NAMED()==2` now `MAYBE_SHARED()`, [#2330](https://github.com/Rdatatable/data.table/issues/2330). Back-ported to pass under the stated dependency, R 3.0.0. + +4. Attempted improvement on Mac-only when the `parallel` package is used too (which forks), [#2137](https://github.com/Rdatatable/data.table/issues/2137). Intel's OpenMP implementation appears to leave threads running after the OpenMP parallel region (inside data.table) has finished unlike GNU libgomp. So, if and when `parallel`'s `fork` is invoked by the user after data.table has run in parallel already, instability occurs. The problem only occurs with Mac package binaries from CRAN because they are built by CRAN with Intel's OpenMP library. No known problems on Windows or Linux and no known problems on any platform when `parallel` is not used. If this Mac-only fix still doesn't work, call `setDTthreads(1)` immediately after `library(data.table)` which has been reported to fix the problem by putting `data.table` into single threaded mode earlier. + +5. When `fread()` and `print()` see `integer64` columns are present but package `bit64` is not installed, the warning is now displayed as intended. Thanks to a question by Santosh on r-help and forwarded by Bill Dunlap. + + +# data.table v1.10.4 (01 Feb 2017) + +## BUG FIXES + +1. The new specialized `nanotime` writer in `fwrite()` type punned using `*(long long *)&REAL(column)[i]` which, strictly, is undefined behavour under C standards. It passed a plethora of tests on linux (gcc 5.4 and clang 3.8), win-builder and 6 out 10 CRAN flavours using gcc. But failed (wrong data written) with the newest version of clang (3.9.1) as used by CRAN on the failing flavors, and solaris-sparc. Replaced with the union method and added a grep to CRAN_Release.cmd. + + +# data.table v1.10.2 (31 Jan 2017) + +## NEW FEATURES + +1. When `j` is a symbol prefixed with `..` it will be looked up in calling scope and its value taken to be column names or numbers. + + ```R + myCols = c("colA","colB") + DT[, myCols, with=FALSE] + DT[, ..myCols] # same + ``` + + When you see the `..` prefix think _one-level-up_ like the directory `..` in all operating systems meaning the parent directory. In future the `..` prefix could be made to work on all symbols apearing anywhere inside `DT[...]`. It is intended to be a convenient way to protect your code from accidentally picking up a column name. Similar to how `x.` and `i.` prefixes (analogous to SQL table aliases) can already be used to disambiguate the same column name present in both `x` and `i`. A symbol prefix rather than a `..()` _function_ will be easier for us to optimize internally and more convenient if you have many variables in calling scope that you wish to use in your expressions safely. This feature was first raised in 2012 and long wished for, [#633](https://github.com/Rdatatable/data.table/issues/633). It is experimental. + +2. When `fread()` or `print()` see `integer64` columns are present, `bit64`'s namespace is now automatically loaded for convenience. + +3. `fwrite()` now supports the new [`nanotime`](https://cran.r-project.org/package=nanotime) type by Dirk Eddelbuettel, [#1982](https://github.com/Rdatatable/data.table/issues/1982). Aside: `data.table` already automatically supported `nanotime` in grouping and joining operations via longstanding support of its underlying `integer64` type. + +4. `indices()` gains a new argument `vectors`, default `FALSE`. This strsplits the index names by `__` for you, [#1589](https://github.com/Rdatatable/data.table/issues/1589). + + ```R + DT = data.table(A=1:3, B=6:4) + setindex(DT, B) + setindex(DT, B, A) + indices(DT) + [1] "B" "B__A" + indices(DT, vectors=TRUE) + [[1]] + [1] "B" + [[2]] + [1] "B" "A" + ``` + +## BUG FIXES + +1. Some long-standing potential instability has been discovered and resolved many thanks to a detailed report from Bill Dunlap and Michael Sannella. At C level any call of the form `setAttrib(x, install(), allocVector())` can be unstable in any R package. Despite `setAttrib()` PROTECTing its inputs, the 3rd argument (`allocVector`) can be executed first only for its result to to be released by `install()`'s potential GC before reaching `setAttrib`'s PROTECTion of its inputs. Fixed by either PROTECTing or pre-`install()`ing. Added to CRAN_Release.cmd procedures: i) `grep`s to prevent usage of this idiom in future and ii) running data.table's test suite with `gctorture(TRUE)`. + +2. A new potential instability introduced in the last release (v1.10.0) in GForce optimized grouping has been fixed by reverting one change from malloc to R_alloc. Thanks again to Michael Sannella for the detailed report. + +3. `fwrite()` could write floating point values incorrectly, [#1968](https://github.com/Rdatatable/data.table/issues/1968). A thread-local variable was incorrectly thread-global. This variable's usage lifetime is only a few clock cycles so it needed large data and many threads for several threads to overlap their usage of it and cause the problem. Many thanks to @mgahan and @jmosser for finding and reporting. + +## NOTES + +1. `fwrite()`'s `..turbo` option has been removed as the warning message warned. If you've found a problem, please [report it](https://github.com/Rdatatable/data.table/issues). + +2. No known issues have arisen due to `DT[,1]` and `DT[,c("colA","colB")]` now returning columns as introduced in v1.9.8. However, as we've moved forward by setting `options('datatable.WhenJisSymbolThenCallingScope'=TRUE)` introduced then too, it has become clear a better solution is needed. All 340 CRAN and Bioconductor packages that use data.table have been checked with this option on. 331 lines would need to be changed in 59 packages. Their usage is elegant, correct and recommended, though. Examples are `DT[1, encoding]` in quanteda and `DT[winner=="first", freq]` in xgboost. These are looking up the columns `encoding` and `freq` respectively and returning them as vectors. But if, for some reason, those columns are removed from `DT` and `encoding` or `freq` are still variables in calling scope, their values in calling scope would be returned. Which cannot be what was intended and could lead to silent bugs. That was the risk we were trying to avoid.
+`options('datatable.WhenJisSymbolThenCallingScope')` is now removed. A migration timeline is no longer needed. The new strategy needs no code changes and has no breakage. It was proposed and discussed in point 2 [here](https://github.com/Rdatatable/data.table/issues/1188#issuecomment-127824969), as follows.
+When `j` is a symbol (as in the quanteda and xgboost examples above) it will continue to be looked up as a column name and returned as a vector, as has always been the case. If it's not a column name however, it is now a helpful error explaining that data.table is different to data.frame and what to do instead (use `..` prefix or `with=FALSE`). The old behaviour of returning the symbol's value in calling scope can never have been useful to anybody and therefore not depended on. Just as the `DT[,1]` change could be made in v1.9.8, this change can be made now. This change increases robustness with no downside. Rerunning all 340 CRAN and Bioconductor package checks reveal 2 packages throwing the new error: partools and simcausal. Their maintainers have been informed that there is a likely bug on those lines due to data.table's (now remedied) weakness. This is exactly what we wanted to reveal and improve. + +3. As before, and as we can see is in common use in CRAN and Bioconductor packages using data.table, `DT[,myCols,with=FALSE]` continues to lookup `myCols` in calling scope and take its value as column names or numbers. You can move to the new experimental convenience feature `DT[, ..myCols]` if you wish at leisure. + + +# data.table v1.10.0 (03 Dec 2016) + +## BUG FIXES + +1. `fwrite(..., quote='auto')` already quoted a field if it contained a `sep` or `\n`, or `sep2[2]` when `list` columns are present. Now it also quotes a field if it contains a double quote (`"`) as documented, [#1925](https://github.com/Rdatatable/data.table/issues/1925). Thanks to Aki Matsuo for reporting. Tests added. The `qmethod` tests did test escaping embedded double quotes, but only when `sep` or `\n` was present in the field as well to trigger the quoting of the field. + +2. Fixed 3 test failures on Solaris only, [#1934](https://github.com/Rdatatable/data.table/issues/1934). Two were on both sparc and x86 and related to a `tzone` attribute difference between `as.POSIXct` and `as.POSIXlt` even when passed the default `tz=""`. The third was on sparc only: a minor rounding issue in `fwrite()` of 1e-305. + +3. Regression crash fixed when 0's occur at the end of a non-empty subset of an empty table, [#1937](https://github.com/Rdatatable/data.table/issues/1937). Thanks Arun for tracking down. Tests added. For example, subsetting the empty `DT=data.table(a=character())` with `DT[c(1,0)]` should return a 1 row result with one `NA` since 1 is past the end of `nrow(DT)==0`, the same result as `DT[1]`. + +4. Fixed newly reported crash that also occurred in old v1.9.6 when `by=.EACHI`, `nomatch=0`, the first item in `i` has no match AND `j` has a function call that is passed a key column, [#1933](https://github.com/Rdatatable/data.table/issues/1933). Many thanks to Reino Bruner for finding and reporting with a reproducible example. Tests added. + +5. Fixed `fread()` error occurring for a subset of Windows users: `showProgress is not type integer but type 'logical'.`, [#1944](https://github.com/Rdatatable/data.table/issues/1944) and [#1111](https://github.com/Rdatatable/data.table/issues/1111). Our tests cover this usage (it is just default usage), pass on AppVeyor (Windows), win-builder (Windows) and CRAN's Windows so perhaps it only occurs on a specific and different version of Windows to all those. Thanks to @demydd for reporting. Fixed by using strictly `logical` type at R level and `Rboolean` at C level, consistently throughout. + +6. Combining `on=` (new in v1.9.6) with `by=` or `keyby=` gave incorrect results, [#1943](https://github.com/Rdatatable/data.table/issues/1943). Many thanks to Henrik-P for the detailed and reproducible report. Tests added. + +7. New function `rleidv` was ignoring its `cols` argument, [#1942](https://github.com/Rdatatable/data.table/issues/1942). Thanks Josh O'Brien for reporting. Tests added. + +## NOTES + +1. It seems OpenMP is not available on CRAN's Mac platform; NOTEs appeared in [CRAN checks](https://cran.r-project.org/web/checks/check_results_data.table.html) for v1.9.8. Moved `Rprintf` from `init.c` to `packageStartupMessage` to avoid the NOTE as requested urgently by Professor Ripley. Also fixed the bad grammar of the message: 'single threaded' now 'single-threaded'. If you have a Mac and run macOS or OS X on it (I run Ubuntu on mine) please contact CRAN maintainers and/or Apple if you'd like CRAN's Mac binary to support OpenMP. Otherwise, please follow [these instructions for OpenMP on Mac](https://github.com/Rdatatable/data.table/wiki/Installation) which people have reported success with. + +2. Just to state explicitly: data.table does not now depend on or require OpenMP. If you don't have it (as on CRAN's Mac it appears but not in general on Mac) then data.table should build, run and pass all tests just fine. + +3. There are now 5,910 raw tests as reported by `test.data.table()`. Tests cover 91% of the 4k lines of R and 89% of the 7k lines of C. These stats are now known thanks to Jim Hester's [Covr](https://CRAN.R-project.org/package=covr) package and [Codecov.io](https://about.codecov.io/). If anyone is looking for something to help with, creating tests to hit the missed lines shown by clicking the `R` and `src` folders at the bottom [here](https://app.codecov.io/github/Rdatatable/data.table?branch=master) would be very much appreciated. + +4. The FAQ vignette has been revised given the changes in v1.9.8. In particular, the very first FAQ. + +5. With hindsight, the last release v1.9.8 should have been named v1.10.0 to convey it wasn't just a patch release from .6 to .8 owing to the 'potentially breaking changes' items. Thanks to @neomantic for correctly pointing out. The best we can do now is now bump to 1.10.0. + + +# data.table v1.9.8 (Nov 2016) back to v1.2 (Aug 2008) has been moved to [NEWS.0.md](https://github.com/Rdatatable/data.table/blob/master/NEWS.0.md) diff --git a/NEWS.md b/NEWS.md index 513ac9bc5..48f7c529e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -610,1549 +610,4 @@ 15. Thanks to @ssh352, Václav Tlapák, Cole Miller, András Svraka and Toby Dylan Hocking for reporting and bisecting a significant performance regression in dev. This was fixed before release thanks to a PR by Jan Gorecki, [#5463](https://github.com/Rdatatable/data.table/pull/5463). -# data.table [v1.14.10](https://github.com/Rdatatable/data.table/milestone/20?closed=1) (8 Dec 2023) - -## NOTES - -1. Maintainer of the package for CRAN releases is from now on Tyson Barrett (@tysonstanley), [#5710](https://github.com/Rdatatable/data.table/issues/5710). - -2. Updated internal code for breaking change of `is.atomic(NULL)` in R-devel, [#5691](https://github.com/Rdatatable/data.table/pull/5691). Thanks to Martin Maechler for the patch. - -3. Fix multiple test concerning coercion to missing complex numbers, [#5695](https://github.com/Rdatatable/data.table/issues/5695) and [#5748](https://github.com/Rdatatable/data.table/issues/5748). Thanks to @MichaelChirico and @ben-schwen for the patches. - -4. Fix multiple format warnings (e.g., -Wformat) [#5712](https://github.com/Rdatatable/data.table/pull/5712), [#5781](https://github.com/Rdatatable/data.table/pull/5781), [#5880](https://github.com/Rdatatable/data.table/pull/5800), [#5786](https://github.com/Rdatatable/data.table/pull/5786). Thanks to @MichaelChirico and @jangorecki for the patches. - - -# data.table [v1.14.8](https://github.com/Rdatatable/data.table/milestone/28?closed=1) (17 Feb 2023) - -## NOTES - -1. Test 1613.605 now passes changes to `as.data.frame()` in R-devel, [#5597](https://github.com/Rdatatable/data.table/pull/5597). Thanks to Avraham Adler for reporting. - -2. An out of bounds read when combining non-equi join with `by=.EACHI` has been found and fixed thanks to clang ASAN, [#5598](https://github.com/Rdatatable/data.table/issues/5598). There was no bug or consequence because the read was followed (now preceded) by a bounds test. - -3. `.rbind.data.table` (note the leading `.`) is no longer exported when `data.table` is installed in R>=4.0.0 (Apr 2020), [#5600](https://github.com/Rdatatable/data.table/pull/5600). It was never documented which R-devel now detects and warns about. It is only needed by `data.table` internals to support R<4.0.0; see note 1 in v1.12.6 (Oct 2019) below in this file for more details. - - -# data.table [v1.14.6](https://github.com/Rdatatable/data.table/milestone/27?closed=1) (16 Nov 2022) - -## BUG FIXES - -1. `fread()` could leak memory, [#3292](https://github.com/Rdatatable/data.table/issues/3292). Thanks to @patrickhowerter for reporting, and Jim Hester for the fix. The fix requires R 3.4.0 or later. Loading `data.table` in earlier versions now highlights this issue on startup, asks users to upgrade R, and warns that we intend to upgrade `data.table`'s dependency from 8 year old R 3.1.0 (April 2014) to 5 year old R 3.4.0 (April 2017). - -## NOTES - -1. Test 1962.098 has been modified to pass latest changes to `POSIXt` in R-devel. - -2. `test.data.table()` no longer creates `DT` in `.GlobalEnv`, a CRAN policy violation, [#5514](https://github.com/Rdatatable/data.table/issues/5514). No other writes occurred to `.GlobalEnv` and release procedures have been improved to prevent this happening again. - -3. The memory usage of the test suite has been halved, [#5507](https://github.com/Rdatatable/data.table/issues/5507). - - -# data.table [v1.14.4](https://github.com/Rdatatable/data.table/milestone/26?closed=1) (17 Oct 2022) - -## NOTES - -1. gcc 12.1 (May 2022) now detects and warns about an always-false condition (`-Waddress`) in `fread` which caused a small efficiency saving never to be invoked, [#5476](https://github.com/Rdatatable/data.table/pull/5476). Thanks to CRAN for testing latest versions of compilers. - -2. `update.dev.pkg()` has been renamed `update_dev_pkg()` to get out of the way of the `stats::update` generic function, [#5421](https://github.com/Rdatatable/data.table/pull/5421). This is a utility function which upgrades the version of `data.table` to the latest commit in development which has passed all tests. As such we don't expect any backwards compatibility concerns. Its manual page was causing an intermittent hang/crash from `R CMD check` on Windows-only on CRAN which we hope will be worked around by changing its name. - -3. Internal C code now passes `-Wstrict-prototypes` to satisfy the warnings now displayed on CRAN, [#5477](https://github.com/Rdatatable/data.table/pull/5477). - -4. `write.csv` in R-devel no longer responds to `getOption("digits.secs")` for `POSIXct`, [#5478](https://github.com/Rdatatable/data.table/issues/5478). This caused our tests of `fwrite(, dateTimeAs="write.csv")` to fail on CRAN's daily checks using latest daily R-devel. While R-devel discussion continues, and currently it seems like the change is intended with further changes possible, this `data.table` release massages our tests to pass on latest R-devel. The idea is to try to get out of the way of R-devel changes in this regard until the new behavior of `write.csv` is released and confirmed. Package updates are not accepted on CRAN if they do not pass the latest daily version of R-devel, even if R-devel changes after the package update is submitted. If the change to `write.csv()` stands, then a future release of `data.table` will be needed to make `fwrite(, dateTimeAs="write.csv")` match `write.csv()` output again in that future version of R onwards. If you use an older version of `data.table` than said future one in the said future version of R, then `fwrite(, dateTimeAs="write.csv")` may not match `write.csv()` if you are using `getOption("digits.secs")` too. However, you can always check that your installation of `data.table` works in your version of R on your platform by simply running `test.data.table()` yourself. Doing so would detect such a situation for you: test 1741 would fail in this case. `test.data.table()` runs the entire suite of tests and is always available to you locally. This way you do not need to rely on our statements about which combinations of versions of R and `data.table` on which platforms we have tested and support; just run `test.data.table()` yourself. Having said that, because test 1741 has been relaxed in this release in order to be accepted on CRAN to pass latest R-devel, this won't be true for this particular release in regard to this particular test. - - ```R - $ R --vanilla - R version 4.2.1 (2022-06-23) -- "Funny-Looking Kid" - > DF = data.frame(A=as.POSIXct("2022-10-01 01:23:45.012")) - > options(digits.secs=0) - > write.csv(DF) - "","A" - "1",2022-10-01 01:23:45 - > options(digits.secs=3) - > write.csv(DF) - "","A" - "1",2022-10-01 01:23:45.012 - - $ Rdevel --vanilla - R Under development (unstable) (2022-10-06 r83040) -- "Unsuffered Consequences" - > DF = data.frame(A=as.POSIXct("2022-10-01 01:23:45.012")) - > options(digits.secs=0) - > write.csv(DF) - "","A" - "1",2022-10-01 01:23:45.012 - ``` - -5. Many thanks to Kurt Hornik for investigating potential impact of a possible future change to `base::intersect()` on empty input, providing a patch so that `data.table` won't break if the change is made to R, and giving us plenty of notice, [#5183](https://github.com/Rdatatable/data.table/pull/5183). - -6. `datatable.[dll|so]` has changed name to `data_table.[dll|so]`, [#4442](https://github.com/Rdatatable/data.table/pull/4442). Thanks to Jan Gorecki for the PR. We had previously removed the `.` since `.` is not allowed by the following paragraph in the Writing-R-Extensions manual. Replacing `.` with `_` instead now seems more consistent with the last sentence. - - > ... the basename of the DLL needs to be both a valid file name and valid as part of a C entry point (e.g. it cannot contain ‘.’): for portable code it is best to confine DLL names to be ASCII alphanumeric plus underscore. If entry point R_init_lib is not found it is also looked for with ‘.’ replaced by ‘_’. - - -# data.table [v1.14.2](https://github.com/Rdatatable/data.table/milestone/24?closed=1) (27 Sep 2021) - -## NOTES - -1. clang 13.0.0 (Sep 2021) requires the system header `omp.h` to be included before R's headers, [#5122](https://github.com/Rdatatable/data.table/issues/5122). Many thanks to Prof Ripley for testing and providing a patch file. - - -# data.table [v1.14.0](https://github.com/Rdatatable/data.table/milestone/23?closed=1) (21 Feb 2021) - -## POTENTIALLY BREAKING CHANGES - -1. In v1.13.0 (July 2020) native parsing of datetime was added to `fread` by Michael Chirico which dramatically improved performance. Before then datetime was read as type character by default which was slow. Since v1.13.0, UTC-marked datetime (e.g. `2020-07-24T10:11:12.134Z` where the final `Z` is present) has been read automatically as POSIXct and quickly. We provided the migration option `datatable.old.fread.datetime.character` to revert to the previous slow character behavior. We also added the `tz=` argument to control unmarked datetime; i.e. where the `Z` (or equivalent UTC postfix) is missing in the data. The default `tz=""` reads unmarked datetime as character as before, slowly. We gave you the ability to set `tz="UTC"` to turn on the new behavior and read unmarked datetime as UTC, quickly. R sessions that are running in UTC by setting the TZ environment variable, as is good practice and common in production, have also been reading unmarked datetime as UTC since v1.13.0, much faster. Note 1 of v1.13.0 (below in this file) ended `In addition to convenience, fread is now significantly faster in the presence of dates, UTC-marked datetimes, and unmarked datetime when tz="UTC" is provided.`. - - At `rstudio::global(2021)`, Neal Richardson, Director of Engineering at Ursa Labs, compared Arrow CSV performance to `data.table` CSV performance, [Bigger Data With Ease Using Apache Arrow](https://posit.co/resources/videos/bigger-data-with-ease-using-apache-arrow/). He opened by comparing to `data.table` as his main point. Arrow was presented as 3 times faster than `data.table`. He talked at length about this result. However, no reproducible code was provided and we were not contacted in advance in case we had any comments. He mentioned New York Taxi data in his talk which is a dataset known to us as containing unmarked datetime. [Rebuttal](https://twitter.com/MattDowle/status/1360073970498875394). - - `tz=`'s default is now changed from `""` to `"UTC"`. If you have been using `tz=` explicitly then there should be no change. The change to read UTC-marked datetime as POSIXct rather than character already happened in v1.13.0. The change now is that unmarked datetimes are now read as UTC too by default without needing to set `tz="UTC"`. None of the 1,017 CRAN packages directly using `data.table` are affected. As before, the migration option `datatable.old.fread.datetime.character` can still be set to TRUE to revert to the old character behavior. This migration option is temporary and will be removed in the near future. - - The community was consulted in [this tweet](https://twitter.com/MattDowle/status/1358011599336931328) before release. - -## BUG FIXES - -1. If `fread()` discards a single line footer, the warning message which includes the discarded text now displays any non-ASCII characters correctly on Windows, [#4747](https://github.com/Rdatatable/data.table/issues/4747). Thanks to @shrektan for reporting and the PR. - -2. `fintersect()` now retains the order of the first argument as reasonably expected, rather than retaining the order of the second argument, [#4716](https://github.com/Rdatatable/data.table/issues/4716). Thanks to Michel Lang for reporting, and Ben Schwen for the PR. - -## NOTES - -1. Compiling from source no longer requires `zlib` header files to be available, [#4844](https://github.com/Rdatatable/data.table/pull/4844). The output suggests installing `zlib` headers, and how (e.g. `zlib1g-dev` on Ubuntu) as before, but now proceeds with `gzip` compression disabled in `fwrite`. Upon calling `fwrite(DT, "file.csv.gz")` at runtime, an error message suggests to reinstall `data.table` with `zlib` headers available. This does not apply to users on Windows or Mac who install the pre-compiled binary package from CRAN. - -2. `r-datatable.com` continues to be the short, canonical and long-standing URL which forwards to the current homepage. The homepage domain has changed a few times over the years but those using `r-datatable.com` did not need to change their links. For example, we use `r-datatable.com` in messages (and translated messages) in preference to the word 'homepage' to save users time in searching for the current homepage. The web forwarding was provided by Domain Monster but they do not support `https://r-datatable.com`, only `http://r-datatable.com`, despite the homepage being forwarded to being `https:` for many years. Meanwhile, CRAN submission checks now require all URLs to be `https:`, rejecting `http:`. Therefore we have moved to [gandi.net](https://www.gandi.net) who do support `https:` web forwarding and so [https://r-datatable.com](https://r-datatable.com) now forwards correctly. Thanks to Dirk Eddelbuettel for suggesting Gandi. Further, Gandi allows the web-forward to be marked 301 (permanent) or 302 (temporary). Since the very point of `https://r-datatable.com` is to be a forward, 302 is appropriate in this case. This enables us to link to it in DESCRIPTION, README, and this NEWS item. Otherwise, CRAN submission checks would require the 301 forward to be followed; i.e. the forward replaced with where it points to and the package resubmitted. Thanks to Uwe Ligges for explaining this distinction. - - -# data.table [v1.13.6](https://github.com/Rdatatable/data.table/milestone/22?closed=1) (30 Dec 2020) - -## BUG FIXES - -1. Grouping could throw an error `Failed to allocate counts or TMP` with more than 1e9 rows even with sufficient RAM due to an integer overflow, [#4295](https://github.com/Rdatatable/data.table/issues/4295) [#4818](https://github.com/Rdatatable/data.table/issues/4818). Thanks to @renkun-ken and @jangorecki for reporting, and @shrektan for fixing. - -2. `fwrite()`'s mutithreaded `gzip` compression failed on Solaris with Z_STREAM_ERROR, [#4099](https://github.com/Rdatatable/data.table/issues/4099). Since this feature was released in Oct 2019 (see item 3 in v1.12.4 below in this news file) there have been no known problems with it on Linux, Windows or Mac. For Solaris, we have been successively adding more and more detailed tracing to the output in each release, culminating in tracing `zlib` internals at byte level by reading `zlib`'s source. The problem did not manifest itself on [R-hub](https://builder.r-hub.io/)'s Solaris instances, so we had to work via CRAN output. If `zlib`'s `z_stream` structure is declared inside a parallel region but before a parallel for, it appears that the particular OpenMP implementation used by CRAN's Solaris moves the structure to a new address on entering the parallel for. Ordinarily this memory move would not matter, however, `zlib` internals have a self reference pointer to the parent, and check that the pointers match. This mismatch caused the -2 (Z_STREAM_ERROR). Allocating an array of structures, one for each thread, before the parallel region avoids the memory move with no cost. - - It should be carefully noted that we cannot be sure it really is a problem unique to CRAN's Solaris. Even if it seems that way after one year of observations. For example, it could be compiler flags, or particular memory circumstances, either of which could occur on other operating systems too. However, we are unaware of why it would make sense for the OpenMP implementation to move the structure at that point. Any optimizations such as aligning the set of structures to cache line boundaries could be performed at the start of the parallel region, not after the parallel for. If anyone reading this knows more, please let us know. - -## NOTES - -1. The last release took place at the same time as several breaking changes were made to R-devel. The CRAN submissions process runs against latest daily R-devel so we had to keep up with those latest changes by making several resubmissions. Then each resubmission reruns against the new latest R-devel again. Overall it took 7 days. For example, we added the new `environments=FALSE` to our `all.equal` call. Then about 4 hours after 1.13.4 was accepted, the `s` was dropped and we now need to resubmit with `environment=FALSE`. In any case, we have suggested that the default should be FALSE first to give packages some notice, as opposed to generating errors in the CRAN submissions process within hours. Then the default for `environment=` could be TRUE in 6 months time after packages have had some time to update in advance of the default change. Readers of this NEWS file will be familiar with `data.table`'s approach to change control and know that we do this ourselves. - - -# data.table [v1.13.4](https://github.com/Rdatatable/data.table/milestone/21?closed=1) (08 Dec 2020) - -## BUG FIXES - -1. `as.matrix()` now retains the column type for the empty matrix result, [#4762](https://github.com/Rdatatable/data.table/issues/4762). Thus, for example, `min(DT[0])` where DT's columns are numeric, is now consistent with non-empty all-NA input and returns `Inf` with R's warning `no non-missing arguments to min; returning Inf` rather than R's error `only defined on a data frame with all numeric[-alike] variables`. Thanks to @mb706 for reporting. - -2. `fsort()` could crash when compiled using `clang-11` (Oct 2020), [#4786](https://github.com/Rdatatable/data.table/issues/4786). Multithreaded debugging revealed that threads are no longer assigned iterations monotonically by the dynamic schedule. Although never guaranteed by the OpenMP standard, in practice monotonicity could be relied on as far as we knew, until now. We rely on monotonicity in the `fsort` implementation. Happily, a schedule modifier `monotonic:dynamic` was added in OpenMP 4.5 (Nov 2015) which we now use if available (e.g. gcc 6+, clang 3.9+). If you have an old compiler which does not support OpenMP 4.5, it's probably the case that the unmodified dynamic schedule is monotonic anyway, so `fsort` now checks that threads are receiving iterations monotonically and emits a graceful error if not. It may be that `clang` prior to version 11, and `gcc` too, exhibit the same crash. It was just that `clang-11` was the first report. To know which version of OpenMP `data.table` is using, `getDTthreads(verbose=TRUE)` now reports the `YYYYMM` value `_OPENMP`; e.g. 201511 corresponds to v4.5, and 201811 corresponds to v5.0. Oddly, the `x.y` version number is not provided by the OpenMP API. OpenMP 4.5 may be enabled in some compilers using `-fopenmp-version=45`. Otherwise, if you need to upgrade compiler, https://www.openmp.org/resources/openmp-compilers-tools/ may be helpful. - -3. Columns containing functions that don't inherit the class `'function'` would fail to group, [#4814](https://github.com/Rdatatable/data.table/issues/4814). Thanks @mb706 for reporting, @ecoRoland2 for helping investigate, and @Coorsaa for a follow-up example involving environments. - -## NOTES - -1. Continuous daily testing by CRAN using latest daily R-devel revealed, within one day of the change to R-devel, that a future version of R would break one of our tests, [#4769](https://github.com/Rdatatable/data.table/issues/4769). The characters "-alike" were added into one of R's error messages, so our too-strict test which expected the error `only defined on a data frame with all numeric variables` will fail when it sees the new error message `only defined on a data frame with all numeric-alike variables`. We have relaxed the pattern the test looks for to `data.*frame.*numeric` well in advance of the future version of R being released. Readers are reminded that CRAN is not just a host for packages. It is also a giant test suite for R-devel. For more information, [behind the scenes of cran, 2016](https://h2o.ai/blog/2016/behind-the-scenes-of-cran/). - -2. `as.Date.IDate` is no longer exported as a function to solve a new error in R-devel `S3 method lookup found 'as.Date.IDate' on search path`, [#4777](https://github.com/Rdatatable/data.table/issues/4777). The S3 method is still exported; i.e. `as.Date(x)` will still invoke the `as.Date.IDate` method when `x` is class `IDate`. The function had been exported, in addition to exporting the method, to solve a compatibility issue with `zoo` (and `xts` which uses `zoo`) because `zoo` exports `as.Date` which masks `base::as.Date`. Happily, since zoo 1.8-1 (Jan 2018) made a change to its `as.IDate`, the workaround is no longer needed. - -3. Thanks to @fredguinog for testing `fcase` in development before 1.13.0 was released and finding a segfault, [#4378](https://github.com/Rdatatable/data.table/issues/4378). It was found separately by the `rchk` tool (which uses static code analysis) in release procedures and fixed before `fcase` was released, but the reproducible example has now been added to the test suite for completeness. Thanks also to @shrektan for investigating, proposing a very similar fix at C level, and a different reproducible example which has also been added to the test suite. - - -# data.table [v1.13.2](https://github.com/Rdatatable/data.table/milestone/19?closed=1) (19 Oct 2020) - -## BUG FIXES - -1. `test.data.table()` could fail the 2nd time it is run by a user in the same R session on Windows due to not resetting locale properly after testing Chinese translation, [#4630](https://github.com/Rdatatable/data.table/pull/4630). Thanks to Cole Miller for investigating and fixing. - -2. A regression in v1.13.0 resulted in installation on Mac often failing with `shared object 'datatable.so' not found`, and FreeBSD always failing with `expr: illegal option -- l`, [#4652](https://github.com/Rdatatable/data.table/issues/4652) [#4640](https://github.com/Rdatatable/data.table/issues/4640) [#4650](https://github.com/Rdatatable/data.table/issues/4650). Thanks to many for assistance including Simon Urbanek, Brian Ripley, Wes Morgan, and @ale07alvarez. There were no installation problems on Windows or Linux. - -3. Operating on columns of type `list`, e.g. `dt[, listCol[[1]], by=id]`, suffered a performance regression in v1.13.0, [#4646](https://github.com/Rdatatable/data.table/issues/4646) [#4658](https://github.com/Rdatatable/data.table/issues/4658). Thanks to @fabiocs8 and @sandoronodi for the detailed reports, and to Cole Miller for substantial debugging, investigation and proposals at C level which enabled the root cause to be fixed. Related, and also fixed, was a segfault revealed by package POUMM, [#4746](https://github.com/Rdatatable/data.table/issues/4746), when grouping a list column where each item has an attribute; e.g., `coda::mcmc.list`. Detected thanks to CRAN's ASAN checks, and thanks to Venelin Mitov for assistance in tracing the memory fault. Thanks also to Hongyuan Jia and @ben-schwen for assistance in debugging the fix in dev to pass reverse dependency testing which highlighted, before release, that package `eplusr` would fail. Its good usage has been added to `data.table`'s test suite. - -4. `fread("1.2\n", colClasses='integer')` (note no columns names in the data) would segfault when creating a warning message, [#4644](https://github.com/Rdatatable/data.table/issues/4644). It now warns with `Attempt to override column 1 of inherent type 'float64' down to 'int32' ignored.` When column names are present however, the warning message includes the name as before; i.e., `fread("A\n1.2\n", colClasses='integer')` produces `Attempt to override column 1 <
> of inherent type 'float64' down to 'int32' ignored.`. Thanks to Kun Ren for reporting. - -5. `dplyr::mutate(setDT(as.list(1:64)), V1=11)` threw error `can't set ALTREP truelength`, [#4734](https://github.com/Rdatatable/data.table/issues/4734). Thanks to @etryn for the reproducible example, and to Cole Miller for refinements. - -## NOTES - -1. `bit64` v4.0.2 and `bit` v4.0.3, both released on 30th July, correctly broke `data.table`'s tests. Like other packages on our `Suggest` list, we check `data.table` works with `bit64` in our tests. The first break was because `all.equal` always returned `TRUE` in previous versions of `bit64`. Now that `all.equal` works for `integer64`, the incorrect test comparison was revealed. If you use `bit64`, or `nanotime` which uses `bit64`, it is highly recommended to upgrade to the latest `bit64` version. Thanks to Cole Miller for the PR to accommodate `bit64`'s update. - - The second break caused by `bit` was the addition of a `copy` function. We did not ask, but the `bit` package kindly offered to change to a different name since `data.table::copy` is long standing. `bit` v4.0.4 released 4th August renamed `copy` to `copy_vector`. Otherwise, users of `data.table` would have needed to prefix every occurrence of `copy` with `data.table::copy` if they use `bit64` too, since `bit64` depends on (rather than importing) `bit`. Again, this impacted `data.table`'s tests which mimic a user's environment; not `data.table` itself per se. - - We have requested that CRAN policy be modified to require that reverse dependency testing include packages which `Suggest` the package. Had this been the case, reverse dependency testing of `bit64` would have caught the impact on `data.table` before release. - -2. `?.NGRP` now displays the help page as intended, [#4946](https://github.com/Rdatatable/data.table/issues/4649). Thanks to @KyleHaynes for posting the issue, and Cole Miller for the fix. `.NGRP` is a symbol new in v1.13.0; see below in this file. - -3. `test.data.table()` failed in non-English locales such as `LC_TIME=fr_FR.UTF-8` due to `Jan` vs `janv.` in tests 168 and 2042, [#3450](https://github.com/Rdatatable/data.table/issues/3450). Thanks to @shrektan for reporting, and @tdhock for making the tests locale-aware. - -4. User-supplied `PKG_LIBS` and `PKG_CFLAGS` are now retained and the suggestion in https://mac.r-project.org/openmp/; i.e., - `PKG_CPPFLAGS='-Xclang -fopenmp' PKG_LIBS=-lomp R CMD INSTALL data.table_.tar.gz` -has a better chance of working on Mac. - - -# data.table [v1.13.0](https://github.com/Rdatatable/data.table/milestone/17?closed=1) (24 Jul 2020) - -## POTENTIALLY BREAKING CHANGES - -1. `fread` now supports native parsing of `%Y-%m-%d`, and [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `%Y-%m-%dT%H:%M:%OS%z`, [#4464](https://github.com/Rdatatable/data.table/pull/4464). Dates are returned as `data.table`'s `integer`-backed `IDate` class (see `?IDate`), and datetimes are returned as `POSIXct` provided either `Z` or the offset from `UTC` is present; e.g. `fwrite()` outputs UTC by default including the final `Z`. Reminder that `IDate` inherits from R's `Date` and is identical other than it uses the `integer` type where (oddly) R uses the `double` type for dates (8 bytes instead of 4). `fread()` gains a `tz` argument to control datetime values that are missing a Z or UTC-offset (now referred to as *unmarked* datetimes); e.g. as written by `write.csv`. By default `tz=""` means, as in R, read the unmarked datetime in local time. Unless the timezone of the R session is UTC (e.g. the TZ environment variable is set to `"UTC"`, or `""` on non-Windows), unmarked datetime will then by read by `fread` as character, as before. If you have been using `colClasses="POSIXct"` that will still work using R's `as.POSIXct()` which will interpret the unmarked datetime in local time, as before, and still slowly. You can tell `fread` to read unmarked datetime as UTC, and quickly, by passing `tz="UTC"` which may be appropriate in many circumstances. Note that the default behaviour of R to read and write csv using unmarked datetime can lead to different research results when the csv file has been saved in one timezone and read in another due to observations being shifted to a different date. If you have been using `colClasses="POSIXct"` for UTC-marked datetime (e.g. as written by `fwrite` including the final `Z`) then it will automatically speed up with no changes needed. - - Since this is a potentially breaking change, i.e. existing code may depend on dates and datetimes being read as type character as before, a temporary option is provided to restore the old behaviour: `options(datatable.old.fread.datetime.character=TRUE)`. However, in most cases, we expect existing code to still work with no changes. - - The minor version number is bumped from 12 to 13, i.e. `v1.13.0`, where the `.0` conveys 'be-aware' as is common practice. As with any new feature, there may be bugs to fix and changes to defaults required in future. In addition to convenience, `fread` is now significantly faster in the presence of dates, UTC-marked datetimes, and unmarked datetime when tz="UTC" is provided. - -## NEW FEATURES - -1. `%chin%` and `chmatch(x, table)` are faster when `x` is length 1, `table` is long, and `x` occurs near the start of `table`. Thanks to Michael Chirico for the suggestion, [#4117](https://github.com/Rdatatable/data.table/pull/4117#discussion_r358378409). - -2. `CsubsetDT` C function is now exported for use by other packages, [#3751](https://github.com/Rdatatable/data.table/issues/3751). Thanks to Leonardo Silvestri for the request and the PR. This uses R's `R_RegisterCCallable` and `R_GetCCallable` mechanism, [R-exts§5.4.3](https://cran.r-project.org/doc/manuals/r-devel/R-exts.html#Linking-to-native-routines-in-other-packages) and [`?cdt`](https://rdatatable.gitlab.io/data.table/reference/cdt.html). Note that organization of our C interface will be changed in future. - -3. `print` method for `data.table` gains `trunc.cols` argument (and corresponding option `datatable.print.trunc.cols`, default `FALSE`), [#1497](https://github.com/Rdatatable/data.table/issues/1497), part of [#1523](https://github.com/Rdatatable/data.table/issues/1523). This prints only as many columns as fit in the console without wrapping to new lines (e.g., the first 5 of 80 columns) and a message that states the count and names of the variables not shown. When `class=TRUE` the message also contains the classes of the variables. `data.table` has always automatically truncated _rows_ of a table for efficiency (e.g. printing 10 rows instead of 10 million); in the future, we may do the same for _columns_ (e.g., 10 columns instead of 20,000) by changing the default for this argument. Thanks to @nverno for the initial suggestion and to @TysonStanley for the PR. - -4. `setnames(DT, new=new_names)` (i.e. explicitly named `new=` argument) now works as expected rather than an error message requesting that `old=` be supplied too, [#4041](https://github.com/Rdatatable/data.table/issues/4041). Thanks @Kodiologist for the suggestion. - -5. `nafill` and `setnafill` gain `nan` argument to say whether `NaN` should be considered the same as `NA` for filling purposes, [#4020](https://github.com/Rdatatable/data.table/issues/4020). Prior versions had an implicit value of `nan=NaN`; the default is now `nan=NA`, i.e., `NaN` is treated as if it's missing. Thanks @AnonymousBoba for the suggestion. Also, while `nafill` still respects `getOption('datatable.verbose')`, the `verbose` argument has been removed. - -6. New function `fcase(...,default)` implemented in C by Morgan Jacob, [#3823](https://github.com/Rdatatable/data.table/issues/3823), is inspired by SQL `CASE WHEN` which is a common tool in SQL for e.g. building labels or cutting age groups based on conditions. `fcase` is comparable to R function `dplyr::case_when` however it evaluates its arguments in a lazy way (i.e. only when needed) as shown below. Please see `?fcase` for more details. - - ```R - # Lazy evaluation - x = 1:10 - data.table::fcase( - x < 5L, 1L, - x >= 5L, 3L, - x == 5L, stop("provided value is an unexpected one!") - ) - # [1] 1 1 1 1 3 3 3 3 3 3 - - dplyr::case_when( - x < 5L ~ 1L, - x >= 5L ~ 3L, - x == 5L ~ stop("provided value is an unexpected one!") - ) - # Error in eval_tidy(pair$rhs, env = default_env) : - # provided value is an unexpected one! - - # Benchmark - x = sample(1:100, 3e7, replace = TRUE) # 114 MB - microbenchmark::microbenchmark( - dplyr::case_when( - x < 10L ~ 0L, - x < 20L ~ 10L, - x < 30L ~ 20L, - x < 40L ~ 30L, - x < 50L ~ 40L, - x < 60L ~ 50L, - x > 60L ~ 60L - ), - data.table::fcase( - x < 10L, 0L, - x < 20L, 10L, - x < 30L, 20L, - x < 40L, 30L, - x < 50L, 40L, - x < 60L, 50L, - x > 60L, 60L - ), - times = 5L, - unit = "s") - # Unit: seconds - # expr min lq mean median uq max neval - # dplyr::case_when 11.57 11.71 12.22 11.82 12.00 14.02 5 - # data.table::fcase 1.49 1.55 1.67 1.71 1.73 1.86 5 - ``` - -7. `.SDcols=is.numeric` now works; i.e., `SDcols=` accepts a function which is used to select the columns of `.SD`, [#3950](https://github.com/Rdatatable/data.table/issues/3950). Any function (even _ad hoc_) that returns scalar `TRUE`/`FALSE` for each column will do; e.g., `.SDcols=!is.character` will return _non_-character columns (_a la_ `Negate()`). Note that `.SDcols=patterns(...)` can still be used for filtering based on the column names. - -8. Compiler support for OpenMP is now detected during installation, which allows `data.table` to compile from source (in single threaded mode) on macOS which, frustratingly, does not include OpenMP support by default, [#2161](https://github.com/Rdatatable/data.table/issues/2161), unlike Windows and Linux. A helpful message is emitted during installation from source, and on package startup as before. Many thanks to @jimhester for the PR. - -9. `rbindlist` now supports columns of type `expression`, [#546](https://github.com/Rdatatable/data.table/issues/546). Thanks @jangorecki for the report. - -10. The dimensions of objects in a `list` column are now displayed, [#3671](https://github.com/Rdatatable/data.table/issues/3671). Thanks to @randomgambit for the request, and Tyson Barrett for the PR. - -11. `frank` gains `ties.method='last'`, paralleling the same in `base::order` which has been available since R 3.3.0 (April 2016), [#1689](https://github.com/Rdatatable/data.table/issues/1689). Thanks @abudis for the encouragement to accommodate this. - -12. The `keep.rownames` argument in `as.data.table.xts` now accepts a string, which can be used for specifying the column name of the index of the xts input, [#4232](https://github.com/Rdatatable/data.table/issues/4232). Thanks to @shrektan for the request and the PR. - -13. New symbol `.NGRP` available in `j`, [#1206](https://github.com/Rdatatable/data.table/issues/1206). `.GRP` (the group number) was already available taking values from `1` to `.NGRP`. The number of groups, `.NGRP`, might be useful in `j` to calculate a percentage of groups processed so far, or to do something different for the last or penultimate group, for example. - -14. Added support for `round()` and `trunc()` to extend functionality of `ITime`. `round()` and `trunc()` can be used with argument units: "hours" or "minutes". Thanks to @JensPederM for the suggestion and PR. - -15. A new throttle feature has been introduced to speed up small data tasks that are repeated in a loop, [#3175](https://github.com/Rdatatable/data.table/issues/3175) [#3438](https://github.com/Rdatatable/data.table/issues/3438) [#3205](https://github.com/Rdatatable/data.table/issues/3205) [#3735](https://github.com/Rdatatable/data.table/issues/3735) [#3739](https://github.com/Rdatatable/data.table/issues/3739) [#4284](https://github.com/Rdatatable/data.table/issues/4284) [#4527](https://github.com/Rdatatable/data.table/issues/4527) [#4294](https://github.com/Rdatatable/data.table/issues/4294) [#1120](https://github.com/Rdatatable/data.table/issues/1120). The default throttle of 1024 means that a single thread will be used when nrow<=1024, two threads when nrow<=2048, etc. To change the default, use `setDTthreads(throttle=)`. Or use the new environment variable `R_DATATABLE_THROTTLE`. If you use `Sys.setenv()` in a running R session to change this environment variable, be sure to run an empty `setDTthreads()` call afterwards for the change to take effect; see `?setDTthreads`. The word *throttle* is used to convey that the number of threads is restricted (throttled) for small data tasks. Reducing throttle to 1 will turn off throttling and should revert behaviour to past versions (i.e. using many threads even for small data). Increasing throttle to, say, 65536 will utilize multi-threading only for larger datasets. The value 1024 is a guess. We welcome feedback and test results indicating what the best default should be. - -## BUG FIXES - -1. A NULL timezone on POSIXct was interpreted by `as.IDate` and `as.ITime` as UTC rather than the session's default timezone (`tz=""`) , [#4085](https://github.com/Rdatatable/data.table/issues/4085). - -2. `DT[i]` could segfault when `i` is a zero-column `data.table`, [#4060](https://github.com/Rdatatable/data.table/issues/4060). Thanks @shrektan for reporting and fixing. - -3. Dispatch of `first` and `last` functions now properly works again for `xts` objects, [#4053](https://github.com/Rdatatable/data.table/issues/4053). Thanks to @ethanbsmith for reporting. - -4. If `.SD` is returned as-is during grouping, it is now unlocked for downstream usage, part of [#4159](https://github.com/Rdatatable/data.table/issues/4159). Thanks also to @mllg for detecting a problem with the initial fix here during the dev release [#4173](https://github.com/Rdatatable/data.table/issues/4173). - -5. `GForce` is deactivated for `[[` on non-atomic input, part of [#4159](https://github.com/Rdatatable/data.table/issues/4159). Thanks @hongyuanjia and @ColeMiller1 for helping debug an issue in dev with the original fix before release, [#4612](https://github.com/Rdatatable/data.table/issues/4612). - -6. `all.equal(DT, y)` no longer errors when `y` is not a data.table, [#4042](https://github.com/Rdatatable/data.table/issues/4042). Thanks to @d-sci for reporting and the PR. - -7. A length 1 `colClasses=NA_character_` would cause `fread` to incorrectly coerce all columns to character, [#4237](https://github.com/Rdatatable/data.table/issues/4237). - -8. An `fwrite` error message could include a garbled number and cause test 1737.5 to fail, [#3492](https://github.com/Rdatatable/data.table/issues/3492). Thanks to @QuLogic for debugging the issue on ARMv7hl, and the PR fixing it. - -9. `fread` improves handling of very small (<1e-300) or very large (>1e+300) floating point numbers on non-x86 architectures (specifically ppc64le and armv7hl). Thanks to @QuLogic for reporting and fixing, [PR#4165](https://github.com/Rdatatable/data.table/pull/4165). - -10. When updating by reference, the use of `get` could result in columns being re-ordered silently, [#4089](https://github.com/Rdatatable/data.table/issues/4089). Thanks to @dmongin for reporting and Cole Miller for the fix. - -11. `copy()` now overallocates deeply nested lists of `data.table`s, [#4205](https://github.com/Rdatatable/data.table/issues/4205). Thanks to @d-sci for reporting and the PR. - -12. `rbindlist` no longer errors when coercing complex vectors to character vectors, [#4202](https://github.com/Rdatatable/data.table/issues/4202). Thanks to @sritchie73 for reporting and the PR. - -13. A relatively rare case of segfault when combining non-equi joins with `by=.EACHI` is now fixed, closes [#4388](https://github.com/Rdatatable/data.table/issues/4388). - -14. Selecting key columns could incur a large speed penalty, [#4498](https://github.com/Rdatatable/data.table/issues/4498). Thanks to @Jesper on Stack Overflow for the report. - -15. `all.equal(DT1, DT2, ignore.row.order=TRUE)` could return TRUE incorrectly in the presence of NAs, [#4422](https://github.com/Rdatatable/data.table/issues/4422). - -16. Non-equi joins now automatically set `allow.cartesian=TRUE`, [4489](https://github.com/Rdatatable/data.table/issues/4489). Thanks to @Henrik-P for reporting. - -17. `X[Y, on=character(0)]` and `merge(X, Y, by.x=character(0), by.y=character(0))` no longer crash, [#4272](https://github.com/Rdatatable/data.table/pull/4272). Thanks to @tlapak for the PR. - -18. `by=col1:col4` gave an incorrect result if `key(DT)==c("col1","col4")`, [#4285](https://github.com/Rdatatable/data.table/issues/4285). Thanks to @cbilot for reporting, and Cole Miller for the PR. - -19. Matrices resulting from logical operators or comparisons on `data.table`s, e.g. in `dta == dtb`, can no longer have their colnames changed by reference later, [#4323](https://github.com/Rdatatable/data.table/issues/4323). Thanks to @eyherabh for reporting and @tlapak for the PR. - -20. The environment variable `R_DATATABLE_NUM_THREADS` was being limited by `R_DATATABLE_NUM_PROCS_PERCENT` (by default 50%), [#4514](https://github.com/Rdatatable/data.table/issues/4514). It is now consistent with `setDTthreads()` and only limited by the full number of logical CPUs. For example, on a machine with 8 logical CPUs, `R_DATATABLE_NUM_THREADS=6` now results in 6 threads rather than 4 (50% of 8). - -## NOTES - -0. Retrospective license change permission was sought from and granted by 4 contributors who were missed in [PR#2456](https://github.com/Rdatatable/data.table/pull/2456), [#4140](https://github.com/Rdatatable/data.table/pull/4140). We had used [GitHub's contributor page](https://github.com/Rdatatable/data.table/graphs/contributors) which omits 3 of these due to invalid email addresses, unlike GitLab's contributor page which includes the ids. The 4th omission was a PR to a script which should not have been excluded; a script is code too. We are sorry these contributors were not properly credited before. They have now been added to the contributors list as displayed on CRAN. All the contributors of code to data.table hold its copyright jointly; your contributions belong to you. You contributed to data.table when it had a particular license at that time, and you contributed on that basis. This is why in the last license change, all contributors of code were consulted and each had a veto. - -1. `as.IDate`, `as.ITime`, `second`, `minute`, and `hour` now recognize UTC equivalents for speed: GMT, GMT-0, GMT+0, GMT0, Etc/GMT, and Etc/UTC, [#4116](https://github.com/Rdatatable/data.table/issues/4116). - -2. `set2key`, `set2keyv`, and `key2` have been removed, as they have been warning since v1.9.8 (Nov 2016) and halting with helpful message since v1.11.0 (May 2018). When they were introduced in version 1.9.4 (Oct 2014) they were marked as 'experimental' and quickly superseded by `setindex` and `indices`. - -3. `data.table` now supports messaging in simplified Chinese (locale `zh_CN`). This was the result of a monumental collaboration to translate `data.table`'s roughly 1400 warnings, errors, and verbose messages (about 16,000 words/100,000 characters) over the course of two months from volunteer translators in at least 4 time zones, most of whom are first-time `data.table` contributors and many of whom are first-time OSS contributors! - - A big thanks goes out to @fengqifang, @hongyuanjia, @biobai, @zhiiiyang, @Leo-Lee15, @soappp9527, @amy17519, @Zachary-Wu, @caiquanyou, @dracodoc, @JulianYlli12, @renkun-ken, @Xueliang24, @koohoko, @KingdaShi, @gaospecial, @shrektan, @sunshine1126, @shawnchen1996, @yc0802, @HesperusArcher, and @Emberwhirl, all of whom took time from their busy schedules to translate and review others' translations. Especial thanks goes to @zhiiiyang and @hongyuanjia who went above and beyond in helping to push the project over the finish line, and to @GuangchuangYu who helped to organize the volunteer pool. - - `data.table` joins `lubridate` and `nlme` as the only of the top 200 most-downloaded community packages on CRAN to offer non-English messaging, and is the only of the top 50 packages to offer complete support of all messaging. We hope this is a first step in broadening the reach and accessibility of the R ecosystem to more users globally and look forward to working with other maintainers looking to bolster the portability of their packages by offering advice on learnings from this undertaking. - - We would be remiss not to mention the laudable lengths to which the R core team goes to maintain the _much_ larger repository (about 6,000 messages in more than 10 languages) of translations for R itself. - - We will evaluate the feasibility (in terms of maintenance difficulty and CRAN package size limits) of offering support for other languages in later releases. - -4. `fifelse` and `fcase` now notify users that S4 objects (except `nanotime`) are not supported [#4135](https://github.com/Rdatatable/data.table/issues/4135). Thanks to @torema-ed for bringing it to our attention and Morgan Jacob for the PR. - -5. `frank(..., ties.method="random", na.last=NA)` now returns the same random ordering that `base::rank` does, [#4243](https://github.com/Rdatatable/data.table/pull/4243). - -6. The error message when mistakenly using `:=` in `i` instead of `j` has been much improved, [#4227](https://github.com/Rdatatable/data.table/issues/4227). Thanks to Hugh Parsonage for the detailed suggestion. - - ```R - > DT = data.table(A=1:2) - > DT[B:=3] - Error: Operator := detected in i, the first argument inside DT[...], but is only valid in - the second argument, j. Most often, this happens when forgetting the first comma - (e.g. DT[newvar:=5] instead of DT[, new_var:=5]). Please double-check the - syntax. Run traceback(), and debugger() to get a line number. - > DT[, B:=3] - > DT - A B - - 1: 1 3 - 2: 2 3 - ``` - -7. Added more explanation/examples to `?data.table` for how to use `.BY`, [#1363](https://github.com/Rdatatable/data.table/issues/1363). - -8. Changes upstream in R have been accomodated; e.g. `c.POSIXct` now raises `'origin' must be supplied` which impacted `foverlaps`, [#4428](https://github.com/Rdatatable/data.table/pull/4428). - -9. `data.table::update.dev.pkg()` now unloads the `data.table` namespace to alleviate a DLL lock issue on Windows, [#4403](https://github.com/Rdatatable/data.table/issues/4403). Thanks to @drag5 for reporting. - -10. `data.table` packages binaries built by R version 3 (R3) should only be installed in R3, and similarly `data.table` package binaries built by R4 should only be installed in R4. Otherwise, `package ‘data.table’ was built under R version...` warning will occur which should not be ignored. This is due to a very welcome change to `rbind` and `cbind` in R 4.0.0 which enabled us to remove workarounds, see news item in v1.12.6 below in this file. To continue to support both R3 and R4, `data.table`'s NAMESPACE file contains a condition on the R major version (3 or 4) and this is what gives rise to the requirement that the major version used to build `data.table` must match the major version used to install it. Thanks to @vinhdizzo for reporting, [#4528](https://github.com/Rdatatable/data.table/issues/4528). - -11. Internal function `shallow()` no longer makes a deep copy of secondary indices. This eliminates a relatively small time and memory overhead when indices are present that added up significantly when performing many operations, such as joins, in a loop or when joining in `j` by group, [#4311](https://github.com/Rdatatable/data.table/issues/4311). Many thanks to @renkun-ken for the report, and @tlapak for the investigation and PR. - -12. The `datatable.old.unique.by.key` option has been removed as per the 4 year schedule detailed in note 10 of v1.12.4 (Oct 2019), note 10 of v1.11.0 (May 2018), and note 1 of v1.9.8 (Nov 2016). It has been generating a helpful warning for 2 years, and helpful error for 1 year. - - -# data.table [v1.12.8](https://github.com/Rdatatable/data.table/milestone/15?closed=1) (09 Dec 2019) - -## NEW FEATURES - -1. `DT[, {...; .(A,B)}]` (i.e. when `.()` is the final item of a multi-statement `{...}`) now auto-names the columns `A` and `B` (just like `DT[, .(A,B)]`) rather than `V1` and `V2`, [#2478](https://github.com/Rdatatable/data.table/issues/2478) [#609](https://github.com/Rdatatable/data.table/issues/609). Similarly, `DT[, if (.N>1) .(B), by=A]` now auto-names the column `B` rather than `V1`. Explicit names are unaffected; e.g. `DT[, {... y= ...; .(A=C+y)}, by=...]` named the column `A` before, and still does. Thanks also to @renkun-ken for his go-first strong testing which caught an issue not caught by the test suite or by revdep testing, related to NULL being the last item, [#4061](https://github.com/Rdatatable/data.table/issues/4061). - -## BUG FIXES - -1. `frollapply` could segfault and exceed R's C protect limits, [#3993](https://github.com/Rdatatable/data.table/issues/3993). Thanks to @DavisVaughan for reporting and fixing. - -2. `DT[, sum(grp), by=grp]` (i.e. aggregating the same column being grouped) could error with `object 'grp' not found`, [#3103](https://github.com/Rdatatable/data.table/issues/3103). Thanks to @cbailiss for reporting. - -## NOTES - -1. Links in the manual were creating warnings when installing HTML, [#4000](https://github.com/Rdatatable/data.table/issues/4000). Thanks to Morgan Jacob. - -2. Adjustments for R-devel (R 4.0.0) which now has reference counting turned on, [#4058](https://github.com/Rdatatable/data.table/issues/4058) [#4093](https://github.com/Rdatatable/data.table/issues/4093). This motivated early release to CRAN because every day CRAN tests every package using the previous day's changes in R-devel; a much valued feature of the R ecosystem. It helps R-core if packages can pass changes in R-devel as soon as possible. Thanks to Luke Tierney for the notice, and for implementing reference counting which we look forward to very much. - -3. C internals have been standardized to use `PRI[u|d]64` to print `[u]int64_t`. This solves new warnings from `gcc-8` on Windows with `%lld`, [#4062](https://github.com/Rdatatable/data.table/issues/4062), in many cases already working around `snprintf` on Windows not supporting `%zu`. Release procedures have been augmented to prevent any internal use of `llu`, `lld`, `zu` or `zd`. - -4. `test.data.table()` gains `showProgress=interactive()` to suppress the thousands of `Running test id ...` lines displayed by CRAN checks when there are warnings or errors. - - -# data.table [v1.12.6](https://github.com/Rdatatable/data.table/milestone/18?closed=1) (18 Oct 2019) - -## BUG FIXES - -1. `shift()` on a `nanotime` with the default `fill=NA` now fills a `nanotime` missing value correctly, [#3945](https://github.com/Rdatatable/data.table/issues/3945). Thanks to @mschubmehl for reporting and fixing in PR [#3942](https://github.com/Rdatatable/data.table/pull/3942). - -2. Compilation failed on CRAN's MacOS due to an older version of `zlib.h/zconf.h` which did not have `z_const` defined, [#3939](https://github.com/Rdatatable/data.table/issues/3939). Other open-source projects unrelated to R have experienced this problem on MacOS too. We have followed the common practice of removing `z_const` to support the older `zlib` versions, and data.table's release procedures have gained a `grep` to ensure `z_const` isn't used again by accident in future. The library `zlib` is used for `fwrite`'s new feature of multithreaded compression on-the-fly; see item 3 of 1.12.4 below. - -3. A runtime error in `fwrite`'s compression, but only observed so far on Solaris 10 32bit with zlib 1.2.8 (Apr 2013), [#3931](https://github.com/Rdatatable/data.table/issues/3931): `Error -2: one or more threads failed to allocate buffers or there was a compression error.` In case it happens again, this area has been made more robust and the error more detailed. As is often the case, investigating the Solaris problem revealed secondary issues in the same area of the code. In this case, some `%d` in verbose output should have been `%lld`. This obliquity that CRAN's Solaris provides is greatly appreciated. - -4. A leak could occur in the event of an unsupported column type error, or if working memory could only partially be allocated; [#3940](https://github.com/Rdatatable/data.table/issues/3940). Found thanks to `clang`'s Leak Sanitizer (prompted by CRAN's diligent use of latest tools), and two tests in the test suite which tested the unsupported-type error. - -## NOTES - -1. Many thanks to Kurt Hornik for fixing R's S3 dispatch of `rbind` and `cbind` methods, [#3948](https://github.com/Rdatatable/data.table/issues/3948). With `R>=4.0.0` (current R-devel), `data.table` now registers the S3 methods `cbind.data.table` and `rbind.data.table`, and no longer applies the workaround documented in FAQ 2.24. - - -# data.table [v1.12.4](https://github.com/Rdatatable/data.table/milestone/16?closed=1) (03 Oct 2019) - -## NEW FEATURES - -1. `rleid()` functions now support long vectors (length > 2 billion). - -2. `fread()`: - * now skips embedded `NUL` (`\0`), [#3400](https://github.com/Rdatatable/data.table/issues/3400). Thanks to Marcus Davy for reporting with examples, Roy Storey for the initial PR, and Bingjie Qian for testing this feature on a very complicated real-world file. - * `colClasses` now supports `'complex'`, `'raw'`, `'Date'`, `'POSIXct'`, and user-defined classes (so long as an `as.` method exists), [#491](https://github.com/Rdatatable/data.table/issues/491) [#1634](https://github.com/Rdatatable/data.table/issues/1634) [#2610](https://github.com/Rdatatable/data.table/issues/2610). Any error during coercion results in a warning and the column is left as the default type (probably `"character"`). Thanks to @hughparsonage for the PR. - * `stringsAsFactors=0.10` will factorize any character column containing under `0.10*nrow` unique strings, [#2025](https://github.com/Rdatatable/data.table/issues/2025). Thanks to @hughparsonage for the PR. - * `colClasses=list(numeric=20:30, numeric="ID")` will apply the `numeric` type to column numbers `20:30` as before and now also column name `"ID"`; i.e. all duplicate class names are now respected rather than only the first. This need may arise when specifying some columns by name and others by number, as in this example. Thanks to @hughparsonage for the PR. - * gains `yaml` (default `FALSE`) and the ability to parse CSVY-formatted input files; i.e., csv files with metadata in a header formatted as YAML (https://csvy.org/), [#1701](https://github.com/Rdatatable/data.table/issues/1701). See `?fread` and files in `/inst/tests/csvy/` for sample formats. Please provide feedback if you find this feature useful and would like extended capabilities. For now, consider it experimental, meaning the API/arguments may change. Thanks to @leeper at [`rio`](https://github.com/gesistsa/rio) for the inspiration and @MichaelChirico for implementing. - * `select` can now be used to specify types for just the columns selected, [#1426](https://github.com/Rdatatable/data.table/issues/1426). Just like `colClasses` it can be a named vector of `colname=type` pairs, or a named `list` of `type=col(s)` pairs. For example: - - ```R - fread(file, select=c(colD="character", # returns 2 columns: colD,colA - colA="integer64")) - fread(file, select=list(character="colD", # returns 5 columns: colD,8,9,10,colA - integer= 8:10, - character="colA")) - ``` - * gains `tmpdir=` argument which is passed to `tempfile()` whenever a temporary file is needed. Thanks to @mschubmehl for the PR. As before, setting `TMPDIR` (to `/dev/shm` for example) before starting the R session still works too; see `?base::tempdir`. - -3. `fwrite()`: - * now writes compressed `.gz` files directly, [#2016](https://github.com/Rdatatable/data.table/issues/2016). Compression, like `fwrite()`, is multithreaded and compresses each chunk on-the-fly (a full size intermediate file is not created). Use a ".gz" extension, or the new `compress=` option. Many thanks to Philippe Chataignon for the significant PR. For example: - - ```R - DT = data.table(A=rep(1:2, 100e6), B=rep(1:4, 50e6)) - fwrite(DT, "data.csv") # 763MB; 1.3s - fwrite(DT, "data.csv.gz") # 2MB; 1.6s - identical(fread("data.csv.gz"), DT) - ``` - - Note that compression is handled using `zlib` library. In the unlikely event of missing `zlib.h`, on a machine that is compiling `data.table` from sources, one may get `fwrite.c` compilation error `zlib.h: No such file or directory`. As of now, the easiest solution is to install missing library using `sudo apt install zlib1g-dev` (Debian/Ubuntu). Installing R (`r-base-dev`) depends on `zlib1g-dev` so this should be rather uncommon. If it happens to you please upvote related issue [#3872](https://github.com/Rdatatable/data.table/issues/3872). - - * Gains `yaml` argument matching that of `fread`, [#3534](https://github.com/Rdatatable/data.table/issues/3534). See the item in `fread` for a bit more detail; here, we'd like to reiterate that feedback is appreciated in the initial phase of rollout for this feature. - - * Gains `bom` argument to add a *byte order mark* (BOM) at the beginning of the file to signal that the file is encoded in UTF-8, [#3488](https://github.com/Rdatatable/data.table/issues/3488). Thanks to Stefan Fleck for requesting and Philippe Chataignon for implementing. - - * Now supports type `complex`, [#3690](https://github.com/Rdatatable/data.table/issues/3690). - - * Gains `scipen` [#2020](https://github.com/Rdatatable/data.table/issues/2020), the number 1 most-requested feature [#3189](https://github.com/Rdatatable/data.table/issues/3189). The default is `getOption("scipen")` so that `fwrite` will now respect R's option in the same way as `base::write.csv` and `base::format`, as expected. The parameter and option name have been kept the same as base R's `scipen` for consistency and to aid online search. It stands for 'scientific penalty'; i.e., the number of characters to add to the width within which non-scientific number format is used if it will fit. A high penalty essentially turns off scientific format. We believe that common practice is to use a value of 999, however, if you do use 999, because your data _might_ include very long numbers such as `10^300`, `fwrite` needs to account for the worst case field width in its buffer allocation per thread. This may impact space or time. If you experience slowdowns or unacceptable memory usage, please pass `verbose=TRUE` to `fwrite`, inspect the output, and report the issue. A workaround, until we can determine the best strategy, may be to pass a smaller value to `scipen`, such as 50. We have observed that `fwrite(DT, scipen=50)` appears to write `10^50` accurately, unlike base R. However, this may be a happy accident and not apply generally. Further work may be needed in this area. - - ```R - DT = data.table(a=0.0001, b=1000000) - fwrite(DT) - # a,b - # 1e-04,1e+06 - fwrite(DT,scipen=1) - # a,b - # 0.0001,1e+06 - fwrite(DT,scipen=2) - # a,b - # 0.0001,1000000 - - 10^50 - # [1] 1e+50 - options(scipen=50) - 10^50 - # [1] 100000000000000007629769841091887003294964970946560 - fwrite(data.table(A=10^50)) - # A - # 100000000000000000000000000000000000000000000000000 - ``` - -4. Assigning to one item of a list column no longer requires the RHS to be wrapped with `list` or `.()`, [#950](https://github.com/Rdatatable/data.table/issues/950). - - ```R - > DT = data.table(A=1:3, B=list(1:2,"foo",3:5)) - > DT - A B - - 1: 1 1,2 - 2: 2 foo - 3: 3 3,4,5 - > - # The following all accomplish the same assignment: - > DT[2, B:=letters[9:13]] # was error, now works - > DT[2, B:=.(letters[9:13])] # was error, now works - > DT[2, B:=.(list(letters[9:13]))] # .(list()) was needed, still works - > DT - A B - - 1: 1 1,2 - 2: 2 i,j,k,l,m - 3: 3 3,4,5 - ``` - -5. `print.data.table()` gains an option to display the timezone of `POSIXct` columns when available, [#2842](https://github.com/Rdatatable/data.table/issues/2842). Thanks to Michael Chirico for reporting and Felipe Parages for the PR. - -6. New functions `nafill` and `setnafill`, [#854](https://github.com/Rdatatable/data.table/issues/854). Thanks to Matthieu Gomez for the request and Jan Gorecki for implementing. - - ```R - DT = setDT(lapply(1:100, function(i) sample(c(rnorm(9e6), rep(NA_real_, 1e6))))) - format(object.size(DT), units="GB") ## 7.5 Gb - zoo::na.locf(DT, na.rm=FALSE) ## zoo 53.518s - setDTthreads(1L) - nafill(DT, "locf") ## DT 1 thread 7.562s - setDTthreads(0L) - nafill(DT, "locf") ## DT 40 threads 0.605s - setnafill(DT, "locf") ## DT in-place 0.367s - ``` - -7. New variable `.Last.updated` (similar to R's `.Last.value`) contains the number of rows affected by the most recent `:=` or `set()`, [#1885](https://github.com/Rdatatable/data.table/issues/1885). For details see `?.Last.updated`. - -8. `between()` and `%between%` are faster for `POSIXct`, [#3519](https://github.com/Rdatatable/data.table/issues/3519), and now support the `.()` alias, [#2315](https://github.com/Rdatatable/data.table/issues/2315). Thanks to @Henrik-P for the reports. There is now also support for `bit64`'s `integer64` class and more robust coercion of types, [#3517](https://github.com/Rdatatable/data.table/issues/3517). `between()` gains `check=` which checks `any(lower>upper)`; off by default for speed in particular for type character. - -9. New convenience functions `%ilike%` and `%flike%` which map to new `like()` arguments `ignore.case` and `fixed` respectively, [#3333](https://github.com/Rdatatable/data.table/issues/3333). `%ilike%` is for case-insensitive pattern matching. `%flike%` is for more efficient matching of fixed strings. Thanks to @andreasLD for providing most of the core code. - -10. `on=.NATURAL` (or alternatively `X[on=Y]` [#3621](https://github.com/Rdatatable/data.table/issues/3621)) joins two tables on their common column names, so called _natural join_, [#629](https://github.com/Rdatatable/data.table/issues/629). Thanks to David Kulp for request. As before, when `on=` is not provided, `X` must have a key and the key columns are used to join (like rownames, but multi-column and multi-type). - -11. `as.data.table` gains `key` argument mirroring its use in `setDT` and `data.table`, [#890](https://github.com/Rdatatable/data.table/issues/890). As a byproduct, the arguments of `as.data.table.array` have changed order, which could affect code relying on positional arguments to this method. Thanks @cooldome for the suggestion and @MichaelChirico for implementation. - -12. `merge.data.table` is now exported, [#2618](https://github.com/Rdatatable/data.table/pull/2618). We realize that S3 methods should not ordinarily be exported. Rather, the method should be invoked via S3 dispatch. But users continue to request its export, perhaps because of intricacies relating to the fact that data.table inherits from data.frame, there are two arguments to `merge()` but S3 dispatch applies just to the first, and a desire to explicitly call `data.table::merge.data.table` from package code. Thanks to @AndreMikulec for the most recent request. - -13. New rolling function to calculate rolling sum has been implemented and exported, see `?frollsum`, [#2778](https://github.com/Rdatatable/data.table/issues/2778). - -14. `setkey` to an existing index now uses the index, [#2889](https://github.com/Rdatatable/data.table/issues/2889). Thanks to @MichaelChirico for suggesting and @saraswatmks for the PR. - -15. `DT[order(col)[1:5], ...]` (i.e. where `i` is a compound expression involving `order()`) is now optimized to use `data.table`'s multithreaded `forder`, [#1921](https://github.com/Rdatatable/data.table/issues/1921). This example is not a fully optimal top-N query since the full ordering is still computed. The improvement is that the call to `order()` is computed faster for any `i` expression using `order`. - -16. `as.data.table` now unpacks columns in a `data.frame` which are themselves a `data.frame` or `matrix`. This need arises when parsing JSON, a corollary in [#3369](https://github.com/Rdatatable/data.table/issues/3369#issuecomment-462662752). Bug fix 19 in v1.12.2 (see below) added a helpful error (rather than segfault) to detect such invalid `data.table`, and promised that `as.data.table()` would unpack these columns in the next release (i.e. this release) so that the invalid `data.table` is not created in the first place. Further, `setDT` now warns if it observes such columns and suggests using `as.data.table` instead, [#3760](https://github.com/Rdatatable/data.table/issues/3760). - -17. `CJ` has been ported to C and parallelized, thanks to a PR by Michael Chirico, [#3596](https://github.com/Rdatatable/data.table/pull/3596). All types benefit, but, as in many `data.table` operations, factors benefit more than character. - - ```R - # default 4 threads on a laptop with 16GB RAM and 8 logical CPU - - ids = as.vector(outer(LETTERS, LETTERS, paste0)) - system.time( CJ(ids, 1:500000) ) # 3.9GB; 340m rows - # user system elapsed (seconds) - # 3.000 0.817 3.798 # was - # 1.800 0.832 2.190 # now - - # ids = as.factor(ids) - system.time( CJ(ids, 1:500000) ) # 2.6GB; 340m rows - # user system elapsed (seconds) - # 1.779 0.534 2.293 # was - # 0.357 0.763 0.292 # now - ``` - -18. New function `fcoalesce(...)` has been written in C, and is multithreaded for `numeric` and `factor`. It replaces missing values according to a prioritized list of candidates (as per SQL COALESCE, `dplyr::coalesce`, and `hutils::coalesce`), [#3424](https://github.com/Rdatatable/data.table/issues/3424). It accepts any number of vectors in several forms. For example, given three vectors `x`, `y`, and `z`, where each `NA` in `x` is to be replaced by the corresponding value in `y` if that is non-NA, else the corresponding value in `z`, the following equivalent forms are all accepted: `fcoalesce(x,y,z)`, `fcoalesce(x,list(y,z))`, and `fcoalesce(list(x,y,z))`. Being a new function, its behaviour is subject to change particularly for type `list`, [#3712](https://github.com/Rdatatable/data.table/issues/3712). - - ```R - # default 4 threads on a laptop with 16GB RAM and 8 logical CPU - N = 100e6 - x = replicate(5, {x=sample(N); x[sample(N, N/2)]=NA; x}, simplify=FALSE) # 2GB - y1 = do.call(dplyr::coalesce, x)) - y2 = do.call(hutils::coalesce, x)) - y3 = do.call(data.table::fcoalesce, x)) - # user system elapsed (seconds) - # 4.935 1.876 6.810 # dplyr::coalesce - # 3.122 0.831 3.956 # hutils::coalesce - # 0.915 0.099 0.379 # data.table::fcoalesce - identical(y1,y2) && identical(y1,y3) - # TRUE - ``` - -19. Type `complex` is now supported by `setkey`, `setorder`, `:=`, `by=`, `keyby=`, `shift`, `dcast`, `frank`, `rowid`, `rleid`, `CJ`, `fcoalesce`, `unique`, and `uniqueN`, [#3690](https://github.com/Rdatatable/data.table/issues/3690). Thanks to Gareth Ward and Elio Campitelli for their reports and input. Sorting `complex` is achieved the same way as base R; i.e., first by the real part then by the imaginary part (as if the `complex` column were two separate columns of `double`). There is no plan to support joining/merging on `complex` columns until a user demonstrates a need for that. - -20. `setkey`, `[key]by=` and `on=` in verbose mode (`options(datatable.verbose=TRUE)`) now detect any columns inheriting from `Date` which are stored as 8 byte double, test if any fractions are present, and if not suggest using a 4 byte integer instead (such as `data.table::IDate`) to save space and time, [#1738](https://github.com/Rdatatable/data.table/issues/1738). In future this could be upgraded to `message` or `warning` depending on feedback. - -21. New function `fifelse(test, yes, no, na)` has been implemented in C by Morgan Jacob, [#3657](https://github.com/Rdatatable/data.table/issues/3657) and [#3753](https://github.com/Rdatatable/data.table/issues/3753). It is comparable to `base::ifelse`, `dplyr::if_else`, `hutils::if_else`, and (forthcoming) [`vctrs::if_else()`](https://vctrs.r-lib.org/articles/stability.html#ifelse). It returns a vector of the same length as `test` but unlike `base::ifelse` the output type is consistent with those of `yes` and `no`. Please see `?data.table::fifelse` for more details. - - ```R - # default 4 threads on a laptop with 16GB RAM and 8 logical CPU - x = sample(c(TRUE,FALSE), 3e8, replace=TRUE) # 1GB - microbenchmark::microbenchmark( - base::ifelse(x, 7L, 11L), - dplyr::if_else(x, 7L, 11L), - hutils::if_else(x, 7L, 11L), - data.table::fifelse(x, 7L, 11L), - times = 5L, unit="s" - ) - # Unit: seconds - # expr min med max neval - # base::ifelse(x, 7L, 11L) 8.5 8.6 8.8 5 - # dplyr::if_else(x, 7L, 11L) 9.4 9.5 9.7 5 - # hutils::if_else(x, 7L, 11L) 2.6 2.6 2.7 5 - # data.table::fifelse(x, 7L, 11L) 1.5 1.5 1.6 5 # setDTthreads(1) - # data.table::fifelse(x, 7L, 11L) 0.8 0.8 0.9 5 # setDTthreads(2) - # data.table::fifelse(x, 7L, 11L) 0.4 0.4 0.5 5 # setDTthreads(4) - ``` - -22. `transpose` gains `keep.names=` and `make.names=` arguments, [#1886](https://github.com/Rdatatable/data.table/issues/1886). Previously, column names were dropped and there was no way to keep them. `keep.names="rn"` keeps the column names and puts them in the `"rn"` column of the result. Similarly, `make.names="rn"` uses column `"rn"` as the column names of the result. Both arguments are `NULL` by default for backwards compatibility. As these new arguments are new, they are subject to change in future according to community feedback. Thanks to @ghost for the request. - -23. Added a `data.table` method for `utils::edit` to ensure a `data.table` is returned, for convenience, [#593](https://github.com/Rdatatable/data.table/issues/593). - -24. More efficient optimization of many columns in `j` (e.g. from `.SD`), [#1470](https://github.com/Rdatatable/data.table/issues/1470). Thanks @Jorges1000 for the report. - -25. `setnames(DT, old, new)` now omits any `old==new` to save redundant key and index name updates, [#3783](https://github.com/Rdatatable/data.table/issues/3783). `setnames(DT, new)` (i.e. not providing `old`) already omitted any column name updates where `names(DT)==new`; e.g. `setnames(DT, gsub('^_', '', names(DT)))` exits early if no columns start with `_`. - -26. `[[` by group is now optimized for regular vectors (not type list), [#3209](https://github.com/Rdatatable/data.table/issues/3209). Thanks @renkun-ken for the suggestion. `[` by group was already optimized. Please file a feature request if you would like this optimization for list columns. - -27. New function `frollapply` for rolling computation of arbitrary R functions (caveat: input `x` is coerced to numeric beforehand, and the function must return a scalar numeric value). The API is consistent to extant rolling functions `frollmean` and `frollsum`; note that it will generally be slower than those functions because (1) the known functions use our optimized internal C implementation and (2) there is no thread-safe API to R's C `eval`. Nevertheless `frollapply` is faster than corresponding `base`-only and `zoo` versions: - - ```R - set.seed(108) - x = rnorm(1e6); n = 1e3 - base_rollapply = function(x, n, FUN) { - nx = length(x) - ans = rep(NA_real_, nx) - for (i in n:nx) ans[i] = FUN(x[(i-n+1):i]) - ans - } - system.time(base_rollapply(x, n, mean)) - system.time(zoo::rollapplyr(x, n, function(x) mean(x), fill=NA)) - system.time(zoo::rollmeanr(x, n, fill=NA)) - system.time(frollapply(x, n, mean)) - system.time(frollmean(x, n)) - - ### fun mean sum median - # base_rollapply 8.815 5.151 60.175 - # zoo::rollapply 34.373 27.837 88.552 - # zoo::roll[fun] 0.215 0.185 NA ## median not fully supported - # frollapply 5.404 1.419 56.475 - # froll[fun] 0.003 0.002 NA ## median not yet supported - ``` - -28. `setnames()` now accepts functions in `old=` and `new=`, [#3703](https://github.com/Rdatatable/data.table/issues/3703). Thanks @smingerson for the feature request and @shrektan for the PR. - - ```R - DT = data.table(a=1:3, b=4:6, c=7:9) - setnames(DT, toupper) - names(DT) - # [1] "A" "B" "C" - setnames(DT, c(1,3), tolower) - names(DT) - # [1] "a" "B" "c" - ``` - -29. `:=` and `set()` now use zero-copy type coercion. Accordingly, `DT[..., integerColumn:=0]` and `set(DT,i,j,0)` no longer warn about the `0` ('numeric') needing to be `0L` ('integer') because there is no longer any time or space used for this coercion. The old long warning was off-putting to new users ("what and why L?"), whereas advanced users appreciated the old warning so they could avoid the coercion. Although the time and space for one coercion in a single call is unmeasurably small, when placed in a loop the small overhead of any allocation on R's heap could start to become noticeable (more so for `set()` whose purpose is low-overhead looping). Further, when assigning a value across columns of varying types, it could be inconvenient to supply the correct type for every column. Hence, zero-copy coercion was introduced to satisfy all these requirements. A warning is still issued, as before, when fractional data is discarded; e.g. when 3.14 is assigned to an integer column. Zero-copy coercion applies to length>1 vectors as well as length-1 vectors. - -## BUG FIXES - -1. `first`, `last`, `head` and `tail` by group no longer error in some cases, [#2030](https://github.com/Rdatatable/data.table/issues/2030) [#3462](https://github.com/Rdatatable/data.table/issues/3462). Thanks to @franknarf1 for reporting. - -2. `keyby=colName` could use the wrong index and return incorrect results if both `colName` and `colNameExtra` (where `colName` is a leading subset of characters of `colNameExtra`) are column names and an index exists on `colNameExtra`, [#3498](https://github.com/Rdatatable/data.table/issues/3498). Thanks to Xianying Tan for the detailed report and pinpointing the source line at fault. - -3. A missing item in `j` such as `j=.(colA, )` now gives a helpful error (`Item 2 of the .() or list() passed to j is missing`) rather than the unhelpful error `argument "this_jsub" is missing, with no default` (v1.12.2) or `argument 2 is empty` (v1.12.0 and before), [#3507](https://github.com/Rdatatable/data.table/issues/3507). Thanks to @eddelbuettel for the report. - -4. `fwrite()` could crash when writing very long strings such as 30 million characters, [#2974](https://github.com/Rdatatable/data.table/issues/2974), and could be unstable in memory constrained environments, [#2612](https://github.com/Rdatatable/data.table/issues/2612). Thanks to @logworthy and @zachokeeffe for reporting and Philippe Chataignon for fixing in PR [#3288](https://github.com/Rdatatable/data.table/pull/3288). - -5. `fread()` could crash if `quote=""` (i.e. ignore quotes), the last line is too short, and `fill=TRUE`, [#3524](https://github.com/Rdatatable/data.table/pull/3524). Thanks to Jiucang Hao for the report and reproducible example. - -6. Printing could occur unexpectedly when code is run with `source`, [#2369](https://github.com/Rdatatable/data.table/issues/2369). Thanks to @jan-glx for the report and reproducible example. - -7. Grouping by `NULL` on zero rows `data.table` now behaves consistently to non-zero rows `data.table`, [#3530](https://github.com/Rdatatable/data.table/issues/3530). Thanks to @SymbolixAU for the report and reproducible example. - -8. GForce optimization of `median` did not retain the class; e.g. `median` of `Date` or `POSIXct` would return a raw number rather than retain the date class, [#3079](https://github.com/Rdatatable/data.table/issues/3079). Thanks to @Henrik-P for reporting. - -9. `DT[, format(mean(date,""%b-%Y")), by=group]` could fail with `invalid 'trim' argument`, [#1876](https://github.com/Rdatatable/data.table/issues/1876). Thanks to Ross Holmberg for reporting. - -10. `externalVar=1:5; DT[, mean(externalVar), by=group]` could return incorrect results rather than a constant (`3` in this example) for each group, [#875](https://github.com/Rdatatable/data.table/issues/875). GForce optimization was being applied incorrectly to the `mean` without realizing `externalVar` was not a column. - -11. `test.data.table()` now passes in non-English R sessions, [#630](https://github.com/Rdatatable/data.table/issues/630) [#3039](https://github.com/Rdatatable/data.table/issues/3039). Each test still checks that the number of warnings and/or errors produced is correct. However, a message is displayed suggesting to restart R with `LANGUAGE=en` in order to test that the text of the warning and/or error messages are as expected, too. - -12. Joining a double column in `i` containing say 1.3, with an integer column in `x` containing say 1, would result in the 1.3 matching to 1, [#2592](https://github.com/Rdatatable/data.table/issues/2592), and joining a factor column to an integer column would match the factor's integers rather than error. The type coercion logic has been revised and strengthened. Many thanks to @MarkusBonsch for reporting and fixing. Joining a character column in `i` to a factor column in `x` is now faster and retains the character column in the result rather than coercing it to factor. Joining an integer column in `i` to a double column in `x` now retains the integer type in the result rather than coercing the integers into the double type. Logical columns may now only be joined to logical columns, other than all-NA columns which are coerced to the matching column's type. All coercions are reported in verbose mode: `options(datatable.verbose=TRUE)`. - -13. Attempting to recycle 2 or more items into an existing `list` column now gives the intended helpful error rather than `Internal error: recycle length error not caught earlier.`, [#3543](https://github.com/Rdatatable/data.table/issues/3543). Thanks to @MichaelChirico for finding and reporting. - -14. Subassigning using `$<-` to a `data.table` embedded in a list column of a single-row `data.table` could fail, [#3474](https://github.com/Rdatatable/data.table/issues/3474). Note that `$<-` is not recommended; please use `:=` instead which already worked in this case. Thanks to Jakob Richter for reporting. - -15. `rbind` and `rbindlist` of zero-row items now retain (again) the unused levels of any (zero-length) factor columns, [#3508](https://github.com/Rdatatable/data.table/issues/3508). This was a regression in v1.12.2 just for zero-row items. Unused factor levels were already retained for items having `nrow>=1`. Thanks to Gregory Demin for reporting. - -16. `rbind` and `rbindlist` of an item containing an ordered factor with levels containing an `NA` (as opposed to an NA integer) could segfault, [#3601](https://github.com/Rdatatable/data.table/issues/3601). This was a a regression in v1.12.2. Thanks to Damian Betebenner for reporting. Also a related segfault when recycling a length-1 factor column, [#3662](https://github.com/Rdatatable/data.table/issues/3662). - -17. `example(":=", local=TRUE)` now works rather than error, [#2972](https://github.com/Rdatatable/data.table/issues/2972). Thanks @vlulla for the report. - -18. `rbind.data.frame` on `IDate` columns changed the column from `integer` to `double`, [#2008](https://github.com/Rdatatable/data.table/issues/2008). Thanks to @rmcgehee for reporting. - -19. `merge.data.table` now retains any custom classes of the first argument, [#1378](https://github.com/Rdatatable/data.table/issues/1378). Thanks to @michaelquinn32 for reopening. - -20. `c`, `seq` and `mean` of `ITime` objects now retain the `ITime` class via new `ITime` methods, [#3628](https://github.com/Rdatatable/data.table/issues/3628). Thanks @UweBlock for reporting. The `cut` and `split` methods for `ITime` have been removed since the default methods work, [#3630](https://github.com/Rdatatable/data.table/pull/3630). - -21. `as.data.table.array` now handles the case when some of the array's dimension names are `NULL`, [#3636](https://github.com/Rdatatable/data.table/issues/3636). - -22. Adding a `list` column using `cbind`, `as.data.table`, or `data.table` now works rather than treating the `list` as if it were a set of columns and introducing an invalid NA column name, [#3471](https://github.com/Rdatatable/data.table/pull/3471). However, please note that using `:=` to add columns is preferred. - - ```R - cbind( data.table(1:2), list(c("a","b"),"a") ) - # V1 V2 NA # v1.12.2 and before - # - # 1: 1 a a - # 2: 2 b a - # - # V1 V2 # v1.12.4+ - # - # 1: 1 a,b - # 2: 2 a - ``` - -23. Incorrect sorting/grouping results due to a bug in Intel's `icc` compiler 2019 (Version 19.0.4.243 Build 20190416) has been worked around thanks to a report and fix by Sebastian Freundt, [#3647](https://github.com/Rdatatable/data.table/issues/3647). Please run `data.table::test.data.table()`. If that passes, your installation does not have the problem. - -24. `column not found` could incorrectly occur in rare non-equi-join cases, [#3635](https://github.com/Rdatatable/data.table/issues/3635). Thanks to @UweBlock for the report. - -25. Slight fix to the logic for auto-naming the `by` clause for using a custom function like `evaluate` to now be named `evaluate` instead of the name of the first symbolic argument, [#3758](https://github.com/Rdatatable/data.table/issues/3758). - -26. Column binding of zero column `data.table` will now work as expected, [#3334](https://github.com/Rdatatable/data.table/issues/3334). Thanks to @kzenstratus for the report. - -27. `integer64` sum-by-group is now properly optimized, [#1647](https://github.com/Rdatatable/data.table/issues/1647), [#3464](https://github.com/Rdatatable/data.table/issues/3464). Thanks to @mlandry22-h2o for the report. - -28. From v1.12.0 `between()` and `%between%` interpret missing values in `lower=` or `upper=` as unlimited bounds. A new parameter `NAbounds` has been added to achieve the old behaviour of returning `NA`, [#3522](https://github.com/Rdatatable/data.table/issues/3522). Thanks @cguill95 for reporting. This is now consistent for character input, [#3667](https://github.com/Rdatatable/data.table/issues/3667) (thanks @AnonymousBoba), and class `nanotime` is now supported too. - -29. `integer64` defined on a subset of a new column would leave "gibberish" on the remaining rows, [#3723](https://github.com/Rdatatable/data.table/issues/3723). A bug in `rbindlist` with the same root cause was also fixed, [#1459](https://github.com/Rdatatable/data.table/issues/1459). Thanks @shrektan and @jangorecki for the reports. - -30. `groupingsets` functions now properly handle alone special symbols when using an empty set to group by, [#3653](https://github.com/Rdatatable/data.table/issues/3653). Thanks to @Henrik-P for the report. - -31. A `data.table` created using `setDT()` on a `data.frame` containing identical columns referencing each other would cause `setkey()` to return incorrect results, [#3496](https://github.com/Rdatatable/data.table/issues/3496) and [#3766](https://github.com/Rdatatable/data.table/issues/3766). Thanks @kirillmayantsev and @alex46015 for reporting, and @jaapwalhout and @Atrebas for helping to debug and isolate the issue. - -32. `x[, round(.SD, 1)]` and similar operations on the whole of `.SD` could return a locked result, incorrectly preventing `:=` on the result, [#2245](https://github.com/Rdatatable/data.table/issues/2245). Thanks @grayskripko for raising. - -33. Using `get`/`mget` in `j` could cause `.SDcols` to be ignored or reordered, [#1744](https://github.com/Rdatatable/data.table/issues/1744), [#1965](https://github.com/Rdatatable/data.table/issues/1965), and [#2036](https://github.com/Rdatatable/data.table/issues/2036). Thanks @franknarf1, @MichaelChirico, and @TonyBonen, for the reports. - -34. `DT[, i-1L, with=FALSE]` would misinterpret the minus sign and return an incorrect result, [#2019](https://github.com/Rdatatable/data.table/issues/2109). Thanks @cguill95 for the report. - -35. `DT[id==1, DT2[.SD, on="id"]]` (i.e. joining from `.SD` in `j`) could incorrectly fail in some cases due to `.SD` being locked, [#1926](https://github.com/Rdatatable/data.table/issues/1926), and when updating-on-join with factors [#3559](https://github.com/Rdatatable/data.table/issues/3559) [#2099](https://github.com/Rdatatable/data.table/issues/2099). Thanks @franknarf1 and @Henrik-P for the reports and for diligently tracking use cases for almost 3 years! - -36. `as.IDate.POSIXct` returned `NA` for UTC times before Dec 1901 and after Jan 2038, [#3780](https://github.com/Rdatatable/data.table/issues/3780). Thanks @gschett for the report. - -37. `rbindlist` now returns correct idcols for lists with different length vectors, [#3785](https://github.com/Rdatatable/data.table/issues/3785), [#3786](https://github.com/Rdatatable/data.table/pull/3786). Thanks to @shrektan for the report and fix. - -38. `DT[ , !rep(FALSE, ncol(DT)), with=FALSE]` correctly returns the full table, [#3013](https://github.com/Rdatatable/data.table/issues/3013) and [#2917](https://github.com/Rdatatable/data.table/issues/2917). Thanks @alexnss and @DavidArenburg for the reports. - -39. `shift(x, 0:1, type='lead', give.names=TRUE)` uses `lead` in all returned column names, [#3832](https://github.com/Rdatatable/data.table/issues/3832). Thanks @daynefiler for the report. - -40. Subtracting two `POSIXt` objects by group could lead to incorrect results because the `base` method internally calls `difftime` with `units='auto'`; `data.table` does not notice if the chosen units differ by group and only the last group's `units` attribute was retained, [#3694](https://github.com/Rdatatable/data.table/issues/3694) and [#761](https://github.com/Rdatatable/data.table/issues/761). To surmount this, we now internally force `units='secs'` on all `POSIXt-POSIXt` calls (reported when `verbose=TRUE`); generally we recommend calling `difftime` directly instead. Thanks @oliver-oliver and @boethian for the reports. - -41. Using `get`/`mget` in `j` could cause `.SDcols` to be ignored or reordered, [#1744](https://github.com/Rdatatable/data.table/issues/1744), [#1965](https://github.com/Rdatatable/data.table/issues/1965), [#2036](https://github.com/Rdatatable/data.table/issues/2036), and [#2946](https://github.com/Rdatatable/data.table/issues/2946). Thanks @franknarf1, @MichaelChirico, @TonyBonen, and Steffen J. (StackOverflow) for the reports. - -42. `DT[...,by={...}]` now handles expressions in `{`, [#3156](https://github.com/Rdatatable/data.table/issues/3156). Thanks to @tdhock for the report. - -43. `:=` could change a `data.table` creation statement in the body of the function calling it, or a variable in calling scope, [#3890](https://github.com/Rdatatable/data.table/issues/3890). Many thanks to @kirillmayantsev for the detailed reports. - -44. Grouping could create a `malformed factor` and/or segfault when the factors returned by each group did not have identical levels, [#2199](https://github.com/Rdatatable/data.table/issues/2199) and [#2522](https://github.com/Rdatatable/data.table/issues/2522). Thanks to Václav Hausenblas, @franknarf1, @ben519, and @Henrik-P for reporting. - -45. `rbindlist` (and printing a `data.table` with over 100 rows because that uses `rbindlist(head, tail)`) could error with `malformed factor` for unordered factor columns containing a used `NA_character_` level, [#3915](https://github.com/Rdatatable/data.table/issues/3915). This is an unusual input for unordered factors because NA_integer_ is recommended by default in R. Thanks to @sindribaldur for reporting. - -46. Adding a `list` column containing an item of type `list` to a one row `data.table` could fail, [#3626](https://github.com/Rdatatable/data.table/issues/3626). Thanks to Jakob Richter for reporting. - -## NOTES - -1. `rbindlist`'s `use.names="check"` now emits its message for automatic column names (`"V[0-9]+"`) too, [#3484](https://github.com/Rdatatable/data.table/pull/3484). See news item 5 of v1.12.2 below. - -2. Adding a new column by reference using `set()` on a `data.table` loaded from binary file now give a more helpful error message, [#2996](https://github.com/Rdatatable/data.table/issues/2996). Thanks to Joseph Burling for reporting. - - ``` - This data.table has either been loaded from disk (e.g. using readRDS()/load()) or constructed - manually (e.g. using structure()). Please run setDT() or alloc.col() on it first (to pre-allocate - space for new columns) before adding new columns by reference to it. - ``` - -3. `setorder` on a superset of a keyed `data.table`'s key now retains its key, [#3456](https://github.com/Rdatatable/data.table/issues/3456). For example, if `a` is the key of `DT`, `setorder(DT, a, -v)` will leave `DT` keyed by `a`. - -4. New option `options(datatable.quiet = TRUE)` turns off the package startup message, [#3489](https://github.com/Rdatatable/data.table/issues/3489). `suppressPackageStartupMessages()` continues to work too. Thanks to @leobarlach for the suggestion inspired by `options(tidyverse.quiet = TRUE)`. We don't know of a way to make a package respect the `quietly=` option of `library()` and `require()` because the `quietly=` isn't passed through for use by the package's own `.onAttach`. If you can see how to do that, please submit a patch to R. - -5. When loading a `data.table` from disk (e.g. with `readRDS`), best practice is to run `setDT()` on the new object to assure it is correctly allocated memory for new column pointers. Barring this, unexpected behavior can follow; for example, if you assign a new column to `DT` from a function `f`, the new columns will only be assigned within `f` and `DT` will be unchanged. The `verbose` messaging in this situation is now more helpful, [#1729](https://github.com/Rdatatable/data.table/issues/1729). Thanks @vspinu for sharing his experience to spur this. - -6. New vignette _Using `.SD` for Data Analysis_, a deep dive into use cases for the `.SD` variable to help illuminate this topic which we've found to be a sticking point for beginning and intermediate `data.table` users, [#3412](https://github.com/Rdatatable/data.table/issues/3412). - -7. Added a note to `?frank` clarifying that ranking is being done according to C sorting (i.e., like `forder`), [#2328](https://github.com/Rdatatable/data.table/issues/2328). Thanks to @cguill95 for the request. - -8. Historically, `dcast` and `melt` were built as enhancements to `reshape2`'s own `dcast`/`melt`. We removed dependency on `reshape2` in v1.9.6 but maintained some backward compatibility. As that package has been superseded since December 2017, we will begin to formally complete the split from `reshape2` by removing some last vestiges. In particular we now warn when redirecting to `reshape2` methods and will later error before ultimately completing the split; see [#3549](https://github.com/Rdatatable/data.table/issues/3549) and [#3633](https://github.com/Rdatatable/data.table/issues/3633). We thank the `reshape2` authors for their original inspiration for these functions, and @ProfFancyPants for testing and reporting regressions in dev which have been fixed before release. - -9. `DT[col]` where `col` is a column containing row numbers of itself to select, now suggests the correct syntax (`DT[(col)]` or `DT[DT$col]`), [#697](https://github.com/Rdatatable/data.table/issues/697). This expands the message introduced in [#1884](https://github.com/Rdatatable/data.table/issues/1884) for the case where `col` is type `logical` and `DT[col==TRUE]` is suggested. - -10. The `datatable.old.unique.by.key` option has been warning for 1 year that it is deprecated: `... Please stop using it and pass by=key(DT) instead for clarity ...`. This warning is now upgraded to error as per the schedule in note 10 of v1.11.0 (May 2018), and note 1 of v1.9.8 (Nov 2016). In June 2020 the option will be removed. - -11. We intend to deprecate the `datatable.nomatch` option, [more info](https://github.com/Rdatatable/data.table/pull/3578/files). A message is now printed upon use of the option (once per session) as a first step. It asks you to please stop using the option and to pass `nomatch=NULL` explicitly if you require inner join. Outer join (`nomatch=NA`) has always been the default because it is safer; it does not drop missing data silently. The problem is that the option is global; i.e., if a user changes the default using this option for their own use, that can change the behavior of joins inside packages that use `data.table` too. This is the only `data.table` option with this concern. - -12. The test suite of 9k tests now runs with three R options on: `warnPartialMatchArgs`, `warnPartialMatchAttr`, and `warnPartialMatchDollar`. This ensures that we don't rely on partial argument matching in internal code, for robustness and efficiency, and so that users can turn these options on for their code in production, [#3664](https://github.com/Rdatatable/data.table/issues/3664). Thanks to Vijay Lulla for the suggestion, and Michael Chirico for fixing 48 internal calls to `attr()` which were missing `exact=TRUE`, for example. Thanks to R-core for adding these options to R 2.6.0 (Oct 2007). - -13. `test.data.table()` could fail if the `datatable.integer64` user option was set, [#3683](https://github.com/Rdatatable/data.table/issues/3683). Thanks @xiaguoxin for reporting. - -14. The warning message when using `keyby=` together with `:=` is clearer, [#2763](https://github.com/Rdatatable/data.table/issues/2763). Thanks to @eliocamp. - -15. `first` and `last` gain an explicit `n=1L` argument so that it's clear the default is 1, and their almost identical manual pages have been merged into one. - -16. Rolling functions (`?froll`) coerce `logical` input to `numeric` (instead of failing) to mimic the behavior of `integer` input. - -17. The warning message when using `strptime` in `j` has been improved, [#2068](https://github.com/Rdatatable/data.table/issues/2068). Thanks to @tdhock for the report. - -18. Added a note to `?setkey` clarifying that `setkey` always uses C-locale sorting (as has been noted in `?setorder`). Thanks @JBreidaks for the report in [#2114](https://github.com/Rdatatable/data.table/issues/2114). - -19. `hour()`/`minute()`/`second()` are much faster for `ITime` input, [#3518](https://github.com/Rdatatable/data.table/issues/3158). - -20. New alias `setalloccol` for `alloc.col`, [#3475](https://github.com/Rdatatable/data.table/issues/3475). For consistency with `set*` prefixes for functions that operate in-place (like `setkey`, `setorder`, etc.). `alloc.col` is not going to be deprecated but we recommend using `setalloccol`. - -21. `dcast` no longer emits a message when `value.var` is missing but `fun.aggregate` is explicitly set to `length` (since `value.var` is arbitrary in this case), [#2980](https://github.com/Rdatatable/data.table/issues/2980). - -22. Optimized `mean` of `integer` columns no longer warns about a coercion to numeric, [#986](https://github.com/Rdatatable/data.table/issues/986). Thanks @dgrtwo for his [YouTube tutorial at 3:01](https://youtu.be/AmE4LXPQErM?t=175) where the warning occurs. - -23. Using `first` and `last` function on `POSIXct` object no longer loads `xts` namespace, [#3857](https://github.com/Rdatatable/data.table/issues/3857). `first` on empty `data.table` returns empty `data.table` now [#3858](https://github.com/Rdatatable/data.table/issues/3858). - -24. Added some clarifying details about what happens when a shell command is used in `fread`, [#3877](https://github.com/Rdatatable/data.table/issues/3877). Thanks Brian for the StackOverflow question which highlighted the lack of explanation here. - -25. We continue to encourage packages to `Import` rather than `Depend` on `data.table`, [#3076](https://github.com/Rdatatable/data.table/issues/3076). To prevent the growth rate in new packages using `Depend`, we have requested that CRAN apply a small patch we provided to prevent new submissions using `Depend`. If this is accepted, the error under `--as-cran` will be as follows. The existing 73 packages using `Depend` will continue to pass OK until they next update, at which point they will be required to change from `Depend` to `Import`. - - ``` - R CMD check --as-cran - ... - * checking package dependencies ... ERROR - - data.table should be in Imports not Depends. Please contact its - maintainer for more information. - ``` - - -# data.table [v1.12.2](https://github.com/Rdatatable/data.table/milestone/14?closed=1) (07 Apr 2019) - -## NEW FEATURES - -1. `:=` no longer recycles length>1 RHS vectors. There was a warning when recycling left a remainder but no warning when the LHS length was an exact multiple of the RHS length (the same behaviour as base R). Consistent feedback for several years has been that recycling is more often a bug. In rare cases where you need to recycle a length>1 vector, please use `rep()` explicitly. Single values are still recycled silently as before. Early warning was given in [this tweet](https://twitter.com/MattDowle/status/1088544083499311104). The 774 CRAN and Bioconductor packages using `data.table` were tested and the maintainers of the 16 packages affected (2%) were consulted before going ahead, [#3310](https://github.com/Rdatatable/data.table/pull/3310). Upon agreement we went ahead. Many thanks to all those maintainers for already updating on CRAN, [#3347](https://github.com/Rdatatable/data.table/pull/3347). - -2. `foverlaps` now supports `type="equal"`, [#3416](https://github.com/Rdatatable/data.table/issues/3416) and part of [#3002](https://github.com/Rdatatable/data.table/issues/3002). - -3. The number of logical CPUs used by default has been reduced from 100% to 50%. The previous 100% default was reported to cause significant slow downs when other non-trivial processes were also running, [#3395](https://github.com/Rdatatable/data.table/issues/3395) [#3298](https://github.com/Rdatatable/data.table/issues/3298). Two new optional environment variables (`R_DATATABLE_NUM_PROCS_PERCENT` & `R_DATATABLE_NUM_THREADS`) control this default. `setDTthreads()` gains `percent=` and `?setDTthreads` has been significantly revised. The output of `getDTthreads(verbose=TRUE)` has been expanded. The environment variable `OMP_THREAD_LIMIT` is now respected ([#3300](https://github.com/Rdatatable/data.table/issues/3300)) in addition to `OMP_NUM_THREADS` as before. - -4. `rbind` and `rbindlist` now retain the position of duplicate column names rather than grouping them together [#3373](https://github.com/Rdatatable/data.table/issues/3373), fill length 0 columns (including NULL) with NA with warning [#1871](https://github.com/Rdatatable/data.table/issues/1871), and recycle length-1 columns [#524](https://github.com/Rdatatable/data.table/issues/524). Thanks to Kun Ren for the requests which arose when parsing JSON. - -5. `rbindlist`'s `use.names=` default has changed from `FALSE` to `"check"`. This emits a message if the column names of each item are not identical and then proceeds as if `use.names=FALSE` for backwards compatibility; i.e., bind by column position not by column name. The `rbind` method for `data.table` already sets `use.names=TRUE` so this change affects `rbindlist` only and not `rbind.data.table`. To stack differently named columns together silently (the previous default behavior of `rbindlist`), it is now necessary to specify `use.names=FALSE` for clarity to readers of your code. Thanks to Clayton Stanley who first raised the issue [here](https://lists.r-forge.r-project.org/pipermail/datatable-help/2014-April/002480.html). To aid pinpointing the calls to `rbindlist` that need attention, the message can be turned to error using `options(datatable.rbindlist.check="error")`. This option also accepts `"warning"`, `"message"` and `"none"`. In this release the message is suppressed for default column names (`"V[0-9]+"`); the next release will emit the message for those too. In 6 months the default will be upgraded from message to warning. There are two slightly different messages. They are helpful, include context and point to this news item : - - ``` - Column %d ['%s'] of item %d is missing in item %d. Use fill=TRUE to fill with - NA (NULL for list columns), or use.names=FALSE to ignore column names. - See news item 5 in v1.12.2 for options to control this message. - - Column %d ['%s'] of item %d appears in position %d in item %d. Set use.names=TRUE - to match by column name, or use.names=FALSE to ignore column names. - See news item 5 in v1.12.2 for options to control this message. - ``` - -6. `fread` gains `keepLeadingZeros`, [#2999](https://github.com/Rdatatable/data.table/issues/2999). By default `FALSE` so that, as before, a field containing `001` is interpreted as the integer 1, otherwise the character string `"001"`. The default may be changed using `options(datatable.keepLeadingZeros=TRUE)`. Many thanks to @marc-outins for the PR. - -## BUG FIXES - -1. `rbindlist()` of a malformed factor which is missing a levels attribute is now a helpful error rather than a cryptic error about `STRING_ELT`, [#3315](https://github.com/Rdatatable/data.table/issues/3315). Thanks to Michael Chirico for reporting. - -2. Forgetting `type=` in `shift(val, "lead")` would segfault, [#3354](https://github.com/Rdatatable/data.table/issues/3354). A helpful error is now produced to indicate `"lead"` is being passed to `n=` rather than the intended `type=` argument. Thanks to @SymbolixAU for reporting. - -3. The default print output (top 5 and bottom 5 rows) when ncol>255 could display the columns in the wrong order, [#3306](https://github.com/Rdatatable/data.table/issues/3306). Thanks to Kun Ren for reporting. - -4. Grouping by unusual column names such as `by='string_with_\\'` and `keyby="x y"` could fail, [#3319](https://github.com/Rdatatable/data.table/issues/3319) [#3378](https://github.com/Rdatatable/data.table/issues/3378). Thanks to @HughParsonage for reporting and @MichaelChirico for the fixes. - -5. `foverlaps()` could return incorrect results for `POSIXct <= 1970-01-01`, [#3349](https://github.com/Rdatatable/data.table/issues/3349). Thanks to @lux5 for reporting. - -6. `dcast.data.table` now handles functions passed to `fun.aggregate=` via a variable; e.g., `funs <- list(sum, mean); dcast(..., fun.aggregate=funs`, [#1974](https://github.com/Rdatatable/data.table/issues/1974) [#1369](https://github.com/Rdatatable/data.table/issues/1369) [#2064](https://github.com/Rdatatable/data.table/issues/2064) [#2949](https://github.com/Rdatatable/data.table/issues/2949). Thanks to @sunbee, @Ping2016, @smidelius and @d0rg0ld for reporting. - -7. Some non-equijoin cases could segfault, [#3401](https://github.com/Rdatatable/data.table/issues/3401). Thanks to @Gayyam for reporting. - -8. `dcast.data.table` could sort rows containing `NA` incorrectly, [#2202](https://github.com/Rdatatable/data.table/issues/2202). Thanks to @Galileo-Galilei for the report. - -9. Sorting, grouping and finding unique values of a numeric column containing at most one finite value (such as `c(Inf,0,-Inf)`) could return incorrect results, [#3372](https://github.com/Rdatatable/data.table/issues/3372) [#3381](https://github.com/Rdatatable/data.table/issues/3381); e.g., `data.table(A=c(Inf,0,-Inf), V=1:3)[,sum(V),by=A]` would treat the 3 rows as one group. This was a regression in 1.12.0. Thanks to Nicolas Ampuero for reporting. - -10. `:=` with quoted expression and dot alias now works as expected, [#3425](https://github.com/Rdatatable/data.table/pull/3425). Thanks to @franknarf1 for raising and @jangorecki for the PR. - -11. A join's result could be incorrectly keyed when a single nomatch occurred at the very beginning while all other values matched, [#3441](https://github.com/Rdatatable/data.table/issues/3441). The incorrect key would cause incorrect results in subsequent queries. Thanks to @symbalex for reporting and @franknarf1 for pinpointing the root cause. - -12. `rbind` and `rbindlist(..., use.names=TRUE)` with over 255 columns could return the columns in a random order, [#3373](https://github.com/Rdatatable/data.table/issues/3373). The contents and name of each column was correct but the order that the columns appeared in the result might not have matched the original input. - -13. `rbind` and `rbindlist` now combine `integer64` columns together with non-`integer64` columns correctly [#1349](https://github.com/Rdatatable/data.table/issues/1349), and support `raw` columns [#2819](https://github.com/Rdatatable/data.table/issues/2819). - -14. `NULL` columns are caught and error appropriately rather than segfault in some cases, [#2303](https://github.com/Rdatatable/data.table/issues/2303) [#2305](https://github.com/Rdatatable/data.table/issues/2305). Thanks to Hugh Parsonage and @franknarf1 for reporting. - -15. `melt` would error with 'factor malformed' or segfault in the presence of duplicate column names, [#1754](https://github.com/Rdatatable/data.table/issues/1754). Many thanks to @franknarf1, William Marble, wligtenberg and Toby Dylan Hocking for reproducible examples. All examples have been added to the test suite. - -16. Removing a column from a null (0-column) data.table is now a (standard and simpler) warning rather than error, [#2335](https://github.com/Rdatatable/data.table/issues/2335). It is no longer an error to add a column to a null (0-column) data.table. - -17. Non-UTF8 strings were not always sorted correctly on Windows (a regression in v1.12.0), [#3397](https://github.com/Rdatatable/data.table/issues/3397) [#3451](https://github.com/Rdatatable/data.table/issues/3451). Many thanks to @shrektan for reporting and fixing. - -18. `cbind` with a null (0-column) `data.table` now works as expected, [#3445](https://github.com/Rdatatable/data.table/issues/3445). Thanks to @mb706 for reporting. - -19. Subsetting does a better job of catching a malformed `data.table` with error rather than segfault. A column may not be NULL, nor may a column be an object which has columns (such as a `data.frame` or `matrix`). Thanks to a comment and reproducible example in [#3369](https://github.com/Rdatatable/data.table/issues/3369) from Drew Abbot which demonstrated the issue which arose from parsing JSON. The next release will enable `as.data.table` to unpack columns which are `data.frame` to support this use case. - -## NOTES - -1. When upgrading to 1.12.0 some Windows users might have seen `CdllVersion not found` in some circumstances. We found a way to catch that so the [helpful message](https://twitter.com/MattDowle/status/1084528873549705217) now occurs for those upgrading from versions prior to 1.12.0 too, as well as those upgrading from 1.12.0 to a later version. See item 1 in notes section of 1.12.0 below for more background. - -2. v1.12.0 checked itself on loading using `tools::checkMD5sums("data.table")` but this check failed under the `packrat` package manager on Windows because `packrat` appears to modify the DESCRIPTION file of packages it has snapshot, [#3329](https://github.com/Rdatatable/data.table/issues/3329). This check is now removed. The `CdllVersion` check was introduced after the `checkMD5sums()` attempt and is better; e.g., reliable on all platforms. - -3. As promised in new feature 6 of v1.11.6 Sep 2018 (see below in this news file), the `datatable.CJ.names` option's default is now `TRUE`. In v1.13.0 it will be removed. - -4. Travis CI gains OSX using homebrew llvm for OpenMP support, [#3326](https://github.com/Rdatatable/data.table/issues/3326). Thanks @marcusklik for the PR. - -5. Calling `data.table:::print.data.table()` directly (i.e. bypassing method dispatch by using 3 colons) and passing it a 0-column `data.frame` (not `data.table`) now works, [#3363](https://github.com/Rdatatable/data.table/pull/3363). Thanks @heavywatal for the PR. - -6. v1.12.0 did not compile on Solaris 10 using Oracle Developer Studio 12.6, [#3285](https://github.com/Rdatatable/data.table/issues/3285). Many thanks to Prof Ripley for providing and testing a patch. For future reference and other package developers, a `const` variable should not be passed to OpenMP's `num_threads()` directive otherwise `left operand must be modifiable lvalue` occurs. This appears to be a compiler bug which is why the specific versions are mentioned in this note. - -7. `foverlaps` provides clearer error messages w.r.t. factor and POSIXct interval columns, [#2645](https://github.com/Rdatatable/data.table/issues/2645) [#3007](https://github.com/Rdatatable/data.table/issues/3007) [#1143](https://github.com/Rdatatable/data.table/issues/1143). Thanks to @sritchie73, @msummersgill and @DavidArenburg for the reports. - -8. `unique(DT)` checks up-front the types of all the columns and will fail if any column is type `list` even though those `list` columns may not be needed to establish uniqueness. Use `unique(DT, by=...)` to specify columns that are not type `list`. v1.11.8 and before would also correctly fail with the same error, but not when uniqueness had been established in prior columns: it would stop early, not look at the `list` column and return the correct result. Checking up-front was necessary for some internal optimizations and it's probably best to be explicit anyway. Thanks to James Lamb for reporting, [#3332](https://github.com/Rdatatable/data.table/issues/3332). The error message has been embellished : - - ``` - Column 2 of by= (2) is type 'list', not yet supported. Please use the by= argument to specify - columns with types that are supported. - ``` - -9. Reminder that note 11 in v1.11.0 (May 2018) warned that `set2key()` and `key2()` will be removed in May 2019. They have been warning since v1.9.8 (Nov 2016) and their warnings were upgraded to errors in v1.11.0 (May 2018). When they were introduced in version 1.9.4 (Oct 2014) they were marked as 'experimental'. - -10. The `key(DT)<-` form of `setkey()` has been warning since at least 2012 to use `setkey()`. The warning is now stronger: `key(x)<-value is deprecated and not supported. Please change to use setkey().`. This warning will be upgraded to error in one year. - - -# data.table v1.12.0 (13 Jan 2019) - -## NEW FEATURES - -1. `setDTthreads()` gains `restore_after_fork=`, [#2885](https://github.com/Rdatatable/data.table/issues/2885). The default `NULL` leaves the internal option unchanged which by default is `TRUE`. `data.table` has always switched to single-threaded mode on fork. It used to restore multithreading after a fork too but problems were reported on Mac and Intel OpenMP library (see 1.10.4 notes below). We are now trying again thanks to suggestions and success reported by Kun Ren and Mark Klik in package `fst`. If you experience problems with multithreading after a fork, please restart R and call `setDTthreads(restore_after_fork=FALSE)`. - -2. Subsetting, ordering and grouping now use more parallelism. See benchmarks [here](https://h2oai.github.io/db-benchmark/) and Matt Dowle's presentation in October 2018 on YouTube [here](https://youtu.be/Ddr8N9STSuI). These internal changes gave rise to 4 regressions which were found before release thanks to Kun Ren, [#3211](https://github.com/Rdatatable/data.table/issues/3211). He kindly volunteers to 'go-first' and runs data.table through his production systems before release. We are looking for a 'go-second' volunteer please. A request to test before release was tweeted on 17 Dec [here](https://twitter.com/MattDowle/status/1074746218645938176). As usual, all CRAN and Bioconductor packages using data.table (currently 750) have been tested against this release, [#3233](https://github.com/Rdatatable/data.table/issues/3233). There are now 8,000 tests in 13,000 lines of test code; more lines of test code than there is code. Overall coverage has increased to 94% thanks to Michael Chirico. - -3. New `frollmean` has been added by Jan Gorecki to calculate _rolling mean_, see `?froll` for documentation. Function name and arguments are experimental. Related to [#2778](https://github.com/Rdatatable/data.table/issues/2778) (and [#624](https://github.com/Rdatatable/data.table/issues/624), [#626](https://github.com/Rdatatable/data.table/issues/626), [#1855](https://github.com/Rdatatable/data.table/issues/1855)). Other rolling statistics will follow. - -4. `fread()` can now read a remote compressed file in one step; `fread("https://domain.org/file.csv.bz2")`. The `file=` argument now supports `.gz` and `.bz2` too; i.e. `fread(file="file.csv.gz")` works now where only `fread("file.csv.gz")` worked in 1.11.8. - -5. `nomatch=NULL` now does the same as `nomatch=0L` in both `DT[...]` and `foverlaps()`; i.e. discards missing values silently (inner join). The default is still `nomatch=NA` (outer join) for statistical safety so that missing values are retained by default. After several years have elapsed, we will start to deprecate `0L`; please start using `NULL`. In future `nomatch=.(0)` (note that `.()` creates a `list` type and is different to `nomatch=0`) will fill with `0` to save replacing `NA` with `0` afterwards, [#857](https://github.com/Rdatatable/data.table/issues/857). - -6. `setnames()` gains `skip_absent` to skip names in `old` that aren't present, [#3030](https://github.com/Rdatatable/data.table/issues/3030). By default `FALSE` so that it is still an error, as before, to attempt to change a column name that is not present. Thanks to @MusTheDataGuy for the suggestion and the PR. - -7. `NA` in `between()` and `%between%`'s `lower` and `upper` are now taken as missing bounds and return `TRUE` rather than `NA`. This is now documented. - -8. `shift()` now interprets negative values of `n` to mean the opposite `type=`, [#1708](https://github.com/Rdatatable/data.table/issues/1708). When `give.names=TRUE` the result is named using a positive `n` with the appropriate `type=`. Alternatively, a new `type="shift"` names the result using a signed `n` and constant type. - - ```R - shift(x, n=-5:5, give.names=TRUE) => "_lead_5" ... "_lag_5" - shift(x, n=-5:5, type="shift", give.names=TRUE) => "_shift_-5" ... "_shift_5" - ``` - -9. `fwrite()` now accepts `matrix`, [#2613](https://github.com/Rdatatable/data.table/issues/2613). Thanks to Michael Chirico for the suggestion and Felipe Parages for implementing. For now matrix input is converted to data.table (which can be costly) before writing. - -10. `fread()` and `fwrite()` can now handle file names in native and UTF-8 encoding, [#3078](https://github.com/Rdatatable/data.table/issues/3078). Thanks to Daniel Possenriede (@dpprdan) for reporting and fixing. - -11. `DT[i]` and `DT[i,cols]` now call internal parallel subsetting code, [#2951](https://github.com/Rdatatable/data.table/issues/2951). Subsetting is significantly faster (as are many other operations) with factor columns rather than character. - - ```R - N = 2e8 # 4GB data on 4-core CPU with 16GB RAM - DT = data.table(ID = sample(LETTERS,N,TRUE), - V1 = sample(5,N,TRUE), - V2 = runif(N)) - w = which(DT$V1 > 3) # select 40% of rows - # v1.12.0 v1.11.8 - system.time(DT[w]) # 0.8s 2.6s - DT[, ID := as.factor(ID)] - system.time(DT[w]) # 0.4s 2.3s - system.time(DT[w, c("ID","V2")]) # 0.3s 1.9s - ``` - -12. `DT[..., .SDcols=]` now accepts `patterns()`; e.g. `DT[..., .SDcols=patterns("^V")]`, for filtering columns according to a pattern (as in `melt.data.table`), [#1878](https://github.com/Rdatatable/data.table/issues/1878). Thanks to many people for pushing for this and @MichaelChirico for ultimately filing the PR. See `?data.table` for full details and examples. - -13. `split` data.table method will now preserve attributes, closes [#2047](https://github.com/Rdatatable/data.table/issues/2047). Thanks to @caneff for reporting. - -14. `DT[i,j]` now retains user-defined and inherited attributes, [#995](https://github.com/Rdatatable/data.table/issues/995); e.g. - - ```R - attr(datasets::BOD,"reference") # "A1.4, p. 270" - attr(as.data.table(datasets::BOD)[2],"reference") # was NULL now "A1.4, p. 270" - ``` - - If a superclass defines attributes that may not be valid after a `[` subset then the superclass should implement its own `[` method to manage those after calling `NextMethod()`. - -## BUG FIXES - -1. Providing an `i` subset expression when attempting to delete a column correctly failed with helpful error, but when the column was missing too created a new column full of `NULL` values, [#3089](https://github.com/Rdatatable/data.table/issues/3089). Thanks to Michael Chirico for reporting. - -2. Column names that look like expressions (e.g. `"a<=colB"`) caused an error when used in `on=` even when wrapped with backticks, [#3092](https://github.com/Rdatatable/data.table/issues/3092). Additionally, `on=` now supports white spaces around operators; e.g. `on = "colA == colB"`. Thanks to @mt1022 for reporting and to @MarkusBonsch for fixing. - -3. Unmatched `patterns` in `measure.vars` fail early and with feedback, [#3106](https://github.com/Rdatatable/data.table/issues/3106). - -4. `fread(..., skip=)` now skips non-standard `\r` and `\n\r` line endings properly again, [#3006](https://github.com/Rdatatable/data.table/issues/3006). Standard line endings (`\n` Linux/Mac and `\r\n` Windows) were skipped ok. Thanks to @brattono and @tbrycekelly for providing reproducible examples, and @st-pasha for fixing. - -5. `fread(..., colClasses=)` could return a corrupted result when a lower type was requested for one or more columns (e.g. reading "3.14" as integer), [#2922](https://github.com/Rdatatable/data.table/issues/2922) [#2863](https://github.com/Rdatatable/data.table/issues/2863) [#3143](https://github.com/Rdatatable/data.table/issues/3143). It now ignores the request as documented and the helpful message in verbose mode is upgraded to warning. In future, coercing to a lower type might be supported (with warning if any accuracy is lost). `"NULL"` is recognized again in both vector and list mode; e.g. `colClasses=c("integer","NULL","integer")` and `colClasses=list(NULL=2, integer=10:40)`. Thanks to Arun Srinivasan, Kun Ren, Henri Ståhl and @kszela24 for reporting. - -6. `cube()` will now produce expected order of results, [#3179](https://github.com/Rdatatable/data.table/issues/3179). Thanks to @Henrik-P for reporting. - -7. `groupingsets()` groups by empty column set and constant value in `j`, [#3173](https://github.com/Rdatatable/data.table/issues/3173). - -8. `split.data.table()` failed if `DT` had a factor column named `"x"`, [#3151](https://github.com/Rdatatable/data.table/issues/3151). Thanks to @tdeenes for reporting and fixing. - -9. `fsetequal` now handles properly datasets having last column a character, closes [#2318](https://github.com/Rdatatable/data.table/issues/2318). Thanks to @pschil and @franknarf1 for reporting. - -10. `DT[..., .SDcols=integer(0L)]` could fail, [#3185](https://github.com/Rdatatable/data.table/issues/3185). An empty `data.table` is now returned correctly. - -11. `as.data.table.default` method will now always copy its input, closes [#3230](https://github.com/Rdatatable/data.table/issues/3230). Thanks to @NikdAK for reporting. - -12. `DT[..., .SDcols=integer()]` failed with `.SDcols is numeric but has both +ve and -ve indices`, [#1789](https://github.com/Rdatatable/data.table/issues/1789) and [#3185](https://github.com/Rdatatable/data.table/issues/3185). It now functions as `.SDcols=character()` has done and creates an empty `.SD`. Thanks to Gabor Grothendieck and Hugh Parsonage for reporting. A related issue with empty `.SDcols` was fixed in development before release thanks to Kun Ren's testing, [#3211](https://github.com/Rdatatable/data.table/issues/3211). - -13. Multithreaded stability should be much improved with R 3.5+. Many thanks to Luke Tierney for pinpointing a memory issue with package `constellation` caused by `data.table` and his advice, [#3165](https://github.com/Rdatatable/data.table/issues/3165). Luke also added an extra check to R-devel when compiled with `--enable-strict-barrier`. The test suite is run through latest daily R-devel after every commit as usual, but now with `--enable-strict-barrier` on too via GitLab CI ("Extra" badge on the `data.table` homepage) thanks to Jan Gorecki. - -14. Fixed an edge-case bug of platform-dependent output of `strtoi("", base = 2L)` on which `groupingsets` had relied, [#3267](https://github.com/Rdatatable/data.table/issues/3267). - -## NOTES - -1. When data.table loads it now checks its DLL version against the version of its R level code. This is to detect installation issues on Windows when i) the DLL is in use by another R session and ii) the CRAN source version > CRAN binary binary which happens just after a new release (R prompts users to install from source until the CRAN binary is available). This situation can lead to a state where the package's new R code calls old C code in the old DLL; [R#17478](https://bugs.r-project.org/show_bug.cgi?id=17478), [#3056](https://github.com/Rdatatable/data.table/issues/3056). This broken state can persist until, hopefully, you experience a strange error caused by the mismatch. Otherwise, wrong results may occur silently. This situation applies to any R package with compiled code not just data.table, is Windows-only, and is long-standing. It has only recently been understood as it typically only occurs during the few days after each new release until binaries are available on CRAN. - -2. When `on=` is provided but not `i=`, a helpful error is now produced rather than silently ignoring `on=`. Thanks to Dirk Eddelbuettel for the idea. - -3. `.SDcols=` is more helpful when passed non-existent columns, [#3116](https://github.com/Rdatatable/data.table/issues/3116) and [#3118](https://github.com/Rdatatable/data.table/issues/3118). Thanks to Michael Chirico for the investigation and PR. - -4. `update.dev.pkg()` gains `type=` to specify if update should be made from binaries, sources or both. [#3148](https://github.com/Rdatatable/data.table/issues/3148). Thanks to Reino Bruner for the detailed suggestions. - -5. `setDT()` improves feedback when passed a ragged list (i.e. where all columns in the list are not the same length), [#3121](https://github.com/Rdatatable/data.table/issues/3121). Thanks @chuk-yong for highlighting. - -6. The one and only usage of `UNPROTECT_PTR()` has been removed, [#3232](https://github.com/Rdatatable/data.table/issues/3232). Thanks to Tomas Kalibera's investigation and advice here: https://developer.r-project.org/Blog/public/2018/12/10/unprotecting-by-value/index.html - - -# data.table v1.11.8 (30 Sep 2018) - -## NEW FEATURES - -1. `fread()` can now read `.gz` and `.bz2` files directly: `fread("file.csv.gz")`, [#717](https://github.com/Rdatatable/data.table/issues/717) [#3058](https://github.com/Rdatatable/data.table/issues/3058). It uses `R.utils::decompressFile` to decompress to a `tempfile()` which is then read by `fread()` in the usual way. For greater speed on large-RAM servers, it is recommended to use ramdisk for temporary files by setting `TMPDIR` to `/dev/shm` before starting R; see `?tempdir`. The decompressed temporary file is removed as soon as `fread` completes even if there is an error reading the file. Reading a remote compressed file in one step will be supported in the next version; e.g. `fread("https://domain.org/file.csv.bz2")`. - -## BUG FIXES - -1. Joining two keyed tables using `on=` to columns not forming a leading subset of `key(i)` could result in an invalidly keyed result, [#3061](https://github.com/Rdatatable/data.table/issues/3061). Subsequent queries on the result could then return incorrect results. A warning `longer object length is not a multiple of shorter object length` could also occur. Thanks to @renkun-ken for reporting and the PR. - -2. `keyby=` on columns for which an index exists now uses the index (new feature 7 in v1.11.6 below) but if an `i` subset is present in the same query then it could segfault, [#3062](https://github.com/Rdatatable/data.table/issues/3062). Again thanks to @renkun-ken for reporting. - -3. Assigning an out-of-range integer to an item in a factor column (a rare operation) correctly created an `NA` in that spot with warning, but now no longer also corrupts the variable being assigned, [#2984](https://github.com/Rdatatable/data.table/issues/2984). Thanks to @radfordneal for reporting and @MarkusBonsch for fixing. Assigning a string which is missing from the factor levels continues to automatically append the string to the factor levels. - -4. Assigning a sequence to a column using base R methods (e.g. `DT[["foo"]] = 1:10`) could cause subsetting to fail with `Internal error in subset.c: column is an ALTREP vector`, [#3051](https://github.com/Rdatatable/data.table/issues/3051). Thanks to Michel Lang for reporting. - -5. `as.data.table` `matrix` method now properly handles rownames for 0 column data.table output. Thanks @mllg for reporting. Closes [#3149](https://github.com/Rdatatable/data.table/issues/3149). - -## NOTES - -1. The test suite now turns on R's new _R_CHECK_LENGTH_1_LOGIC2_ to catch when internal use of `&&` or `||` encounter arguments of length more than one. Thanks to Hugh Parsonage for implementing and fixing the problems caught by this. - -2. Some namespace changes have been made with respect to melt, dcast and xts. No change is expected but if you do have any trouble, please file an issue. - -3. `split.data.table` was exported in v1.11.6 in addition to being registered using `S3method(split, data.table)`. The export has been removed again. It had been added because a user said they found it difficult to find, [#2920](https://github.com/Rdatatable/data.table/issues/2920). But S3 methods are not normally exported explicitly by packages. The proper way to access the `split.data.table` method is to call `split(DT)` where `DT` is a `data.table`. The generic (`base::split` in this case) then dispatches to the `split.data.table` method. v1.11.6 was not on CRAN very long (1 week) so we think it's better to revert this change quickly. To know what methods exist, R provides the `methods()` function. - - ```R - methods(split) # all the methods for the split generic - methods(class="data.table") # all the generics that data.table has a method for (47 currently) - ``` - - -# data.table v1.11.6 (19 Sep 2018) - -## NEW FEATURES - -1. For convenience when some of the files in `fnams` are empty in `rbindlist(lapply(fnams,fread))`, `fread` now reads empty input as a null-data.table with warning rather than error, [#2898](https://github.com/Rdatatable/data.table/issues/2898). For consistency, `fwrite(data.table(NULL))` now creates an empty file and warns instead of error, too. - -2. `setcolorder(DT)` without further arguments now defaults to moving the key columns to be first, [#2895](https://github.com/Rdatatable/data.table/issues/2895). Thanks to @jsams for the PR. - -3. Attempting to subset on `col` when the column is actually called `Col` will still error, but the error message will helpfully suggest similarly-spelled columns, [#2887](https://github.com/Rdatatable/data.table/issues/2887). This is experimental, applies just to `i` currently, and we look forward to feedback. Thanks to Michael Chirico for the suggestion and PR. - -4. `fread()` has always accepted literal data; e.g. `fread("A,B\n1,2\n3,4")`. It now gains explicit `text=`; e.g. `fread(text="A,B\n1,2\n3,4")`. Unlike the first general purpose `input=` argument, the `text=` argument accepts multi-line input; e.g. `fread(text=c("A,B","1,2","3,4"))`, [#1423](https://github.com/Rdatatable/data.table/issues/1423). Thanks to Douglas Clark for the request and Hugh Parsonage for the PR. - -5. `fread()` has always accepted system commands; e.g. `fread("grep blah file.txt")`. It now gains explicit `cmd=`; e.g. `fread(cmd="grep blah file.txt")`. Further, if and only if `input=` is a system command and a variable was used to hold that command (`fread(someCommand)` not `fread("grep blah file.txt")`) or a variable is used to construct it (`fread(paste("grep",variable,"file.txt"))`), a message is now printed suggesting `cmd=`. This is to inform all users that there is a potential security concern if you are i) creating apps, and ii) your app takes input from a public user who could be malicious, and iii) input from the malicious user (such as a filename) is passed by your app to `fread()`, and iv) your app in not running in a protected environment. If all 4 conditions hold then the malicious user could provide a system command instead of a filename which `fread()` would run, and that would be a problem too. If the app is not running in a protected environment (e.g. app is running as root) then this could do damage or obtain data you did not intend. Public facing apps should be running with limited operating system permission so that any breach from any source is contained. We agree with [Linus Torvald's advice](https://lkml.org/lkml/2017/11/21/356) on this which boils down to: "when addressing security concerns the first step is do no harm, just inform". If you aren't creating apps or apis that could have a malicious user then there is no risk but we can't distinguish you so we have to inform everyone. Please change to `fread(cmd=...)` at your leisure. The new message can be suppressed with `options(datatable.fread.input.cmd.message=FALSE)`. Passing system commands to `fread()` continues to be recommended and encouraged and is widely used; e.g. via the techniques gathered together in the book [Data Science at the Command Line](https://datascienceatthecommandline.com/). A `warning()` is too strong because best-practice for production systems is to set `options(warn=2)` to tolerate no warnings. Such production systems have no user input and so there is no security risk; we don't want to do harm by breaking production systems via a `warning()` which gets turned into an error by `options(warn=2)`. Now that we have informed all users, we request feedback. There are 3 options for future releases: i) remove the message, ii) leave the message in place, iii) upgrade the message to warning and then eventually error. The default choice is the middle one: leave the message in place. - -6. New `options(datatable.CJ.names=TRUE)` changes `CJ()` to auto-name its inputs exactly as `data.table()` does, [#1596](https://github.com/Rdatatable/data.table/issues/1596). Thanks @franknarf1 for the suggestion. Current default is `FALSE`; i.e. no change. The option's default will be changed to `TRUE` in v1.12.0 and then eventually the option will be removed. Any code that depends on `CJ(x,y)$V1` will need to be changed to `CJ(x,y)$x` and is more akin to a bug fix due to the inconsistency with `data.table()`. - -7. If an appropriate index exists, `keyby=` will now use it. For example, given `setindex(DT,colA,colB)`, both `DT[,j,keyby=colA]` (a leading subset of the index columns) and `DT[,j,keyby=.(colA,colB)]` will use the index, but not `DT[,j,keyby=.(colB,colA)]`. The option `options(datatable.use.index=FALSE)` will turn this feature off. Please always use `keyby=` unless you wish to retain the order of groups by first-appearance order (in which case use `by=`). Also, both `keyby=` and `by=` already used the key where possible but are now faster when using just the first column of the key. As usual, setting `verbose=TRUE` either per-query or globally using `options(datatable.verbose=TRUE)` will report what's being done internally. - -## BUG FIXES - -1. `fread` now respects the order of columns passed to `select=` when column numbers are used, [#2986](https://github.com/Rdatatable/data.table/issues/2986). It already respected the order when column names are used. Thanks @privefl for raising the issue. - -2. `gmin` and `gmax` no longer fail on _ordered_ factors, [#1947](https://github.com/Rdatatable/data.table/issues/1947). Thanks to @mcieslik-mctp for identifying and @mbacou for the nudge. - -3. `as.ITime.character` now properly handles NA when attempting to detect the format of non-NA values in vector. Thanks @polyjian for reporting, closes [#2940](https://github.com/Rdatatable/data.table/issues/2940). - -4. `as.matrix(DT, rownames="id")` now works when `DT` has a single row, [#2930](https://github.com/Rdatatable/data.table/issues/2930). Thanks to @malcook for reporting and @sritchie73 for fixing. The root cause was the dual meaning of the `rownames=` argument: i) a single column name/number (most common), or ii) rowname values length 1 for the single row. For clarity and safety, `rownames.value=` has been added. Old usage (i.e. `length(rownames)>1`) continues to work for now but will issue a warning in a future release, and then error in a release after that. - -5. Fixed regression in v1.11.0 (May 2018) caused by PR [#2389](https://github.com/Rdatatable/data.table/pull/2389) which introduced partial key retainment on `:=` assigns. This broke the joining logic that assumed implicitly that assigning always drops keys completely. Consequently, join and subset results could be wrong when matching character to factor columns with existing keys, [#2881](https://github.com/Rdatatable/data.table/issues/2881). Thanks to @ddong63 for reporting and to @MarkusBonsch for fixing. Missing test added to ensure this doesn't arise again. - -6. `as.IDate.numeric` no longer ignores "origin", [#2880](https://github.com/Rdatatable/data.table/issues/2880). Thanks to David Arenburg for reporting and fixing. - -7. `as.ITime.times` was rounding fractional seconds while other methods were truncating, [#2870](https://github.com/Rdatatable/data.table/issues/2870). The `as.ITime` method gains `ms=` taking `"truncate"` (default), `"nearest"` and `"ceil"`. Thanks to @rossholmberg for reporting and Michael Chirico for fixing. - -8. `fwrite()` now writes POSIXct dates after 2038 correctly, [#2995](https://github.com/Rdatatable/data.table/issues/2995). Thanks to Manfred Zorn for reporting and Philippe Chataignon for the PR fixing it. - -9. `fsetequal` gains the `all` argument to make it consistent with the other set operator functions `funion`, `fsetdiff` and `fintersect` [#2968](https://github.com/Rdatatable/data.table/issues/2968). When `all = FALSE` `fsetequal` will treat rows as elements in a set when checking whether two `data.tables` are equal (i.e. duplicate rows will be ignored). For now the default value is `all = TRUE` for backwards compatibility, but this will be changed to `all = FALSE` in a future release to make it consistent with the other set operation functions. Thanks to @franknarf1 for reporting and @sritchie73 for fixing. - -10. `fintersect` failed on tables with a column called `y`, [#3034](https://github.com/Rdatatable/data.table/issues/3034). Thanks to Maxim Nazarov for reporting. - -11. Compilation fails in AIX because NAN and INFINITY macros definition in AIX make them not constant literals, [#3043](https://github.com/Rdatatable/data.table/pull/3043). Thanks to Ayappan for reporting and fixing. - -12. The introduction of altrep in R 3.5.0 caused some performance regressions of about 20% in some cases, [#2962](https://github.com/Rdatatable/data.table/issues/2962). Investigating this led to some improvements to grouping which are faster than before R 3.5.0 in some cases. Thanks to Nikolay S. for reporting. The work to accomodate altrep is not complete but it is better and it is highly recommended to upgrade to this update. - -13. Fixed 7 memory faults thanks to CRAN's [`rchk`](https://github.com/kalibera/rchk) tool by Tomas Kalibera, [#3033](https://github.com/Rdatatable/data.table/pull/3033). - -## NOTES - -1. The type coercion warning message has been improved, [#2989](https://github.com/Rdatatable/data.table/pull/2989). Thanks to @sarahbeeysian on Twitter for highlighting. For example, given the follow statements: - - ```R - DT = data.table(id=1:3) - DT[2, id:="foo"] - ``` - - the warning message has changed from : - - ``` - Coerced character RHS to integer to match the column's type. Either change the target column - ['id'] to character first (by creating a new character vector length 3 (nrows of entire table) and - assign that; i.e. 'replace' column), or coerce RHS to integer (e.g. 1L, NA_[real|integer]_, as.*, - etc) to make your intent clear and for speed. Or, set the column type correctly up front when you - create the table and stick to it, please. - ``` - - to : - - ``` - Coerced character RHS to integer to match the type of the target column (column 1 named 'id'). If - the target column's type integer is correct, it's best for efficiency to avoid the coercion and - create the RHS as type integer. To achieve that consider the L postfix: typeof(0L) vs typeof(0), - and typeof(NA) vs typeof(NA_integer_) vs typeof(NA_real_). Wrapping the RHS with as.integer() will - avoid this warning but still perform the coercion. If the target column's type is not correct, it - is best to revisit where the DT was created and fix the column type there; e.g., by using - colClasses= in fread(). Otherwise, you can change the column type now by plonking a new column (of - the desired type) over the top of it; e.g. DT[, `id`:=as.character(`id`)]. If the RHS of := has - nrow(DT) elements then the assignment is called a column plonk and is the way to change a column's - type. Column types can be observed with sapply(DT,typeof). - ``` - - Further, if a coercion from double to integer is performed, fractional data such as 3.14 is now detected and the truncation to 3 is warned about if and only if truncation has occurred. - - ```R - DT = data.table(v=1:3) - DT[2, v:=3.14] - Warning message: - Coerced double RHS to integer to match the type of the target column (column 1 named 'v'). One - or more RHS values contain fractions which have been lost; e.g. item 1 with value 3.140000 has - been truncated to 3. - ``` - -2. `split.data.table` method is now properly exported, [#2920](https://github.com/Rdatatable/data.table/issues/2920). But we don't recommend it because `split` copies all the pieces into new memory. - -3. Setting indices on columns which are part of the key will now create those indices. - -4. `hour`, `minute`, and `second` utility functions use integer arithmetic when the input is already (explicitly) UTC-based `POSIXct` for 4-10x speedup vs. using `as.POSIXlt`. - -5. Error added for incorrect usage of `%between%`, with some helpful diagnostic hints, [#3014](https://github.com/Rdatatable/data.table/issues/3014). Thanks @peterlittlejohn for offering his user experience and providing the impetus. - - -# data.table v1.11.4 (27 May 2018) - -1. Empty RHS of `:=` is no longer an error when the `i` clause returns no rows to assign to anyway, [#2829](https://github.com/Rdatatable/data.table/issues/2829). Thanks to @cguill95 for reporting and to @MarkusBonsch for fixing. - -2. Fixed runaway memory usage with R-devel (R > 3.5.0), [#2882](https://github.com/Rdatatable/data.table/pull/2882). Thanks to many people but in particular to Trang Nguyen for making the breakthrough reproducible example, Paul Bailey for liaising, and Luke Tierney for then pinpointing the issue. It was caused by an interaction of two or more data.table threads operating on new compact vectors in the ALTREP framework, such as the sequence `1:n`. This interaction could result in R's garbage collector turning off, and hence the memory explosion. Problems may occur in R 3.5.0 too but we were only able to reproduce in R > 3.5.0. The R code in data.table's implementation benefits from ALTREP (`for` loops in R no longer allocate their range vector input, for example) but are not so appropriate as data.table columns. Sequences such as `1:n` are common in test data but not very common in real-world datasets. Therefore, there is no need for data.table to support columns which are ALTREP compact sequences. The `data.table()` function already expanded compact vectors (by happy accident) but `setDT()` did not (it now does). If, somehow, a compact vector still reaches the internal parallel regions, a helpful error will now be generated. If this happens, please report it as a bug. - -3. Tests 1590.3 & 1590.4 now pass when users run `test.data.table()` on Windows, [#2856](https://github.com/Rdatatable/data.table/pull/2856). Thanks to Avraham Adler for reporting. Those tests were passing on AppVeyor, win-builder and CRAN's Windows because `R CMD check` sets `LC_COLLATE=C` as documented in R-exts$1.3.1, whereas by default on Windows `LC_COLLATE` is usually a regional Windows-1252 dialect such as `English_United States.1252`. - -4. Around 1 billion very small groups (of size 1 or 2 rows) could result in `"Failed to realloc working memory"` even when plenty of memory is available, [#2777](https://github.com/Rdatatable/data.table/issues/2777). Thanks once again to @jsams for the detailed report as a follow up to bug fix 40 in v1.11.0. - - -# data.table v1.11.2 (08 May 2018) - -1. `test.data.table()` created/overwrote variable `x` in `.GlobalEnv`, [#2828](https://github.com/Rdatatable/data.table/issues/2828); i.e. a modification of user's workspace which is not allowed. Thanks to @etienne-s for reporting. - -2. `as.chron` methods for `IDate` and `ITime` have been removed, [#2825](https://github.com/Rdatatable/data.table/issues/2825). `as.chron` still works since `IDate` inherits from `Date`. We are not sure why we had specific methods in the first place. It may have been from a time when `IDate` did not inherit from `Date`, perhaps. Note that we don't use `chron` ourselves in our own work. - -3. Fixed `SETLENGTH() cannot be applied to an ALTVEC object` starting in R-devel (R 3.6.0) on 1 May 2018, a few hours after 1.11.0 was accepted on CRAN, [#2820](https://github.com/Rdatatable/data.table/issues/2820). Many thanks to Luke Tierney for pinpointing the problem. - -4. Fixed some rare memory faults in `fread()` and `rbindlist()` found with `gctorture2()` and [`rchk`](https://github.com/kalibera/rchk), [#2841](https://github.com/Rdatatable/data.table/issues/2841). - - -# data.table v1.11.0 (01 May 2018) - -## NOTICE OF INTENDED FUTURE POTENTIAL BREAKING CHANGES - -1. `fread()`'s `na.strings=` argument : - - ```R - "NA" # old default - getOption("datatable.na.strings", "NA") # this release; i.e. the same; no change yet - getOption("datatable.na.strings", "") # future release - ``` - - This option controls how `,,` is read in character columns. It does not affect numeric columns which read `,,` as `NA` regardless. We would like `,,`=>`NA` for consistency with numeric types, and `,"",`=>empty string to be the standard default for `fwrite/fread` character columns so that `fread(fwrite(DT))==DT` without needing any change to any parameters. `fwrite` has never written `NA` as `"NA"` in case `"NA"` is a valid string in the data; e.g., 2 character id columns sometimes do. Instead, `fwrite` has always written `,,` by default for an `` in a character columns. The use of R's `getOption()` allows users to move forward now, using `options(datatable.fread.na.strings="")`, or restore old behaviour when the default's default is changed in future, using `options(datatable.fread.na.strings="NA")`. - -2. `fread()` and `fwrite()`'s `logical01=` argument : - - ```R - logical01 = FALSE # old default - getOption("datatable.logical01", FALSE) # this release; i.e. the same; no change yet - getOption("datatable.logical01", TRUE) # future release - ``` - - This option controls whether a column of all 0's and 1's is read as `integer`, or `logical` directly to avoid needing to change the type afterwards to `logical` or use `colClasses`. `0/1` is smaller and faster than `"TRUE"/"FALSE"`, which can make a significant difference to space and time the more `logical` columns there are. When the default's default changes to `TRUE` for `fread` we do not expect much impact since all arithmetic operators that are currently receiving 0's and 1's as type `integer` (think `sum()`) but instead could receive `logical`, would return exactly the same result on the 0's and 1's as `logical` type. However, code that is manipulating column types using `is.integer` or `is.logical` on `fread`'s result, could require change. It could be painful if `DT[(logical_column)]` (i.e. `DT[logical_column==TRUE]`) changed behaviour due to `logical_column` no longer being type `logical` but `integer`. But that is not the change proposed. The change is the other way around; i.e., a previously `integer` column holding only 0's and 1's would now be type `logical`. Since it's that way around, we believe the scope for breakage is limited. We think a lot of code is converting 0/1 integer columns to logical anyway, either using `colClasses=` or afterwards with an assign. For `fwrite`, the level of breakage depends on the consumer of the output file. We believe `0/1` is a better more standard default choice to move to. See notes below about improvements to `fread`'s sampling for type guessing, and automatic rereading in the rare cases of out-of-sample type surprises. - - -These options are meant for temporary use to aid your migration, [#2652](https://github.com/Rdatatable/data.table/pull/2652). You are not meant to set them to the old default and then not migrate your code that is dependent on the default. Either set the argument explicitly so your code is not dependent on the default, or change the code to cope with the new default. Over the next few years we will slowly start to remove these options, warning you if you are using them, and return to a simple default. See the history of NEWS and NEWS.0 for past migrations that have, generally speaking, been successfully managed in this way. For example, at the end of NOTES for this version (below in this file) is a note about the usage of `datatable.old.unique.by.key` now warning, as you were warned it would do over a year ago. When that change was introduced, the default was changed and that option provided an option to restore the old behaviour. These `fread`/`fwrite` changes are even more cautious and not even changing the default's default yet. Giving you extra warning by way of this notice to move forward. And giving you a chance to object. - -## NEW FEATURES - -1. `fread()`: - * Efficiency savings at C level including **parallelization** announced [here](https://github.com/Rdatatable/data.table/wiki/talks/BARUG_201704_ParallelFread.pdf); e.g. a 9GB 2 column integer csv input is **50s down to 12s** to cold load on a 4 core laptop with 16GB RAM and SSD. Run `echo 3 >/proc/sys/vm/drop_caches` first to measure cold load time. Subsequent load time (after file has been cached by OS on the first run) **40s down to 6s**. - * The [fread for small data](https://github.com/Rdatatable/data.table/wiki/Convenience-features-of-fread) page has been revised. - * Memory maps lazily; e.g. reading just the first 10 rows with `nrow=10` is **12s down to 0.01s** from cold for the 9GB file. Large files close to your RAM limit may work more reliably too. The progress meter will commence sooner and more consistently. - * `fread` has always jumped to the middle and to the end of the file for a much improved column type guess. The sample size is increased from 100 rows at 10 jump jump points (1,000 rows) to 100 rows at 100 jumps points (10,000 row sample). In the rare case of there still being out-of-sample type exceptions, those columns are now *automatically reread* so you don't have to use `colClasses` yourself. - * Large number of columns support; e.g. **12,000 columns** tested. - * **Quoting rules** are more robust and flexible. See point 10 on the wiki page [here](https://github.com/Rdatatable/data.table/wiki/Convenience-features-of-fread#10-automatic-quote-escape-method-detection-including-no-escape). - * Numeric data that has been quoted is now detected and read as numeric. - * The ability to position `autostart` anywhere inside one of multiple tables in a single file is removed with warning. It used to search upwards from that line to find the start of the table based on a consistent number of columns. People appear to be using `skip="string"` or `skip=nrow` to find the header row exactly, which is retained and simpler. It was too difficult to retain search-upwards-autostart together with skipping/filling blank lines, filling incomplete rows and parallelization too. If there is any header info above the column names, it is still auto detected and auto skipped (particularly useful when loading a set of files where the column names start on different lines due to a varying height messy header). - * `dec=','` is now implemented directly so there is no dependency on locale. The options `datatable.fread.dec.experiment` and `datatable.fread.dec.locale` have been removed. - * `\\r\\r\\n` line endings are now handled such as produced by `base::download.file()` when it doubles up `\\r`. Other rare line endings (`\\r` and `\\n\\r`) are now more robust. - * Mixed line endings are now handled; e.g. a file formed by concatenating a Unix file and a Windows file so that some lines end with `\\n` while others end with `\\r\\n`. - * Improved automatic detection of whether the first row is column names by comparing the types of the fields on the first row against the column types ascertained by the 10,000 rows sample (or `colClasses` if provided). If a numeric column has a string value at the top, then column names are deemed present. - * Detects GB-18030 and UTF-16 encodings and in verbose mode prints a message about BOM detection. - * Detects and ignores trailing ^Z end-of-file control character sometimes created on MS DOS/Windows, [#1612](https://github.com/Rdatatable/data.table/issues/1612). Thanks to Gergely Daróczi for reporting and providing a file. - * Added ability to recognize and parse hexadecimal floating point numbers, as used for example in Java. Thanks for @scottstanfield [#2316](https://github.com/Rdatatable/data.table/issues/2316) for the report. - * Now handles floating-point NaN values in a wide variety of formats, including `NaN`, `sNaN`, `1.#QNAN`, `NaN1234`, `#NUM!` and others, [#1800](https://github.com/Rdatatable/data.table/issues/1800). Thanks to Jori Liesenborgs for highlighting and the PR. - * If negative numbers are passed to `select=` the out-of-range error now suggests `drop=` instead, [#2423](https://github.com/Rdatatable/data.table/issues/2423). Thanks to Michael Chirico for the suggestion. - * `sep=NULL` or `sep=""` (i.e., no column separator) can now be used to specify single column input reliably like `base::readLines`, [#1616](https://github.com/Rdatatable/data.table/issues/1616). `sep='\\n'` still works (even on Windows where line ending is actually `\\r\\n`) but `NULL` or `""` are now documented and recommended. Thanks to Dmitriy Selivanov for the pull request and many others for comments. As before, `sep=NA` is not valid; use the default `"auto"` for automatic separator detection. `sep='\\n'` is now deprecated and in future will start to warn when used. - * Single-column input with blank lines is now valid and the blank lines are significant (representing `NA`). The blank lines are significant even at the very end, which may be surprising on first glance. The change is so that `fread(fwrite(DT))==DT` for single-column inputs containing `NA` which are written as blank. There is no change when `ncol>1`; i.e., input stops with detailed warning at the first blank line, because a blank line when `ncol>1` is invalid input due to no separators being present. Thanks to @skanskan, Michael Chirico, @franknarf1 and Pasha for the testing and discussions, [#2106](https://github.com/Rdatatable/data.table/issues/2106). - * Too few column names are now auto filled with default column names, with warning, [#1625](https://github.com/Rdatatable/data.table/issues/1625). If there is just one missing column name it is guessed to be for the first column (row names or an index), otherwise the column names are filled at the end. Similarly, too many column names now automatically sets `fill=TRUE`, with warning. - * `skip=` and `nrow=` are more reliable and are no longer affected by invalid lines outside the range specified. Thanks to Ziyad Saeed and Kyle Chung for reporting, [#1267](https://github.com/Rdatatable/data.table/issues/1267). - * Ram disk (`/dev/shm`) is no longer used for the output of system command input. Although faster when it worked, it was causing too many device full errors; e.g., [#1139](https://github.com/Rdatatable/data.table/issues/1139) and [zUMIs/19](https://github.com/sdparekh/zUMIs/issues/19). Thanks to Kyle Chung for reporting. Standard `tempdir()` is now used. If you wish to use ram disk, set TEMPDIR to `/dev/shm`; see `?tempdir`. - * Detecting whether a very long input string is a file name or data is now much faster, [#2531](https://github.com/Rdatatable/data.table/issues/2531). Many thanks to @javrucebo for the detailed report, benchmarks and suggestions. - * A column of `TRUE/FALSE`s is ok, as well as `True/False`s and `true/false`s, but mixing styles (e.g. `TRUE/false`) is not and will be read as type `character`. - * New argument `index` to compliment the existing `key` argument for applying secondary orderings out of the box for convenience, [#2633](https://github.com/Rdatatable/data.table/issues/2633). - * A warning is now issued whenever incorrectly quoted fields have been detected and fixed using a non-standard quote rule. `fread` has always used these advanced rules but now it warns that it is using them. Most file writers correctly quote fields if the field contains the field separator, but a common error is not to also quote fields that contain a quote and then escape those quotes, particularly if that quote occurs at the start of the field. The ability to detect and fix such files is referred to as self-healing. Ambiguities are resolved using the knowledge that the number of columns is constant, and therefore this ability is not available when `fill=TRUE`. This feature can be improved in future by using column type consistency as well as the number of fields. For example: - - ```R - txt = 'A,B\n1,hello\n2,"howdy" said Joe\n3,bonjour\n' - cat(txt) - # A,B - # 1,hello - # 2,"howdy" said Joe - # 3,bonjour - fread(txt) - A B - - 1: 1 hello - 2: 2 "howdy" said Joe - 3: 3 bonjour - Warning message: - In fread(txt) : Found and resolved improper quoting - ``` - - * Many thanks to @yaakovfeldman, Guillermo Ponce, Arun Srinivasan, Hugh Parsonage, Mark Klik, Pasha Stetsenko, Mahyar K, Tom Crockett, @cnoelke, @qinjs, @etienne-s, Mark Danese, Avraham Adler, @franknarf1, @MichaelChirico, @tdhock, Luke Tierney, Ananda Mahto, @memoryfull, @brandenkmurray for testing dev and reporting these regressions before release to CRAN: #1464, #1671, #1888, #1895, #2070, #2073, #2087, #2091, #2092, #2107, #2118, #2123, #2167, #2194, #2196, #2201, #2222, #2228, #2238, #2246, #2251, #2265, #2267, #2285, #2287, #2299, #2322, #2347, #2352, #2370, #2371, #2395, #2404, #2446, #2453, #2457, #2464, #2481, #2499, #2512, #2515, #2516, #2518, #2520, #2523, #2526, #2535, #2542, #2548, #2561, #2600, #2625, #2666, #2697, #2735, #2744. - -2. `fwrite()`: - * empty strings are now always quoted (`,"",`) to distinguish them from `NA` which by default is still empty (`,,`) but can be changed using `na=` as before. If `na=` is provided and `quote=` is the default `'auto'` then `quote=` is set to `TRUE` so that if the `na=` value occurs in the data, it can be distinguished from `NA`. Thanks to Ethan Welty for the request [#2214](https://github.com/Rdatatable/data.table/issues/2214) and Pasha for the code change and tests, [#2215](https://github.com/Rdatatable/data.table/issues/2215). - * `logical01` has been added and the old name `logicalAsInt` retained. Pease move to the new name when convenient for you. The old argument name (`logicalAsInt`) will slowly be deprecated over the next few years. The default is unchanged: `FALSE`, so `logical` is still written as `"TRUE"`/`"FALSE"` in full by default. We intend to change the default's default in future to `TRUE`; see the notice at the top of these release notes. - -3. Added helpful message when subsetting by a logical column without wrapping it in parentheses, [#1844](https://github.com/Rdatatable/data.table/issues/1844). Thanks @dracodoc for the suggestion and @MichaelChirico for the PR. - -4. `tables` gains `index` argument for supplementary metadata about `data.table`s in memory (or any optionally specified environment), part of [#1648](https://github.com/Rdatatable/data.table/issues/1648). Thanks due variously to @jangorecki, @rsaporta, @MichaelChirico for ideas and work towards PR. - -5. Improved auto-detection of `character` inputs' formats to `as.ITime` to mirror the logic in `as.POSIXlt.character`, [#1383](https://github.com/Rdatatable/data.table/issues/1383) Thanks @franknarf1 for identifying a discrepancy and @MichaelChirico for investigating. - -6. `setcolorder()` now accepts less than `ncol(DT)` columns to be moved to the front, [#592](https://github.com/Rdatatable/data.table/issues/592). Thanks @MichaelChirico for the PR. This also incidentally fixed [#2007](https://github.com/Rdatatable/data.table/issues/2007) whereby explicitly setting `select = NULL` in `fread` errored; thanks to @rcapell for reporting that and @dselivanov and @MichaelChirico for investigating and providing a new test. - -7. Three new *Grouping Sets* functions: `rollup`, `cube` and `groupingsets`, [#1377](https://github.com/Rdatatable/data.table/issues/1377). Allows to aggregation on various grouping levels at once producing sub-totals and grand total. - -8. `as.data.table()` gains new method for `array`s to return a useful data.table, [#1418](https://github.com/Rdatatable/data.table/issues/1418). - -9. `print.data.table()` (all via master issue [#1523](https://github.com/Rdatatable/data.table/issues/1523)): - - * gains `print.keys` argument, `FALSE` by default, which displays the keys and/or indices (secondary keys) of a `data.table`. Thanks @MichaelChirico for the PR, Yike Lu for the suggestion and Arun for honing that idea to its present form. - - * gains `col.names` argument, `"auto"` by default, which toggles which registers of column names to include in printed output. `"top"` forces `data.frame`-like behavior where column names are only ever included at the top of the output, as opposed to the default behavior which appends the column names below the output as well for longer (>20 rows) tables. `"none"` shuts down column name printing altogether. Thanks @MichaelChirico for the PR, Oleg Bondar for the suggestion, and Arun for guiding commentary. - - * list columns would print the first 6 items in each cell followed by a comma if there are more than 6 in that cell. Now it ends ",..." to make it clearer, part of [#1523](https://github.com/Rdatatable/data.table/issues/1523). Thanks to @franknarf1 for drawing attention to an issue raised on Stack Overflow by @TMOTTM [here](https://stackoverflow.com/q/47679701). - -10. `setkeyv` accelerated if key already exists [#2331](https://github.com/Rdatatable/data.table/issues/2331). Thanks to @MarkusBonsch for the PR. - -11. Keys and indexes are now partially retained up to the key column assigned to with ':=' [#2372](https://github.com/Rdatatable/data.table/issues/2372). They used to be dropped completely if any one of the columns was affected by `:=`. Tanks to @MarkusBonsch for the PR. - -12. Faster `as.IDate` and `as.ITime` methods for `POSIXct` and `numeric`, [#1392](https://github.com/Rdatatable/data.table/issues/1392). Thanks to Jan Gorecki for the PR. - -13. `unique(DT)` now returns `DT` early when there are no duplicates to save RAM, [#2013](https://github.com/Rdatatable/data.table/issues/2013). Thanks to Michael Chirico for the PR, and thanks to @mgahan for pointing out a reversion in `na.omit.data.table` before release, [#2660](https://github.com/Rdatatable/data.table/issues/2660#issuecomment-371027948). - -14. `uniqueN()` is now faster on logical vectors. Thanks to Hugh Parsonage for [PR#2648](https://github.com/Rdatatable/data.table/pull/2648). - - ```R - N = 1e9 - # was now - x = c(TRUE,FALSE,NA,rep(TRUE,N)) # - uniqueN(x) == 3 # 5.4s 0.00s - x = c(TRUE,rep(FALSE,N), NA) # - uniqueN(x,na.rm=TRUE) == 2 # 5.4s 0.00s - x = c(rep(TRUE,N),FALSE,NA) # - uniqueN(x) == 3 # 6.7s 0.38s - ``` - -15. Subsetting optimization with keys and indices is now possible for compound queries like `DT[a==1 & b==2]`, [#2472](https://github.com/Rdatatable/data.table/issues/2472). -Thanks to @MichaelChirico for reporting and to @MarkusBonsch for the implementation. - -16. `melt.data.table` now offers friendlier functionality for providing `value.name` for `list` input to `measure.vars`, [#1547](https://github.com/Rdatatable/data.table/issues/1547). Thanks @MichaelChirico and @franknarf1 for the suggestion and use cases, @jangorecki and @mrdwab for implementation feedback, and @MichaelChirico for ultimate implementation. - -17. `update.dev.pkg` is new function to update package from development repository, it will download package sources only when newer commit is available in repository. `data.table::update.dev.pkg()` defaults updates `data.table`, but any package can be used. - -18. Item 1 in NEWS for [v1.10.2](https://github.com/Rdatatable/data.table/blob/master/NEWS.md#changes-in-v1102--on-cran-31-jan-2017) on CRAN in Jan 2017 included : - - > When j is a symbol prefixed with `..` it will be looked up in calling scope and its value taken to be column names or numbers. - > When you see the `..` prefix think one-level-up, like the directory `..` in all operating systems means the parent directory. - > In future the `..` prefix could be made to work on all symbols apearing anywhere inside `DT[...]`. - - The response has been positive ([this tweet](https://twitter.com/MattDowle/status/967290562725359617) and [FR#2655](https://github.com/Rdatatable/data.table/issues/2655)) and so this prefix is now expanded to all symbols appearing in `j=` as a first step; e.g. - - ```R - cols = "colB" - DT[, c(..cols, "colC")] # same as DT[, .(colB,colC)] - DT[, -..cols] # all columns other than colB - ``` - - Thus, `with=` should no longer be needed in any cases. Please change to using the `..` prefix and over the next few years we will start to formally deprecate and remove the `with=` parameter. If this is well received, the `..` prefix could be expanded to symbols appearing in `i=` and `by=`, too. Note that column names should not now start with `..`. If a symbol `..var` is used in `j=` but `..var` exists as a column name, the column still takes precedence, for backwards compatibility. Over the next few years, data.table will start issuing warnings/errors when it sees column names starting with `..`. This affects one CRAN package out of 475 using data.table, so we do not believe this restriction to be unreasonable. Our main focus here which we believe `..` achieves is to resolve the more common ambiguity when `var` is in calling scope and `var` is a column name too. Further, we have not forgotten that in the past we recommended prefixing the variable in calling scope with `..` yourself. If you did that and `..var` exists in calling scope, that still works, provided neither `var` exists in calling scope nor `..var` exists as a column name. Please now remove the `..` prefix on `..var` in calling scope to tidy this up. In future data.table will start to warn/error on such usage. - -19. `setindexv` can now assign multiple (separate) indices by accepting a `list` in the `cols` argument. - -20. `as.matrix.data.table` method now has an additional `rownames` argument allowing for a single column to be used as the `rownames` after conversion to a `matrix`. Thanks to @sritchie73 for the suggestion, use cases, [#2692](https://github.com/Rdatatable/data.table/issues/2692) and implementation [PR#2702](https://github.com/Rdatatable/data.table/pull/2702) and @MichaelChirico for additional use cases. - -## BUG FIXES - -1. The new quote rules handles this single field `"Our Stock Screen Delivers an Israeli Software Company (MNDO, CTCH)<\/a> SmallCapInvestor.com - Thu, May 19, 2011 10:02 AM EDT<\/cite><\/div>Yesterday in \""Google, But for Finding - Great Stocks\"", I discussed the value of stock screeners as a powerful tool"`, [#2051](https://github.com/Rdatatable/data.table/issues/2051). Thanks to @scarrascoso for reporting. Example file added to test suite. - -2. `fwrite()` creates a file with permissions that now play correctly with `Sys.umask()`, [#2049](https://github.com/Rdatatable/data.table/issues/2049). Thanks to @gnguy for reporting. - -3. `fread()` no longer holds an open lock on the file when a line outside the large sample has too many fields and generates an error, [#2044](https://github.com/Rdatatable/data.table/issues/2044). Thanks to Hugh Parsonage for reporting. - -4. Setting `j = {}` no longer results in an error, [#2142](https://github.com/Rdatatable/data.table/issues/2142). Thanks Michael Chirico for the pull request. - -5. Segfault in `rbindlist()` when one or more items are empty, [#2019](https://github.com/Rdatatable/data.table/issues/2019). Thanks Michael Lang for the pull request. Another segfault if the result would be more than 2bn rows, thanks to @jsams's comment in [#2340](https://github.com/Rdatatable/data.table/issues/2340#issuecomment-331505494). - -6. Error printing 0-length `ITime` and `NA` objects, [#2032](https://github.com/Rdatatable/data.table/issues/2032) and [#2171](https://github.com/Rdatatable/data.table/issues/2171). Thanks Michael Chirico for the pull requests and @franknarf1 for pointing out a shortcoming of the initial fix. - -7. `as.IDate.POSIXct` error with `NULL` timezone, [#1973](https://github.com/Rdatatable/data.table/issues/1973). Thanks @lbilli for reporting and Michael Chirico for the pull request. - -8. Printing a null `data.table` with `print` no longer visibly outputs `NULL`, [#1852](https://github.com/Rdatatable/data.table/issues/1852). Thanks @aaronmcdaid for spotting and @MichaelChirico for the PR. - -9. `data.table` now works with Shiny Reactivity / Flexdashboard. The error was typically something like `col not found` in `DT[col==val]`. Thanks to Dirk Eddelbuettel leading Matt through reproducible steps and @sergeganakou and Richard White for reporting. Closes [#2001](https://github.com/Rdatatable/data.table/issues/2001) and [shiny/#1696](https://github.com/rstudio/shiny/issues/1696). - -10. The `as.IDate.POSIXct` method passed `tzone` along but was not exported. So `tzone` is now taken into account by `as.IDate` too as well as `IDateTime`, [#977](https://github.com/Rdatatable/data.table/issues/977) and [#1498](https://github.com/Rdatatable/data.table/issues/1498). Tests added. - -11. Named logical vector now select rows as expected from single row data.table. Thanks to @skranz for reporting. Closes [#2152](https://github.com/Rdatatable/data.table/issues/2152). - -12. `fread()`'s rare `Internal error: Sampling jump point 10 is before the last jump ended` has been fixed, [#2157](https://github.com/Rdatatable/data.table/issues/2157). Thanks to Frank Erickson and Artem Klevtsov for reporting with example files which are now added to the test suite. - -13. `CJ()` no longer loses attribute information, [#2029](https://github.com/Rdatatable/data.table/issues/2029). Thanks to @MarkusBonsch and @royalts for the pull request. - -14. `split.data.table` respects `factor` ordering in `by` argument, [#2082](https://github.com/Rdatatable/data.table/issues/2082). Thanks to @MichaelChirico for identifying and fixing the issue. - -15. `.SD` would incorrectly include symbol on lhs of `:=` when `.SDcols` is specified and `get()` appears in `j`. Thanks @renkun-ken for reporting and the PR, and @ProfFancyPants for reporing a regression introduced in the PR. Closes [#2326](https://github.com/Rdatatable/data.table/issues/2326) and [#2338](https://github.com/Rdatatable/data.table/issues/2338). - -16. Integer values that are too large to fit in `int64` will now be read as strings [#2250](https://github.com/Rdatatable/data.table/issues/2250). - -17. Internal-only `.shallow` now retains keys correctly, [#2336](https://github.com/Rdatatable/data.table/issues/2336). Thanks to @MarkusBonsch for reporting, fixing ([PR #2337](https://github.com/Rdatatable/data.table/pull/2337)) and adding 37 tests. This much advances the journey towards exporting `shallow()`, [#2323](https://github.com/Rdatatable/data.table/issues/2323). - -18. `isoweek` calculation is correct regardless of local timezone setting (`Sys.timezone()`), [#2407](https://github.com/Rdatatable/data.table/issues/2407). Thanks to @MoebiusAV and @SimonCoulombe for reporting and @MichaelChirico for fixing. - -19. Fixed `as.xts.data.table` to support all xts supported time based index clasess [#2408](https://github.com/Rdatatable/data.table/issues/2408). Thanks to @ebs238 for reporting and for the PR. - -20. A memory leak when a very small number such as `0.58E-2141` is bumped to type `character` is resolved, [#918](https://github.com/Rdatatable/data.table/issues/918). - -21. The edge case `setnames(data.table(), character(0))` now works rather than error, [#2452](https://github.com/Rdatatable/data.table/issues/2452). - -22. Order of rows returned in non-equi joins were incorrect in certain scenarios as reported under [#1991](https://github.com/Rdatatable/data.table/issues/1991). This is now fixed. Thanks to @Henrik-P for reporting. - -23. Non-equi joins work as expected when `x` in `x[i, on=...]` is a 0-row data.table. Closes [#1986](https://github.com/Rdatatable/data.table/issues/1986). - -24. Non-equi joins along with `by=.EACHI` returned incorrect result in some rare cases as reported under [#2360](https://github.com/Rdatatable/data.table/issues/2360). This is fixed now. This fix also takes care of [#2275](https://github.com/Rdatatable/data.table/issues/2275). Thanks to @ebs238 for the nice minimal reproducible report, @Mihael for asking on SO and to @Frank for following up on SO and filing an issue. - -25. `by=.EACHI` works now when `list` columns are being returned and some join values are missing, [#2300](https://github.com/Rdatatable/data.table/issues/2300). Thanks to @jangorecki and @franknarf1 for the reproducible examples which have been added to the test suite. - -26. Indices are now retrieved by exact name, [#2465](https://github.com/Rdatatable/data.table/issues/2465). This prevents usage of wrong indices as well as unexpected row reordering in join results. Thanks to @pannnda for reporting and providing a reproducible example and to @MarkusBonsch for fixing. - -27. `setnames` of whole table when original table had `NA` names skipped replacing those, [#2475](https://github.com/Rdatatable/data.table/issues/2475). Thanks to @franknarf1 and [BenoitLondon on StackOverflow](https://stackoverflow.com/questions/47228836/) for the report and @MichaelChirico for fixing. - -28. `CJ()` works with multiple empty vectors now [#2511](https://github.com/Rdatatable/data.table/issues/2511). Thanks to @MarkusBonsch for fixing. - -29. `:=` assignment of one vector to two or more columns, e.g. `DT[, c("x", "y") := 1:10]`, failed to copy the `1:10` data causing errors later if and when those columns were updated by reference, [#2540](https://github.com/Rdatatable/data.table/issues/2540). This is an old issue ([#185](https://github.com/Rdatatable/data.table/issues/185)) that had been fixed but reappeared when code was refactored. Thanks to @patrickhowerter for the detailed report with reproducible example and to @MarkusBonsch for fixing and strengthening tests so it doesn't reappear again. - -30. "Negative length vectors not allowed" error when grouping `median` and `var` fixed, [#2046](https://github.com/Rdatatable/data.table/issues/2046) and [#2111](https://github.com/Rdatatable/data.table/issues/2111). Thanks to @caneff and @osofr for reporting and to @kmillar for debugging and explaining the cause. - -31. Fixed a bug on Windows where `data.table`s containing non-UTF8 strings in `key`s were not properly sorted, [#2462](https://github.com/Rdatatable/data.table/issues/2462), [#1826](https://github.com/Rdatatable/data.table/issues/1826) and [StackOverflow](https://stackoverflow.com/questions/47599934/why-doesnt-r-data-table-support-well-for-non-ascii-keys-on-windows). Thanks to @shrektan for reporting and fixing. - -32. `x.` prefixes during joins sometimes resulted in a "column not found" error. This is now fixed. Closes [#2313](https://github.com/Rdatatable/data.table/issues/2313). Thanks to @franknarf1 for the MRE. - -33. `setattr()` no longer segfaults when setting 'class' to empty character vector, [#2386](https://github.com/Rdatatable/data.table/issues/2386). Thanks to @hatal175 for reporting and to @MarkusBonsch for fixing. - -34. Fixed cases where the result of `merge.data.table()` would contain duplicate column names if `by.x` was also in `names(y)`. -`merge.data.table()` gains the `no.dups` argument (default TRUE) to match the correpsonding patched behaviour in `base:::merge.data.frame()`. Now, when `by.x` is also in `names(y)` the column name from `y` has the corresponding `suffixes` added to it. `by.x` remains unchanged for backwards compatibility reasons. -In addition, where duplicate column names arise anyway (i.e. `suffixes = c("", "")`) `merge.data.table()` will now throw a warning to match the behaviour of `base:::merge.data.frame()`. -Thanks to @sritchie73 for reporting and fixing [PR#2631](https://github.com/Rdatatable/data.table/pull/2631) and [PR#2653](https://github.com/Rdatatable/data.table/pull/2653) - -35. `CJ()` now fails with proper error message when results would exceed max integer, [#2636](https://github.com/Rdatatable/data.table/issues/2636). - -36. `NA` in character columns now display as `` just like base R to distinguish from `""` and `"NA"`. - -37. `getDTthreads()` could return INT_MAX (2 billion) after an explicit call to `setDTthreads(0)`, [PR#2708](https://github.com/Rdatatable/data.table/pull/2708). - -38. Fixed a bug on Windows that `data.table` may break if the garbage collecting was triggered when sorting a large number of non-ASCII characters. Thanks to @shrektan for reporting and fixing [PR#2678](https://github.com/Rdatatable/data.table/pull/2678), [#2674](https://github.com/Rdatatable/data.table/issues/2674). - -39. Internal aliasing of `.` to `list` was over-aggressive in applying `list` even when `.` was intended within `bquote`, [#1912](https://github.com/Rdatatable/data.table/issues/1912). Thanks @MichaelChirico for reporting/filing and @ecoRoland for suggesting and testing a fix. - -40. Attempt to allocate a wildly large amount of RAM (16EB) when grouping by key and there are close to 2 billion 1-row groups, [#2777](https://github.com/Rdatatable/data.table/issues/2777). Thanks to @jsams for the detailed report. - -41. Fix a bug that `print(dt, class=TRUE)` shows only `topn - 1` rows. Thanks to @heavywatal for reporting [#2803](https://github.com/Rdatatable/data.table/issues/2803) and filing [PR#2804](https://github.com/Rdatatable/data.table/pull/2804). - -## NOTES - -0. The license has been changed from GPL to MPL (Mozilla Public License). All contributors were consulted and approved. [PR#2456](https://github.com/Rdatatable/data.table/pull/2456) details the reasons for the change. - -1. `?data.table` makes explicit the option of using a `logical` vector in `j` to select columns, [#1978](https://github.com/Rdatatable/data.table/issues/1978). Thanks @Henrik-P for the note and @MichaelChirico for filing. - -2. Test 1675.1 updated to cope with a change in R-devel in June 2017 related to `factor()` and `NA` levels. - -3. Package `ezknitr` has been added to the whitelist of packages that run user code and should be consider data.table-aware, [#2266](https://github.com/Rdatatable/data.table/issues/2266). Thanks to Matt Mills for testing and reporting. - -4. Printing with `quote = TRUE` now quotes column names as well, [#1319](https://github.com/Rdatatable/data.table/issues/1319). Thanks @jan-glx for the suggestion and @MichaelChirico for the PR. - -5. Added a blurb to `?melt.data.table` explicating the subtle difference in behavior of the `id.vars` argument vis-a-vis its analog in `reshape2::melt`, [#1699](https://github.com/Rdatatable/data.table/issues/1699). Thanks @MichaelChirico for uncovering and filing. - -6. Added some clarification about the usage of `on` to `?data.table`, [#2383](https://github.com/Rdatatable/data.table/issues/2383). Thanks to @peterlittlejohn for volunteering his confusion and @MichaelChirico for brushing things up. - -7. Clarified that "data.table always sorts in `C-locale`" means that upper-case letters are sorted before lower-case letters by ordering in data.table (e.g. `setorder`, `setkey`, `DT[order(...)]`). Thanks to @hughparsonage for the pull request editing the documentation. Note this makes no difference in most cases of data; e.g. ids where only uppercase or lowercase letters are used (`"AB123"<"AC234"` is always true, regardless), or country names and words which are consistently capitalized. For example, `"America" < "Brazil"` is not affected (it's always true), and neither is `"america" < "brazil"` (always true too); since the first letter is consistently capitalized. But, whether `"america" < "Brazil"` (the words are not consistently capitalized) is true or false in base R depends on the locale of your R session. In America it is true by default and false if you i) type `Sys.setlocale(locale="C")`, ii) the R session has been started in a C locale for you which can happen on servers/services (the locale comes from the environment the R session is started in). However, `"america" < "Brazil"` is always, consistently false in data.table which can be a surprise because it differs to base R by default in most regions. It is false because `"B"<"a"` is true because all upper-case letters come first, followed by all lower case letters (the ascii number of each letter determines the order, which is what is meant by `C-locale`). - -8. `data.table`'s dependency has been moved forward from R 3.0.0 (Apr 2013) to R 3.1.0 (Apr 2014; i.e. 3.5 years old). We keep this dependency as old as possible for as long as possible as requested by users in managed environments. Thanks to Jan Gorecki, the test suite from latest dev now runs on R 3.1.0 continously, as well as R-release (currently 3.4.2) and latest R-devel snapshot. The primary motivation for the bump to R 3.1.0 was allowing one new test which relies on better non-copying behaviour in that version, [#2484](https://github.com/Rdatatable/data.table/issues/2484). It also allows further internal simplifications. Thanks to @MichaelChirico for fixing another test that failed on R 3.1.0 due to slightly different behaviour of `base::read.csv` in R 3.1.0-only which the test was comparing to, [#2489](https://github.com/Rdatatable/data.table/pull/2489). - -9. New vignette added: _Importing data.table_ - focused on using data.table as a dependency in R packages. Answers most commonly asked questions and promote good practices. - -10. As warned in v1.9.8 release notes below in this file (25 Nov 2016) it has been 1 year since then and so use of `options(datatable.old.unique.by.key=TRUE)` to restore the old default is now deprecated with warning. The new warning states that this option still works and repeats the request to pass `by=key(DT)` explicitly to `unique()`, `duplicated()`, `uniqueN()` and `anyDuplicated()` and to stop using this option. In another year, this warning will become error. Another year after that the option will be removed. - -11. As `set2key()` and `key2()` have been warning since v1.9.8 (Nov 2016), their warnings have now been upgraded to errors. Note that when they were introduced in version 1.9.4 (Oct 2014) they were marked as 'experimental' in NEWS item 4. They will be removed in one year. - - ``` - Was warning: set2key() will be deprecated in the next relase. Please use setindex() instead. - Now error: set2key() is now deprecated. Please use setindex() instead. - ``` - -12. The option `datatable.showProgress` is no longer set to a default value when the package is loaded. Instead, the `default=` argument of `getOption` is used by both `fwrite` and `fread`. The default is the result of `interactive()` at the time of the call. Using `getOption` in this way is intended to be more helpful to users looking at `args(fread)` and `?fread`. - -13. `print.data.table()` invisibly returns its first argument instead of `NULL`. This behavior is compatible with the standard `print.data.frame()` and tibble's `print.tbl_df()`. Thanks to @heavywatal for [PR#2807](https://github.com/Rdatatable/data.table/pull/2807) - - -# data.table v1.10.4-3 (20 Oct 2017) - -1. Fixed crash/hang on MacOS when `parallel::mclapply` is used and data.table is merely loaded, [#2418](https://github.com/Rdatatable/data.table/issues/2418). Oddly, all tests including test 1705 (which tests `mclapply` with data.table) passed fine on CRAN. It appears to be some versions of MacOS or some versions of libraries on MacOS, perhaps. Many thanks to Martin Morgan for reporting and confirming this fix works. Thanks also to @asenabouth, Joe Thorley and Danton Noriega for testing, debugging and confirming that automatic parallelism inside data.table (such as `fwrite`) works well even on these MacOS installations. See also news items below for 1.10.4-1 and 1.10.4-2. - - -# data.table v1.10.4-2 (12 Oct 2017) - -1. OpenMP on MacOS is now supported by CRAN and included in CRAN's package binaries for Mac. But installing v1.10.4-1 from source on MacOS failed when OpenMP was not enabled at compile time, [#2409](https://github.com/Rdatatable/data.table/issues/2409). Thanks to Liz Macfie and @fupangpangpang for reporting. The startup message when OpenMP is not enabled has been updated. - -2. Two rare potential memory faults fixed, thanks to CRAN's automated use of latest compiler tools; e.g. clang-5 and gcc-7 - - -# data.table v1.10.4-1 (09 Oct 2017) - -1. The `nanotime` v0.2.0 update (June 2017) changed from `integer64` to `S4` and broke `fwrite` of `nanotime` columns. Fixed to work with `nanotime` both before and after v0.2.0. - -2. Pass R-devel changes related to `deparse(,backtick=)` and `factor()`. - -3. Internal `NAMED()==2` now `MAYBE_SHARED()`, [#2330](https://github.com/Rdatatable/data.table/issues/2330). Back-ported to pass under the stated dependency, R 3.0.0. - -4. Attempted improvement on Mac-only when the `parallel` package is used too (which forks), [#2137](https://github.com/Rdatatable/data.table/issues/2137). Intel's OpenMP implementation appears to leave threads running after the OpenMP parallel region (inside data.table) has finished unlike GNU libgomp. So, if and when `parallel`'s `fork` is invoked by the user after data.table has run in parallel already, instability occurs. The problem only occurs with Mac package binaries from CRAN because they are built by CRAN with Intel's OpenMP library. No known problems on Windows or Linux and no known problems on any platform when `parallel` is not used. If this Mac-only fix still doesn't work, call `setDTthreads(1)` immediately after `library(data.table)` which has been reported to fix the problem by putting `data.table` into single threaded mode earlier. - -5. When `fread()` and `print()` see `integer64` columns are present but package `bit64` is not installed, the warning is now displayed as intended. Thanks to a question by Santosh on r-help and forwarded by Bill Dunlap. - - -# data.table v1.10.4 (01 Feb 2017) - -## BUG FIXES - -1. The new specialized `nanotime` writer in `fwrite()` type punned using `*(long long *)&REAL(column)[i]` which, strictly, is undefined behavour under C standards. It passed a plethora of tests on linux (gcc 5.4 and clang 3.8), win-builder and 6 out 10 CRAN flavours using gcc. But failed (wrong data written) with the newest version of clang (3.9.1) as used by CRAN on the failing flavors, and solaris-sparc. Replaced with the union method and added a grep to CRAN_Release.cmd. - - -# data.table v1.10.2 (31 Jan 2017) - -## NEW FEATURES - -1. When `j` is a symbol prefixed with `..` it will be looked up in calling scope and its value taken to be column names or numbers. - - ```R - myCols = c("colA","colB") - DT[, myCols, with=FALSE] - DT[, ..myCols] # same - ``` - - When you see the `..` prefix think _one-level-up_ like the directory `..` in all operating systems meaning the parent directory. In future the `..` prefix could be made to work on all symbols apearing anywhere inside `DT[...]`. It is intended to be a convenient way to protect your code from accidentally picking up a column name. Similar to how `x.` and `i.` prefixes (analogous to SQL table aliases) can already be used to disambiguate the same column name present in both `x` and `i`. A symbol prefix rather than a `..()` _function_ will be easier for us to optimize internally and more convenient if you have many variables in calling scope that you wish to use in your expressions safely. This feature was first raised in 2012 and long wished for, [#633](https://github.com/Rdatatable/data.table/issues/633). It is experimental. - -2. When `fread()` or `print()` see `integer64` columns are present, `bit64`'s namespace is now automatically loaded for convenience. - -3. `fwrite()` now supports the new [`nanotime`](https://cran.r-project.org/package=nanotime) type by Dirk Eddelbuettel, [#1982](https://github.com/Rdatatable/data.table/issues/1982). Aside: `data.table` already automatically supported `nanotime` in grouping and joining operations via longstanding support of its underlying `integer64` type. - -4. `indices()` gains a new argument `vectors`, default `FALSE`. This strsplits the index names by `__` for you, [#1589](https://github.com/Rdatatable/data.table/issues/1589). - - ```R - DT = data.table(A=1:3, B=6:4) - setindex(DT, B) - setindex(DT, B, A) - indices(DT) - [1] "B" "B__A" - indices(DT, vectors=TRUE) - [[1]] - [1] "B" - [[2]] - [1] "B" "A" - ``` - -## BUG FIXES - -1. Some long-standing potential instability has been discovered and resolved many thanks to a detailed report from Bill Dunlap and Michael Sannella. At C level any call of the form `setAttrib(x, install(), allocVector())` can be unstable in any R package. Despite `setAttrib()` PROTECTing its inputs, the 3rd argument (`allocVector`) can be executed first only for its result to to be released by `install()`'s potential GC before reaching `setAttrib`'s PROTECTion of its inputs. Fixed by either PROTECTing or pre-`install()`ing. Added to CRAN_Release.cmd procedures: i) `grep`s to prevent usage of this idiom in future and ii) running data.table's test suite with `gctorture(TRUE)`. - -2. A new potential instability introduced in the last release (v1.10.0) in GForce optimized grouping has been fixed by reverting one change from malloc to R_alloc. Thanks again to Michael Sannella for the detailed report. - -3. `fwrite()` could write floating point values incorrectly, [#1968](https://github.com/Rdatatable/data.table/issues/1968). A thread-local variable was incorrectly thread-global. This variable's usage lifetime is only a few clock cycles so it needed large data and many threads for several threads to overlap their usage of it and cause the problem. Many thanks to @mgahan and @jmosser for finding and reporting. - -## NOTES - -1. `fwrite()`'s `..turbo` option has been removed as the warning message warned. If you've found a problem, please [report it](https://github.com/Rdatatable/data.table/issues). - -2. No known issues have arisen due to `DT[,1]` and `DT[,c("colA","colB")]` now returning columns as introduced in v1.9.8. However, as we've moved forward by setting `options('datatable.WhenJisSymbolThenCallingScope'=TRUE)` introduced then too, it has become clear a better solution is needed. All 340 CRAN and Bioconductor packages that use data.table have been checked with this option on. 331 lines would need to be changed in 59 packages. Their usage is elegant, correct and recommended, though. Examples are `DT[1, encoding]` in quanteda and `DT[winner=="first", freq]` in xgboost. These are looking up the columns `encoding` and `freq` respectively and returning them as vectors. But if, for some reason, those columns are removed from `DT` and `encoding` or `freq` are still variables in calling scope, their values in calling scope would be returned. Which cannot be what was intended and could lead to silent bugs. That was the risk we were trying to avoid.
-`options('datatable.WhenJisSymbolThenCallingScope')` is now removed. A migration timeline is no longer needed. The new strategy needs no code changes and has no breakage. It was proposed and discussed in point 2 [here](https://github.com/Rdatatable/data.table/issues/1188#issuecomment-127824969), as follows.
-When `j` is a symbol (as in the quanteda and xgboost examples above) it will continue to be looked up as a column name and returned as a vector, as has always been the case. If it's not a column name however, it is now a helpful error explaining that data.table is different to data.frame and what to do instead (use `..` prefix or `with=FALSE`). The old behaviour of returning the symbol's value in calling scope can never have been useful to anybody and therefore not depended on. Just as the `DT[,1]` change could be made in v1.9.8, this change can be made now. This change increases robustness with no downside. Rerunning all 340 CRAN and Bioconductor package checks reveal 2 packages throwing the new error: partools and simcausal. Their maintainers have been informed that there is a likely bug on those lines due to data.table's (now remedied) weakness. This is exactly what we wanted to reveal and improve. - -3. As before, and as we can see is in common use in CRAN and Bioconductor packages using data.table, `DT[,myCols,with=FALSE]` continues to lookup `myCols` in calling scope and take its value as column names or numbers. You can move to the new experimental convenience feature `DT[, ..myCols]` if you wish at leisure. - - -# data.table v1.10.0 (03 Dec 2016) - -## BUG FIXES - -1. `fwrite(..., quote='auto')` already quoted a field if it contained a `sep` or `\n`, or `sep2[2]` when `list` columns are present. Now it also quotes a field if it contains a double quote (`"`) as documented, [#1925](https://github.com/Rdatatable/data.table/issues/1925). Thanks to Aki Matsuo for reporting. Tests added. The `qmethod` tests did test escaping embedded double quotes, but only when `sep` or `\n` was present in the field as well to trigger the quoting of the field. - -2. Fixed 3 test failures on Solaris only, [#1934](https://github.com/Rdatatable/data.table/issues/1934). Two were on both sparc and x86 and related to a `tzone` attribute difference between `as.POSIXct` and `as.POSIXlt` even when passed the default `tz=""`. The third was on sparc only: a minor rounding issue in `fwrite()` of 1e-305. - -3. Regression crash fixed when 0's occur at the end of a non-empty subset of an empty table, [#1937](https://github.com/Rdatatable/data.table/issues/1937). Thanks Arun for tracking down. Tests added. For example, subsetting the empty `DT=data.table(a=character())` with `DT[c(1,0)]` should return a 1 row result with one `NA` since 1 is past the end of `nrow(DT)==0`, the same result as `DT[1]`. - -4. Fixed newly reported crash that also occurred in old v1.9.6 when `by=.EACHI`, `nomatch=0`, the first item in `i` has no match AND `j` has a function call that is passed a key column, [#1933](https://github.com/Rdatatable/data.table/issues/1933). Many thanks to Reino Bruner for finding and reporting with a reproducible example. Tests added. - -5. Fixed `fread()` error occurring for a subset of Windows users: `showProgress is not type integer but type 'logical'.`, [#1944](https://github.com/Rdatatable/data.table/issues/1944) and [#1111](https://github.com/Rdatatable/data.table/issues/1111). Our tests cover this usage (it is just default usage), pass on AppVeyor (Windows), win-builder (Windows) and CRAN's Windows so perhaps it only occurs on a specific and different version of Windows to all those. Thanks to @demydd for reporting. Fixed by using strictly `logical` type at R level and `Rboolean` at C level, consistently throughout. - -6. Combining `on=` (new in v1.9.6) with `by=` or `keyby=` gave incorrect results, [#1943](https://github.com/Rdatatable/data.table/issues/1943). Many thanks to Henrik-P for the detailed and reproducible report. Tests added. - -7. New function `rleidv` was ignoring its `cols` argument, [#1942](https://github.com/Rdatatable/data.table/issues/1942). Thanks Josh O'Brien for reporting. Tests added. - -## NOTES - -1. It seems OpenMP is not available on CRAN's Mac platform; NOTEs appeared in [CRAN checks](https://cran.r-project.org/web/checks/check_results_data.table.html) for v1.9.8. Moved `Rprintf` from `init.c` to `packageStartupMessage` to avoid the NOTE as requested urgently by Professor Ripley. Also fixed the bad grammar of the message: 'single threaded' now 'single-threaded'. If you have a Mac and run macOS or OS X on it (I run Ubuntu on mine) please contact CRAN maintainers and/or Apple if you'd like CRAN's Mac binary to support OpenMP. Otherwise, please follow [these instructions for OpenMP on Mac](https://github.com/Rdatatable/data.table/wiki/Installation) which people have reported success with. - -2. Just to state explicitly: data.table does not now depend on or require OpenMP. If you don't have it (as on CRAN's Mac it appears but not in general on Mac) then data.table should build, run and pass all tests just fine. - -3. There are now 5,910 raw tests as reported by `test.data.table()`. Tests cover 91% of the 4k lines of R and 89% of the 7k lines of C. These stats are now known thanks to Jim Hester's [Covr](https://CRAN.R-project.org/package=covr) package and [Codecov.io](https://about.codecov.io/). If anyone is looking for something to help with, creating tests to hit the missed lines shown by clicking the `R` and `src` folders at the bottom [here](https://app.codecov.io/github/Rdatatable/data.table?branch=master) would be very much appreciated. - -4. The FAQ vignette has been revised given the changes in v1.9.8. In particular, the very first FAQ. - -5. With hindsight, the last release v1.9.8 should have been named v1.10.0 to convey it wasn't just a patch release from .6 to .8 owing to the 'potentially breaking changes' items. Thanks to @neomantic for correctly pointing out. The best we can do now is now bump to 1.10.0. - - -# data.table v1.9.8 (Nov 2016) back to v1.2 (Aug 2008) has been moved to [NEWS.0.md](https://github.com/Rdatatable/data.table/blob/master/NEWS.0.md) +# data.table v1.14.10 (Dec 2023) back to v1.10.0 (Dec 2016) has been moved to [NEWS.1.md](https://github.com/Rdatatable/data.table/blob/master/NEWS.1.md) From ac576061b8766efbe8e5995adfeaa62aef91bf77 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Sat, 9 Dec 2023 14:41:07 +0100 Subject: [PATCH 64/88] ignore newly added file (#5818) --- .Rbuildignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.Rbuildignore b/.Rbuildignore index 22a3a807f..343b168b0 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -21,6 +21,7 @@ ^Makefile$ ^NEWS\.0\.md$ +^NEWS\.1\.md$ ^_pkgdown\.yml$ ^src/Makevars$ ^CODEOWNERS$ From f37f8e96b4d098f985df65c6394a53a9a7874c5a Mon Sep 17 00:00:00 2001 From: eitsupi <50911393+eitsupi@users.noreply.github.com> Date: Sun, 10 Dec 2023 00:28:03 +0900 Subject: [PATCH 65/88] Switch to pkgdown Bootstrap 5 template (#5505) * Switch to pkgdown bs5 templage * ignore docs dir used by pkgdown * Update _pkgdown.yml --- .Rbuildignore | 1 + .gitignore | 3 +++ _pkgdown.yml | 3 +++ 3 files changed, 7 insertions(+) diff --git a/.Rbuildignore b/.Rbuildignore index 343b168b0..9b64f6267 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -40,6 +40,7 @@ ^bus$ ^pkgdown$ +^docs$ ^lib$ ^library$ ^devwd$ diff --git a/.gitignore b/.gitignore index 559df7b9d..e05f2b803 100644 --- a/.gitignore +++ b/.gitignore @@ -48,3 +48,6 @@ dev.R *.RDS *.diff *.patch + +# pkgdown +docs diff --git a/_pkgdown.yml b/_pkgdown.yml index 66488b928..c69f920c0 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,5 +1,8 @@ url: https://rdatatable.gitlab.io/data.table +template: + bootstrap: 5 + development: version_tooltip: "Development version" From d47c4b63eaa7ba99191385a15792257625e9ffac Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Sat, 9 Dec 2023 22:17:17 +0100 Subject: [PATCH 66/88] minor vignette correction (#5819) --- vignettes/datatable-programming.Rmd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vignettes/datatable-programming.Rmd b/vignettes/datatable-programming.Rmd index 89d129201..4e8a99879 100644 --- a/vignettes/datatable-programming.Rmd +++ b/vignettes/datatable-programming.Rmd @@ -23,7 +23,7 @@ knitr::opts_chunk$set( ## Introduction -`data.table`, from its very first releases, enabled the usage of `subset` and `with` (or `within`) functions by defining the`[.data.table` method. `subset` and `with` are base R functions that are useful for reducing repetition in code, enhancing readability, and reducing number the total characters the user has to type. This functionality is possible in R because of a quite unique feature called *lazy evaluation*. This feature allows a function to catch its arguments, before they are evaluated, and to evaluate them in a different scope than the one in which they were called. Let's recap usage of the `subset` function. +`data.table`, from its very first releases, enabled the usage of `subset` and `with` (or `within`) functions by defining the `[.data.table` method. `subset` and `with` are base R functions that are useful for reducing repetition in code, enhancing readability, and reducing number the total characters the user has to type. This functionality is possible in R because of a quite unique feature called *lazy evaluation*. This feature allows a function to catch its arguments, before they are evaluated, and to evaluate them in a different scope than the one in which they were called. Let's recap usage of the `subset` function. ```{r df_print, echo=FALSE} registerS3method("print", "data.frame", function(x, ...) { @@ -71,7 +71,7 @@ my_subset = function(data, col, val) { my_subset(iris, col = "Species", val = "setosa") ``` -Here, we compute a logical vector of length `nrow(iris)`, then this vector is supplied to the `i` argument of `[.data.frame` to perform ordinary logical vector subsetting. It works well for this simple example, but it lacks flexibility, introduces variable repetition, and requires user to change the function interface to pass the column name as a character rather than unquoted symbol. The more complex the expression we need to parameterize, the less practical this approach becomes. +Here, we compute a logical vector of length `nrow(iris)`, then this vector is supplied to the `i` argument of `[.data.frame` to perform ordinary "logical vector"-based subsetting. It works well for this simple example, but it lacks flexibility, introduces variable repetition, and requires user to change the function interface to pass the column name as a character rather than unquoted symbol. The more complex the expression we need to parameterize, the less practical this approach becomes. #### Use of `parse` / `eval` @@ -110,7 +110,7 @@ my_subset = function(data, col, val) { my_subset(iris, Species, "setosa") ``` -Here, we used the base R `substitute` function to transform the call `subset(data, col == val)` into `subset(iris, Species == "setosa")` by substituting `data`, `col`, and `val` with their original names (or values) from their parent environment. The benefits of this approach to the previous ones should be clear. Note that because we operate at the level of language objects, and don't have to resort to string manipulation, we refer to this as *computing on the language*. There is a dedicated chapter on *Computing on the language* in [R language manual](https://cran.r-project.org/doc/manuals/r-release/R-lang.html). Although it is not necessary for *programming on data.table*, we encourage readers to read this chapter for the sake of better understanding this powerful and unique feature. +Here, we used the base R `substitute` function to transform the call `subset(data, col == val)` into `subset(iris, Species == "setosa")` by substituting `data`, `col`, and `val` with their original names (or values) from their parent environment. The benefits of this approach to the previous ones should be clear. Note that because we operate at the level of language objects, and don't have to resort to string manipulation, we refer to this as *computing on the language*. There is a dedicated chapter on *Computing on the language* in [R language manual](https://cran.r-project.org/doc/manuals/r-release/R-lang.html). Although it is not necessary for *programming on data.table*, we encourage readers to read this chapter for the sake of better understanding this powerful and unique feature of R language. #### Use third party packages From 3c17ead51917ba59049aaa838eba823e99d3ab7a Mon Sep 17 00:00:00 2001 From: Tyson Barrett Date: Sun, 10 Dec 2023 10:40:00 -0700 Subject: [PATCH 67/88] Emphasize the release procedure with tagging (#5817) * Emphasize the release procedure with tagging --- .dev/CRAN_Release.cmd | 88 +++++++++++++++++++++---------------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/.dev/CRAN_Release.cmd b/.dev/CRAN_Release.cmd index 94a4a17ec..8fc72bc77 100644 --- a/.dev/CRAN_Release.cmd +++ b/.dev/CRAN_Release.cmd @@ -583,11 +583,12 @@ ls -1 *.tar.gz | grep -E 'Chicago|dada2|flowWorkspace|LymphoSeq' | TZ='UTC' para # Release to CRAN ############################################### -Bump version to even release number in 3 places : - 1) DESCRIPTION - 2) NEWS; add ?closed=1 to the milestone link, don't add date yet as that published-on-CRAN date isn't yet known - 3) dllVersion() at the end of init.c -DO NOT push to GitHub. Prevents even a slim possibility of user getting premature version. Even release numbers must have been obtained from CRAN and only CRAN. There were too many support problems in the past before this procedure was brought in. +# Bump version to even release number in 3 places : +# 1) DESCRIPTION +# 2) NEWS; add ?closed=1 to the milestone link, don't add date yet as that published-on-CRAN date isn't yet known +# 3) dllVersion() at the end of init.c +# DO NOT push to GitHub's master branch. Prevents even a slim possibility of user getting premature version. +# Even release numbers must have been obtained from CRAN and only CRAN. There were too many support problems in the past before this procedure was brought in. du -k inst/tests # 1.5MB before bzip2 inst/tests/*.Rraw # compress *.Rraw just for release to CRAN; do not commit compressed *.Rraw to git du -k inst/tests # 0.75MB after @@ -595,49 +596,48 @@ R CMD build . export GITHUB_PAT="f1c.. github personal access token ..7ad" Rdevel -q -e "packageVersion('xml2')" # ensure installed Rdevel CMD check data.table_1.16.0.tar.gz --as-cran # use latest Rdevel as it may have extra checks -# bunzip2 inst/tests/*.Rraw.bz2 # decompress *.Rraw again so as not to commit compressed *.Rraw to git -# -Resubmit to winbuilder (R-release, R-devel and R-oldrelease) -Submit to CRAN. Message template : ------------------------------------------------------------- -Hello, -1,016 CRAN revdeps checked. None are impacted. -Many thanks! -Best, Matt ------------------------------------------------------------- -DO NOT commit or push to GitHub. Leave 4 files (.dev/CRAN_Release.cmd, DESCRIPTION, NEWS and init.c) edited and not committed. Include these in a single and final bump commit below. -DO NOT even use a PR. Because PRs build binaries and we don't want any binary versions of even release numbers available from anywhere other than CRAN. -Leave milestone open with a 'release checks' issue open. Keep updating status there. -** If on EC2, shutdown instance. Otherwise get charged for potentially many days/weeks idle time with no alerts ** -If it's evening, SLEEP. -It can take a few days for CRAN's checks to run. If any issues arise, backport locally. Resubmit the same even version to CRAN. -CRAN's first check is automatic and usually received within an hour. WAIT FOR THAT EMAIL. -When CRAN's email contains "Pretest results OK pending a manual inspection" (or similar), or if not and it is known why not and ok, then bump dev. + +# Resubmit to winbuilder (R-release, R-devel and R-oldrelease) +# Submit to CRAN. Message template : +# ------------------------------------------------------------ +# Hello, +# 1,016 CRAN revdeps checked. None are impacted. +# Many thanks! +# Best, Matt +# ------------------------------------------------------------ +# DO NOT commit or push to GitHub. Leave 4 files (.dev/CRAN_Release.cmd, DESCRIPTION, NEWS and init.c) edited and not committed. Include these in a single and final bump commit below. +# DO NOT even use a PR. Because PRs build binaries and we don't want any binary versions of even release numbers available from anywhere other than CRAN. +# Leave milestone open with a 'release checks' issue open. Keep updating status there. +# ** If on EC2, shutdown instance. Otherwise get charged for potentially many days/weeks idle time with no alerts ** +# If it's evening, SLEEP. +# It can take a few days for CRAN's checks to run. If any issues arise, backport locally. Resubmit the same even version to CRAN. +# CRAN's first check is automatic and usually received within an hour. WAIT FOR THAT EMAIL. +# When CRAN's email contains "Pretest results OK pending a manual inspection" (or similar), or if not and it is known why not and ok, then bump dev. ###### Bump dev for NON-PATCH RELEASE -0. Close milestone to prevent new issues being tagged with it. The final 'release checks' issue can be left open in a closed milestone. -1. Check that 'git status' shows 4 files in modified and uncommitted state: DESCRIPTION, NEWS.md, init.c and this .dev/CRAN_Release.cmd -2. Bump minor version in DESCRIPTION to next odd number. Note that DESCRIPTION was in edited and uncommitted state so even number never appears in git. -3. Add new heading in NEWS for the next dev version. Add "(submitted to CRAN on )" on the released heading. -4. Bump minor version in dllVersion() in init.c -5. Bump 3 minor version numbers in Makefile -6. Search and replace this .dev/CRAN_Release.cmd to update 1.14.99 to 1.15.99 inc below, 1.15.0 to 1.16.0 above, 1.14.0 to 1.15.0 below -7. Another final gd to view all diffs using meld. (I have `alias gd='git difftool &> /dev/null'` and difftool meld: http://meldmerge.org/) -8. Push to master with this consistent commit message: "1.15.0 on CRAN. Bump to 1.14.10" -9. Take sha from step 8 and run `git tag 1.15.0 96c..sha..d77` then `git push origin 1.15.0` (not `git push --tags` according to https://stackoverflow.com/a/5195913/403310) +# 0. Close milestone to prevent new issues being tagged with it. The final 'release checks' issue can be left open in a closed milestone. +# 1. Check that 'git status' shows 4 files in modified and uncommitted state: DESCRIPTION, NEWS.md, init.c and this .dev/CRAN_Release.cmd +# 2. Bump minor version in DESCRIPTION to next odd number. Note that DESCRIPTION was in edited and uncommitted state so even number never appears in git. +# 3. Add new heading in NEWS for the next dev version. Add "(submitted to CRAN on )" on the released heading. +# 4. Bump minor version in dllVersion() in init.c +# 5. Bump 3 minor version numbers in Makefile +# 6. Search and replace this .dev/CRAN_Release.cmd to update 1.14.99 to 1.15.99 inc below, 1.15.0 to 1.16.0 above, 1.14.0 to 1.15.0 below +# 7. Another final gd to view all diffs using meld. (I have `alias gd='git difftool &> /dev/null'` and difftool meld: http://meldmerge.org/) +# 8. Push to master with this consistent commit message: "1.15.0 on CRAN. Bump to 1.14.10" +# 9. Take sha from step 8 and run `git tag 1.15.0 96c..sha..d77` then `git push origin 1.15.0` (not `git push --tags` according to https://stackoverflow.com/a/5195913/403310) ###### ###### Bump dev for PATCH RELEASE ## WARNING: review this process during the next first patch release (x.y.2) from a regular release (x,y,0), possibly during 1.15.2 release. -0. Close milestone to prevent new issues being tagged with it. The final 'release checks' issue can be left open in a closed milestone. -1. Check that 'git status' shows 4 files in modified and uncommitted state: DESCRIPTION, NEWS.md, init.c and this .dev/CRAN_Release.cmd -2. Bump patch version in DESCRIPTION to next odd number. Note that DESCRIPTION was in edited and uncommitted state so even number never appears in git. -3. Add new heading in NEWS for the next dev PATCH version. Add "(submitted to CRAN on )" on the released heading. -4. Bump patch version in dllVersion() in init.c -5. Bump 3 patch version numbers in Makefile -6. Search and replace this .dev/CRAN_Release.cmd to update 1.14.9 to 1.14.11 inc below, 1.14.10 to 1.14.12 above, 1.14.8 to 1.14.10 below -7. Another final gd to view all diffs using meld. (I have `alias gd='git difftool &> /dev/null'` and difftool meld: http://meldmerge.org/) -8. Push to master with this consistent commit message: "1.14.8 on CRAN. Bump to 1.14.10" -9. Take sha from step 8 and run `git tag 1.14.8 96c..sha..d77` then `git push origin 1.14.8` (not `git push --tags` according to https://stackoverflow.com/a/5195913/403310) -###### \ No newline at end of file +# 0. Close milestone to prevent new issues being tagged with it. The final 'release checks' issue can be left open in a closed milestone. +# 1. Check that 'git status' shows 4 files in modified and uncommitted state: DESCRIPTION, NEWS.md, init.c and this .dev/CRAN_Release.cmd +# 2. Bump patch version in DESCRIPTION to next odd number. Note that DESCRIPTION was in edited and uncommitted state so even number never appears in git. +# 3. Add new heading in NEWS for the next dev PATCH version. Add "(submitted to CRAN on )" on the released heading. +# 4. Bump patch version in dllVersion() in init.c +# 5. Bump 3 patch version numbers in Makefile +# 6. Search and replace this .dev/CRAN_Release.cmd to update 1.14.9 to 1.14.11 inc below, 1.14.10 to 1.14.12 above, 1.14.8 to 1.14.10 below +# 7. Another final gd to view all diffs using meld. (I have `alias gd='git difftool &> /dev/null'` and difftool meld: http://meldmerge.org/) +# 8. Push to master with this consistent commit message: "1.14.8 on CRAN. Bump to 1.14.10" +# 9. Take sha from step 8 and run `git tag 1.14.8 96c..sha..d77` then `git push origin 1.14.8` (not `git push --tags` according to https://stackoverflow.com/a/5195913/403310) +###### From 1b768a869db1693e98c467582ca21fbde2862264 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Sun, 10 Dec 2023 18:41:15 +0100 Subject: [PATCH 68/88] CI reporting improvements (#5820) * CI reporting improvements * clang flags --- .ci/publish.R | 118 +++++++++++-------------------------------------- .gitlab-ci.yml | 27 +++++------ 2 files changed, 39 insertions(+), 106 deletions(-) diff --git a/.ci/publish.R b/.ci/publish.R index 0657790d2..7d43a44e6 100644 --- a/.ci/publish.R +++ b/.ci/publish.R @@ -3,16 +3,20 @@ format.deps <- function(file, which) { if (all(is.na(deps.raw))) return(character()) deps.raw = gsub("\n", " ", deps.raw, fixed=TRUE) deps.full = trimws(strsplit(deps.raw, ", ", fixed=TRUE)[[1L]]) - deps = trimws(sapply(strsplit(deps.full, "(", fixed=TRUE), `[[`, 1L)) - deps.full = gsub(">=", "≥", deps.full, fixed=TRUE) - deps.full = gsub("<=", "≤", deps.full, fixed=TRUE) - if (any(grepl(">", deps.full, fixed=TRUE), grepl("<", deps.full, fixed=TRUE), grepl("=", deps.full, fixed=TRUE))) + deps.full.split = strsplit(deps.full, "(", fixed=TRUE) + deps = trimws(sapply(deps.full.split, `[[`, 1L)) + vers = trimws(sapply(deps.full.split, function(x) if (length(x)>1L) paste0("(",x[[2L]]) else "")) + vers = gsub(">=", "≥", vers, fixed=TRUE) + vers = gsub("<=", "≤", vers, fixed=TRUE) + if (any(grepl(">", vers, fixed=TRUE), grepl("<", vers, fixed=TRUE), grepl("=", vers, fixed=TRUE))) stop("formatting dependencies version for CRAN-line package website failed because some dependencies have version defined using operators other than >= and <=") - names(deps.full) <- deps + names(vers) <- deps base.deps = c("R", unlist(tools:::.get_standard_package_names(), use.names = FALSE)) ans = sapply(deps, function(x) { - if (x %in% base.deps) deps.full[[x]] ## base R packages are not linked - else sprintf("
%s", x, deps.full[[x]]) + if (x %in% base.deps) { + if (nchar(vers[[x]])) paste(x, vers[[x]]) else x ## base R packages are not linked + } + else sprintf("%s%s", x, x, if (nchar(vers[[x]])) paste0(" ",vers[[x]]) else "") }) sprintf("%s:%s", which, paste(ans, collapse=", ")) } @@ -207,24 +211,11 @@ check.copy <- function(job, repodir="bus/integration/cran"){ dir.create(job.checks<-file.path(repodir, "web", "checks", pkg<-"data.table", job), recursive=TRUE); os = plat(job) from = file.path("bus", sprintf("%s/%s.Rcheck", job, pkg)) - current.rout = c("main.Rout","main.Rout.fail","knitr.Rout","knitr.Rout.fail","memtest.csv","memtest.png") - if (os=="Windows") { - dir.create(file.path(job.checks, "tests_i386"), showWarnings=FALSE) - dir.create(file.path(job.checks, "tests_x64"), showWarnings=FALSE) - rout32 = file.path("tests_i386", current.rout) - rout64 = file.path("tests_x64", current.rout) - file.copy(file.path(from, rout32)[file.exists(file.path(from, rout32))], file.path(job.checks, "tests_i386")) - file.copy(file.path(from, rout64)[file.exists(file.path(from, rout64))], file.path(job.checks, "tests_x64")) - routs = c(rout32, rout64) - } else if (os=="Mac OS X") { - dir.create(file.path(job.checks, "tests"), showWarnings=FALSE) - routs = file.path("tests", current.rout) - file.copy(file.path(from, routs)[file.exists(file.path(from, routs))], file.path(job.checks, "tests")) - } else { - dir.create(file.path(job.checks, "tests"), showWarnings=FALSE) - routs = file.path("tests", current.rout) - file.copy(file.path(from, routs)[file.exists(file.path(from, routs))], file.path(job.checks, "tests")) - } + tests = list.files("tests", pattern="\\.R$") + current.rout = c(paste0(tests, "out"), paste0(tests, "out.fail")) + dir.create(file.path(job.checks, "tests"), showWarnings=FALSE) + routs = file.path("tests", current.rout) + file.copy(file.path(from, routs)[file.exists(file.path(from, routs))], file.path(job.checks, "tests")) inst.check.files = file.path(from, inst.check<-c("00install.out","00check.log")) file.copy(inst.check.files[file.exists(inst.check.files)], job.checks) setNames(file.exists(file.path(job.checks, c(inst.check, routs))), c(inst.check, routs)) @@ -274,75 +265,46 @@ log.copy <- function(job, repodir="bus/integration/cran") { Sys.sleep(0.1) ## to not get ban from gitlab.com setNames(file.exists(to), "log") } - ci.status <- function(job) { if (!file.exists(status_file <- file.path("bus", job, "status"))) return(NA_character_) readLines(status_file, warn=FALSE)[1L] } - ci.log <- function(jobs, repodir="bus/integration/cran") { pkg = "data.table" ans = vector("character", length(jobs)) logs = sapply(jobs, log.copy, repodir=repodir) statuses = sapply(jobs, ci.status) + statuses[statuses=="success"] = paste0("",statuses[statuses=="success"],"") + statuses[statuses=="failed"] = paste0("",statuses[statuses=="failed"],"") ans[!logs] = statuses[!logs] ans[logs] = sprintf('%s', pkg[any(logs)], jobs[logs], statuses[logs]) ans } check.index <- function(pkg, jobs, repodir="bus/integration/cran") { - status = function(x) if (grepl("^.*ERROR", x)) "ERROR" else if (grepl("^.*WARNING", x)) "WARNING" else if (grepl("^.*NOTE", x)) "NOTE" else if (grepl("^.*OK", x)) "OK" else NA_character_ - test.files = function(job, files, trim.name=FALSE, trim.exts=0L, pkg="data.table") { - stopifnot(trim.name + as.logical(trim.exts) < 2L) # cannot use both + status = function(x) if (grepl("^.*ERROR", x)) "ERROR" else if (grepl("^.*WARNING", x)) "WARNING" else if (grepl("^.*NOTE", x)) "NOTE" else if (grepl("^.*OK", x)) "OK" else NA_character_ + test.files = function(job, trim=TRUE, pkg="data.table") { + files = paste0("tests/", list.files("tests", pattern="\\.R$"), "out.fail") links = sapply(files, function(file) { if (!file.exists(file.path(repodir, "web/checks", pkg, job, file))) return(NA_character_) dir = if (!identical(d<-dirname(file), ".")) d sprintf("%s", pkg, job, file, - if (trim.name) paste(c(dir, tools::file_ext(file)), collapse="/") else if (trim.exts) { for (i in 1:trim.exts) { file<-tools::file_path_sans_ext(file) }; file } else file) + if (trim) sub(".Rout.fail", "", basename(file), fixed=TRUE) else file) }) paste(na.omit(links), collapse=", ") } - routs = lapply(jobs, function(job) { - current.rout = c("main.Rout.fail","knitr.Rout.fail") - os = plat(job) - if (os=="Windows") { - rout32 = file.path("tests_i386", current.rout) - rout64 = file.path("tests_x64", current.rout) - routs = c(rout32, rout64) - } else if (os=="Mac OS X") { - routs = file.path("tests", current.rout) - } else { - routs = file.path("tests", current.rout) - } - routs - }) - memouts = lapply(jobs, function(job) { - current.memout = c("memtest.csv","memtest.png") - os = plat(job) - if (os=="Windows") { - mem32 = file.path("tests_i386", current.memout) - mem64 = file.path("tests_x64", current.memout) - memouts = c(mem32, mem64) - } else if (os=="Mac OS X") { - memouts = file.path("tests", current.memout) - } else { - memouts = file.path("tests", current.memout) - } - memouts - }) - th = "FlavorVersionRevisionInstallStatusFlagsRout.failLogMemtest" + th = "FlavorVersionRevisionInstallCheckFlagsRout.failLog" tbl = sprintf( - "%s%s%sout%s%s%s%s%s", + "%s%s%sout%s%s%s%s", sub("test-", "", jobs, fixed=TRUE), ## Flavor sapply(jobs, pkg.version, pkg), ## Version sapply(jobs, pkg.revision, pkg), ## Revision pkg, jobs, ## Install - pkg, jobs, sapply(sapply(jobs, check.test, pkg="data.table"), status), ## Status + pkg, jobs, sapply(sapply(jobs, check.test, pkg="data.table"), status), ## Check sapply(jobs, pkg.flags, pkg), ## Flags - mapply(test.files, jobs, routs, trim.exts=2L), ## Rout.fail: 1st fail, 2nd Rout, keep just: tests_x64/main - ci.log(jobs), ## CI job logs - mapply(test.files, jobs, memouts, trim.name=TRUE) ## Memtest // currently not used + sapply(jobs, test.files), ## Rout.fail + ci.log(jobs) ## CI job logs ) file = file.path(repodir, "web/checks", sprintf("check_results_%s.html", pkg)) writeLines(c( @@ -378,29 +340,3 @@ check.test <- function(job, pkg) { check[length(check)] } -move.bin <- function(job, bin.version, os.type, file="DESCRIPTION", silent=TRUE) { - ## currently not used, if not used for macos in future then can be removed - if (os.type=="unix") { - stop("publish of linux binaries not supported") - } else if (os.type=="windows") { - plat.path = "windows" - extension = "zip" - } else if (os.type=="macosx") { - plat.path = "macosx/el-capitan" - extension = "tgz" - } - dcf = read.dcf(file) - pkg = dcf[,"Package"][[1L]] - version = dcf[,"Version"][[1L]] - src.path = file.path("bus",job,"cran/bin",plat.path,"contrib",bin.version) - if (!silent && !dir.exists(src.path)) stop(sprintf("expected directory does not exists %s", src.path)) - bin.file = sprintf("%s_%s.%s", pkg, version, extension) - tgt.path = file.path("bus/integration/cran/bin",plat.path,"contrib",bin.version) - if (!file.exists(file.path(src.path, bin.file))) { - if (!silent) stop(sprintf("expected binaries does not exists %s", file.path(src.path, bin.file))) - } else { - if (!dir.exists(tgt.path)) dir.create(tgt.path, recursive=TRUE) - file.rename(file.path(src.path,bin.file), file.path(tgt.path,bin.file)) - } - setNames(file.exists(file.path(tgt.path,bin.file)), file.path(tgt.path,bin.file)) -} diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 099f39977..587885092 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -177,7 +177,7 @@ test-lin-dev-gcc-strict-cran: Rscript -e 'l=tail(readLines("data.table.Rcheck/00check.log"), 1L); notes<-"Status: 2 NOTEs"; if (!identical(l, notes)) stop("Last line of ", shQuote("00check.log"), " is not ", shQuote(notes), " (size of tarball, installed package size) but ", shQuote(l)) else q("no")' ## R-devel on Linux clang -# R compiled with clang +# R compiled with clang, flags removed: -flto=auto -fopenmp # tests for compilation warnings # tests for new notes test-lin-dev-clang-cran: @@ -189,8 +189,8 @@ test-lin-dev-clang-cran: _R_S3_METHOD_LOOKUP_BASEENV_AFTER_GLOBALENV_: "FALSE" _R_S3_METHOD_LOOKUP_REPORT_SEARCH_PATH_USES_: "TRUE" script: - - echo 'CFLAGS=-g -O2 -flto=auto -fno-common -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars - - echo 'CXXFLAGS=-g -O2 -flto=auto -fno-common -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars + - echo 'CFLAGS=-g -O2 -fno-common -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars + - echo 'CXXFLAGS=-g -O2 -fno-common -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars - *install-deps - R CMD check --as-cran $(ls -1t data.table_*.tar.gz | head -n 1) - (! grep "warning:" data.table.Rcheck/00install.out) @@ -332,20 +332,17 @@ integration: - Rscript -e 'tools::write_PACKAGES(contrib.url("bus/integration/cran", type="win.binary", ver=Sys.getenv("R_REL_VERSION")), type="win.binary", fields="Revision", addFiles=TRUE)' - Rscript -e 'tools::write_PACKAGES(contrib.url("bus/integration/cran", type="win.binary", ver=Sys.getenv("R_DEV_VERSION")), type="win.binary", fields="Revision", addFiles=TRUE)' - Rscript -e 'tools::write_PACKAGES(contrib.url("bus/integration/cran", type="win.binary", ver=Sys.getenv("R_OLD_VERSION")), type="win.binary", fields="Revision", addFiles=TRUE)' - #- Rscript -e 'move.bin("test-mac-rel", Sys.getenv("R_REL_VERSION"), os.type="macosx")' - #- Rscript -e 'move.bin("test-mac-dev", Sys.getenv("R_DEV_VERSION"), os.type="macosx")' - #- Rscript -e 'move.bin("test-mac-old", Sys.getenv("R_OLD_VERSION"), os.type="macosx")' - #- Rscript -e 'tools::write_PACKAGES(contrib.url("bus/integration/cran", type="mac.binary.el-capitan", ver=Sys.getenv("R_REL_VERSION")), type="mac.binary.el-capitan", fields="Revision", addFiles=TRUE)' - #- Rscript -e 'tools::write_PACKAGES(contrib.url("bus/integration/cran", type="mac.binary.el-capitan", ver=Sys.getenv("R_DEV_VERSION")), type="mac.binary.el-capitan", fields="Revision", addFiles=TRUE)' - #- Rscript -e 'tools::write_PACKAGES(contrib.url("bus/integration/cran", type="mac.binary.el-capitan", ver=Sys.getenv("R_OLD_VERSION")), type="mac.binary.el-capitan", fields="Revision", addFiles=TRUE)' - ## install all pkgs to render html and double check successful installation of all devel packages - - mkdir -p /tmp/opencran/library /tmp/opencran/doc/html ## reset R_LIBS_USER to re-install all with html because pkgdown image has pre installed curl knitr - - R_LIBS_USER="" Rscript -e 'install.packages("data.table", dependencies=TRUE, lib="/tmp/opencran/library", repos=file.path("file:",normalizePath("bus/integration/cran")), INSTALL_opts="--html", quiet=TRUE)' + #### macos mkdir cran/bin/.../contrib/... + #### macos move binaries + #### macos write_PACKAGES + ## install pkg to render html + - mkdir -p /tmp/opencran/library /tmp/opencran/doc/html + - Rscript -e 'install.packages("data.table", lib="/tmp/opencran/library", repos=file.path("file:",normalizePath("bus/integration/cran")), INSTALL_opts="--html", quiet=TRUE)' - Rscript -e 'packageVersion("data.table", lib.loc="/tmp/opencran/library")' ## CRAN style web/CRAN_web.css - wget -q -P bus/integration/cran/web https://cran.r-project.org/web/CRAN_web.css ## web/packages/$pkg/index.html - - Rscript -e 'sapply(rownames(installed.packages(lib.loc="/tmp/opencran/library", priority="NA")), package.index, lib.loc="/tmp/opencran/library")' + - Rscript -e 'sapply(setNames(nm=rownames(installed.packages(lib.loc="/tmp/opencran/library", priority="NA"))), package.index, lib.loc="/tmp/opencran/library")' ## R docs, html, css, icons - Rscript -e 'doc.copy(repodir="/tmp/opencran")' ## Update packages.html, fix paths @@ -353,8 +350,8 @@ integration: - mv /tmp/opencran/doc bus/integration/cran/ ## library html manual, vignettes - Rscript -e 'lib.copy(lib.from="/tmp/opencran/library")' - ## web/checks/$pkg/$job 00install.out, 00check.log, *.Rout, memtest.csv, memtest.png ## memtest not available for now #5764 - - Rscript -e 'sapply(names(test.jobs), check.copy, simplify=FALSE)' + ## web/checks/$pkg/$job 00install.out, 00check.log, *.Rout + - Rscript -e 'sapply(names(test.jobs), check.copy)' ## web/packages/$pkg/$pkg.pdf - Rscript -e 'pdf.copy("data.table", "test-lin-rel")' ## web/checks/check_results_$pkg.html From 0c6c567b53389169b37c30c969270c5933a151b4 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Sun, 10 Dec 2023 18:49:06 +0100 Subject: [PATCH 69/88] clarify readme (#5821) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 562799db4..fd815f855 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ ```r install.packages("data.table") -# latest development version that has passed all tests: +# latest development version data.table::update_dev_pkg() ``` From f1be897218821ffd631bad4443e24fc56c85fd81 Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Sun, 10 Dec 2023 18:52:39 +0100 Subject: [PATCH 70/88] clarify install devel in readme even more (#5822) --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index fd815f855..19a812df4 100644 --- a/README.md +++ b/README.md @@ -42,8 +42,11 @@ ```r install.packages("data.table") -# latest development version +# latest development version (only if newer available) data.table::update_dev_pkg() + +# latest development version (force install) +install.packages("data.table", repos="https://rdatatable.gitlab.io/data.table") ``` See [the Installation wiki](https://github.com/Rdatatable/data.table/wiki/Installation) for more details. From a6009e87003c81be9c02f1212033663f8d0754a3 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Wed, 13 Dec 2023 07:47:06 +0100 Subject: [PATCH 71/88] add favicons to webpage (#5561) * add favicons * update CI * update files * update CI * adjust paths * tidy comment * Update .gitlab-ci.yml Co-authored-by: Jan Gorecki --------- Co-authored-by: Jan Gorecki --- .gitlab-ci.yml | 13 +++++++------ .graphics/favicon/apple-touch-icon-120x120.png | Bin 0 -> 12821 bytes .graphics/favicon/apple-touch-icon-152x152.png | Bin 0 -> 17797 bytes .graphics/favicon/apple-touch-icon-180x180.png | Bin 0 -> 23371 bytes .graphics/favicon/apple-touch-icon-60x60.png | Bin 0 -> 5062 bytes .graphics/favicon/apple-touch-icon-76x76.png | Bin 0 -> 6667 bytes .graphics/favicon/apple-touch-icon.png | Bin 0 -> 23371 bytes .graphics/favicon/favicon-16x16.png | Bin 0 -> 1222 bytes .graphics/favicon/favicon-32x32.png | Bin 0 -> 2365 bytes .graphics/favicon/favicon.ico | Bin 0 -> 15086 bytes 10 files changed, 7 insertions(+), 6 deletions(-) create mode 100644 .graphics/favicon/apple-touch-icon-120x120.png create mode 100644 .graphics/favicon/apple-touch-icon-152x152.png create mode 100644 .graphics/favicon/apple-touch-icon-180x180.png create mode 100644 .graphics/favicon/apple-touch-icon-60x60.png create mode 100644 .graphics/favicon/apple-touch-icon-76x76.png create mode 100644 .graphics/favicon/apple-touch-icon.png create mode 100644 .graphics/favicon/favicon-16x16.png create mode 100644 .graphics/favicon/favicon-32x32.png create mode 100644 .graphics/favicon/favicon.ico diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 587885092..008f5c076 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -297,8 +297,9 @@ integration: script: - R --version - *install-deps ## markdown pkg not present in r-pkgdown image + - mkdir -p ./pkgdown/favicon/ && cp .graphics/favicon/* ./pkgdown/favicon/ ## copy favicons - rm -rf ./vignettes ## r-lib/pkgdown#2383 - - Rscript -e 'pkgdown::build_site(override=list(destination="./pkgdown"))' + - Rscript -e 'pkgdown::build_site(override=list(destination="./website"))' ## html manual, vignettes, repos, cran_web, cran_checks - echo 'source(".ci/ci.R"); source(".ci/publish.R")' >> .Rprofile ## list of available test-* jobs dynamically based on bus/test-* directories @@ -359,12 +360,12 @@ integration: ## web/checks/check_flavors.html - Rscript -e 'check.flavors(names(test.jobs))' ## pkgdown vignettes workaround r-lib/pkgdown#2383 - - mkdir -p pkgdown/articles - - cp bus/integration/cran/library/data.table/doc/*.html pkgdown/articles/. - - rm pkgdown/articles/index.html + - mkdir -p website/articles + - cp bus/integration/cran/library/data.table/doc/*.html website/articles/. + - rm website/articles/index.html ## pkgdown merge - - Rscript -e 'common_files<-function(path1, path2) intersect(list.files(path1, all.files=TRUE, no..=TRUE), list.files(path2, all.files=TRUE, no..=TRUE)); msg = if (length(f<-common_files("pkgdown","bus/integration/cran"))) paste(c("Following artifacts will be overwritten by pkgdown artifacts:", paste0(" ", f)), collapse="\n") else "No overlapping files from pkgdown artifacts"; message(msg); q("no")' - - mv pkgdown/* bus/integration/cran/ + - Rscript -e 'common_files<-function(path1, path2) intersect(list.files(path1, all.files=TRUE, no..=TRUE), list.files(path2, all.files=TRUE, no..=TRUE)); msg = if (length(f<-common_files("website","bus/integration/cran"))) paste(c("Following artifacts will be overwritten by pkgdown artifacts:", paste0(" ", f)), collapse="\n") else "No overlapping files from pkgdown artifacts"; message(msg); q("no")' + - mv website/* bus/integration/cran/ ## add plausible.io stats - find bus/integration/cran -type f -iname "*.html" | xargs sed -i 's!!!g' <<: *artifacts diff --git a/.graphics/favicon/apple-touch-icon-120x120.png b/.graphics/favicon/apple-touch-icon-120x120.png new file mode 100644 index 0000000000000000000000000000000000000000..1b7b472b3502c6e527342c78c29951371568f45b GIT binary patch literal 12821 zcmZ{LbySpJ)b#)Z3|&J?=SX)+mw>c@lt_ahjg;h&(jna-C;}oOAze~G8U&;pk#3~k z%eTI@-oIWQo#o*3JU7nWXPZJ@Ao*Z7?>Zkz+nzpcuywAJ%2O6`4cEr&4bP{pBrI}RC7m`_J#p&3qflw`jpJl*K za4{!2`FT~qt#4~?gtdD1U1BEdA2^Q6AtioZfrMMb6I z?3Z!ZLhQkz-ul*-<#4vx8$Z9B$&VjDzDVUVw((z!6^Weq!@LBQCF~n3dfG2fAPN6( zczDrPILldGT|E}&mDk#pmYyb!)hp5v?AT3N7r4=&CNUk|0eQ|pT^VT4b_n2 zWDO~qzS|S3w(3v13k?lj${5m)1+wZa(uk_8K#KyqS~(Kbpz40-l2phK8?5nt>0mXnXm7^{q61TN3lvpp8ZKI zD`dU(m}JrDg6d4%+{4sh^wEYjHbPF|S@sSNS-ci)*zvvci#}U4)xKx;wr+06kNEib zdRUX+gS}oM8%e=SQ4O1!F>nL{r2gm6A8P`ML&z&bLlvlOx}ck8&*&)O)f(NsvZ|`8 zZ;aBKfVQ^wbDihUOHWTvU7~jel5_osBD1Ea+U}a~A zm~RfilNd=pK6b12+%pQax8KnS>2Rs&Fw+7H?-G4<+*MZDirr{}U!niVl9z&ZpufL# z;hTBL<>~ZNulB47;k>p zs@yDx5X7BdT(nW+Vl1IA!=R|`~t~TL*m6nq7{XFi;zH;-Qk}N`U z@{8CD0s?}y`@1@H>M5vYHuY;2C&Jf+xNG4WTq}W#e9P?&(gB`r_}J*NT~qF^6Pa`7 zyxVLQ@f~J_8yg#!6M1**_ctda(lxVhc*X@=A_$3bF+!5r4^(0?IZ{-iA+l%;>vbPK zeAses+(^y3+grL*Z*$vFwfp|teUU?jc-QlZ=fNT#i*fjsqSPT0|M8Kd%k?KTT`HJ$ zsD4_MT?ly%wtZsiWXyS-)1;tFUw=PlmzstbWq5FpMMo0LvzhMhZrrAYfY>3MXQ$V< zFWnmFRkh(6<(DTPPYnvRg4RUEMoyyfFuDW=(|7{y)_%sScrCmoCnYDxj1sSBWC+c# ztRz6vw;x={mf43fbfTv{i$l4FM>1nm{|#eCCn6CFdR&T)SZ7+Mz#M;`b9nXUvx6=Z z10OnduX>bO@4uG@29af7f2bBu+1s9 zTh8^uZH76KatwKWPI*Lv>Rnd+=0~eNY8o0Eg%f|8e|-AH_LMLbrDk^ilTImf{hZ`} z+mn50nJ)Y1Df`i1k;3C-Slmo7YAh3>H4F|=R^92*^Azi6C1FGXli z5;V=Y{qPW`z(+;R7J{J6PZjB+Q;X1e3rA9*Yx9Sc_%Bm%d~m~hN5HGBnu}7zXETrP z(`sf&MoCH8@NcKg8YHj+)<1_leb-2Lh{!MoCLhuEs14j>&3pV40MHK=BVMun36!_U;evNxH_5?x<?0BT${#!Ih?KRzCiw_NQO;t>NI{J6NjvdR9V zr8LXwMBi%3k)JUYv6j=#UDW981_8@_hvXwAMf)t3)4aN- zh6_K!(#P1?7(;zR^6Kj9mWYU`+y1-hWDQT|^7^{fTV9H0{ZA8;^}c62O}V~1r42$3 zzqA0Jl2^pFVI?ukCVeFAfGoR_G|-SnzTZVhCxYNnI$+9Q%VSgiSCk*AJrOuuZiOGXYAIJ_wC`g3wyve z)6bLhwDJ*HaF(z;S~Lduszu8-9jtv%Go=4?D7VH%rb6HHe0O%TP%)ZOG>!uh4%5S* zkDL>7b905$2H(my@eN0t$-%yqm6h)7>^ufhd!L=0d~!e_edBUe6fgB9UD)yWM{$DI zFEr))PfWV<82P@E&@cH($Ng+1 zPAzBp;;gJTotv{bitf)@EY6+~7MwvummVXLM*U3ug#?%!^IESb3gSm|w^~+$O}L*r zIdPNmn3nTX;7(j$U#AHP3BCSzeeGb7dX_U(@2`(!Vq)qzX%=T^-&}lp3VQavDIxU`eVu;=^`InQ|6l&}`AT#Yq_9w+U@Y_%lRrtV5!dE_86ky* zj9pchy;1LceFvuO-$pMTNUvRt{2KsJ#)~VjsGxwb9nQXr<@@bIPk}4<^D19AlKibS zgN%uXw2f6@(1_G*6o6hYQVpnGrjous87CP|Z*}>u$=9Wm@zIMj@OSKixCtt-Rx&SYwwGEr0+mLu``a!uRzS}@C6g|JTNLJ zZx`9S*7kAhrAC8?v<1bAXA__3D0&pJrhP`VzzvK+i{0O#iW4QtSzs}+l31mGB7dq! zhVU^0&Yr%ynm_$&F?=S&MUlirPY)}iD;$R*wNPnmaY8nv(63LOP9~}p7k`>>hWbEu zwMgcf>Z#ZEx4i`exZ2C0;%DR`EuoLit6h7DP5f^}cm7I+@d;x?se{~&$KdEN#^s}d zc77;3X<(*hXaRJ4GL~LJF7zi>%P>kl8bnq9)`U zg}}2&1^=r+!tO_yM@7bOK0DviHD73O*$m%FMMZCg8N-(%!EGZfp>p|@V$Y4136vNp ztU7Y7ivdzB_+!7hGRAc>l#FC{r}0ClYmy?=~K*kCpsqsN%UZPoR;B;op5Cs z@!`}VH%iBrwf%ZYQ)1?S5s~=_WNrZEBsQg7CY^zz%C9wkYx^G^OJeU7Lvqxlr;JGb5?Tik2#aeG`5YIDoZwy{3I!2mDbnS^BWo{+_#1m0qQU^ zF?A7t#=_~47%6INqw9^Q>s@SlR~2|4ASfbY>Fs^#5L(TN1DFFLc4*`>I&Bg&1L4Q^ zYJOz}8DxL#+FPTK#iAYKxPAfn2mbK)4yiMQB#JIy-er`q=+aEnBb1e6E=)i8N|6>S zo7+1P`pmM=(2UtRJImYJvi)dkIszO;rgEA|Qj!LYr|0JCBrz|Kpk`L$_a~M1k3_p* z>!+r|qN1>Oe}BhGNl5`$M$GNui(U5i6(g|OQ?Rkah^b=9?1gaLvxy?U2QmuBYmtrr_!0h*S*5Rk zIe+R<3$bQ4dT!C36>1kk8XZNW#f}9-p$PAMPvR+YPuvmj^t_!;i=K5HCxbdcBjeTM zsLrQ({Y18KUT3mE4YyvmAFwO80$CJ@(B=uP!~JKB^i+||(cbD;B|JG)8HEUJkTE`& zmdZLhaxMCusHdy3!r}1z+FJO-I)8Zl{O8LQO;|@Lp`ehE`9z62h!0G0adAQzRPM{g z8wN4J%YNA>rlbVVPT=+RQv|b6j{db%p!6jo`2*?23t6L39{;SFQG&7si(V7reB+5S z()J%xnfuLUF*QALs|gmd_bvZqPGt8ux`e0`G)X!B2XApQ54NQJ`xC}x*D`t{GPev& z&L=w;|LJRgIQ*>gok(62M@tJZQ_n6K}!JG-st!^YS~nj3_U~}-bQ9y@*10O z4CBTnmb~7?HVpBeP>~AG?7~k)OmT?Xjzer0g;ao1YfuH=Hj$cJ-<<}Gg1>RH;tYM1%n1HB?+lCgtQKlNyu-(5q0x`g@m+F%$abh!r9}E zUDnvKRl}fih;QXVf39e}H(e?5^@wg>mr+%`B>L=#YnI36EU65@2|l7)tYQS|(7@44S+4KS!^l3S}mMG$ORR zx|)!f7+hN`s09x@Klj|7ZzOs7@+CM@WiKjeU(m9uDaOZYe42RQ6+uw+ zDMR&wlSpP(*4X@fOh+>LUiaslTP?aOr z`45qhyb(OvOSfv8Is)^HMdjv~6k&SLvH@7rhaycd($`QM0!YJpP84?~`&K3-spIss z3bY*(2?_qo-IsYI=u$34?3NLU{1kV9Pog~3aVG4aQhp!;wI56B z6i+8IZqh+UX~q@*O5Lkwl8 z^%M0yMKCflCL|>x%gZ_8WMm*DiwX)D6wbr;k!~%|IyrBNWes7UfJGUo`v)eUwB}a&lr$4qkK*svPY1+#F!EafGF-DrvJ6rIuMc zt7~g~@6O+F8r8z0$+&*pV3oY4DlRTg2PKe^v9arFH`eZ4J%P2g_2&(0Vk#mvm27fq${a{@wJ3O(n^rPyO9bE*UQpHb^=G0z)A5M~ zuh$QPXPcPX36Ap5%cxrX-Ar0WF`Kx8;-!vO3GQivGbRv|Ld2ZhDMiJ4-F5NnGbdrF z6^`=XjIOn{dt}O{I_+XWEmr)^fRj*>C?_Z9a=R#gbYjAts?@R@wdlJlF$sybu5KvH zv()@=-w0WgvBt*6)`ojR#l->8nfjmUfn&gF@C_oe-?>{rv^_AP9(`=#!RV2lY)k6c_e5t!y~R&JQUt=Ee`wezewm zH2#do9BAGK^-c>Z0+{hwEv2PcK#rSf_7MP*C9m_M#D~<>RZv!jPT4E^_=ri}-?&fy z`eotmEvjGy>f6}efB$^9J;a$=Ss4L`5Lxgb@y>k;B9)4YYIJFdPG{}aOXpndT=HcX zQ5kLcyc1jbxz+AmWl#AZek<$K5KBEX3JltK%z!pJ&cVX6vPbscsg#O{Kt=sf0Wzz| z85=*Ub6$!AY|LAN7KU{B(G^}&pi`QVnMp-e3hMl2AKIsJ)EziH%|QbLsvcfmXcQG+ zo)87U$MVtxB8My?FE1|yY{8c=>~9vEDPO;SZBX}ylGb|}<>sADi9YhZ zl@PA3(08!2auB3qW?^CA;J}f->>!|rp=xA{27_6io}G0ZTttNWvwp&6fJ)^NfXu$V z4PlSl?x#+A;yfC5X8|Kmo{Af>r_>sQ3V-zenFzR zmwwyq*I22mm-Za$wM>(`nueDy)Xal8P56MT!iGi^Ze>+;@jKXf<7G}b1gW5Qw59Ny}a}WaJl;L zZzbl)iJgOkJeaNTyLa+ddHo@*FjTwDj!1V3PI>t<%Y=-K_w(~6fTVc0EGGZGy5JKP zh2@QqgV7hoS{s55{#aaG0LTkC&z+N{&&>s?G|QZ{^z_2^=IXm_YI4*j^m4~$-Ft{7 zSaHMTQ1YBTqB+zjK9v?%5%D_y7F5d;#*_Xx7C-vySNQGqMFc8bQc|)Ykp>3k6%>SK z4S`dDVfWn#X!(FOp@9q)P7kwlbX4^Bm-O}Z)o}l!1xHV{E|{`6uQEtTPDU#lgS9Qi zZ*JQ79rciZn|)KDE$lf`dSPv4Wp;OSW$?`m1;}1e>FMb;-0p}d)Gpq91U7#Lu3MwT zdTYxn7A?+oaY@PQ*i$;ihX5EDSaxe1zQO=S`1 zJ3F8@#!B3~xHhk!8yg*k#43e^hC;2Ztd`#`UDLuC3KAV_WT#70xSlHOugruZz7auLX6{oQo&VCO-#>~tN!qa-W z8afy-u5_6G!WO8Y4Y%iWe7w9tB_*usY9%K>UKME33kZ;8WMmlB*x-TndDsU4I&0B9 ztumlAGvOvz(3TjIaj>%s()bk8Z-rfY1X?YCxvX`+CqeM=5HzZx%LXy4Fa%bqvC8Y} z!rk26wT+CTu1>a?fSiR9vVtt5gp-q74yJPT4GbXl%XlL)5$_iGcrzrR0FykJ0<3IW?(R9wtrH5*sz3YZ`mJXk8To+wi9 z>%DqDetu+?W11zC9+(R&D+X8;g9aA{(CtY`NePRLjC{tpR%U(A<{ZwP__Lq``#y;oeU5^6;L7Pb#d*VcAsR*}N3od6}u?Au4R%jA=CB%K8P* zM{(3*$kfzS!hYPYb~E1P@JCKEMn*>4*Ot>ilm6mippB1@YnhnDgyE1gN=paIS=5qn zMLg##?Fg5l4_aI_1q!R$%(&%Pq2lFPWnk=!SN^0^Q&WfD63Wk>bvUY?=tQo&*zMjL%R?>-`lU}x{_48!5G z{28M)u2TGQfOaOmfwnq!F7-np0 z%(sSfW?J5ffaL`1@C`KhtZZ!~l=fG;B97Y82p;G@s6XI6T7(vAZ^s2CofJU2<}|Lu zeK_sf+C)GT&?qyJ)=rv*t+`v1ZhdgIfBlb~@A{>ay(y@~OQZ4uA8v!??sb_Cm z`SR#hHT^s6dCU`o%xES~KD|h$T zQ0zb+z!X#&E+RgiM-_||kp1Qp4G#~Gy#)J6CtY_rLHVdgf#lgR*v|p73dYe!kQWx)O`7LIx$!N zO$3^)dr60{yJjy*q7v}uVp@}+c4Eqgo=8XaHaT)+$1k8&wQbdoDE87TI%jJ0%8ub~e-<6hOx{T?gis|=>%(3` zDxy(oh_F;k!waoQx3|47rEi#qJp!Od?-=mUP(y>>3y0{a=f3#c3~Qa-#pvHdZcS|w ziPqtn@rvL7REqVKq6qri77$`~<{5-x=O{5*e}jAM^P=k{ZK<>Ez%dBr#`=K4#hV;a zh&*Q-L1rbPO*;o-wVn{lAb1m1w3el5VJIZOF;U+*stV8Z^W2$+PkB-%j}4>I+s zZ2g^ekTu`&-)n}rT2wfmusM6}#T!Ug5>`;qVV6tGsV03PftnRr?Zpj8Lv|?}UBKo` zq=oaN`TJW=r1~`qs*4KCs8j=OjM$$fD_^&=ArU3HVf&SDoH)5#Vsd>I01n;*a3;^O zA(?3-$-TejvU2~5wCFe^6wj39BmRKnj8QR(9Jiy-Y9*|mZ(Md5pcfR9*RDpbRYF(& z8st()Rb5W0#c7DfZ7bdoBRBd3rbUF&XTyb-CWRLq70)mmLC<4^A0^iNZmq9vtO_FB*gM=2l(6${D-2DZTkF~Sy;$;wmKFcu5Y{L{*@m;IDfuuT{5z65y zS9>liWlM=j4YS*mH@h%iuC|p)eIY&|Mi=TZF|qu&HR#{3Jo!{1v^z=XU}efD?>pSzUi-jk|4|oKyw=Sc6QsyN>DcEcti=4lN|yANf_Xcl{2TOx z9NPWxJxW$4=2TQutKVlt&@aVc=W_I;qkJJ&5b}g=A>mf}I-OXA#oq+|=Uv%HXwI2LsGo}kb<7$ts?`8Gc>c0-`Z@ySjv}K9eF|#oW)~z%{-;lv zkA&aIczT}atFgXoEGu)^%DX%L^bTi9b zwrVF6R0xSeU2=mbl@a`S_bS-$m!3<@KMp7det?g37wR&7Y)sIlKfSCpq8Pj52bFRF zd9Bwyj@7LDvfsb0gk7#z*di1`7kkqT9`&*~@b(Tr(&x678+q&@)$6twO4#E|mViD9k7y zenE`L@Kk$cZkK&|tKQQu5^1@S+Vxzai+Jbuc7$L)FiT9}scD&Zs=qfa(DGx1AxE6* z1J`c1HmtuHRZvh+$Y^N94gK-h2U-7j8|YZPRa8`r()g6j!pxlQ9#Ar61dZQS%C}%c zMhwttspeK5vu7LoW3N)bL!8w*(%OxrjS+`X_<>C7P4!-Dm8S zewk8IGm?q?^tPSB4Sa~vCrpw&R8>|?8X^?W0l~gwn~=c_{NHBKNLXJ=!UmbVX)Ufv zA^Hb7bR#+()iRW1Ui2>x2f-G1-x)&UbPydz<0u+fM$V8l5gZd=+J+dvZ_kfq`cr~* zP=vV%syi8Z6RT`fPJm~x?#(Rn)&0L|$tJIZfS$FZa&pBZlDIVq?zF#hh^D>?YR%x; zH^M4O9>x|Hs|%pcA8y!4HU3;xm}TqDgQ88vb$l~*m6f1vxv4g-(Q^0w>Xh?jcSf>( zwn9&Z%8_I!duhvZT0?`!cAxR8I^pcawbZY+n#hH}XR8yM(mlYisl?9CE_HZ)%LCkS z@=45CpaLTA8DV*XjE;_8z>iqVpZGJcXK48NLrRJvB1(sijV+JiK2rQi@TAz+9N$|% zX}Ts)Zux{lrFdJhrIC^A4BH=4=l@l{B2gp)B7pa1hApzP`B)VgP3G2cyP8GaH~%BS z#HUUGl-sq6qCS87K^*yRv{8si{XDf3mu#omIFVJ&G`Y+u{CW)?TI1)$Yy|F5tc3&j z;tr>J%egwo8DM!0eED+W)$7avAz|UVsxM#cO>k`04UFW?%^5aaL;?S8UH~3hyP-^> z()xKf9Bbh$p4*idyf*>TfN~%HcBpJ5pp&|(qKEM~2xg*%g@pw_sF?FZO}q~^5U@Ix z&%Vh`{qW)R6^DK~#qS?)Ib6ka*6HvH5lKu>;}Y_2*4aA1sBT%4|I~q2P~^M)Mz(pu zm*NipU81TC0CM6x;0S^eMV0RgUW$5SK^57nxo=LzC~X{EO?rS;z)dod`BAu06@3 zu?9{qE_GLyG|L}moG*XCfiG1kL#;-Qbt12_<=kivY?-_X_lt(BMmyj;29DPL2fv4{ ztu0+lTwE&$AKw_LRH>a@BM2Tpe%v=atl#tVr>R4gfoJ*4reE~{34wXBYiJt1pt3T) z`LGjbyxg$H2>7JiSecnW&b#)~+*9;VZ{gI;Qa|`e5eS56op~2V0U$)mNpPqA;rEG& zM9063ofAnp^k=}R9Dq?t0Ei7g|4Zj%nqO^}Fz3%e6pf|Rl9HZf0dLOm3savkTzvfS ztlmlnLRiB8x3D^cQ})!iCBWLvt=bLPPgclQHsX;;)c-PtqN8JCvdMu<`V7Ru+c$G{ zvECB7E+?wYDvUr4vAX?tCa16eiwO8%o#o^@yC;qQ)rFW-00rjDTk!<~p#OUTo2maD z=pK>C5_Zb%1MR`*Y9p-}3<4_lfJv$)8HK(ShBaC%lNW~}>X%~&%01>MG9Q2&w zz^TRN7~t8ounz9S@(^EfQBl7Or|h?Y1XpcoOXnZq+FluO#!+tClKJcJnW1>jei$fA zTh+6KiL!=-&AX3*JDFNk2J|7fG?kSv)hhH~rDbPR=T`i?M~vEkPv^JB{-n#k^;A)@ z=u20Q+3`QPjO>5czFyfsmS%-9fM?nOU#7B? zlhcHAz{#W1va-L(_h-H>R@diy>(@*7t%GK~H+tM@X+V^9BhiaFePRLuhE86ueG?oldOPo0fxY+^5JMx>Ym+U{f4>+0*1Kjr$W4h#)Be(1NlFVtmU3+jJzy!pfgRgfw+ z*F3!mEF*isc}*S`@CX)s``EQYpC#hV+SAh`&z`a)O&Q)lG&oq4oW$)YKjW3I04l)M z(oealy{`S>$L}cxY-(FBSEI)x=mL~~l^d=JQ^g**QP&%Mn&@vGByBD>hkl6gBPzD2QRNUFK>WS-otk&{`Ue$XRFsXKL7Ux1drGn!38gTbqzdp%)RJc z+?;J*zpL73BIk7y>^TXbJa&61~do%Q`yjgpfTZaTjG!~kvvO=qmj@n y1POLop)u9y(htr|_I3Z)P2G(-7^Ix9$_?2eC-OL!6G{P(fT$>HD14POL;eqCXX+RL literal 0 HcmV?d00001 diff --git a/.graphics/favicon/apple-touch-icon-152x152.png b/.graphics/favicon/apple-touch-icon-152x152.png new file mode 100644 index 0000000000000000000000000000000000000000..f2363e268aa40208f3a33679dba2f6c688e8cd1e GIT binary patch literal 17797 zcmZ`>Wk6J2wB?fy>F)0CM!E;-?nXiZ>5xzwB&0(c0qIWZE|D&2knV zs9(djBy|iQ{Uet)2~%21xhCV#IXRR!U;**uP{`(y$F_;BqX{E z$=ju?5A7{at8P0tVSXG9RAwYg*xzps3g zPh8yjnIhO6xVOA0D=OZ_l$QGY-W-4Xl(`oS(Q(Hy{-hYK*WgG*pZLSf(eco&%j`6x zOy_55>qaC?g0P6l!P3%_Eh?30T3TAx{{H?{P|$x|Q}Edq*p~GMT!d&yRB)6w6WK@O zBO|SR92_nqeFUbY-iOr#TYGyYMWv;V4i3LNuInp@h34t?n|!-Ro{ho3z^Ki3BDGV$lnpJ=huU$8Z0Wy*;IlXYi-a@1JO_S5v< z7h?kNnwy*1goK3FviY41n7;41w=liqmwokLcxfrKzrOy$;qvnGOP095&l*9;Dg46Z zM&C`?bNxqs4jyPQpK1_LD0sKc8*~B$D~~_?|N}GduAK2uyX1jh!}AKQY`(77NCu zs?h%z{`>dut-QRv8E}b}#$M~6*X7B*Wzb*0wjc2PTpUtb>Uy%;zG&>ZQmC!1eYLx_ z^;lk4=MI5DVzDp>jz#(q@u))I;>xPzh_4uFXw--@Fzm)n7pvVYd#?G{)Yof^;HbBg z<-S$ucsw6jd@Zp2-Nx3If}NGMt+=kP&TM=5o966cNcfP|E6mTIKevF@ERBwirudeU z;`uTy@s#Md86&XOdZ%wYe0o~tcR3}$ElXgGJ2W=7l8};eGc-IrY!w^BKW4*&A7!0a zW*d1EUh?xNo~*2_K3MLT?39$bCWjg3tIEKqvdf33T^T)w>8UBI&WDwgVf|X0J8mAH z7b87AFLP(-io%W#@*>v7hm6BRM_mt(>#K!@g@a^PeeAoti9mKlM8pY*sTXmt1y}RC zIrgKAoR>CM4vv$N;o*Bl6O&by;%Ong!vF~lJw3bOi3!up#KemSa641J$IbHOLatlo z;3kwf36nTFZu8o}V$6WegasFA%Ff6*XzfsU*YO64${j4iw!XeTMp9Ce`)a_$k?Y0I z7&mr=N=X&O#?0)^^FQ6rgpp*nC@X90_GGXEXW(K9C^&Qd-;&F0ZEsT*s!a198T_pU zd(Ly*w28h_s$EvKbcAEv(I~5=`0gD`ocKMXn&?@mrT20BxS@QsqGxG6ZcIdsJ=oGOK z%jMP85F&-&+wmL;iOlTmXC)=2VLqGMg#gxUfxc#1KE9c(^mHFX9i7>Rz9@o$=hH9= zg}4EpsdT{uyL5UCGGW@z%R-K!goK1C5UujRr>76d(8F>|OC9nu)L4C8 zSiP^K>yNfacd!keukS>P%1<8QXhjO?5}cZe|DK;8BJq3{Cwe_^*+nBCTb`&P&RA%) zd*&r2yt0*no9a&Yd*6NDk>8e+uuTr>tmogqe})}k@e2dCSxm8bY({^Xab}*^^Qvyf zI4ybIt6{j!(D}&u>2#|LxRvo!yUXf1ZPPX%>aj74T&H^7bHo%h!|NKkhj(Y{kUwmi zw~3=_Yk$KrWvMPjoFW3$N#@6DC{;>+-3|C^OHB-<`1 zkFlV*xYnW%#lgX$+|tt0t);b<10C*_M;2s`I7Y6W;^lT=q0wbchT!7{{EkO!^@3w$ z9wQ!zpfYfBZcIMTXn$OLdA^C6GYu32j}-fl<>pfP`1lM?OfdYtp{f@@oJCTAMU11x zK}x}noUbzIp$zH$!|9<+19>1^x1-0mM3h3(NEFl3{<2WV7tUeEz#))v#z&P1+is$! zsEEv>TY(EI7n7Knn6i(rFTua7`@yMv`K3!Af3Lis3!%uY5)wI&kA8mQrZ$wAO)GZ1 z;#KbZzgar&#$1HOZ<;66-@bis0P)K-<5W;n!vHTOlds-p!XYg^M)=x1cpa&-GdDgy z7b}7=T;^l+y~dCt#T#|fUM;*bq3%6BWF1WsDMmp@X{(|z@56|ED;w>E>}-!kYvH-n zF&nUEdd;qd7L7}8tt~A&=olEfs8k3bmH4pYE03Pss3|Pa%b0;xQDAZc*RLx>9si)L zq~!br@?!eO39*~-g<4xn-t08ZpZ3Jy8=6*}ab&9d&mx$Db=2LY`AfMnwnC#s3!dJ+8>&xze_}D^CWBLBXl|v`3YurJ(Fmwz!c)`Z)YoE62;?wjXucJ{ z;M=SR`;f*C33;P$KI`k~;IrtIW6sRXT(7UMdmkMgu^=EKs;<9pTG%|h?Y#QwlGS*g zDG&rIK)`uo4lbX7z)NeF+3hvooKfv9^a8D~{>@m~*kt3a$!Tk6LKvp(lHO2$>RJD_ zdl8?N^?4?_|9S<%2f+1Itr=k43cEF zEiQamPteG=l#7b(i=?7Q5ms2?>#{q}TswH1Y4*!VjKC7&R%;3r=5;%6jrpo{bQmAB zFN8~;WvNFqqb2D9;-yFBX7vU?e*Uz+JKy#xn=zQ5ooy)1&(H66?PvbI7wmo--gf;z7`+t*Llnpj=m2V?Fauhp} zTyUE`@t=jKFSOe1k7?=X=vX{HK7LV9Rek#RZ>H>B$4&dmer3}J%KCf3@jF3Nq#(>! zoohMsZq%_eJ6osm zy1MYi#l^=EM2rAPnz=)(;()3gS5!`BPrqC0do5gk-O= zFP#|JtZ_re!yw31SbIsFr91srd#~L0KDHxKMc8tQka4w%Gx%Z%{;14(&G}=)qBwHO zuAsD3$;j`lkD7;v2W+6Fx3917&*}Y309M;;i;H*5;U~tw{MzufqgxAnd748E>3Xmo zOTRh;Xz}2O=gP;FlJas~oSCd%|Ek0ua`7A`(}{rdT0_Zx_`L6oxt1G|FA&e=PWVAW z_&XbqW4^>}p%^XC^H9;bg>L7@C?U6JGCOA)@gX>gK>z=10`FB&sAqq^_QNrRVkLl&kD5N;7ztB0-xRP&PO;J;a&x; z*;rYLy563y&))owwYBI9O-jfT@ni;FRo2w+-{*X-E6*_p6|K(4o}CZI(=)oJYyc$D ze{TAF)LIp<&LNhS!Nle$7mp>FRihLk(wx(kOX-WQ_oIYSzBt1#Xl`1zcp4N1N}UQL zMNmOqb#>?Rc6N69Zq@=j4UCMI0Z=74JUSA4I>q7#DE(MgF5TGvzq z0#*w$rGzy}(Zfp0%5XsJ@!6&8d;kloZES3O_eKpg0B`j5)4*k)Rm=$$E`DrXIb3aj z2!1d4*xuIG7T9_*E;7;q((ZmTyD>8>e$=O%2|FdF`BqPsW}=J_t)iLUJJoM0qtw5O z^shciyI%=0vJSqyiBLh3^zmr{-ELZ{m~X?(!PRU-Lxaiu%8HY-v-2uLC{pJVA4Jg% zG^6=bw@(1sEnjX@yv(~F)z6_cE{WMcmQPb*$5bE1bn@go3gAz-&Q7#`|E?>AyAwNE zHPG{wAtE@q?PhG`+PODJx^hm_$cUA_?Nlm0J3G4uoL%_R(u(>8$D`c0d&UlXxLj!} z$<1!N8is}}5QZ}M8Z4C0oFroN44AL^3Q-0n(nn`Z^*>&Xq@`gz;!wxAXaR*Dw8?kKU%| zU1thv;K~EOg!xP|<<@o-C`G~R8&z7m^f;5>9a*BG$I`MsXUx7lt-xlopX4Ncs_i|U zL<$KJW0aEqc77RLeiAIxXMyZG#&_{S&*S`ItzO-v8T7}ltoSUY#L|@l(Un#;2I81v z-K}T2IP;Tl1*XEo+%}a^LQgfck1aj65fHv)k~%E(1tAdlNaKg&wpj*8W(kAD1*2%&)Rh7*Dab`bh!%>AKu#^2IkmXvJ_FiLHt z5?@idm!t=Ebor6LB*yv*h)?W_znrM#nA`=`+fWb(#YFpfJy!;r^gwXk8TRK#NSp2|ouy8O21Tc&O$S^Mh=+ah|^r+nPM z5xo56v8o%51<#!AwXi31OA(B&#VhoigS3?~&Jqz{!e(bLTFVcOE^;yTXtDd8U#q8* z{Ic7l<{OHeEB2j}K?$-NwcV1f{(R6-dpjeIA{nAEyLr)6kb7!Ovnh*j4epQ#z?ZE)#>Sk#8YR^MEsEuYu$Vt zSqf`&V_ZIuLky|BIZ5+XMh)Le#V+hg8orn~<5?v$N0Gq05jPxTC)*f|Xbca(ozkQP z0QS-|G8URX&@45c^qz=xcN23&a{jIHwL9_P`l;nVz#QI>%in2kv(QaBRDoGqL>rx* zBz#_L0JDvz831ALM9;7e&8Spf?=qE;bvkkc{KqOk&COHqgMrzAZby+l8XDSrP+j<) z7JCmWJI7bWdI3s_z z-v}kwR*FfErLFlbrN@S~rJqVL$@|%!?k8&n$!4*{lZ12Ula8yKo1&7E@cOp51oE*z zTa^N>l#2_8i082tNU-Imo!J!?>;P-3C;HsihWK>IwFniPAS2m-mK}$#CKgE92r{8@0RgiT16tK9ow9xFvK*H~6w$$KM zS6`2X1P1`2>hxC)Y?RN~U2m!2(ug3U`TuFDN)}Hmcw$UXS<^7^ybl({g*Om!f4pzB z(J_^f(~pQvM2wVDkdqs`ydvQw)*j!Hi^t|9#uIlNK_bgKolEi2tF?K}o)XpFEd~1O z!N&FcRgLVC5f+0NB* zhn_<)6V;8_C&$n05|hjte;5VU#~D8urLqYFJhf2&j^Op{*N^8Zodr!zsrJ)FAEsmp z)C~=lKYZZwX!ZYX{Rg5HB%7}ec1P&_UlmG@&TB<-qhffmMfEY~WF8t6mUqDI66x~N!P&=UHtw{KTi7|4kp3Oe`&eF*L4oP9# z$uZIDPyQwc2R%<h`G zD$stf={?oe)fH$5zGh~MGBpUK-eQx|kfKkotf)#$!&Y|OGmCf}{{0mM3-{{3?sZOt zkRUzQFG&fv6`WE??!sjFQAGKYYLOy%l4y-fpOd&A|Jys-m#Y*=NJx@W>_DWV)j+|8 zAC44lYhX)xT(%jNQ)Dc+yg(;NG5!;?!mM`Q+8-gaKG&(%eR6YA9hcdf$-xAV?i+0# zg@@i4JiTxBo>PhyUs_u`6cah+JLn`E8u(|+^+RSH3R_yzK{c##T;MJ(Egc*k72On@ zTV=svu4D&olw*kiq)vlp}dyT)eZabd=-9@Ulu==?6LDqsuz z{ry2~0eZlKki1dHB@FXbRvrxlLrQSBM#5o_QCqi)jt^nR&!jGMvi)O8jYN6QG%~A# z{`d0(yJ(v!xKiAlR-1e4_~_)W)i&+1ZKM(OVVp!_E*bcYpJT^F%C~k({>;b;)gRT< zMuwBNW}m^Wqj0fPGts60p$ezMg)azB+BA_2}}Q}rDJ8m?;?)`PsJOGDPxHlgF=g) zFPS$N<5}JDDK($Z?#b!>QLLcC~^e{C;rin9my+2 zOy?$~VfAkkZEMwRD zP9H8c#<5hkVP}l>|M~?3QgT6aa~cS5>i7>5n-uz0XDHK&nO3`+b2qL%+VXGRgYD`F zw6*)QK8nd$d`jA6sVGbywEBu9Cl5c-vA0pEiznA^a{u59VBgk`l?A5=@@H72b)oS^ z`)HqK$Hio67d45V5Lm|*b|#kOTUh!A0XtG>)$K`j8uW~B!J?8rN)laFY5nx5_C)wf zC`WLQRQHI}RjuV-a-s#r{e#0uJnfF5Nwlw3efAFdFf~^c_F-}1xI}5fBpnpI9em^W zuI_t_ICRRs0gf9eHmc_4RNxE$hwHOHnrd8u&yN_%Anjk?+|+*9l$)(ER+f`P;ISPq z=;+8+=4emK&L+P;S{lfe4kIKbr6rERj**+5wE9a^o*(Zw>{nZi-TS&RYWA}(47pD zvX!snr5;kJdPtIfBM2!Y->S6e=;)BB1YNg~Cd3~A7sD1E6GPYWu6AMEuLc=@2HJ}qU+c61o;UwZrC14Kgrv9JT|X>)^cyQjQ4uEl%C<^&NQFBnT1y( zJav<9hx2b=a$Vj`XihBLZt@8GSxQeC(}Eaa{FP87+%A`KH2KSBmO9G+vs8le7H6wIs?!uL7dv0#y3546tsrwyuvNu?#jPObbe>R|HH_x*uNX~d-cfD z-7DPemogPfyenlxWA_n=^*#JdxeLWsv+)`)4qnM9V`b>qhAYn^KMKv`E zcq}?qW?iq8l$9sEGjd#$CbwP1?kZpID=AuUR(;ppUI4cZD#77eXO3zHf3@?9aC&=HRD=i%z*m*^8&Vf7~9UJ?5a`FubYUo#rFHppU6KH3$)(4qi8q~#>hoRMmGHoMjad?T1yFB>+pwO zD@a6s;uPscbfF*pyf-JvBzVj6@`9t-TfzZv!RfTf3-aW6(`!RR!{+;cWivo_i~6Z^ zzyhNF>FLSu(2`xmxoc1do%|vA=)K@>pU}X`T1A zwCM=Ze$UMf03PhUASqDHHFb27PZxmDHW*7L0xbu)D1i0BdHsR8n%_$^HTP|3M+fB9 ze;8e62);LZUVrxgCPX&~3+$(i) zkB`J^qDO!GQhMBP6ob3Ce0*XtCFB{IgwoDyTpixoYQBB@2IS%D>UaRWCa0%+=I05) z5+I123C-WCwUGd#Xf&NK3amgCkc@z$`BwYe)5FaLkO<%J7p8RVYHCBiU9eYq6$AjO z&u>>`t?S5x$7NMir(CZK5Ore=5_gKg^xzE`A20AR-|~?~T3Q-8z84<*h9_@|vCHi1 zoQZD?I4JQ+NxgOU(;(2y_J0=wwE+-<<{pRhT>vWpG{j~3z9#20il!%Y2Gz>k)t%MWM}=ezWVg_q!A1jHu|0vz;9q&_mw=5UXTk zL;vyPM^=-z)De!(%$YLX>4k-%ov{q>yX_QcI|L4UNoizJjQsNEHwsYx_uWA$kdu@1 zdb*lZR94O}DGC4g&jDQPr?nH?iEOdMmDUt+?r`X1;%;Eq6z!^0BQc4mQis<8h5)K- z7dk`S(^PL}^AN7p$Mq`OsSQ;6N1#qi(PCTLy+>-7jO*hb26qi~-6Eh(tgo8`APOi2 z`MskfI$RVi^e}a8KJ^N(XcqDF*?sqjs$M{{LHi$2#zK3{L2d2{eBKSL0j?3y&k+(6 zD~%?Tn%+>@1@$3^^?>h?%Ebg}5f~><4{Ik9zI~H5HYS78n>kO1w$0nghIY=*HUqI_ z<;JbaE3KYDR8Odq0d0phz=w>H;3{)FJ9C_t8sd|aGvgY+KB+!cfaDCJU(uLzf&1X;?9|I&1La3->o4htH4vvnj!Jb0%AP7Py zCpKWEpuvu)@;osCD=jTAKY*M@a>m_GcE(OjNsNjWb4~O0@-yOFz8HWvLGn!#a3LQW z8Vc=Mx3o5q*VQGc){6#Y2QRT|xKi=7IVT}@z64NI0|LasLR$Al;z4gvPwy6=;vZ;M zMITRlfMEeEMlM2z8uV7ZBi!oVkBM?-r#qwuZR?{p7fU+PeHOXp<BQL2lXB-`~z-H&6z>`2+&!`gkR3%qAb4FJMr5KS1V*iG~1e z=RuSFy|Gc?!>YOw%z#>DuYc|)?03M%t7P!U0ISQoN9))AmLb~O)bR>= zXIDOAV8N!g4S(v?)YL#%SUH(B7rYJE_J#8m0f%K9`)0?#Faxu*enmAkdF>e)vokH` zJ}Yi;rwRa&Q``;8gVK`^x(qIACwu#@%`;31KbF;|^dc-YgtI$8Gz^Sd-y7#c4-so? zYb_les09HV2@aad0Y#%6K4wvk86$`D{B^;Ro{1?UJ)Ia3JCXp&hlGRxWcGW0K78&# zH5z-5rI-b~z0Pp;DAWM{5j$RtEIV8sW?y`-|z76?A0nL>%ckagp};R0YC$YAA0 z&Dd0Mo~z!r7S*FNoJ?@|o@T}sXLJV8ljZK7)ZZ^h9Y06}4-d8rxRR`FZEOARTtOnU zb8xT#`kv!rU4)qL_1`luY}nx2zW_)v^zw6aOM&JcSX#h+0_fhu*0>zpvknDWQ&W-v z`KyWF(E?QrxhwlnQ=b)|SGC8C(~LVUmEVaNyzO>3Dn|ieILgb*Z$zCmwX}KzUv2^! zDr|X)7pg7gEeb@{RZxQZgv1?k5)!&M`l2Qx?dJ}z3JMEBcIof_j0(aQ^d!|E4|Ght zxAC9|Wq-fRjFYxpl@?o3MeObginyU!$GI_MBH+5V+^{j0&9H$sxZ~`-;$r}Q6djkW zW$gkeR|>aUz|I6-Icui*7l6ZHAy*s384?ufsssl*KiEDTP4Bw{rJ)vpCLp;sA2lpN zkC$Pivo*l`P!SWK(2G`_ATn66$k;K$Kw(ZK=%$Hcxc-KvX><| zm7o~Y&%TzS#wthx2Z__NKU(HSS6y8a7@Q1QmHe*PpXv@@969N#ERm=H@P}F^+fKVt z>lz#L?TNujh90Pw=aZKrkl&%#b$;$zkYog$B&_;1p)AXzT{#IzVaWE{2&H-ghj;-ba=;&Kbvztz{wu452u_@h{!W}@ zK}Sc2mgm`@hEQw`Y%CP(?^9j;Hc4=vQg^e!OKlD@|4 z=K`MyUQS+~Br+kp$ZOv;xOsVdyFw7XuV!=?+Pryy6lme@&KqyD<=RaDlO}*Y4V#b? z)Rbv8iI{=Su7j%%YcGnmW?Lxk2Ye2o-Ck|#xa^K+IW9N-FS1%(OrIBK?id!%-Ue8P z{+^2=lqra-<8#;iyq<;g=~4KuWlU@=eZUQ%4$i>662uK8D!_>bTxn){N&O9rhkmop zFNgZr_{FaSNKg!B{M>>9-J+d)bQ`2-?RDUmJ1}#ozCMgsU{Vr72Z+gEjBk|AM zRs)%JH74`q62v{!5d_WPz`jBX@>P5e{zNx43Qcdj3EsD^xa>az^9!|eGbW=s`2Ldvr80IQX}s^>_o=;ED{e|XpV36?JqM;CC*medPp2hSI1sHZNFE43p9Mx7wHvF83Fm1cxZ1^)k z77)yWjM}($C&k6YEOO_`VuLUSsq@W3a%o}kQb4*axGKFnoJe74eb0<=GNPELIXSJ_o) z&wkjZ6NGAnkMh=gCnvU0*aYYS$F+{EqM{-hxp$&*tR1+=Mgk6P6@XL%yu+VAQ2&a4 zo7Y+B;TW-Pw|qRHTfq)MVM4WqEOp#7Xg{o1T0TZtoA1Ih(E;2yFfOnNVZ_Y?m=CTl}QMMZDy^e(Zsw)a^dLG#P=1Bj3}QoI2F0WDdl(qydjaikL{2AQ|9YbOk# z;QcOBTP^9cDB#-rXyX{WXJ2vJHPuM1qGpZ7rSm<@C0v7O#{b= zbF#A`KKEN9W&yuZ4*jsv!=Ue!D0s2A^{W8$RsPhVOW=cHMaw^CW)>ElS!VK~u(S9N ze$7jOZP%-_!vZZ2aI1dQhz*`V)O=+rdbD$^zBbgW`{lt;90W0Nov1vjs^bE4G7h3P z8^AdL@`4I3QAdP3z(dCg@$dY-shgq9#Kg?JF)ngq1&Wpy5ZjWI`RPg%Q?UvEx(hRb z#tSrz5qMGqI49IA&Yl*Hl2mZIrv7}u&ASrT@F0R$WFpXi)7S*6<>1gz*S3#!!u5B0AI20N9rxHRl}E~v@{+*vuNIUw_)hru>3se%pya1t-WWS+}hOtFK$ z5?tDb1DI0pLGQ)$Zk81q(!f>-LK=!cbae+nV1qi6X;v-u@F$8joR^+G{}-9^tH1k! z6cMzacuT&w`G1<@^#AWV@ELiQqyrsaWG^`LUr<3}gd%O#+0QO@+~+Bx*-d;U-veab zfwvDL$WDI2dKasKw?t+}y(jOpJf)+)L^v8MyYI?vDcVNlxO#6_=QMp2+pkjh4}hOl zmWcJ9+D2-JpFE3wMIsS$C)UulV5{LCKP9aiDj*RiJR4@ADu)`EZPs4*DQ7a>HlqGS z3Kj7!s|VWl1t(h*Vc(9-(E)YcGrUDs3{n}nlOy3vPT*(GbjcZW123vC-Q0Qd7+3}` zw!CTtcGz@6B$_Kq(bK!6IM*UHmRwSmPg!~g;=F5|diijs-jRbHKfjmm)sAp-v=g!s zQr_MatBVj%QcL2N!Ag6R?ZC5OAyX&?OVYI1imlGBU?Q6{cEVibDPztGG%}{7fAK;z zAA0}w^m;D*aN!f0&txIKrHMQI{YFiMv(8^$cn@}N70aJtX92s)y$GH(<)4^=j!&BM zo3@8aibD)i?jq1_^#%bp)Wiq7{(|vZZ8b)CQuQy!4b54|YAC<3!l{}B46Tggo*OC` zZgQ>%Io<6tNB;@V^yH&t_rde0i(^x>^0b%Z&odUGCXXajUu=KnBEokckR!Yf|B65X5=+ujTB<%EK= z?iij`*ALs@Txi^vf@-lY>zAg4R7!lFIg``=CAPSE7X6k!bKy^2v7&Rzy{4MI~8Alt>=r z-4t?|_WyDPFjt$Ce^CS|c&(u7u^Yy0DE&wEm8;4qfMjQT4tu2$>qQd-+u#LJWrV%w z?c3*y83Lze$Z6VI?#N*O^^eN@kwLoKxf~q7b!4eZ(a+DbzL=4J9b-^{^V<8kc^8lk zk)ZzXNeF-6@Pa7P(I-YI(4J3&u0YSF^XXb!VEKKB6Y!%d#RCqGP;zRD|HQdxmlF33F>~}7*R}D0*_DIDfLVMDvw_F9*7U|8H)aGyU^c)Fo z*=4F*Ax(uAo_X^|C1P2Fdbq|quLpN0zlg+7A$8mN?DoHPCmb$Q=XOPYJ~Ds%nsE3X zm(EP~DOWC|OR}dUVPAUC`=%*TOS@c3rQg+ZaVVFCwOo)g*&Jhq+xnF7kGy#v{P4R;g2P7-1G>%vfXwdlfq^f0FAtqB4{89~WwrWTIrPX&+^a*WQx(f8 zM(#b?RKBDF%P8uCYkX7wmOui_7f+08nlYqWA`7{Kp{Tc+-`lU)5>D|44Em|zvhPfT z)g)psz22O_ePLUH-ypNLfU|QOUk7V{6i;4>9M? zr`3-?GPnBmFn&YuiCCKK=vA$h)e=J9JAsg*Ph4n9ghIq}r6*2eAw~9}g+GVN%_$~6 z_I6<8gh*CB0JK`3tt-d#fG}FUA}(H@I9jMhG=sY}217S30FBJ7-S!JO2Lk$2N#Y`H zqRMV6Z+ZfLZXXkYg}Xrdf>gxeHuTw;Qv2Jls5nfq7T6K(Mhla3b*o=tC%G?g4sNn8 z?=0(I_(Oi^>5>E$lmA!Sd+l;#!B|dPV=K~Ox{5liI73PI?+pj{o_xx;IKhDZOi>*< zEQ)>dz`%&;P~?tN&Z$`h*16>%@2MGu9K<#0wKpUYZ8O+p4>8elQ-0=fa-8kW_wtM= z-#>}kTx}4%bw^6X>E?>QNHH_{u);`adMKWModn#pzExHp z5zZL9PBi0eT6L%g{VxI`A)#R5L1qLET`Tydp`j75_g>Isxyro977#rT)xbPg6!1?o z#{y`0#mVjrZ&KOzP?aLus=u7h?sDl&$bXSm_z^rAqZ?bAcPED_93);zEUoxD-4n^Y z<`bdFSvox`63=2uirxU0`$C@`jo6+>ZG0{zH?C}oRo`2K5b;jZ`amSCX`0>$CV{CP z*yxo&fqx1T4Qh|5vt_yz@8&l8#|inQVQJ7aP0h3R3^389Wj1Wz*Up_Xzb z&3=!$44Oit_UpdLPaOP9m_8@%b>LRA5{4P8b?NOw{*iUmBQ!nw@*|GZYFJg%q#w;* zEy%;VI2u1ovK}O-IkiUrqy1+lavx0{x_&vA!TxqFB;9gBUo&ks1atq)Z`~Zfl^MBl=P@~;x&+o@2 z+riVtT749t7BR88ocwQ+WXg~1dsHwzmK@EO8gCi?_D?VI4@f_~;cykpHyPoKzLPaF z=*o$=Bw1M)%b)o@KtjiZ(9r(gc4KhOj>_j`sn)rPWV@_Hv$MgD2}EnjeO<>1sd3On ztfrBV?}wQRY5n>y1}RJ2Tr9LNlFAohD(2m(#5GK=#0xsxVV|HS`FQ0soU}bzJR5lY zQdNpW6i!O^$U!z#^Xu997mM%kNTO#sq0f(;rImPEuMf5x^Cx_HdcV^anx%?|Ozz0} zVFbWSqI4&y{D$rP#TzITUr2xxu&LiBamyPigb^wei@tsvmt)ak*2g4Z2YL!%$%6WU zl0dR>B9MC2K|0F-<8?d0-_b=R=e_3EkU6odDMmg49f;zKS15q?>iQY{PQ#P??n+=r zrnO>Zy09Rhqmt<46)vU@PfMm!G@ml(STkndj+E)!FIBB?N(c3jatZq-G~0oMZ;@pA zfu?WgO^W^}G+D65-S*O^qGX$gg?ksuQz;N>C2PM=Sg2Hv&S-sZoa617(hH+!#|lVb zv35>m=Z*9@3}ZUJZ;e&#%ySy&=!Pe~y0mB;P4gyXCNNLsB9Z<;*ZogaXO&XQRw~6X zDAxw7$U6!XpM@LP)MJn`U|HoZ@4o3(w&9EvmZ^`rWS7#r`&`O`Izi};AmhsYrPM&B z7XQ0oBA3=&u3It96+)=GBrTme+^B70JCzf=k^JN7v7@;EX0{Pd8+ zn=xjJ`dKRO=JFEl>iRmny6NxXxI+Y(zFc<~Mg>ea8$!ssCMp3t>+Ptmcn>^*5#_2A zDhyE!wR8X&1cmw06+XF-{hEdEH4(a$AY3H(Gg13+5h`22=NzO!cA0|CZx7Gc?O{83ZtNHdk&;Av|pV#q7+@yq+B#H330&VY3n>ss9 zpTR^UK9~fwYbq?<=Br(J)JzqcCq|ND2*RXqqb1tqRhzUPffHKRm2&& z4$(0+u6SgWmy}>?Mp4sezoDcw^*fU#_uj$+6RTCGN_Wa2w^=DGE1y*J<=Q&^zQ{yU zsho?_W2FbP9Y;2`-k%3H?~D``hZ5BMHgI-;KhdP9xVSU!{6BHQl2RzcdOK4Y&N{Wzh9g=^wwQldnVe@rCr-| zh%QB;bw!-6?&4EYLUJTpt+jc>I z{v9w;gXhs|4494&VEV$%_2t4|5)P_(MNiqeXHV=t0BhSnycA=Pw1b8v=lp^K8I?k& z95LWNf*RU7fd~E%;3=fji^QxNmnZ}X(TrO?Prh6v8F`o(Ue2$cfr&wG-)qMKKqZ!% zao#QfX&b(A)u-&Pw5%clpaD&0UktEj9sUnn(imdBQ5+rfBErJ=Rt5%(m7j_LUp!+| zyMFU9VYss)!(Rm0q;wF``X-e(K7X%)wluut(rMfbC`4c++VFF+eGUMkjHdYcAkE{x zTHrYb8d^DU{k-gYvvoAH4UOKZI}aiZ8O?Y=i2Wi$6~i9?smfb^5!pvWrWhTvaN z!U%zTterbB2L3ehjD&=me=P?UZmWJ5V>REun+;3&k^sXWNJj}04+|4p4Ngnx`BQr? zfXxg%xp@GN|HVn(qoLZ6c~hQT4c zIa(@S0por(fYRm=6MK3AJhFrBXlgF#8!8FdQx3!g`VooF7A698MpL3r|%-cWK*9f_EOSyC{YRtK`f%O6mb{DHh42$ai?x{8W%pJ`QruC4uedI|mv zRTztf8tWDyg{OfH!+=ifE`OXeHjtJM!H?>rZ)u-kA&-;y1#&?>IP$!i-@aYR0R%)= zUtjN>I7F*Lp9oZZibWo#H5Mr)MSuVH!m6tG+CDz_(}2LUgpLT-kB^UII5z&U-;4to zar?__lDZ7%7Dd2uIgJkww^Rb%?Gn&&v6UwoH3qiGU0!xQS@H27-hrc;3{H&TV=s=H z9k>RO4s~&)l2T?cB3t19bYMCMY>WJe|Bb5lOvLOU5f?u{eUA4I(GgItfz(pr50HQs00@hT ziHV)209l#n=@AmlBsI~edKtVeT{`OEp`){Y82-jW6L>$J!&^BgGIf6!29_a?R4C6M z%x@)uDW)E9E*b$o$tu7m4 z2IjOMK!U9R!(xb8kX_-%CI6K+?~9EOUzp5QZ9tzx&fc;|Ep217;|CIE1_}(Q)tejxIctLzSh+_>(|C@6Trmem2>F0m z^ZNO+SmGP7M(i3}SL>-buXPwdfJfMBYinEC@JHf+XS;aKeOh%G=*{#krA1yG3gFpH ziXg@20BrRdP}z!y9D#RHI~RB3%~iari9dh-Tx{%oa?)2V_!$9d85tr|+FX6jP$$}GmdG5S@Y)8gxW#mgj6~04T>PZ@1q4QF@gz_YLe5VD zUs|E!u6FVCf>pgiCm8upVJUtr#~=W+>FBL10U>=B`lhC)6wjb;&>+o!ZXxYh4 zz(%o+5pW8tZ(?%HlcqAUcyt{FhD%3xFJP{j=$QcNn_p1@*>!C$Ri&Q}0o1lXGzYMH zo~*L1v9S$qo(&F=`>L>;c8EGOE;SmmCNF{Y*QCEsD@qM5C@Rv>(z2>qa3mm#>6b|P z_U*3YBI^V?T;7Ppf*mmkj4UJXDw^e#ArNPNLbMxtIy$_cChZ1Ljtu9CQB%bRxTQd< zVNV=#kLp>UZvi8@dr+EDNLZK|Fs;B>odSO3LL(~}L*-9X!B{xDZU$F8oi&`St*H|K zHC8#!4+1KN-nkrbuz?%aQ zPS(hFY2$+91UP6OU`lLeYikSRFO`yQ&I`pK{=6pmSx7JVAtH2OVB6iw+}&E(($yM# zg5d_*H5(WB!>h&3B@F&>ak6l73UhKYxGyUFpC>rESlQcn|Nl?WVY0XcPtf(z(sS1^ z_oRI9>SAN>Xie$v{ob0={=K^;42)Oi<~bVD2OUPnVYQJ_#cd53SQ=DzOH>>h9OXn@ zSR6{3PrN-=ur#$=lq2(h275<)NqZ4TJ}G=xVu$%hfaQKJ#rF-o1&pFBM5aQ@Eckx_ D54kTx literal 0 HcmV?d00001 diff --git a/.graphics/favicon/apple-touch-icon-180x180.png b/.graphics/favicon/apple-touch-icon-180x180.png new file mode 100644 index 0000000000000000000000000000000000000000..c1f7cebd8e99255f428bf1bf04f1304734e49131 GIT binary patch literal 23371 zcmW(-1z40>7e=HMq)WO(I;6Y1Te?BI8|m(plva?gB?Rg2mJ)^#DTi+UJHPv=YwR#{ zzjM!d>uijgitL-$M6coC;NHl~Nojzen_%Bjkid_^6G)ukCj=XDWpOyT`Xtl`Geq!f z3JW<6WjHuLIyksb;c#$w;I}^gfrIm8hlBfT3I`{U4F^ZylGm;-1pWZYTv1jE?iu#a zmmd{r;CGNc*lJh9(6^?sV!=xl@CD*NTIjmu67M2pw@|_y7X-ayYI6$^xN7-EQKp(^$** zjWJulw(EzdWSiuK7pu#eXnx(ser7Tyg8O;*pZ7)f`KNpnM+phw5iucy33IWrv3vG8 zS6o?RmyPl9@#(X(vypN%oP_965$SHFtG6MNqz6a=$#Mz`*yVQbbgHzKa(SxxNYFRu zR#tA0V{qsvJB}L0QUwGBZQk?pF752>gexj2DJ4v0@hM|6jn})bwXMJ-hOG=`&FzWfHll z(}wqEptZELG&2)ZRZT;~8Tr`g=$THfF^h+hE}_3)0ggqO*AOg`xi$b{u3HR|R!_^uZ8l>#(>=b-`Z>tdz5o>q%_9DA(2G~a1+XcAr z_VY(%ct-1jncjTyPPxcuW{V}mr+)j6D>9&QCBO(?8N1%tn_TNh2CA$}<> z?f>h-w}+079;7qZl+kECq3|?rBhnX&h+4OLOy%S|73uRw1w~v|HY(t1#b!6>{YP6z zM@J3qOV^6I+m0Xp*JOJvio({_Lwvlcsi_EG(lavbz%dtS>FM31f$G|mj>G@l{dnv= zv9Yny$TeW|$&B!9IE8wz`(dm*TU%RuW`2I&5f2aV8vJYtA3I>KuF~`|evX}oy+Stc zrl_-1NLRr(ZS(Z`{^;l^X2~S@$Jv2ua87YCm21o5jZ*Z7TREEET<0#vU}&64^h7Q% zvSiV8l~e2e5?PW>{?g&)eSiod>UjcG1%+~&`%TtZ8M|dh8WR(f^YG}XpYOvBG{pb( zhpy9R*Lu86$^I%sH&a+x*q+JL8NvClkF^;qD=S6=0|OW;+3aSSW4jISVmYm=Rm~jO zHzf3`b->+WX3hF2!+_g9#2#M-zG;e{f1Vo7VA3sUEDWeYd;UH-sd9S*NjeXHz7FQ* z;oNjZ zssW)%K^G;K*4Bx!uAItgFOkuQhsgYugl>1zunMPIHMJLzLu`nNi96o57h3c_VU`&B z?q*Q&^74KJpUxYU$O-eicd>!{f}1IqmzU*cgaive_%wUB>I}Pse%#h41uB(SRNR9y zDIno>TEhdM*emk(nOsKXJ1GGiKR-Wf?nDbWcFfwx)>Q&l46LR2ae+nBeu-Bh4OW?3-&qnXcf(NTG>@FW&Xh*}h=68pUm{sHiYdPwyyA zC$FxoROr6m@YnAO;0GsIy6Uso)YLRA`mlwuT|Y#Y2C6K|(9S>SZ|`PR z1h@C84n<%?wXCkrg~TP-jIeNgY>fCB9FfGD+ZYQcJ^g-Y_!cXEq*-0?bCB=z<8_>_ zN#Of!s87qXLyO7%og{Tm)cw;=t&oAC!w=TOExw7jWVqI4gKTe;|^m}Q_o3x zd3o<5c{~G5=@ZHORR-=$!Nbyd;hj@{J7YIjzPskN>iFHDITp0o6eozZxPrW-D$V&c)j&+o2CK8^0J=30>_n<7x=VmKvrOn+Yn2#h#M|;7< z)6>&2OG9Sc=U#FOYZ|j2n59^b)w#li$6Zi2#QS#gguf-+Q zDC(wWPtzQ_6B>dMc}9jxq*)r=*Rz|Os=B%u`9{Kg`Ce{r4I^U{gU4P*O;*rrNmW%0 zRgqg7SjEXElGwc>czx3EC9@lp>co_Vj)zR#v)g37Md>#Ws^^>V+ z<)U=MSVZ5+_7t1)?^^wrL?tQqoTqt_3rMj zpSQQSzQ6yytkN9Gz6+m$WRV(1mfFT8Ev0SMLTt?;KI1O4^4ycO_ChU-L6hw)Z}+Y8 z<9Xq;5NNpjLI1X#ibV_>vRG%+Y6P*Jf z#N*il2I;vB$k$zoYR{UI+3Bp;cCe~0c{8+8Qk9KB@FL);xPJ9;9r*V zeRLJ{7h~+NAs;A4<|spKZA{|l(z+s#ot?JSFf{a$*Za%o)SAohzE@{ELsevFU?3^k z7XaG&w0*OThmo^}_Iup80h#vCr^8?IJiNSkR4Ta6yY5f!OViIbIXO9bl%tUd3>5zS`79)QlM9A9!KvC74D35Y*m=kl!Qk!Ru#U40%_EbxY09|4=qtEjjd7NH0etiz$JJQ zQnW4zUYOaSDf(cpf{84t;!9cC?wKvOz>0nIRmb{C3+PtMF^;AsvFh}Hc6KZ|xwssA zO#i(uoVpHdU43lRuF}DK`WGdtCGrGKiaSq{qA4qFZpMr16Aua!0bL&fKFH1OweLxr zpPL)5fgrbrrlV3J{$TN&>5a6Py^(4Z?-`|n2AR;1_ON68M&ypaG&+;4>uGYDW!Fl6XC)?5f%ei-)z{ zT}w#4LJYEjN#4iosp-HotSUK9*w}KY8WjSD{Pgq;bmdcw0^vcP| zII=Q5&0f{f;SWurmOT*VEM}mfimYkoTP|N@hf9vvtuSH|w z{KY#5eA2kZgt^5SHg2qnKra01{1?w8M~gDw{dl+ZkY;ROYv{S^T~URLDa6zqeh*}k z3)=FitfVt`3%dB$zglBQWMyRqs}w3WwyDL%MV_{@GAmzIRaI*mY`ZRiT<*P3H(1(+ zlS<-1o*mL&1X>y342ykk-=Hexl=bq2j8&N}=a<0GlGs4>^48uU*Qo_r+ZdHkBpCS7>|@|O+{4z7S( zuL!7B4d_-pvQ$_mG;UC#lsGzY6~RaIee>o`@F~;!RorT`_Zx9WlLhf zx-sATdiw{!_0sD9{C=QNy0jV6cPU9815UT~yNKueo!MHV2m;-cDZ0)N^0-Ch-hP6d zkhv}l=}*$9v~*0<>G)b!n3xRq2t}pPi_P|I(Py{WSq(N{CQ=(8ascl8*V^0%I(^U6 zlr8z=OFZzxlAoTYGwNaBHTZ zFj^rBn%w>atliagKl!C(W_qc|wdN&)YOXn4KE6srmeodyVDV__QZPQDkc(NLjVbrT zk`UMQgCf!weKYoGCcyloz*IS#fQcl*AorGj6w>}nia&hNE{0f% zpr9_j7KhKU0{%5Or@1&iUDwds+A5{6^22f@^{SzPv$3n|Db({x;*|kIELAPw-iiPc z<^fzCj3p5Ojb*C&+IiY_KQDOlpJdEZWQX3#KnE5YQj8Q87%8pI(hNjV7{?Zd#$1&s z#g!bmSTD73v83m~58|5laHP@*m=f5vpP0&GCx~cho0Dw{P}yxJ9Sm8AqOWfyFi+D4 z%!?WtGz^1ocX9FX^gkFFR06`1)zsWv7Aa9w?i_TUTaab!_losXa9|fAi&G~Q%s$;U zC-2>jAO79gC66N>HI z+uG5XAji+Hwy1K<~g6jvQprEf+%%(OacyPtx3p$l^}n6giZhr$=NtMG*yi4P{&1 zxZ>~2iv^clsYx`RLD1?@!xpt6HuOFkT zJQP$_DyXZcfKqykL$5L~C?N0-om}K0rJ$fdLV502Cd-G#=3vmFN%#CkL-9Wu)|*F2 z&o$`}C6PY|N16qA&J;_hbzlEDJ@8)l%wk|=)xnj1yC4z7Mm59mp~a!N(}!)OMr6{r z6EVA-LU69{r|GB5<`egjb6RE>GjF_zM=J&Ap#2=w2%BL863(iwQOQQ4+I+E-5dTYuKTLFmcI*t<2V>d zX|IV>)E{hLx1_#FM*E6MlL|hHox>5s$^DGM7O=&-%jWZ;e8shdIp?CrwN-Qg8*lcVRR-B zo1iywdLrB7w4ObmJ~1F==3BiHurL0WkdQ&h@8XXvpc<8c-rQ+wYOd~&f5s7Z+Zldq za{v8gvbuF+6hKkd>-)#&Jr}-{_LKI**wcz)B*#D7Sn~gu^6& z$r+2eZ-E>eOVyi>i)iO?)nMTre%8={k@0nl#jL9G-CzPSeI}cUioX8mvc^Vt34li3 zl7`aUfBy`5`DtIN)>kL=%120n0`ms+=G_GtUvKt5X_HOQ%zQ`S-gal+=s1Laq`vc)bbogl%HDA;ovo%nSf)039e{IP>&TOvE#%vv*Y3fnQZd&Ycz?zJ z)2w5?OPB<`aHe^3bPb>?l#X>LfQ{*xbZfK>S(l<~&y3`hG|i>_M@A9EcZBr!`LM%7 z%P;Exp8c@hLn;Vq zVW8Ronj}`DEsca*Y^2@bVN{_i%@^~ToHoDPpsQ2g)XT$7!ULxyhErWl8`?h@#YqoN z*^wdZ1@VIXFEx$7isXJuppoU$4elQTWG?dV4(WHbB>xbfJ8OdQ6}1!tuC?hGWTQ9m zgNP3_F?K(9V(DI^nma06PN;e91C`tOYS_L>iSi1?$ll zqupAACMI>z;hcM0$BwKFU1l1t^R-cwU^POV3%jpXt79R7wP&AK*M%NrQD?G|qb^D2 znQ2huKV&5=CSp@z5qv&ak7qiP(p7D_l8|G^(=cLT=#8^`hcb?1LPa`_wM^L1iy4=r zw;;LJsZiGbE=iY1q$erJV5kAnU9T(PtxlC^X_TuJ_+pIND+6OL_ zI`p1}L1IfO5S>y(t82lSS9~i7w|MC?a`z(C{TdM-sJ++GU8SzpJ- zFll${cs3pm3wTNziR|o18S9+Fcy6YxIy}nBrPrkhMc8rt*}~00x8u|Y4 zO^u;Qbmd@jj8JkBB5oY|E`-OTF+1J*y#Ms@}Z^9^H2wX<%PMkf5i&d`iBpSKnw)PdD(#nt?y^?$EGIzgFlI!U3{VCpLZ-H z`8JNsB~~ajVsRoYRUVnAJ;Is~^GSr5Z+{Ww)axswN#@bmN{6(ljC6%T^!X|f!k{hNo0$+}9g{%E-n>PfZOeHR8;=Q-IL2{n_d&!i1mWeoI19+SOG6%n)PWT|DsU zW!>GmL<%t~=H!|26>l`551n4mU*9$S^LL$nE&5RT4%1 z`P1aHQ2O`?-lows|rz?TYtg_F(WNYrRE1=C`2adCfpb>NtHX)#~ZQvcC>{9(#2XJbR( z;eBci-s$%bt%-|^Iso)yN|-hq;Pk#nqgLni{u-O)RxE>*-Ph{{HH4_~MNSliVTvfZ zGw8MzHx*_rRX_@n68oWB*pymNHy-%LCeaIpgkqZMbM2TKp_N2OqB-MFVO~#LC*rtR z1MC^+|3R-V2$8h3!x@-n?QA`NdkDNj3WJLgjt5$`1^6xm?Z*MEnalI9RwlE)gtGD* z;2F`-)0Y4e(7IM)FkNAQ*uAL=9`|aivrS**o1>M+C;%ydP8zAZM~NNt`}|hr4jYYh znc9^vPp=xETX}%nQ;93G5&;TWjtn5`nEwXq4vy)2J>ciGhA01}7UT?s+-X^B`?H(l zYYlv<;!RK%Bw6!mj=JCu)4RBU*TzjdG0wOrS1JDbbEtSlL}>((;C!0zb)HI^RUCro zy*7coyc0OL6t_R9B;=xT0}|XsWkp4xf72EC{bdA_L$-kDyM=`XW~27>(NP8P8`weD zwz+zV@5Bp<47iljLPB07lJNJ!TG->|(n`B06Oeh)(9w%4?R)Lz7-I&^(Q5J8!o}2a zzw0o`%d;}$qvFTCtU_e=qThvhcuY`Wj(+*pmUeQ2z9x^TR9VOFoat&`u%P z7}i~rFK2e*)WB7OBJFk)9a~<(X=DAeyr_T&9v(9h<>|Ur|7czG=hU;Xl3POz77l^* zv}Av_|L0zuDeKwwV{l|xuOn1`Jz~Fp)20WVYuC~)>+UWjFbPO${`7=$)vs+L@7O)P(RjDBkH9$TJv+}(ujc_DGFPbVOo$Z8(G>bNMoLW^ zPg?!Y;h}}8=_eq+4Sf{T)Ycv-mX6B?ACZ=pmWGx#;gtaHYjqQosgw1Nl|~y3VjhPt zU0q~_Q?h&{GE1JI4N(bTksoYL$geM+U zK25LGFNx2i)dz@UYNrrZF#k8~)789#%f$AHA(R0dudZuCmH-%xoSeJ$Uuj>4-onYk(6muXh2)OT0mQ_~9 z0SlgS))WwIWQHgn- zESGAG)&kBaO3TaL{{0sJ`}Z%O%Wt@#ZgL9Bqw7j$T=MQ68f2c1-U_F`yT6zcf4k*~ zp-IP3hNCx8l!Y=bP>zjG7Y}_KRHuOY;62?e30;w%{dOsJ`!`Jqt~3r^da*rLsM#CF zY7{HE2=L-a!Y>?n=tJs8tmcF)t!H?wI!jlvJxZf3j`=SP~dh$JOqHSVGX23!{EnDL;vIErq4fud(;KVxS%_ev@?CR~i+wMuKK0)jSazf&s?I)-`cJ3SVG z6x(oHt&&DrNK){^O8a`)(A-2wn9-;H!^;L=cZ>|2@|k%vGZ#$G*&TLoF?f8#D~ML2 z_xZ{yE|1g6R)^>XSII90d$;7S9;HPm6l}C#{}3fTyqh>{N|Sd=weobthuc4U9Ov%NC)3H7|nJ;*|%N7iCazQ{r z>N{6_-4_T}g3o^wr`L@ZL+GG_fUl&K%^vcW%i3i@TNh}CgCQ@Gmw)*4!e$|`9e{X< z8S26pFzc|+O+oTzX5M;NP?WW(VocGwyYw5a1a4TWsnMwBOC{?^Iinb~x z^+ZHE(v})Z(X!n!ibX$sQ!9B7=UV=SKY@YR$y=f6W((1yFM%eVF+An4^Pi}-*amqVkrShoQl_Ccq(pY zQ}k!#lJ}O%9Y(Cb1Dh|a&Y~F+pWAEIXBH(@UDSe}v}*S??3W;QQko8+P#PGnK(`zoA4iD9 zHx0UKESyP!v*AIO2wQ#uqnCyV`zOQ(B+2=`@nhS3Lh4MD_F#6PZnsJ#!E zo?E@HInLC;P6)+1cl)nrFso~KCo{>K)5cHqmfu(l5AD2qs0N|*Rc!Lke$I~7DqusU zG#?!{tIZqB8tgubn%qmI`!)*j%_jGAv8jC7v{2&_k;^Q}w@5%&h<$1McU8J1?<*nk6dacXdYGM6^T+=w;3Z%&+2v1M1NF#MIOK;Z8TZ$vf5y>ej%m+!|%7 zE$!&YO(5pgFfmaDxHm=aLTXoSW5FaqRA&GJt@EBzdHq`9X64yH-p!^Vplh-cFKASu zS8^uSjKeXlXrb*d<9+?FHKAt%^r*yU4GmN2xi>tHZ_fWl;@;hnuDMAP=rSLdKYh)} z;@NAs;Y@oy;oYlC-oBTNh~Lz@nvz{wE~SPiZ_;f#=f#^OTr{(ATH4wjadkya7@4B? zwn>i}273Vm+nXv>b#h__#SQ{M|4g1QO+ZV9T>8Zn{8Qh)w&v@r_BR}-*53E)N33Re z4f1QitH@t6_D{IF@_Gmo%_+~P+1qNvXHO5qOpxLC`9~3jPTK3#s%dCAp3bNVR8_>* z4L>HJb8f*n?Mg!g2yBJhMmcx*btet^0FD%2cK*ZC z25E*n#YJ>ujh8Hm_h&JGwNkD}c-w;4(}m$Dh%%Ajb@|M%qb=g+WtjuMjQyqE=}l*s zxrO{cIhLp5ym)2e-!lngss5a3-;BA-3*C0cMT?GykK$^~IUe~m#_hE*h<9W3{VD3` z5aci!86O9eHu9Th1;8Vrz#rV7%ohS-Hxgk#-0sanc7}@F0H)Z~8xCb0qa}F-1#{2} zV&os?^!_&9^$)}mRDpO(?V1lHAr2N7qtSRXg$%~}-k$Z<(MsR1=8r4&7Ra(mWCQ!> zwBOHN%rNMKLdArf99vgMMn_?$x$3+IkV-vTeWp{xW_ zFI4%FgVOTddZ=>g!5je*OLj6r4zSQ{t}925j>cpl|*8eQ|7VgLCK9&Hw29I0t(I1rZ_5`MREfN=_HYHEU;45~$)?d z#Gi63D()X8{?K>}o$}n9(!5>XL|ksHji{+%FF)wpEfBl@6u}nArQjb-w|QHv5LA#+ zQ80Hi$MrfY>7XACueLS6Cj{sa{L4}4zcJ9kKcGMWMv&hYM?IT)Uft1ZxrIn}x4 zWjS_2CB5G+d=D)S0@CVg+V7zetp?Tqtvi96HkHTkuGe7sDkCEU=qvCcaI9H}fpNEw z?Dhzxsx8kmNkKs6u!PC<1;T=qCxB?yJFN@YfBeV~(4Vld@T)ki?pLlHgJ}-MGYqlC zE=S88e-95=K+ULBq(z_-aQdEEg7zy9`V9b)J0K8AA`(EzXtj-m9{etk1xo+(3$GZ% znpAKwn-My4)O+L)Egkxud>}faU2`v&oLeJ2+3!ZIT1z_B_%Y7*<7UK^X!PciD!ZPn znHfh|7#$)aQFrjWm`xG0E!(iz;vA{sKyF_0o>L4OI$xmMYiViKnfAZ~d*ILCzuN%8 zJ%CJQaa9#lN*`9zK1OT5>kAC;DWy5spa5}O*q{JLC{SI7?3-s776yQ?F%C#u{ffiG zoaiGCFo>zkOVp|V@Bt&p1{lmR56T9(HcEve`FKn^u;?E+)Fg0Uf$lNCxYz&=>*{nf zX$V3|f^Jc-hy6;$bl={!N9WwvI@)|STb~f0}d5N>47zOcYhDP ze`JUaEw_vLSTKS8fSGE`9XSNCGW{7=S^1xlPRnWDct{B%H}?T^_RM5>tVq|*=a)cm z;^bK@S2wh^+I%twv5hDUaw&CO_z+A|Qu%Pn7qhcmc8-pKz>)*8$k>?de1FR3Y-a>C zU&!_S3Q)K}XDs^m&5mK}zMmk_=UtDBuEf%1|IXThN9yO#Bfulh77Z3T-bL8IKUG$~16^VemZlK)=K-YnYo!7xb5+qt z?&juZhhI%GlarHDubl(l25_^#LnOW44-he-iubF_8ffq$XDLb3fSd!;0n;MDa8L@b z-sa^lD101wb1F*>afBY}DaZ4@ev6e-&fu5;g4S*$gg z+MCEdI6ATf!5J7(tAScps4)OnO+l|7MDM_j=mi}}354N5FN4i8AV=V!N8yNg%gQ1L z1_s8kDH@8MW|A_{+UDeBM~D18O#+sg_1b@~uye`DAp$}fkb=LPbQb_I&cMK+PLDa} zs85b2bmlqTG3SMpZlK@z9fJ_%j zzH03|@Sr0*jWnZSZk^ii1HE-|QRc%3Vo>LyUWMvL08= zAVq~mf-Y-rE)WoxT66fdGRG$zxb{|7v_MJ()I0@rBS4mXV2U9?GXO8T@Wo?>f|UI4 z-=^kh+f;G;ru{(Q5i$qG3i6C$R%4m!Mn=jiD(LuQ(^%s+4OZ`--nQ3R0I|2KojSh} zsuFK6KJ?fHDxFR_N6Oz{5R7)4%WK!tbPyT_L5Xi)zy2O1b_OZF44}M&W;6sIpPiFa zd2Q{fW2PW>3?>>9P;+f_GV8X%)9Hi`5(5y&2_%Hds;bXiHs~|9>EJ&EZi2$ck4&J^ zgJqNA8Qb2_NC`>+^M&tF@DKo6W7bV^BArb07a-s5 zx~Xev_16YnGJ!gW$#pF)nZJJhY8_D9Owe>LSa+)U{v9@qbgB)c!G{<0I)eR;>JP1S zUTIN%X22OKx1*cYf@~+&D#%b_0k0lrhyVjN26v1t@Idk6`kEel10v!L9ub%IxGBgd zfcN3DGmK^IvxNxwt(1w0N$==jAvSc#UntxboO`Chk|&7ifd>i}XOa+6@!4IsrNFdS z!QO5QpWSv!Td?1aBz7|0==5ba{15XJY(IdHBrzP7&w#KwRHQjOer&h6xHuG)-~YEI zz;S`vQ3Q)H`X=}D#8roWR1Nj^3-8+7+hKwo41!!Pne6|WDFONoEuEz6A!&@%zx&6S zH5_ei5CQV=_HGQk_X52T>~>g>rZcX&PYKgoeFLg46c8viU0qoK@cKs|gMuZ-U9o;S zw4`O`m&`DLHWE{}4x5c290jt*uXG^+^HXw)ija>c_e3D(QQFx_k~J1l%mCnz&wPQr zh&|$?O8?_FtfOdZx^L9HqItOJ?Ck7Mj0aW;fDDB`?~a!1!+->`c}fo$AO~aIhku~J ziy1)Cz@8tklak&Eh^oJ-S{Q{r(@tY&^IrTUvi?FI=ouP*9HO*3=)izP=c&-c)?NCy z#>U3vm6bt^rO}Ii(ozVZ#uTbPDz(U4@`8BoUu`=G!g;8|KPwhJ6;)M%*A=GU zXbs!D0OLOI>voglDvuBuOp$DA;-*SLnX%kYhR@r%9~vOr4CT?nrH=tEANcn0 z)>8$_c6N+Tt%A_&zEzLapxnU!+j0H3523SLR>bJAfrYL z1B*al%~P>4zl6@mxC)_@CQ&Cry#^5;MKC2`5IX4je)Al3n(2)V6+rF5kOpgA^=`Wg zC!OaiFa~#bdH9*z4jW9vRp;(}*uAkBL<1mF-uxbUEkvkA-js1_`g9|+X zV6ANXVM!(jAek^b6BaE5uas6-Ty6)}M!r^6L7JD0AuX#;5LhrB;DA8${fXn&&4Ost zJo^|3qUZKX5U`x~L1b09UX-&Jm;s*@D~!(vtd*ws<$xcxc{}M1*5N3CDxNgtK6?r= zYkmK5F*!ZGe_!@QO< z7-?07FL40~1l?bVY-%+eKSl~*cPfyK-F!Uph9yveQ%f6E%b-wfdk-^Z8>~=4C<#)8Bf8dY^%(9c!wj5&2T5PcZ=3no z0%UWfWP7pQB1GsFt{v+EyG$lM){W3*i=-kuA^<94NT>fFIPVpGJ{N5SI?7zj(v}NN zY%847XHMGs4oi^hwzRVPah$%C@_QaiR)fh*aKMCx9Vyu;6DL)htFaF3#h&YTU)EY2&XDq0F24o^HENi zqundKYb9u$f=;1E^<7+Cz+4<-zXMtr)ZX=^VWQ0+g`rPWFqy%@Z#~_gs^S)Y1_TC` zfED%*fmfIG=hLraFBlgFybFhyFG#CT@YR%gz03IoZyFFA=D%smtEvX;Dv|^nTwYz# zH9%RBBF(1f=hK&0|8s@N0Hr@Cx?@O@FCK}2eex^z#RvKyl`+prrx}d~)d=t}K=lm* zkL-5BISAGQuQ!85!J-DyjMvau)SIb9f7m|Zf5$DWDx-EHMqek{(E#uJR$IFduA>Ej z+5w>QEWt?;bc#ZSGCEG$nbeC!gUCTag;PSV9c~Wvs7JnHb9Z_^nF=xEi{=V;DO&nO z%=qBlrtHlV`_kpAs7w)kM+u|vprgVLh=`aNl-uBeM}gKgL;`hq|9oa?DXQks zN2oyR?JH1%m6c$Vtl!QGgaXw=y=mUQ4^2EJe%L1H9tP{4ut#&c@gwGrkX z^?m*7@qgGtfq!u@_Zfh6vZScT`)m4!31AXS08@oE zi^-L6-`30r{^{GyXYcklK%#+y4r`3yu=P5;SU^extdP*XqZ>e_zY7ov$0RVChKlG{ z>u6|d0x}PJ3-}iR=S)G4GbU7Izl84M)>P{ik(?!1@+gDBDp);$?xzCmi`uTccL1rH z0eb5!*l$w`C=b910Ef#12u)2*tuB3_r4F_)$%1ao3j9y_y^eonU^1fxeuMaulu%|- zcSVdJd%3SzWpTLgDZxY|Fr>gBEa`P7oBWD%ae%}C_A)^G06x<{A0ZMUpJYJ(L5tkQ za;!>EZE4{t2)xpVJu|P<4P`*4=T|;7+v5SOKD6bMcj0YZn`KiNn3*r=P4`;1z`)3k z4=SA#mp=xs>0G(81z=b(@C){V^1Z@NY|ZCi-DyQ4pW{hWg0I@#l2I@U_bNKeLzHfnB~K0I_ezOrClxL@vKcVg1{3k~hpFOWJRW`q%FrCzwAe z^<7tcq5mW^uKxZ9z>m*Qr_VvFAr}N6!%o`R|00s^d!&Sec7^p(6##zGn;wj>)zu)? z9NJ^rYYt$mRM&YJhKL*h<)A^G%6GIt&|2Mc%C}tj)NpCBh9<|xEPy%z3($4cj6#h#c^J=v?7bI#@K#fXVHE>I1nQ zqRZBc2v%f?L-N-l(i&8vkpSqlA!yicD=3dhU+ zp{Ez`-$T}FD{`V~NZ0-*Uj`yyFe z$&up!$~JL~pZ&kb)rjS!##t4Xg%8)fL3ldtR*Nf#{C7}r9jvTLrL=L$Xp!FzcQ}tY zEQeBZ%rKZ@V7lYoXNV%x`lT#pW;1MM6}=sAPSB?pJ6|J!mZ*p1&%nh6MW2}(-uw<) z`uJKuwKfsQcu$Ze141St)xZnR$d#oCG=XjUkrKMH&|<1S%J?Uz9aC~nB1C0rzBeJ(9lMuZP zzzad3ndOzcxDiK*Qk!*Go8uH$hd3)VS^TlNiK`ewyWS;FpB(0uW$6|omheH4R1#V5eVVg55(c#eR(MSbpL_kdLCE=@ zzgp@X`+fV{hwEB1G;XRsBb2y&y}lPhn~%5}>J$#e8RIkuOq=4a6_^`MFE(?6rAJjP z`w832t1wp^y8B#@?ZY3(*KzHnppRFSR38eYE6M6mluW3@M7xlS^u$u4ZL#H_Peo@G zMx&cahVJ{E&C!6@YZhKfnYW@>&eC^VKUGw2gVE6*f*W>2b|YZ@1-Ue4o4MC@V$-7g z49RCT>-0yVY)Y6Q%EM-uRFbnyBr;Ye&Bet5pU63e9-XqFYs?IPiqPTPk%(F*Z}VPv zTi&kct|YzH7tu&q_)N2N=#lWztsoSK5{GUNH;@sV`60fJpS?W^Gr;;yNH<7alH^1W zv?mwwlw!RsqDWw{{*UyngJh&ZN=isL6Lp?|Ek}B~!+ai9xOLATzUXJ@Db}xx-ny0o zac{VTb1Gzi%D?3H-Ha()tmA4-)?abvY-LA_w6uy2z93>IS~lgGhxa9{tcv}K*u?fZ zqqJo9J42RZN6Nv%N6p4EKSuudc-oD%QUCN^R@BcybmRE}d+T^AB>syv?x(jPN~_)Z ze5-8kI7C=&9@*G54W#lc(9Unb|6~9P)`?xf7VoxO+oW?`sTdJQFLA*@bW3r$4aD$~ z<>D1rbY~mcv-9(tw}Nde22Xsc&zoT=QN$jkjkP$NeVa557ECF>mN<0R6H_x|?wPv9 zkl=5~pRrPm4(pV1Le8)5F&B#(6>)RlY!UuVGJYSHe)>q>n}HS^JEM_w9o(&OG#y0c z&ujL<=W-hI{Ntn=uE?Ix`Pu&{*qgd&setC@s8)?@aI1ZPQ*EvxvP&vfY>WwBY+{u9 zfcQC~j=#se=i*}PdA%ERWb=;{O*gqW~mO&eQzq)w@7BzW+$if%w$%!zm z_G!7k4>QHKY(a0|FvPr-8o46W9+6SSue&3J8Jo6RJ@eh z+Q^@QjA2`ZEhYFlvE@8l2z~K#zVpYy%fV z`Gjob;6M0qL1D80)p6e8RKNcpR~pEStn3wqY-Jrgl2ukx#}3)a3P+N?XEukZXo#{M zWoJ7!$N6N0)Hz1T{=UwizdwH0<+}Royw5rB-siqw_j5ex1`x+ebV_SvVA>Nlc7sq! zx0^lX?#4h@0p#J66@C5vvu@cxX|6V|eanO6>4*9s5f`fyfgA9tvC*oLcyO9hisKtK zZ77#k_Y?l}qu1hVZpaI0ZW9I88n_v8kC`Y`Cr*A5R`&~)iLgrWJNSsD&Tyum+B^9b zaTH;3Xk5Y&@sVc6#?yj_Juoslon!A)9zo@jkiAE6eo$on#i706f2bXPTSYp)SKMr0 zcq%Fxe`sy~MX~s$o6rbeq46PSW7mv$PMq1f)1W-~oNYyW;$1j0B)bdw;KXAm*$$r_ z^^u54Tw+{%#HrwXiWll|j?y~X8^YT1LSCc1*(%BD)bvWMN@J;BmO%o!}ju-zYQ=(!PL3R0jojFPCTO z((0if6^*U3=&Q~vjzzGop)H5iOlRvEE*;tVe6gnHP@DLD&Ex3&w$^wUn*Lu-&T@Wy z;mh0#0ZFF*6e=m#{92p!d>Y)0#j01Y8scV3#O9X|ws{J4;Vt`7dCM19)q)$IIs__* zk159vjfOew*;dF_%~Ivi5OdtgZ9*9$EEd{U`PN^_1}a2taj0APaE#+)gWKOxmE77ebePkOt!x0NUYrZ zljDDm=>#i_wR0`*ni}+S&K49-PfstBKrQRj_@gmb7Z(LuCA9O@d|zK*=Pdg>>8vLO zz54;~Z3xlxvF&oYB0ErozK|pD(Y(_qkI$_EaccFxA5QRLf38UX+n-r!6y=;f)ohwy z-V2V^r?w!E&aO5-isnSf2%OOptTcJOs-YqxG*rAl*kPzvsVH%FUoCn+p!E#W*eI8c zM~szIKU_{Z(#+(=BtC(f#`@mcP*HkUVT_MfG#Bo9_Fc02(`>)aHeCN5c*(Y29ac)J z=Hq|N!)KDG%#DuSXefUEmSF6R+4=1b;`DzIR7G=nUxKok}zbYSmIVr=*d-}F& z^$)T@tTB`M`3`Hm5ytRB3ZEJcqoOaFtt6jf0e?O50{zM0?;L3*CUeRBMJulpIHH8? zWVrbLc3@whPL)t(AG$PF=(t#9KES&@{#;W|IUZSK)AXA?ye|Ph^bj^Gc)gFJr*3>= zbN_H7>tX&6u%Z|meEp@_1_IbNgB8_5IdMcH3kILy#>d9C z?eE^56cSnN1F33+Q_+u*>jPRrIrs6uP6scq%4@|&_~;m=F~RZklN)@=-a!%$xv}7OOSffK|S)7tuAQz1JyMuW*`LmFP zEjdc&N-2t-jZ#SE@$wLA&i_kFv~b4BxM!`WP{;gXi3i@o@5KeD2FeOEP0)rG>P~;y zzoDKcbzMp1{hh&!yk^a#;la5tWXIEVVi5f=WINH?LT#Pbbg=xVEpj(HmE~D@i_}A| zqAkqVWkSae^7xb*4IjHMVprDkfn-9CidKaKT_yiL3Y9~$B7_oEOhR0Qk=1pJQc-PP z1LalW8#3$;;GJ#!h~#D-P~Nd>k%-&U)zv+oU-uvLd-7y=7>mVcCe1ezy_45Bw}9Pq z3S!8cG_=oCp9Zs@g zOwNrNi0AV@B;W*w_Ud`J=eAiDWC&y$XBiz0|sa07wtv*Pp!LL;)rB$2s zTC@Hz&=wI@Es$eeb%-^_elkkAJy_0CDDWz@JwZobty@P+EGx?4murRWV`HmsqR3DG z-7_MBAcSlnj^+_N5j+-rX-^lCFg2O4x*n zF~|1No_X$fCFYYiNC(|;D*uox@4@BUaKG{C5TU5EBu+b74I#BmuBA1ROM#OTFj^Fo z*VDH1XriMxQ?#hiAS&zo>$9Jc`cLDGK_i}^KH++4H&W_67gg$ugF<17a&n(@-)sY( zPTahC3&X-jRVYx_^i;<%`{h@jVFZP?>uW=~{b&^${cDkXjI4o|b)r8`wAgA`{4Kir zf!;d%Vmc7*%#7N9olT@n3UQz~M-2Gb3@mS-PpWA*7A% z@4fHXs~VUxMbG*(`5RN$MRVJT#RE&JBqJ0im{16|?x>0~?>~Zu=5vrh??Jc z4y0i;!#3O7En1mk&{5#pf|c{khP4LxXrl&YRR{fwL=%tt6`{M(^qfm9tHcdua;@(( z$7_RXn|Lt#Aw&=aqRFIEXOHFjqboqWyM6zDUSddDB~87b3evH zFn5ZB7EIRv!lj>XCu~6-dIFbrU`h=I~isb$P1E-I>I0cude@B7ubQ~YRFE#vT>#q;pVpb2Xan^!s9 z8p16DgM2TnzWhoJ&NoN&>Q(buEQV_yEnV@iWTx6V)vC%d-X9_$XGNKN^x&a3htAl& zgAi1g%jdbxUGY8fuw=6sGR?bp*_dM;tbkoqGO*OpR8nGp0GncOr^>b@I9=^)&p7Pt zqY_b~%%q0f#PKw}&PtyX&8<#z1g`;8SNMb@eJ;PM?r=D;u6$F5)6dzN>TWV-LUw?{ouJivtWJ z1UOyC)30`UrC^HxRosv4Hl+tn2I9BN&E?h#qK>-H{B2Xrcz#rhnqS0k# zON^z?A-3TDwfoI@orP3WJfwFy9&7aTM{_WUKsD^+E=)3JB(PgDP*L(AboLc4|In8( zGufFmlG^6Q*rEVZ>x0w4g>eroulsl!j7_v)g#7-qr?#HpZTL-HYSk<`a z1px?53rc30rOpHJp_P``{eQuSvxU$LZRCw$$wcFaMfhmOs)s{ZEaUD-naYzwK*PLq z&gO*k$v`k1e?RT-?X|(yWe+RCgw!6JL{MEIm{33ILe|LFN%G&w%*=tei_0%R0KkN# zr=^Y5`Vk+9yh@9(iL)$M%GKLQxFjeT4L1cn^cvy?CEwvD8a{2B%>fCSicmXG>jm!; z19phy@t#auC$tn{z3T;*UK|P2F_e*!frEkO5fsf+!D%BcbpSwF@lkL@Ss6OeP9(39~JGkbuQ4M>n7n2U^~e7C(4Y{oo&U;;BdH&?n?z}i7I=-Fwnbt zU*o{&>FWmzu*Y*j=1Ubl*R4~t+Zto7GhquCcmqKe?#B(d56l-XoId)&ee@NaM`=Eo zS@iE@mx>_b+V%-sfqjs5>bI!#eg&<9p}geebx%{%(ME8F??%SYvlgE^5k<-yfU3*{ zT4u1UvU0Eoe2>2*Cn_G|LDC_12+?l|JJRg&-@$vhSg=2{)C-O)%F=R54<2EZJh-Kq z9G_-deYq1Qr|3g#7c=fvL+HyH)Xd}b{QTKQn3?B?Zg9)OLZq?=m+F(C4uBxT=05(o z*%;Ookp5@W3kr^3knnZEVAmQdO=|B&#CthXhzNp_QJtBQ5h5TfyEOp5GdXDms|z^{ z{|{s5t zie0{3uYCRb?iN@@Yv!0eQ#*Wfur}!36hCeY&4ys`SCNpgm5gen;y0)c$<_APrR+)uGGY`{l|1npH79QFd}vdJEo# zs>Q=`G_?Zf|nuFM?SY2LZd!r6nZ?)wQ+$Q*-UH zrtqzb_`O;IMTxC~%D!FAbPa4D;myF{%z?$X*>o z!yO>KTmtQ199i%4#sR{u>&@}&yjo;)KXW|HAm^h8RMzHIAB99nQAuyZ4|3Sa!-L!F zSm9`L$jM}EbTkWj_{%&Vmb$_|$EW;oi&7!MveLRQHB7G1v8+n+TOio+2n>SE>v{c= z;PmeRq|{kuYvfMEz|uvAN1QB=@7>GU1vz~L&=B6eE}ywzyF1c`yFRqGo%ok5K~jN% z4D=n(#f1f{N1rW`UH$#r_<*InpU^++g-(odI49<)r6u$Lu3;avz|Sp-bN6d#{ZWgl zt*!mJxB6i`qQ);yvOfwEjjn(r3y5w1R1T2QjXxf!X<%(QeZw>xsl%k_4XE6o(2j6% zw736PVYVs_+lKi-mUJgDIaP*Rr8-{qjyG@1LUhQ4;Cbx`2RP3MQ2p%yLTWF8K-d(w zZ?<;0~^e9gGSMkQH^4SA+}xnz z_p7T=PYZ)Qd}19`iAX@$)FfD*gCJDdyV#SNV0fx2%)PK1)Q#)&~9dx1& z&cZcM!*{B&ada2U&EDBUwj9#pj_EKPWsSPH6(CT}q-`E3y8qoj9SjkU-FT0nR#6d= z=wJ2CJwH5yngNCTsAONJgB5znMkXfqh6V-?T&gCIfG6^^4%VNA@(;}}XHGs#4WGec z`Nu2n+V$ti4^&V8>Ext^KqeM@<))tA)hwDGyC5A%4FDyz6y`S6clR2P%kVa&fI`~- z4lz@uz_9t>T&(x#TT>|jB^xW9DNcB%2_LPuRLz;r3h9v^a> z!fP73(%%7;)DEe(R+yx6eC{8ox7!9ZH#AHu4!d#M#Yx)&R+o%VP|)E$d}#<~xxq4i z{L8oD9OAd);8lTRM5xPBvEQKxfTGC+eY7lm>}`A;6zsem;2$z62`MQtN%)g7k&;w^ zKS{|e5)uj$63@to4*t&z+&t`^9RvRV7py&SR)rUs2bx&;+_v%K@$~j^bar*%@d@yB z;Bof!u_Gh%&z;|(rtmTo7XEJ7)2q99n~a?QoVeXNMt(-UR3>sp9?ejhR(o>(DifZb oiIJ|h-ZqZ5vpu0Yg*U~?ez7q4Y^cko!&{K)Y8hz0SGNxPA8zPqJ^%m! literal 0 HcmV?d00001 diff --git a/.graphics/favicon/apple-touch-icon-60x60.png b/.graphics/favicon/apple-touch-icon-60x60.png new file mode 100644 index 0000000000000000000000000000000000000000..254fd66fdc27a99cf6519e8266de2ef5d7e7cca6 GIT binary patch literal 5062 zcmZ`-XH*l<*ItScdQ}jk1cDlh5Rhi1g$~jQ9h4RXMSAZgC{3DxfV6-J{162(5JftI z5Sd_TNr%k0iMb7%J6dG7PvjWf{GV4~-t2LOOcOH`!s!BJTNz~zzq z+3=qs8vA=1s=)cbqp+d$1=vIDqou1xyU56V@gi-`=qMimus_sNMPr;NyR9B$FvH}q zo;%)7vbWm+s@sfQk!3L1Nr-zdwJat-k zi)4kPvhD1L`Q*!-oSZ64`vl!e3zsJ zc8P~fDy0Jy5u$MT+=5NLU#N|l+0D2jS+@`HzP>(m*zwp@?b5=+scMOlTH{75Lvk`g z%@AnTD^QTHD=S;}u&~Jfxw^}IWz{cO)1fV}Kd>_AW1UIb_EJ(>ZLQ_(?oo%TC8wH^ zu`#@1cgV^_U*T{*2(V0=>u}!>I2V(Zl@$~db0>lREqL%Hz*AFG^V>$U0&nuRN%*tn zTN*?Pz%eBBdqVbR!$wy{YTDV=GR#^QYA2{cHuUJ@{&Y+9`Oy|hEc{nrpCO$Rv|{Fm zIZtOtN9&5u)1~=&*U2w|+x1qpuHQ*rU9;+8o=sCM^%HdU1h(8QdI5IE5(#d)fe;7_ zA;LiqPk?jIu-!%v6rWFukY?j<=G)eM_`p6lJDYz4r|RSDOE^3Y8^dwDapA$S>iYQX z)WpQZG>z6i8A|}Y%=l|{x9;g4GDLwFl8QIrfgN38V>4yQNTV(N*esS)IJ}E5A<+JA zR)a7OCjCMiRQ$@4dYp;6b?X*fLc*(@v|x|Ad)KP1wN>s~@-xXFid0u*_llU|{ZdEn4 zzU=Jm>bST#VT_PT(O3Hw9w_u9KrD5vPw=p|hSJc|wp|jnKInS`BKU;0>uJCM~H(wPB^nWie zJNG(EM!_I;9L#idk)3mW|4^Rn{$*3WW3b%#^3%%${9#-Hye!Fc#yINxEfX!QF)&@l zwIL_QfNlsKU3fr;jG~{jGa@51(@6m5XoA6neD}=#`p1t7UP&ysaEWdyWMpK_P3Z}2 z;+6bUpS+J~&l1I#|MhpSAEh%CNO?`S!XrZBmy^piDaXX8uJ*u97Ud@%;iHR%Kx473%*(`0eSQn77j+dMS-oRnDuCw!c+!f`Gq~hPj8bJRVos_`hsx25J*|Cu)R(HK;8dDDjUL%&Hw!08$U^reIdPw6K6ZSkGXBFcrZdoax$>O}xu%rnaUVW+o6&&`Mr7*X57({U(Id&u_o_D00V)2F|DIoDruugW|K4SD_}&{H#y z)cnUpshrcmxKuSg?in8@$|0g7=AO1lP*?a*f09HD(fQ`+sK3L2FfaUuG{v1eWKAut zmUKbmLX6POXH^1P%d4wxUlX_t@83@X?Mq5ZHoN&pU@dUJ30cy=An(nTHCbbnZY5_{ zXa_V%$|+nlT4ah~o$kk8inOF)q#8Gh;`FVr7X-2eubphV+Z@J=aFWC8ee9Uz-<5T!&UR8!Ga zdL5~A6o`t~_(j>*KT43dm<~ev$|xK59x# zi8Y{CGFraLSS6ESp0e^aCqu#dElYb`4bRSAO`!{m>SfJXPd?3?&5H)j>n}I<;gV-Z zbjc9_b9J>iGF!*i;*wwe)$ZO>8=c6 z5aQrklwDn+;KaWksp;#pK$$zAsDJx_bHEN8*c7+59-;%>2t13bQA-7z)9V_T0}@&> zzsnJJWJ2uMo@?g2Rwx^AsDz*z)Vych>{eG?UQQ?diiv`PA~rd>^Y^cc)Pe$*8nRq9 z&L!t5*|npSH#~8E-f@0usqGPAwl#*Ktg=$g+xzz5BPlS|i+eBXAQ`D%b<-q#rL3P| zMzg~1@xL{_qRo8Q^NV%#4u&efX9*TBCnQwA;rpEO*<9jA0;&uHP1nApP(W2#$pl8! zpd6DW?m#p2)*?D9i#tG0#e39I!tP_=c$TS)R+`65>jjL73Htr8!%S<;NS!-xuHMbA zt}X*h%lMoeo{t1VZ%gTMYw|Cn>SJ&pr$>(ru7u>X3&1!6lbWEZfzqr_Z0rCPkB7|AqaYSOacv&o5;2E0=88iR|dum|YEd z+6R@ImKJsWUMB4B-76rkLztO6PP>F9b>S#fPyibGc=L%7y^MAVyRWY=LQt?}dK$L1 zy}dc^{ga7h<%*13VNns~@&0<}>iedoa^q4339B11RRb&S$LmeWU_t{__?!z@)pqLx^?7IplV6A6nDsz81u~a z>&igs%w_;G{49i+2ezQc>MJ+Z9)a>;%c*58XAHUs$jMl_P|O zSEIvQ@>(N@=*xtI(=*yy4(iC0ol?c?sJtW(sK(Iy2l}9(CV%!7#v|fEHIxbpkkp3Y z{WRJ-Ap z%2d^ZimsL(9h$4JpY?h$pOGFG<}^{pe|u(T>;Jrz)@_+OCC_xjiSZD#i= z@rUfO!&E3qN_(qYuiw4F{|&vSIbnwW!qQ_l#jXjz!xVh5=m)bBxin+CVpcTM-;9asY zoClO((7S^|8>NjA9t7s!3sV%SryJK!g-^J;@S!WuY!g+`^lWNGmcKEB!xSB?47%t( z8@P)=L4N+VzSZ4$_*cf;A0h>`!s;g`u>{LvUzAOHi*4b$PS~+nKFubxvP%!J8I0}| znGX|wz31eSP!PSgtfF+pPW|Hn4gL)3cnoV#x`8m}iZu0tr9rgIDZdJ#$FEVlo$$?5 z2)8c=eG6?U^ufcZ{5(kaBRn>#N7^xvTT-^I#b`moM#vEbXGZ|*%ZpuPi$V-V9gC7O z7clO;u&YargZKtAidfM#`5Gi!5oQ-JaD@#%uKr^5K?=pzsz#?RG@6=Y=ti0V2a zZW|_@d}BdMt4yq8uar(in}4cgq|%nxGbuYTHUBsL9!1~F-@Fa^)^+dm>P2W~K8{GcaQ8M- z*8y2&_th&kFId&No$icOJGj-2 z$-DiL>tm&k#F&{e-1S>4)6~_qVNjCK6l#~@mzCuPK7z7pYVwo!(h5kHhOD?wbxbC< ze`x5&I=N=43sk3d-q43XNTlF^B7b`jryxPGf#eZhOe7L_!P?gG?_iiAT#+X3UGleL>8ow4ml+wm z{qJn*dxnRG;yFA+%rmiGe@3N@tgXj03kuHRNpt-9;b(`P8asNqa)Ip7PBk&B+O_V` z<2{1EpI_3ex7|+gf{?8^0!Vh?4~U-D@P3NT;0S4+=idL;dk*r%UbQvFMo`1+FJH#3 zFc_bJqn(umip2Ln6v@O-S!H15wq5sRZ0|itNs~**i_tShh_uh2Q7eDyrp{1OQoe9t z!a0rN+>>~Do2Im+q~WON?z;7V@837A^5~nHaUD)av!J%Nw*0C@uGm;w9ud3qxe*y% zBUn=A=}8-lBDAs0#K>qCfj}H+z8}8nkS6*_chA0ARaMn-VQFa{3_*uOXVU4H>gr8~ zoHwB_d@rt_1OLUWkuichhu~kR>pp7cK8|)iSd@bo7Hj|sq=dvRaqyJHNQk4r6DcE% zM52&L(ZG50|1;o;yQ7O!;Qv41fq?ZqIKV6jWA0;Q=MVGna(8lZ#ln07J+Lqr4<82r z2*{n=VuW~_A`o5rq#s%fMgSF@LDYeP1I}^p1t%2;Of^EX&5;UTgMpDo2EVocXot4b fk|HzpD%2FBN6o8hRp6WYg+o=Bmv0Zyn literal 0 HcmV?d00001 diff --git a/.graphics/favicon/apple-touch-icon-76x76.png b/.graphics/favicon/apple-touch-icon-76x76.png new file mode 100644 index 0000000000000000000000000000000000000000..0b24a0bfca2e2275f2ff70963dcfa9f9c1f3b5a9 GIT binary patch literal 6667 zcmZ`;Wn5Iv+g=0-QA$drTe`b@rIDqiyGxoSl0i{#vMq0W-1RkWjmahN!@%`|g zJ$vSN=IorAx$C;_dn43T41meyCf$SPXAVTR72$6Gkv$_a)fNH8JF9Uh} z_xaLRlmvRv+@Q*`Xq(Tm2nf)hS@c&!AjIWR8A(mA-y7N9UYh+)MJrQ9p6(6Pwsksn z)df!-bWD@X>(_tdnB5v;<8aq|ha*G#7TsG{pGV}nh^%4Md>)Ru?}`0zLT8B1_nec{ zBRWC1hyDjT#;cw}0_akro`z!Er66yB!hrFbPM}14#)|kP&3Vbgt|-jN4|Wb4gtvTk z7y2CX!)ok7+R$+Moqxy5;=+RWs5MV$!Nj)O;>AJYj4jzaHMQd*t6abOhn|R+`Ato_ zcPDWH`})ot9asUYoe`65lL>?ubqxu{Q9BAwzxz0Fjiv6WTPl~9NjmAal@ zsR&|RI>pAuMm*JGfzt=EombiSBB{@@VQciW@MT=b?b4AD2?7%nH0VQnZD(MzR`49UD}i zKDcCO4Vpg*C*ybgbDF}eO}<=X7KVq8Qu%DUR|8*GQ%fr{_wjD)+}qoGZ2SCXJ+E@l zSmDS`Xlu#;_Tc>rxCHjEHCd_|X>kza)6voWDutEBM!)8Fe2P;xwG-OBtj#iL&u2B9 z$-9?AWayRGR#<2`F6pOB9w*;=HY(gD8%5|BA0K~ACczF3690l_8v%~SrNdRQ^-DJW zM(3HyZ;Jh!>+8-gcV8Kqc(+OuzT(g)z3S+AT{Wkuq{Koar_yQp!!YTEySuyna%2Q{irZKJ^lUS z;$qCf!GYC*hp#FJ@tqcADsyD>75@tS>^2LolC{$b+o}_Cu_XDy}boVNl8Rk@b=+Ca9H#F zMk(e;(J{LGK+C}JXuR;IZc8BMI&QP4NbUQb3_9S$7xTl zYEO=#fd-1Vy6wC_t5>QbSKUWo+F;~2*Q#UrBO^PTbSe53_aqrs|D_1|SaoV@D#@2M zA7t*{{(g3yM(5S2IPr%oZ0$;YLHoO_lSzK%$T~gN?<@tSOq`sS_AVJbywa`b+&r0# zi6}k}ucdnO=IYiC4u9^kQ_@)se}M!o6CF4Uq;s0dDLFbGx+M)F3>p?IcJ}v0%I5a& zBlDj#aV(f~DYLS$+-)FZi&WoC$WwgI&dp_i{X7)ub@_y~8vaw=)QAYqJVy0!`=6$^ z(loLXi5$DHm+r36u#16A5EJ3{Bo}{`in7LAcsSgLuO#tJC#CS9Nvu-GKxeB9x%62*-w7I4z#d z>QOLN+S9_R9W~gDn8*J7W3$)E-!$@oyOWl~_S?hx)$XtltX*H^3U;u$TbCX0mOJpA z?2u?_QH+8^@J?JDruiEaYQ5*}gVu)F7@~Tm3=MPgzI>r%W@B@d^z`(E+u7O0l$L&8 zK`+NnUjOIdK6@4HVt2>g;<|OVGsVy(dcT6)W{tY$vAlOHU6v;4~sn_x!?D(cEAGo_i1)b6l_({`Cf7TRG;J)6IL>I+rUVn^anwYom zq^QQVJ1|-25XUoa$d)cTUkkT;T8c|!o?ZW$RM?HQ|JZk6WJJE8>EV_p=rsYBgns}| z1tSgnk}_@n>R(3L0^DS>n7k5M#b4We&CM}aPlRYCiq4TJgHfq!DQOesuHNJ{ztFya zT%WHg;O}{Ojmm--p=M;%U++LMzTomKsw9DYh3L!(`XoR?+8`(vf#J_$8Nvw_bJLq3 zlJ&1UFP4&GW*+_}JX%sZ=hCuROgHt|4JG1-&G;S);fL@W zdYY1g?(E8ljnpOwYo4~HGo6&!;{5-V!a3s6ZmSP8-mMm9&XT!cQG zo13d?YRZ_HP-SFf2$04qYJGA-=SUlpwXu04?0IPZ_;8;s_yM(Q&KJ6G`lB}?BjHzV z2^X{gAwXuL$va&BL|o{zL`3NIO29gdZ_31@W?S_y)- zCACDKN5TkOHCVhwwVvkbhdL1AXpjQ16BL3A3+Xd6Gv{l}(D<@qFkaGg_}#c|j^(NX zd3dzk-ymb6JSjKo$eEnf06(*~x9>T+G~HpA(3p+Mef6f}j~XN!Sj_abkuOhLjo zYA#s2bK_u;)y&W*Mx35F&HNfv9Zrw&HQ9jW-654+j8ZCdiJqHig!du?%LQqU>G zIm>j+sIBYh6XohTMQYGs_qGx&!IZ?$|GG8_xUQqe#KfRHK}u!Ob-n)x=ne>GlHT5e zZ{EC7$`vDnCR#f>7PhzN)YtQA!p4`#OuGv?whTT=sP$UMSL>c{yW^1=sCf)1+m)eJ zsPtRO(^s1(r0Tp6#*Gkb53q_~V=xz6CQ#f#VqeJ6inJ>JlT2ow&W&EY?Zy<_$f#EV zfdgsQy=Yc!t{ya}=HyHOMo5Z<)@2;u*C$t6TAD86eHw#A55-HQUG->jY0|f@(*r>Z zkOh<9wKnKf);2bIyXG`4m+m+iXt+{OX=$M!)bUq@>mq`s;m7<$dI#2oOL&FWdVvV2 ziAGXg&jJj8m6Bw}jLZAj>s%(}V@ZYq@pGn1Ewa_>Q1aaWmM-h=Y|PL~OXD*$-@bkO z3@a>1f>OfFj3y-|1vrH!tk;^ya6YBup_7FWv*W6lePC?NYNg$eRNSAmVNqF85mV?h zR#>+r7E|L>#IF^tZ)AsJ}ZWxdE zsrj>K&(gWAu+BC|S~xq+b?L>$bItl=Ih~e8QrQgNf_0-|lYeb(y<VmF0#z?3?5cUo&!B2UoMhT%GIw@eLMdfU zEXb7-jDnV@=HyvcEFfoP#ZX#m%9Wv@s%q62{c3%4)3ioM!1It4fHDZ_yM2HRIRE{x zE69Gn`r|}_d{SCk|K0VO_i-1Rp!@DWW=bmRlMD!m;(v4u41m=K$`y2NxSrp(s%=LJ zTJVtMf9vhN(8AtGIr*y0m0$WvZ9tWyc=B(oq!n>r@|zlzAj#2Oaa9=1Li}rbIx!bF z_Zt=#b3nj|pQB=tiwvZ)zAr8>@7X#_$cDQ&G0)+LK@(*d@&E2G{Kyvb%LF_p-F1^?UoYYi8gpZ$i>)jpPT(@t-Rb5+K9w2f+d>JFI zJ{!MSZVI5ZTz)q)re;IBR3Yf({A9v&X&tK-!NuPS!d!2En_ zfcDZVDp-KGr=fh4LO0j71b(8FEs#+0z5x$i|6XdH7deSGB0BoHq4x%Xo}M0COEkP| zK_y2d!(u3PVt!r$Oq=U}^XB&QsHm_oGzbmbaMFQ@O3(C+=M3f&VlBEP;sP;YjhNpl`VEAzYS+_hanJd z!5Le^^{ zq>YS>thDvEaR8H=PP?w7#>5X{F z02IK$%v=_rIXpN>B_{R|d7ud+0Ai=bYbs>zv3ufSa&$PQDsFM|w|6*4H0$=TB@T$) zP+gGQYSjGx=A4J0KPf&waB88&fe)-%+|+af}==??JE$R@Q^QCsWoue|(SH z{&*}pXu@DMc5`fR6&0H@Nxfy&iz_P&f%Lw9r3D`Ismu5axY^1L$oaRl@$qqryfKh_ zbQ(PVjQ;Op(aGxJ?i%E|Q7&tJCVW{=EF9bJ`CxbVdmH3&Tcx2lKnbVA++*Xw;Nfr) z@#{fVuhT(>e@-}*%IfCfkw0Vm3M_wTX9u5%2z!1qjs4pK#LAe3<=;`f!(HLT&U_rw2!kcuq zg~`Q6V=x4VhLtsjf7vUjwzih}lba~-dMCg>@rRSRf7Ga{=~c_pD46tLW?VF7Fhjj^ zJ{^yYtn=L*0|;??cJ|`7#Ay$yOcJ@!qn7F?(z*#$`>vd2=zd5X`B5sx`kg3A3$k)F zK``ZSFVwmgt=UPJfTK1Tm*Kb2rd@xO*u;4-l-M# zbYNW0oIe!tXAn~rr}B^DWmy@PR#w*4&CN|G7EMNVxYIqwlf{SNzok#1QZh6$d!-m;Z}PTg ztkg31B67+kP?|Bc52-YK@o@>jwU*(@~hQ|@Ww zFAaaXbI0$0a~ChGNKom$lN@{BL9g?@q^i(J6Y7#%bUR~(PO+#w)*&u`)w7D!b?VbL zJvS#$oe+My@wq}V&|8EY@(5rh*Y9ShNZk9V?eZs4YA8EqdB5V<3xUmEY~z8BZY;osUx?eD5h4 z(rp4fsdvo(;z#L77Jn7e=mi3yf107#Y`#KY7A`}*xZ-!Y)V;B>@n`{j9-GCmRlpBG zM`kz8X?iEhH`0s zwin(qc-HR4BU=~;_so%H7zT1k6PcyUt}!me&o=Y3#g!Z3{8AC7hHm1naBAz?U@S)l zpF)zg;Q%Q{6KWWm+L9wecWPq(FI!33(8k}ffQJubPX1?EBVao|2!K*fOp&f5CDGrz zSJOTADjM3)!^H2p7XSRY>ou{1;YKz{Gm=$Ey|`Sq&A(|5nOvq$Ca=ON{e9aIsLFW2 zmWK*A5}7*0D_vm-Rtr&s<+*uc;Ksn!-2`ay<61gMXSVM1^<{CvWBj=H#Sb}A`|7?! z7ajj@n!7)~HD<<(2nW=lG*0wle(=q^!n?9AV};?75i$`qqgfF9CU)FLLD3;B z;ASc}N5J{NK#~ZV$~O8rT~LAr2ozptH`g2)6=n2YuOg{Q{qhQN%`s?Nqr}3(0$)=4JH&=Dkv=%GOM@LUjh}=xo^r^6FY!9e7ijY;W_5{SxZI_lM z>5(EM{|+NKVnRLv5RrftkaTrm>#!0I#h{&AoLexy*Le3Xtz4(Z^eBwNpBNA~X;dbX zm`ED=*kVA{#6SV=?5kE8S(C$JeO?~|MnFVFGjw{qzrWv~>-9HcDh!KE0~Dj{UN%0N znVpR!GJ@AQjAje6zh_BR3jMEkh+yYa0C(2N4k+e5Ui`#luU#%KGhg<DHLaE?gWe76iGIX@MfL7l_NH#wMHiRUHr} zao^pNB3s}Ly)!orAZIvPTzRN3Xl%T4PZ~__Zdg(^DB$sF6SD(!v8oL*shD$fbAuGL zyT6YQs>RQjUHZu=77i}n2X=}3owKHb!rA?lL&MU=YUqpC?I)811}z?Mn6)cK_&_b3 z;>kssC^I5g&ktGpPqBKz;bRxJzgqi;tx?5FIvBnz{NZ}bbR?5^i<^rp2U#QR_oh>l z#^lu0V}B;E9f{cE-3LGfY5gB>S3H5l9|w&&*3aVHIM*rP@?|K6fY;nF=F3LKGSJhr zAy=|t{1w`HM9Tg3Q7A^AO9lLuLFXo`<7RH+W+7zeY5^Ju7Y7#?D;On;GUynwN@6_{c9QWdeD+&5>z-Xi^T%PY?7A j_mK9W4FxKEdCLabCBk((mEuhSQ$V1yDl(N)#-IKN;)oBu literal 0 HcmV?d00001 diff --git a/.graphics/favicon/apple-touch-icon.png b/.graphics/favicon/apple-touch-icon.png new file mode 100644 index 0000000000000000000000000000000000000000..ff3edd4a48c40c1c556636624c5a0b4083cbf608 GIT binary patch literal 23371 zcmW(-1z40>7e=HMq)WO(I;6Y1Te?BI8|m(plva?gB?Rg2mJ)^#DTi+UJHPv=YwR#{ zzjM!d>uijgitL-$M6coC;NHl~Nojzen_%Bjkid_^6G)ukCj=XDWpOyT`Xtl`Geq!f z3JW<6WjHuLIyksb;c#$w;I}^gfrIm8hlBfT3I`{U4F^ZylGm;-1pWZYTv1jE?iu#a zmmd{r;CGNc-dLvfZ@XD=Vy^tCGtySma3J^ zxSde{0giIQVGT_Rj@;?gqjIMP`K}cQIWNtijuATI%^_~BuNjD0+Qtv6tK(f-sx0nE9LT3^O2x$ z&aJH69>?I&Pj(zNjHL<)3fjEqszmhl+;POTqaq{qg-Uh0p*(cRhIedHGv-E8!` zw0D2JyOd!kA<|wrj1Uvj}yC3_b>@(W};sj*riK8tLY# z>}RLEg>08pR)P=8LGbqN+hfHn*8jhNyQ%5Vbb5B>^U`OcNXjH~ zQKt>>%|L5uX=!FArmC8ThBNZ9(a|%VT4NRuBV9s&zXBYKFs~~Vv9Fb*J0odvNXW>} zu~JlplM@rwH~sV?MQbj7Z;HQsq2Q>I=nRZ5OGlxMmMLkdsZkNR`%|)gyI+VA*|(+a z)$yo{9%XS{W7L5JaTbMg4HE9-QHHBEG&D%r+1V-h`rcL}A|lrA?(Ic(-3+jewzms# z;m74$#!TTBPT7cDuetAK=o&L}adG*_#l{9{s;hH2KRw(ENSaVsl?n!bW4A7vLPPvg zTH62Dg>MfX9X&{At|_C@dP3o8+(x7?6cM#<^_a@ZcPi57j|z&otZY=k)r!q-&ijwH zj*gBR+Lx{sbGIEo{IALOSQLe=tB3e_Q&UqBzNBYl*nwj%(9+YpNdwijCmo0Xx%=_h zd17N@qmgUC=93xW*>DQ=UiZUTceb{+_RReJydxeS-Zl8y5}sVk*~`#T%vQ54UnOy}8a^jKR=2ljw?u&_Oor!#`{Umt5TR#sMw1_lN&RI=I4GRJls-on*y0S}bHhmXG{S#23<=`#x=mJYQClgLeo;s!+U5@bYmjE#*S zVpIb{lY%ZvEUm2*V_i9w(_SK@4-b*~D+%51q+u0KwQ6cFAcxox6BBp5Z7;OweZnj; z^xe&%;^pQ22tJ)RD3KH9ckf~Y_XRgoE-x?3%?Jq=e(-7bZq*rf2mQFMPYP5juc){O zWl})G>$HXkKCxHi?K8QI$ahi#IDUS9*4&8}ZtR$~kFBc&tj3+o_29zs`(HT}9p1^c zLWK_w57XGXZm2MXFEGLDuSc3a%-A>AfHGadiIGAR317VHqqBX%ZZ(SATv1VBpq}1Q znoeF_S*g%{z2UFl6~GTpuyoaDv8kzPSoC2FW4nHcEDcmyl&4WJAAa1U%F<=a-rnBL zst9iHQyq%HhH6<|oePOet{GwB_}CcnGdLoNHMcPqPI~(N(C{r*{7AF9;O8LU=f~?f zU6a7~+fbjDWrr4%`^TsK_KuEw)z0IWY=0H6(F=^orOnLDV$BGbkl{lF-QC@7jNK>R z|0VTa6ZiGyue|G#N5zPc=zO|6{I1^^`G%Os=S`oF_*~Qdo4)1QnHekrXbVPXooFNYIlm)=olamzHd5^c!VZ^&9%f$HyJUrly{g z^78WDMe=wCn9?Vb_p1!tmx70-^TIo){C37}u6%dRYt`|)L31okR4Ny(Q< zqECFm4-oQ%#Kc6&*kU1uvIh$r8yi9LUU>-ZP^o;HoV+~a_{7AcfuG-9k$f86ThZGQ zRZB(1F@2k!K(*)rbGQ4ee>M`ybXPYi*NLfhX?cg`(uS6JCo; zs8Q5S&7P(?bSE?fBl3(4l}NKRxUXk7H&u0YG4hRs`SQKo+!{v4CI*kajGC;V*OIEL z7^)(-G_Z=3O(d~^6~8KEHQuitN%!>-$l|>M@eZ#g(`y1VSA9Gy1IJLy7%S; z7)-;%!+aH}Nburh!4KCY!kr;2jd@b$v~nr&6MyNkQ#`KkX7DihKdYnJNvk_h^!Yp> zrpMP7wfp_0#Tw&LPAid3I{O`laW>91)$ix$=fxCzW=ey78#e?w5P`NRsw`(!eu-E; zp48I5gv{7pB!Zqe!n^K~_TS^xDkCeahz--q;^H86(gRU<>t}6UzB)aGoC3*qf9fYw z(aJ^XhOvmglkF)s<=?ji2K5~cxAV&AbQ#|DUKadONoDT)Hlp%gUXyZ3GjFkCq?Wq^ z1-qXvYr$pL4So6YO)Gw(e;1qD5w7ec%)&Y;ApD-C0%CAG=7?}^!!-UM5c{7HvyofQ^!n;CXQq%fTuW(dzeYucUFzN4 zT|aMcZ+(CNds(G9l6@CG1IZ#aj4ZW{OIk|Xs)g8^Lwv?vX63mjY3+qt7K0|+S>Enj z<;U~FXCcsV_k|8nBP2*fH-5GpclS_z%8~- zWxkV^&KYxizVF_*us)t`7)23Ud7&m?Tz9pg0xwRGMyD*L&6aTd2|q3Z+=qIH6;2He zjlAmW>N$UZf3_vhb;o7{V(OIWEXALQZ!beLHO|rsA4*AXLetOgap;uv{;57+s3tlG zK#0e)IUs@hJz^9;izK65_(&`*Ejgz*C3GS~i^J8DeFEALN;0^;(@PlM@x)>Dp}@Z^ z<@@L==r6|DUqe1njLcDn*xH!H&!u%m9y>d2sbOg7Bd_Da0;rzr`KW~?fF2?f%qA915+C?8s)RvKHQzG>2zFn~+& zBBW?t5WFz6K~wa>Tm=(ZQpK0DvfVRVZh;m1=BtkNlNQjemSY@EOJddO|Lp8oa&mDw z_L%;AT{v|e*t+`Ird_3j_w+AHR7>OuniO}QB1KbH+T4s6)h8YlBm%lV0(_90+iTyG zHa|BvTmwOF4NXU-Lj1wvH`5zwFMA`^DBd$l1r0KxA?;zu_>IUNe`$0k;~7)ue*bzl zw&pS=s|31?Gc+>#t@J;J=TcIJEcl#qbqx(Fo$q?$($W!S)zyv~pd|5nOxabn7Zwj| zyStd)zt1QyFHdo@4^^iIlXyzx{`(%(r&5?~iZ`TI7kt|UnD6ht$Th#Zu`%o9(suL^ z#LUGd8T9!85pqyR>FMdFgukJ`?40auCgWwwRRhA}(_U}tw5z<_%fWM#E` z#ufyPt2qrk^&c+~{-9UAcy7Id*7v2lI^pQZ;mAvng@wh^#>RpWNu)5VvhtOO*O5i< zjWC~MH9Zkwuf5$MIkWhvc&gFFh2LKC(Q@qAM)Z(wn6@*$eR?tDquzx`sn!fP#^VO` z;qo>pll8NrMry9&dyRMD6#V@9UstG$9~24*O+vIuEYIA3wUO?s*l@2?x-rWc~izSrH6p~;@jLQ0sn$mX5L*U!@?@jWI1fE`AL7<4(3)m5&4s4J|o8KcScb9RAkj2VaZE z!ugAL4)~;TiwSd!F>Kse6@gs%)%h=;Nsbm}zWecR=^@S7zShul)w`k!7gLC-Is6{T zA{VsfQCUf6>=tzKt$($~j>yW&3RWppY;04Di;FyMWo1^rs;a8iG}v}s0J+?IpKh?U z4JVbxG1^j=S+2KxD)5mMtZi&OHOtYQvAOS!e};kwF?5$v8z8Nvt^G<{n=Lw7btp!F z1R+e^yq<7mWQ1NsH@UD-QpOLP^9)-XQux!1{}ZN>PZ*cWvO%4_?5m+lj#`E|!nN7O#0OT(n92{H$ zw_XuYs~XU)cx0)tOlaJoLMd@{;3|TT=KJQ&o8VKZPf&7huKz+-_k-1SYxkpLy~~!w zfOTWO_x1J%08r0-IaJO-R@>vs{)_dBz-L=gnKCsTBtALMb1$i4jp zIU#dh7}B4lPig6xrql7YtS~Vd>=BAep%j|JAOG2Wnun@m7&Y`wj&5l=57aGT%&KY#~wOMdfVkGJ;p;NaFw zL1DB)6g0X02UxqS>3;G{$;|Xpk890K1l3$~xO{w-hAgX%62aop(xqU0LLnEkJ{wc+ zhb1Ad=?6umFZyQe(M*8(M}etwHUSe!fwaGF&^9OA6ri%(Ogb2{4n<$zN?@L* z4VV`-G-wzG-R|Pz;pu-cFsKBCC9A2qxhztmsN6Z|Jhvdr*zXnVr{KUYL>8w`D42b^ zYfj#~8$bNJu~87Z1U>)G-yMe!PWiU8FY1v-2~SUNI_3m;JurGfPmBK;o-fw zxOh)9U@*>S-re^`^_C;p16wYJ9-_BO)1IWc6OqN8!YOhnJx`Cwa*84f_8Q2}yzl<{ zqnBX5i~Yw{Gdy|NES{Q*Ix;zV_w#13Xl}mT}XKOdE7;X{i68cUe|T_0H!z^=g#+eT;7j9H;d`tSrN=-sI_(f@L}y5eryhTla(9J8!+SW?xS;2!;NJh@580O#G|pL~qS zLojmm$8+8*0-PgPug81;h}NCm2d7Vx>6?-4t9LSq0S z3!WMsZ8y98yKu01zj2*D#@1~JXjI9ps)%lTyT6^#8N&NCV)YC-iB1zXM+O1TAj9ZP z9yUR5;`Bte$7wx#K7C?9%FMTVBVb?rEg>O;kl)20SwJ-^0lm4?)YM$vAODOa?6x!f z*5v;C$z*lw#wdWItk?IC&wDO>C+#Qghq0#}ry{$HADp#;TqN_OFo2arcu{Wonh1wU z0FyHobKe3vHkPV49T(Bg;i|#HJN&Gn0VCt<7K>R`<-5TIV){%r6BT{^&t;8`?h*iv zx+M*zx&Qtd^77NZQmwB}=#`I<0tMy`=*_zeFuvaGf6^wKo|*ZMz`gCxywPz8{YZgj z7h;f7?nm~~`6~kg1L^+mGL*gJS~^=zf3Qq#@HznJy4H~=H(SWJL9g9|QKe$8Iq?39 z|EF2UdY3Q>df`m-LzG3nN58L}=#*Pa>4DQTKZ`Hzeui0=sL@AF}Y zhn8Q||2_L*yN6T|{A49_epQY-;u5B(rbgk^b!$y4DENUdUqw+vWA?4d{`b@5bd6y% zo5Mi00W?XhL|YmOx7bL#!NaIRRhlp6GdXR3w?S8@zNwdon}i2WNerjDnl`k5Fp85N zoU$WB)(heV`Cn=pe-+97lt3fPr5oHo1jt23%{?FUUr3 z;0F;OXkzSs?!?l)Mm2w!dVat#J~i!aC4r2;#kuJC4fV0JwH4QWDXADq5MN|TCw)=x zA&$DfYD`5>#nPj@x;dOa8Z;~2!x7-#(t;8I_7V8$SDnE%Kw ze?5xW5(!TcseR4SCc*ykub)A=RsFRZS1#|vtM#0yfmG@7I>@E(42wPuJwc;sk5)s2 z$W4rj&1m+Q%@;{SeI?zGq=7GuOO=GxANYf6cH7t=PF3CCeEY;*zw*H6$Pw+CTKHZp zC^6r5T%N%VfzlAgj2<^}W>LQ%N7ZSkep9%AUnCd|P?0zFdhxJw3Wu+z zDZ>wm4#C||XQifPh?Qc%N`{XS%c&stdS(079JA^d-~o096!?;51X&AW*oZVLaSGO> zFGjny22D)rpu;)$wvHWH8M@3gT<2?}D8XukI2U$bt5(NC0&CAcudWL{$fC|8f%%bp%*hQ zM{hxLty7__{aunSk4R5akik#`qPt#Kz+0Uv&(bJYDe%P@v&%;m*?tUK%@vs&5~q#P zPj7B2Db(;)iA7;z7n1WhDV8;_%XnofSVt`%nc_f_L8|rqNuFcGjttmnVy^@e^Ihm#}r@LCW-hb~U~{ zt(FYr8|>druQ-~w`McasDVm$vpuye2@V-=?*JkEQ(-ptpUZ`g~r)C|$T#+J`!51g*^(d#%n<4^_fDm(^AO($7M583CGKtua1c8CZ#r0f@O1QlggtNYm zjbYO6)bVUQ92W4DG!og_kuugfh4I`>TXlGplS{8l5sI+m__KwZ6RTKx|9gIVE~@Cb ze>Azn-j-zgvjv~20VJfUtK(JQtq1};hc#uI`LKSOh7R&zI0P;*U*D`Uig!IlK5m%Rlc} zM)GYOnM!hUXLDl>W2?!uD|8dEqmMn)pb`vPo<6dvYNcJoRk z;#P)-tplx-A0=rGsVL>&@@cFY#Ti0>+#?42Xt4XY|FmyY^HqI~GBWh+l-w24w|4!5 zyx1P8tPD@7(73NR0+o@Cjh>nsQfkDRb*BKKW&5+$RfGvY$NiRsq_nH60GJ`hzPotf z(aXBKbBPpURLsdU<15}|0A-)vNVQd;kx`+ns*0HLeR6W`OkmaCnI5 zbv9I%N)S$q3 z&+hf>*9;5{rhw)cdaYO~6$pI>(r*7qM@8acSuZW)A5HJ^esd0>J@W_Lupcf~TL1*( zu-TR0)g@wT%eZRaTx>@a)GeA4a)B=u{t73X$C0SfW(uao;^N}|_Ugbf@6uwvrltO)`S`<>UCzda zzQg;}8obl*A6gR^7j*#W#gs5@Ho)n9k4CM|>HRe}$*ouhDZ8)N3u*{a`ix3DR-o^CwwjZLB#3JJwD)92bTH9{+ijzn|DpTfMJwob%x zu?E;P&i{j6T@WH^X@@f~&Dz;|{`L@fg%k!CBODL3Y76jP2-=SWSTmRBU#(1LeFh6+G_MR%e^O$TvqTjZpwn0G%{acaIV~=J)xn${jWu z=`yt|U!GnyKDY7!x2FQOD)J52)Wa;*7j#N z$JZM8QpKB~EJ(8E(;Rid9j1420k4glc4C}yPp(q@_2*FWiipw(BEk7I-|IY;G^;oS z(R*zId3h&rZYgemP)W!|;|3(SiOPzKK>wyI@cYXMB!_GP&vy$83(Q9C>7%0x;5V>? zu5EMm65ojz5*ctQr-g*PNF?F!g|)E9%cYfePbMJqqM@S~SK9a5%Q40bn4{I=vxSSP z<9^p+l9y*?#z)1Eds&6Z>_xu|@$i_Sz#RSZtu5{31bs~&QK_1oOSZ2!-_I8)ZM>&M{8uwF-~{CdQG{iaP1I@hkHUDn-QNMI6>()@AhY+4c{B`a%a zwoLK!`;YkV-@o74-^UTWE&ZIU7jAT>6Q2LmQ>dYFIH1mSMl>h$FELeVoOrf`t!50quzjic6r;% z(2VKrx0lNfwGgirImNeUj1!vph-F%~_U_x{4(3Z6N;=Okl?vcq8jC9K`KM4JXb4X{ zsC=4Usb3PGN2?DI$J9mnAi8?u(2)+`(M@hUpv9d6)5IFTo7>IpDe4a zj0IYIT0ue3Of&a3kjrP+*Czlq^!Za1wG5tPks3wnLQE)*B)0hHFX(GgzU*Is7WS8@eEJU-}T>Enu1gRuvsai2LyGN+K%jf@`j7FWQ3_En_A4U!<10yZOKgslOpblVGVh^D%5?pB+5GbP^x;5n7HF%;xrFln%-|=w)<}zP^^ScsD_+p7_4Z$uv6#^w*wDxxt)3AO5KVB&AV z%o~pmkqrV9eaVaS&FkeG4lbMo>3#Vg!ax!1=c9$&}=H!Ba zg4B1e__{9;tOTF`CQh##Er!rR1p!}4DVsgyEtj>+g0?Qu3&CI;@uAnGuQN@^|b9d=CS_#~+rg<%cRl(_M0W*{A(NU^+TNG_o zO6rM-bfhgcl%i$3V-$;i_@-9!9>`m`jo3k{66`540{<&PzJPIM9U0#G>|GjiK3aYm z1AB5!ItIf>tez)HBDT(){r z?M4h}b7mL9e=xORT`)1SY)Y;&**m1i}!w)*hao^}grUK*Ul4k~kHw#qd4W2EF1mG`2}w$G#L^xx;H zAor+Yc8f|;VDcTD(F74N<=92Y-*O}n$m;3K7L{Ai&5KBoN5 z1Vxlk-JG=8hCqxWN{pF(=RddCs?RJ+s=BBJJ!#eMYuGPA>ZCLsK%q1+T!C&mJU)&P ziEkQo)mS*Qwl@CrC!B@?uC%=2$5Ksg*@!QYMbG~R661)!fXUp`BMzWM)Z9U7Q&ali zzxFych9fZjXsa)3rPY}n06vfdp{H~xbWRrg8JAA?1QG7b>>7fIqlkZ8{ZM-! zFg>?=U2~kNft?VFb?)|G&tO*9?oMWsHK&cA=q8se}o&B60t5v{; zN@+eiY*w2$mNnRY6g9b*NcU|N;G0eE=VDX&vT32lBO;esl5dfKt`Pgu_V226N#0jd zs)-`yzp4VYiW2?x}n^@*vc_rslTc9VCk7u2nRTe&sL zR9o87k()rwt6^fI3UF_V+=bMx+Qx!OfT+#@1X|}krSkffWbB`Cb>;OCB$`v6PqVkxhR>cJhM6G4@AHo$3Z1mqsa4a^a6Fw+6R4_) zts8z!KQ*oyVIM_ zE^`a{e{w8O#d-0{#J^_}#!~$`(Y_gTmlwM2jEfc>4Mf9ElgI}4 z&uPD(yO?3n2Zf3WIXSkjj*O1NUP~#DF9j&lcXxNob!IPz5=kM;^%l+@k-i09azj}O zq+Y1+SmctL0{wR*jy32kF$8-bjOd(Z}~OlCmB-27gl`Ll^d_h|srH zR455azI1dDYcGIqSK82!3ep3>9UB9e9~pzp60kN<8aOBj*zi2c=QO(JH^mBuUnNk0 z-O0$rWHAtn2Rzy=z&KLV(r_RyH6;9Q;Q-?l)YQ}jHyKomI@{Utmvj@mqLL0-H>w*L zC|Fz5=^DC9nG*IyotEnSQ}AO!(M*Sw_6}~{V9SikW0Zom~Qj7SRtq& zqoQE$W{&H1RMJ5|8eVN{eoqL{A^4Z0(tl&1gMrZNxnwj2DxBfnJ998R9ameR7jvp} z%gb`?gi3n9UHBea90a7*)wJJ3BU%lr|66wgH*G49-(9c4@>NDg2GCdFL*Q7m4g=$E zAKC2@NL5>&XOe<|%3%qU=?jDfDNg{=tan-$u>bgxAD};BVc}PCSlzE&IR?`lif0&N zi(QVEIsP6Vu7H|RsYr`JCE)Zuvjpu|9`qXkB6mO_l0+nckkM)z2|f5-9t)KI=NDcv zhBc|+U^XLkjM4*Z?(Y5` zdjH4}8(MA`^RZw8`vEi6mOF9?VrBX>tg`YyBb}Dhyz!6{L~iZ_=Ioit@K}+qo6j$S z;Ka$ZSgvkpYqj}g3}PEm801pwxbPvEq@?oUk}qawx$GPr0f8k4Vv(^i+4=sI&DqWf zXugo^`xT&YfzDX;?VBCL)O|lepwGJ=7hQ>^%l@6U1CP|tpGSa4oGltGa=tfF($oY3 zI}#Qa)E}2>c6YN(tN}#;{V5%g+E_fMNI#*_+!r$WUtRzz*xM}#>ID{M0~u&gO1U5{ z%V9G~g3qk?6Bai?CleZGs_m3;bYuaI%frKihRhfG*_)e4xxZzr8m%ER5(yxi@x|&7 zt^)bYdUc;(;n2BUF6hpIX9Pz5N{2Ta)ctH*-zK&gGoiqjwcn*xP9f}tYI30e;qb)R zu4=y1(*wasQ;w_B*FU`y)bGA|cwOhY1V)lww%35!a&USoi4u`G=`=bziv8g$u%K*h zZGVGm6}nos0BMHbmwq$cEL+4Fh4g^zxzumh2Y;%pdHpD;q*QG6h+Win5!N_6z(IZMhX(T{(dJ&TQP7 zU8a}?G90ka4lKtaFa&#gdioJA95~@1foU4pF@v^FPSPkZtEBsg|3OEq)Tw%2xVhzr z_hM;jOU^Y(VRx+_)3LE70rJijc;{irO3-7P47yl8WGD%AjciPp05r&Ptg~Bm)bh4^ zae6u~5N`CEY_Y(zw*Z6(_}d^M0`>a=Q_d<1{9<`G1JZz43K~I9Igq1JqWWSI2208k zH@@OtxKO1G%P%fCwL;Es`TQ;zU?r0xOJ)W}O=(#fEm=a9ZtWfzF|cC;dA%^$Yk>@6 zapk*6GtBd71<)GwZegTw&*ulS2fAG9U$iH|Z__Vw{12L7g6R z%u$~lP3Y1gqKXP#YzHnt1>e6jgXF3{=s%zrQ~=bmwzk$!h}OJsxyk$Vj2;yRWYJG) z#bhNZiW1YYoayl(ii00Vi)Ad3pB=S{Nr?breE_I%0?86)MCRn?!W6vP?k54TSR5x zm_dpPiv(TP+*}|aF16qfLkRXcTl zBUB~cUVP}W3sgFta*mY0zaSXxHka3~rRg9v41yBhzJC2ZNbC$!d>KG_2hC^*JU%-o zr}Em`Q^!m}>=;ZmB%tQn=494wgQwF89V7-IjuS`-l~q-rxops9Y}3Ji2;2mPj~|&p zqX)|-#WS|Op^*}l0Ok!(P1y{_ztz&!Ejbj(&(HsE)RB32=Lb8IE|5A_x%mO@8hE_9 zA>brR0Ei~-Yim82ZRIMY0%1VM>YTvt&Z@$E4h(Q(DJumC<>4U!w8pHP;zT-`<}X0L z+jUdd((11bx?}=%4wLIzS~7qA`qet1wwa*mT(Is`@%=k&80l0SNP`bA=ye498`U3L z>AcdS`pkecQf@~#s|DFktW}Vq!UA4B%n$(vYz*!gTi}7>#q~8k_69`68$2Q|>v2<% zPXO=3WoH=6*k=n7@LMSp6O-Q2!9r~4kiSs4Ejaf~gC$Q8(*q9_EY2h$qT;i=ZcBk_ zt%AMX7CyV}l(t~M8%gYBy3y&&Z1^AMC)j=fA4y_3DxU#ibErsjcKq0GadB}dD8K)2 zOMv47wWA0YVf0Pz=ZUKh{iqu1?HAs)x3|LtI~WAHTr%1JGgAWe8(KO^*F(}6sekv6 zF>5&5+8_et;qBcRc<%*zA=vG(9!+OlbDt8XxB3QDT__+>YP!0z0O0kHJ_ZF#jJsm} za%f4*%rBW?0Bt0uZXGroK{yIzjbG_P0_LaW6cr&KP40<6%%ilklO$^_qL=}|AD{UG zc@cZWN0t7^ZCFRq)O6pdc}4SZ(b?JApBN9U5&#(recl}{*M|WKWb>3BFhCB*xDWq8 zffqA?qJcd>UMD5J6A)E@Q?)P(d#0Vn&gQ-NNo4(nJkT>V{5V8ub^7)zrU{iLN3K*_gYQ76U6k^64E!cG45ow?D6Id0IfV2T!Wd*I^o5~lBh zA?I`4yabc`K(l-G%x7#sNhXPmo%UXN&eqk(EtFbn2ae@eN<|Zx8w!!+`rm(5QOtkg@0BodMc`_0sg2DayOL5Bf#I{ujpWPEMh?~hwS)W#K{*p*IS zF0jgRwA#v6Uu@mFs;clQiOox@L>-8dTQ1okL<(-MKETHyJZJ{mD$u=O?2CN6XXm8* z<8J!jE)tNizgCPypy6W9_ z6;3+ORbUM6?(*<6w;eW^hO5rq`LKIqF^C30q{=T^QUbTNA7IrAFe7&$)EfIAzXun3 z0Ki(=_QR4)4nQ(tb|x%Z2wo|zuDIL|tc`rFs)95x8ADoDoglDaI=}&e=KB-Jt(yhW zrg`=;5Jb=Il^|d_?SsgwaJ?vJFE9f>DOMPt4_GTr@5=!{YV&r|8?3`o098C`$bI${ zV%GZp<6?4pdjG!Y^R3h(dEN$gYg1F5^QMS#(5=-*z_~J@3OKhEDi!CYopBs0Q`ujp zUNF+C3SZ&^5D2=z5ZTmfIDU*2!0uEa8N2y-;tfln0;iTXsFp#Y*!CV~$~IV`f>06? zi2BB8%Q4cz+(o>vhj-c9K^TJwf#~6VX5epmX{1gcM6>0$f&di^+j#Ji>?QDS{645C zv<6cOU;;YMhe&#vgvI!lnQ`z&LVS$=`UGA2V_nBVe0~^%n6XKnSzly-su!#QRH?Q) zkg7@%x$|%2bBGtd)Tzn<1qu>+rHuuNH#Dm1xS*e_+!=L}I0gql+nrN+7^yz!G2@5! zP=M|bL-W9~1i6TRbw>gQ@^{0wRNy6H87&>0oQOE`Ge`A>^lg~}$N|_(CkrQ5>oD|s zD65+k9XUmgM!m%gpa`G`)tL{xwgzKucAb2|&@GgVWvdbZLbtv*y#K&?e+D~7{U<5^ z?Jjtd0-O)@6cFHJ0}Fo@$BF?ff(6q%=eHm`3buHn0K&)vw?Y$)Mr-T6Xw}zjh~75y zuLa2FNXhnMyG4l5D_lF)19q8AdaN6v%N9vRc0>SF#E?$^KXBeF`g|_h2y~RWmZdEh zn%Gu2rO%wS^&OTV*KKKK_2W2wDdqP(l&oNd_ZR5dNZ2D6q@<(|5Bs7v2eV~L`2rbW zGp8RtJ_Q$~jCr0xc4!<=*RTh$u5Z=VR=|G(@D88F02WYv3%0gae-Tbqv;Y{Bx96jr zFh{#rc-KnMI0cCdNM$6hcl40sm~FJF*WpWv%0^Lm%_3EngyHq3w1lvh;^)>R}4Hn_aH zqHBP%B1M`_&(EhXum0x>kpW77PISkRB40cb0sG`v?28ZdKPqFMlTI@l4XP2~Ux4Zx z1RmM#gmVzA1zv9ki-JWBq8YEDv8XpwiT<#C!vBt2R#isrM2x;pvZDds_pP>eA6!QZ z0JQ@^bc#uv>M>{7J! ziJ0-hyG_}fC-$YwRaI?$Nz+B?1&$I%-$6%(9S{*QF(|je1CIi&YlsBu@c#MC(o$5- zp^s33)Z15}1S>1SCRx9o6$k~YhkDbzeIJ^5O8l@*&^-*+Jze)#LxxPtwpKo#}+Ea((Q8y_A87g08V|#PMotqvzpbKV8U;^;VowK7*4d#Gf8d zy64yn0vvfW1*4_UAo~VukGrE8^rSwb*zxb)0|Wo!VD2*j>10V!kN4N~4HLj5mH?&- zYZj9$;l8bz5B$@&na|$sZGc1r1s&EH!C~ulc(H(#23R4Xdq+2bN`Dt15{^k=Gz}He zuh!Af)C6Q6^cL_h0M40$9A`|Z$bJdk#jUBACGW!9xHikCFfcP;(3|eHY=ME1 z9UoLWCoX>sT+_L7WedQtVBi<*1Lb>#o!FYszq-?kL_WuprUYNL&3&H}EI5L#Ug2UZ zL+|5X1J_;G!r*I_^?znBw*$L;LjYpmjF~+3R)}1FlfwG9|_Co(jW?cRK4}c$^pH82HRzofbK8BsNvHwLR-S$qbdOWqBlJlU#qJ@ zsyVdBwAUQKR;jM@Fbokn0LnpwI+gEefuOaz<&jadM70v4hNGaTsO zJ~MZONfm{xL|g`KmI#6GsPpIk9W_pAfS3?a3$&hu@cP(cxT`3NY#SgpA;un2l}_7| zvE5#7Wq>q66M*?GzUO-!-Y14&LmX`XQZMe$F#pzUvqTh51=9k+0#N^_4AvC&V;w*- z2OBeK2&4d5DMs(^?{fi(2lfm>FaywVkZkY2mo4cQAOeX)exNtMLV*8Tc}I3xxcPb? zF{~%ec+N9FJ{}DOoyTD4P}twA7rQV~aYP}J1|$KI0n9*;qE?4KuVX)iNG&rZ&K>N4 z!{!9bsJH|KNjc~8A}9=}&)DQ5fXOlGe5U|NP(9^bYa7uF_>`IzDrKN^fSr8Hc2zNK1 zA2-461#|-+a8}<*Xg;m4uW!JCqRDpqVGZ$l2phIVXb?Hz`_Q?ONl3ZQEDft352}>B_7f(5; zG)zd&z{-f~cqb&~b=q$$7;ZJ{a4~xBJ~&zyJF_5eIpsCp#ri!?r`T#g|0*!$aW(pA z^)-C5*ti%IVUp+M>+teEWZPapI}`&2O!$&r!VU6ZJT8y;Qnu%d$5#e9a(!Z7e+_gO zX;38)N@$DcjY(?2hYr_NqiuK-#0nAz{&Mi47e5Vnu_^f+JFOMI5P^nV&-x$w*tnHs z=o6f)9m*X7_D@GDE!4cjJ(M|$CRfrFquQdONsY5AEDIm5d4uqD+N~B>4*Bn(;5t}YlS*mhlF=f+9qw=* zaaazehCE6dU?L@eQhAgLs>-upCXb0iFE3#{;(5-IrjVZw-48~W@y}0eMTs8`FedXgf<^>HPk5_iZjM(4wyE@T`Mp*nqF+?1WS*q zSoRaPmseq~Hgxy79@~dMj<4g|NkJd4D5*XaNLP~8p(vS9hlzF}7wL(mMB8G^Kc9-u zD2zrolMLPWIh&&auh%TRk}_{aubidtwtlLp+y%^u- z_ZgDUYS!tGLfMosL6nEhFsUSGnMh=;Oqz>}13r;+3_Ut!LD!fW{uH6Zw<8g?Oy1_b z?zX&L&s|A+t1qIFu<)5?=g=eJqgz2J4kZrV9&R8bHuFP#9Y1?}5@vw)n~-jhxFpGm z9%xT4;wi;?SwxY*VErHITL;NVgOrqza3<8&{M2m7rk{Y z1>)Xt2j^7C{*-^o?YkLMwphp2n5@6z%-PD07HMe}9ehE=OtfsuGY{`eSXmYO6S0Zy zb4F>&?01GN$BvYPg^!wzWqyqO@A0%7Yoq?@yR4|6gXqTd1NPSOR7m_6Yurz7L6laz z^Z8cU+;NDo+B~wcX&Ol7S)iTYfd9z=6s!}wfGyr_x3)>=xKc49j$Yz|f#{avavO-@ zBg@4ruISD-vS;V#H*W>oRt%o_QlB@&P@;%ENE>T$Hv2Yd8Z4Mnel2n6t|z8u#@sV? ziy^_^kUwLk7#-Fr<%FDH-D55mH7erfzS$!Dn`HbxEdBJ6yf*_aHg-lM={mSu;b=OD z%AeQlgU{tO%eq}__Ax@-Kt*s1odJa0eGK1e;nv9 z{*upBOiI_U@Z(t0DE+HV!v5$Wm8%wU&$TJdQc2_I!qpYkCHdVKdCRf3Xg7;)trS&U zQ4zHp;6$L?(&5KRM-s#ZvrQ@hnEg3LZby`uh-gQ*>nwvd_ZUOs68U1ieGo#FEy?P4!;f#K|is^qJ&z?26xo*&8T=O zwY8By0~y1%2wO_8|3doqRlYBp zE`zP_07&B?Oa>_-GBodNOT)0jA(~2HTD$}%g;Y8`<@Rrj_I(iX0BIJ-^ z5%USz$iaW`;e!9G!vR4$cm38b$R#}OT9g>x-a3tA#W^;&&hA7)n zcD7^ZIG>ErsdJ2w{e7K3e}DY0%XRhFd7pFMz0ZBW?&o-@aMKSWj+E(@S1G`>C$8@V zqmpm6c*)<1g{}g~!^bQ82L@&z=KQ3+(zNn76v$urWN zUuo#VxV3v83!EFfnoxUPK~QUpB)Hnh!-RXpOr>Imb@H6Q85{uyC3t3suzVFJwpnDqZ%#@?}q$CZ41~d(euCL zVgJlqS;h22d*d&P)jz|OR_HRVFC{yNR^&6%%=Ya@m7!{>JBaopv-N<{#9=XVM z`fh8CMo!_9;yWTwgcMLc*MM`B_TlaXh9y`2;X0ripc=MKIqc%wJh*f=um97)YcmiA zZD8XM0bg`)5fpzTmDN_tT9u7H8Ex`2p#aCkAg~G8egUc(e+pYdF z+*9FJ(#Lw!Geib_(NS(O#P5EF#CtVW-1tGvQ^r+vmPLuti^QRPtt}7!hWCHk?Ymbh zBW*=Ex9E^=f#06Yvq-G{%Y0Fw8cZKr*iE!Bj_a*5Y-Z`mH~77r$VZSE;O%2e)6`%t z-|>mLedwJ^;8oL=q)lIY`6|1^Hb?V)h59OWjh>957*#Wh;GQE4D*Xa9;b}2@I1-I* zRGBM1j7}C9Jf5(zGq9^UpgnYsT!5mu9!6qiWkuj|xRC9TZJhrYG!D`~gGN*j1$S?^ zrx`LDVIP!Ctg{)a&nb;Zvah18M%2w_8yGJhI{1FJrQuYc{C(B)@Z6U6L^zt^UvBPl zK|;}syh=eS=7CgdY4?IUyR`yZ+>9l`yH6c)BQQbPP_J%a@DicxwNEQ4@$c*#z@PBb^`y}OSvG$=uJ)yOJB|jd|XJ!TPic5 z8sZuLMg^KP#F}|Obf}()TICMg3yxTs2i?uFC2xwRzl^3ZI=mva6}-CPb}BZ!`}@0QIo`@- zKQ8Rs3-oA5h@Fe;kk=F4h9dO&Tm{dT?S2J(UM+}I1RH;x(EWit(SbKVv(u?6xO!{Y zwLZTS8gEEzMIN3eG(CvnLdXi9(iW;ReMQhz6%`&X*&FIKQm;~yJiVtLvlrNQ3Ta}T z$IdIx#$^y8uM%Z$`g{tXNJDFTcXhZpqq`{9S38Cq_blfQMZ-zEVe zOReSyMG)46+2UNME#4SobUu||ot8<-kHTJxU#U=_fpnhXc*r--^itEg6oKNESBad_ z!Va?B{C_*KuTG{(Dsc>794~TSEVdZr+nRW$C9jfzthH|$Vw~|c~>k`fdH)srNqHN_KgrF4Ny*;k;uZK$GC~{ z@h!(Yccz3z7yCh~8s$>*C+7KrR#3rn^smdw+q>#&i7`GVR(V`#;@s3ae+uPYs{KXG znZr=pWxG-jwEzl3Q_SdkXEC&Pn%1@>4%Z)j9YjQYSn2knV3#e%YjuXWh%&y3efcJG zNZ%xWmL(W*z`@2s6=YoDZ2S6NSoP8JFlsK~b83u8=JEKaZ6{Di0^v#fJ|gcVgr*0}i?U47MwS{b ze>lFPo+Ni)PU8EW$%njZ%d6?hwI}Sr+k9*Y{V#MY$<|VRjn8bT;-@`wCnk;cX+*2^ zeeU8-%$H?i=Qi@_ggPxhhaO@_&g!02Vy>!or4xOXz+EcU1BzmVGIeZXe5A4UHOn$F z9X&%8g2;7Q4kz%=)_+9tunekfTenKaZ|dpk9nG%=jQc-+yfcEu;Ya5%u z?l}Q5<0S_+=C%0$ray!1sEL4Sb*97%=x7P>BVxI#j6*?j12bh=>r zU$N1=8)9Y~9|r2r$*IuWRiVucwOulcx2N7xRh&&Rk2hDgTV6wiY$cn;uxj;5=+uoQ z+cBl&#iwRGwX#Zd`W7s9+qtC737ug?`8rJejNK>R-yT{zXQu1iDyuW?vafEGibT!% zIRsg$QMv1TbjbU!eJ|m?g%*5S#KOXNa4bUUjF14=rvT-kDNcAnQQ3X8#m!?C4&7GY z#WfG~#5FGGgxzaqAXEm5$SaM+^R@A5-4f=%b}}U;HU}B`@XO;m)itx4{nT3gYNc{| zjcK1P+Ydt>Q8Bebc_xBWoC)@$aq6w13f3aQmth@=x(4b!y4vE|(N4eIE9D-USoe@b ze+KNF5)}d=WF2uhk7zeS?cRn=bN*wxsQK%y7zJ6`CIH)%gt60{8r1jcc^;d5IV?}o zE?k@?uAlDIGylu6AALYN=z&xD`#c3tZofvGrYFP1;;zzoofLJ1^fINk_GBIvPFm1- zQCvY^$HB9ip21wnvQm?zVh~`!aZ36>%~OU=c%sIn`^B9o>2uuFY0vkIM5rn#eb0Wi z4}3CtVObeIDJbE|r)}O+t&jD0@xuv9weW z{E>=aRXh$(+F3T>%x8{OSoDJSQ2gdsqeTg-{CckP%T3b1mg>NR(YO{S(prm5b6;a< zJNw_e->_FSG382L4W|m$r>}|SwUbH)m(s{aC`>S65NzF66JyzX01eG&Ac5Y6a25LE z4P{+O!)S$XbaYs@F~_2#!LLqbfCP+n^?KCK>FESY^f0<80c>LYk;vSNHn1J&* z!a^{2nv)Jp*8jq(uMOymTtBu@@X4SF>j;}y z1>73qO+&*1Z>)jBN-fSWSM16ai&-p&dmb%Q`LA@Q#x>2l+BqQrA|R*5SbFv0p|*g| z*xmh5RJYrwxs4r(U5W4%^H>V4J9pSw;+(93T~s=_)Yx2F>bMV^Vqcf)mJ~Q$9ji|{ z9UP;RP+~0PhTHnn)7%UYO-Kl8o}C7~3-UtbnnGO19Aqp=C%4n^2`PxrRHd#t==%D= zR_E$5XVABYr>1%u9us0AxIy%-x0b$gg+8}mSf`C0&m$jsfh^zEJNI)0;zPE30IS6b zh7kgsuA>QXi6(Xr4x=Xs87aDsS_+pFshw`-$i{$$Xb0 z`r34;K#z;*a|R8nim@9&`N+hRFcNGup`Fgv1_uil5wB6e0fGpWY172DFo!%P?`P0= zSpf5R5%4N!tH66w7hP-sFBb{oXmbk?y8>=)Mc1>IJB+?oCs#erK2=lG6$S7Ic(G`7 zdHE7knQN#$xPKk~Gv22m)sz6~UCu|E0|GIejG|Bt`>+F(j5!(XmI73id|b zBhE~9B@d_gFpr<+rd9o`2c1ya-SL8Y7uictFbV<}v093m`r*o-KV#r2KSx6xdNSXz z9H=myXE&OSB9m_0JWs;6;z!6D?Fi?|&kMoTub@G$vtuIkC z0AvxIoon86?WI8NnpxVwIeoHPLj5h_`Gop)yD;^3*-gfrGFtHyNoV%r**t7RR12+Z zJ@SJA1f~rov#c`LLHN+h%WDC@;KSKN=!GuodWck#$^Bw{3{&;}VJwzuXS7`PaS@xC7V@3Ia&7WAvxnFf%wDZoTJXVS1A4t_TeD z?mkyJG5Q7uA%YwU+>rTFL(lc-*6y^$+Uic)!v$VLkcH<_Bkn!R`ST|afAAcB0q0SM zA7&PV+c{;Ti1_wB;$~1kWSs^q>wR8At6(@kC1uUa%xtU)oZ&lB3G-|vCyqsv^9GD5IcYwh3nD`Qht4apI>2vW#U*??cC|ZGfW0Rzqdqq4s+%Y#$LVz~Ib<$hR~A40Gi;UHSAyokW;Z!Nhs91_;PL zT_dAyAiZ1!?Oy`f;QQJM!mVp932S`X6mvgwJs=7IPJ92R80^cCweUfoLiOOYt0 z$VdVDj@RPCg7t$>R>ccXhuD zwSO9}c@nmpUisXIN5GKeN9+zK^+DnhbAROazJH3YmdDh`j;4(PrG5y1VBo*^8vKYG zfR*I$>bj!q?;THnp~C#FJ!H!vE$*BFvr+b#+rvTxs)fAG14Z{go0B0B;n+#=3~mz> z6^;4T(9-+EE4T$vxDQJAWINfQhiq(W>S$zWc+ahR>JWG$KkH%rS*Up5;&$rz)3k^g zELLEm@{YqmuEJo=_di`+bP&kIVlUs&*T0fY+v^al3#kF1q?Wv;*@h8$2x z-`gf-sTLZw{F{sO8GB;bDw!7n7_^bWo>1hXE( zGJf>SukkG6x6;rR!6QVN+fs@Dfhd5YDTI8rEqxvBe4P{>e4OAP3Ta7cX>lp|lQor= zQiMM#>C2LmijtBa2)%ay=LHWv9bKIR|Nj?!`TPh6FR%zQwe-DZ=g;fqgDS|K@pHQzfMEtZ7w46&8W9eZ}AodrNCJUhqFuqO!{feluW!@VX|$GlmgYJ qyuFj7-R*ttob9K3!*q*oNKpJ@W%OOwkjsF#pwQDc)Ox338~#79>S(C| literal 0 HcmV?d00001 diff --git a/.graphics/favicon/favicon-16x16.png b/.graphics/favicon/favicon-16x16.png new file mode 100644 index 0000000000000000000000000000000000000000..feecd920540a6732a1f9c3bf574cee90c498fd31 GIT binary patch literal 1222 zcmZ`%drZ?;6uzZEnV^$F2RIffWjt&oZH1UI&3J07yhT?*h0=mzaMDtvZNfk*CM4VB zQDuo}00lzEYA7(>hSu>Ago4#tilZ%3`)f;iwBy;*((dx7f9&R-dvZ?hch32~d#lq^ zkM7*DX9oaaCzXpmRMRml6j+*DX&w-3`V;3`%AS0K8BDDsBL< z2~!mY0Im=LFy;Xe^9uleC5?Y(90I@vlb)QF1TSP~=j7(*;&3E`F>7Zw&&D%G-dXjLJdc|R)>iQqi2AS|oXAvT+>zrVk{ zyu7xyc64-fadB~JY3a_LJ6J5XrKLqClgZ`swzf7%&0<+|I2T3YFTzVoe*oc zJI!;qmNf+PyvX5jZf!XiwLfg=zA(+kE*aW3)(tBwD-wz1{=NE_&kpOS@C!=3)qv9| z&O8@C*(PW<7Z(>tgoh+k0+{Fg?$+-8>ruqZzKCZc95HfFe0%~NX>Dx{a%8hv+1aNf zA_yV)fFp^)smWw|ZYqz*rO{{;6BAHFD9iBhFcg`d�-bgS_GVR;#tHt`6$c+}sRF zwYIi4G&D?2O)Z&BP?@TmH-(QLy^>0eM&s7jmR+l~y?bSweq){e$2hCDsweGAnN>Dy zQ^_1lM$3!G)`@ z>Ka_FpRWn^zNm0avwU4B&o=FYX|7m_JkamW^j>JwVw-&^&Goa3lMWUZSFJaOqQ4an z9dRXo=<0;!Q2ED?CZ6D(|I!@>7!EZh38VAcg~fV*zw2@dOgX8EaasN{MjmiEf{Me@ zy)#1HJ@N{7`!twW8U%HPc_{YjcT6UtMZ54T6MTJdPkrq#IGH7{Oc?0L2lM&z=%`-* zo-37n{(}}q{s+xebwXK)0k^N zRX*_XJ19MP$ZH}}c)+fDs0_H)>_Aq`tIO;EBA6~z)iJ7iF=m(srTB`IMB^~&oPrnz zy8t>sCX&hfNpMGHkx4OdlMWJy#26w`Bh$Y42yp2#v#5~!|A6|R&+s7NJ6;xzlS#iC zRKmVoSX5jP#Nn0{1QnHV7yw*r{IKlh!8#QlKAJWrp(2?8jrWXXc>3Udj@SF5eS#7z vq6U~~e0NsR*wovRLCIj?AZDzB(sV2m82!9C%kj~-VGBS_N=+2T=T-g(0!3RR literal 0 HcmV?d00001 diff --git a/.graphics/favicon/favicon-32x32.png b/.graphics/favicon/favicon-32x32.png new file mode 100644 index 0000000000000000000000000000000000000000..044f4cb0e89468ba53f460451c62e21c10d30d45 GIT binary patch literal 2365 zcmZ`*dpy+J7T+GzObn)!$9ObeB{2;lCdp$A6C-JII5{CE8e(K_8hMO&-pV7cX`(tc z9#fs-ILR2V8lpqu5SPKAP!Xb-xWE3qf84#--s|)EuFu+g?X}lhduMpKJ1fYc_~ecEd>vcA^s{k59B?yRA) z-rEtbYrAPzpXS2-m^s{?7DR=~6*_sf@7{qz*=x5^^8+|*V^QluIbPS|ds->5S&F#(#VXJ+pE3gY=yckWD9x1}pZY%9N(n;ZX@A)ik50nIOSG!p2u zeZL>8udCxPZv3F2WQX6q>-6Kte2pa%i8g>(Nr@Ls!j-h!`+j#-fWcsJG#WkZi&d=< zwTQp;&dkhko(A`DeCudM&nhctw^aXp7dCF?&YbGF{`&HBJ}oFne|+s~<>36C@HKsd zpfn}CrR83))2A&sHcYdw5c9Me-)Bp&Q%@W129MR%)iVqX3~n=`qffZpAP8R)LzV@$ z_jzlZE@krxTH?r7V2XON ztQJxjc`ZA;zx~Avo(t6co&;^5r&~2#ii+ab)zyhC`)GBb`J+g1Z2BMGnbnpSd%wqZ zx6buh5WbI@8)XyTG(Rpu9DY{sA3>vU7G;O@oIgie(YH$?p$NEq)8~c*VzQp#A&L!o=2@!AA%3?>$>4o4(H< z4L{7WjmvCkz*Bx19N+w*A~8GD+*fanROqTuQXan)*-JB2OOxt)eA|5Wn$I2r9cIOKFLmuu(fC}YF9IkX;iX!TQ3;EN;3hegV2deh!p+ARfG z#T1W_emTQkyX35$-1-r7wBskeuk0{2^{V;pxr&dE&(Pz5w(N^){h(*r5Q`=H`R(ba zDXhdOoJA_HNRoxY^@iHEOXH)_t^TI7D0J(1M1oS=&I8F72W(KO0RP+?;fXLVM&Xdj zb91zmloX7KiNd94vGFo0S+J2mFD8$Mr(+P~Ozq!B3(6_-0+EsfEY_z>TqeZUoET_B z!+cHT%fEzL#B5O1B2iZwX#^*Al*;||UwJQ~%L%^LLt}}ptTYvE5;ir{6}22_G$UGQ z=*d&3l2=zRySck3p`;%TruPU?s2w^EH>{9~Sva)b^`*1Ao~KS-&(CK?z5jb+_0!1W z_t=QlojV=!i;7a&Y|Rf0JQRNc3uF8%s~^?T8X5uhBX0%3R#G>^q=^7;IH{rz0UEbHD7^WIs;+cmjVY0EN&+1XiWb&pDl6@467MvP^5 zc9Oh}RZ3pZ%nm0~eFy}C|HEafmrSAOqD`J124zk}v`oIpYI$P(@$wNU75gjK(PFW< zgw2kPlF7)(2%*s&NTkkeZ>SPreg9(R*{&lXQ-=6g4?_Xbv;3%T`FpHY+`4(;af9;* z2G8Fr>+W{Dckf>F!dlO@FE)9B!d&|b5C4tTPliTD4zaP4&@LD-D@8&;a%a%Lca#l+ zHXN?(wSC|n!`4}bLxl&%)>fNJr9#-a)#tBMX6j*<8Z!>7q9jqOTeegoOCbyFhS}X{ zHav|hb^Gh`C$w6ceaWxei4SGHRocJqL^*kMD$SL$y3t3f7YjMrC~3ercTN|RIk}?Y z`jUA?U^TN6u_GucMS3ow7XJ^^7 z^?*?7hOC#WV#idu)E=%7X7sJu(9td~ykjggaY|E4uONAc^x|AjNN7SIbM5o!GUQr= z($dnq``p=xCd)$4nlts-st=Oy$7*e3`WKcrR4L&wljE?&Kcg9VtY4Gww=CY7i_Hn~ z^qHyCw>mx#*B$r+D$oT`ZQXuym#VSN>0;4ek>a-!iNpc~sbm>&Ra8GtRKLxWOTsax zUlsLi*IAP)S&4}&B;K7!`c$%??f#&DXlRJy<+VDJl$0a@SB6JMTz#>F6203w5=m^k zP^jU43;(scp`pQ9Sy}l|)rtBM^1|rz!MK_3Qjz|?ePvZuRh!W*{%$#V6BCpBT<(Vx zN_%YCo7QnlQ=K7^k&$nTP0maLTual2NTZ^n0yI;DgSTpGYip|-8zt3^jg4e5)nZFr z{u1#`3i77iLLwr) z3=LnM=;|g-djc6fd2FD(vYxVQ!7dqP41s0d79^whl!ECR>hEapZr5ywcd=Z`k7L1{ W>JG+|gGCXv1t2-O6COJFC;b=6EIr=< literal 0 HcmV?d00001 diff --git a/.graphics/favicon/favicon.ico b/.graphics/favicon/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..55fb4ebffecac868588d3911308f35dfc08f6f51 GIT binary patch literal 15086 zcmdU02Y6N0vOW-y`#`z~0x`W&1Jb1lSdeDpQA7kRAgEka6i^AER7L4XRnZ7sAvCFm zBm@GXB_R;fdjh2Qlb-+mvyzj0lAJUk_`UUgC;RMu_F8M!teIIe^G`IIdKx2*y}gE4 zQ_Vgrji!%AqcJrtxo@G-gz;GmUgh^$4K$h+e8HCQu#09IKk{5P%i_x;_-oXqOP4`k zfBm)diWMuoo_+S&S2<^+M=h-=haY}8bNlw~+=7CFa+Lq~wy3BG5fKrEJ9qB9I%dq6DV(tB{Z9H2`qih5 z+b>$QXqShFN2*QbLLEkW5 zl4-cayBqYGe$th&U zj2ZLWv}x0}#*D1_i;oQ*I&{R^wQK*nb?a7+?Ek}Vg@uI(3kxgQzJ2?}v17+hAZbl% zPF|&N)}xGfS+HQi*7N7jCu_A?51Z@?tzNx)k8}Sf@7S^9 za(H-nfyg+w7LF&h_+n!k4!dXLd&dlXxg{L|Vfoniw-)R7X5z40HWvMqj*a^>5ua3u z?f=O{CTaBz%EPj4={V()gKvM%z|tM*IKeR%ZA!yWPFYCHEL43Ca-6-Vv{=3~9k+t> zO84d7y?aIb_wNswGG)rgEn2jAqN-Iq^UO2-xNnENy}h-vkGdxObJ&`Wtp_u)<5(89 zkhYoMq~OB!TzvaW25xY^OTM{Syg418u1iyN-;L+Ski7@bYVpTOEn<29{jV9Ay*w4$ z|IEY|u5c<(>twYb{@jc2xG#ISPa-qYaw$WL zxhJ#Nq{7QT5Btw(v3gGi4!dMy2G_skK&D!k!|rsXkw;E^{uTM;;gefDK)Js;IXU1O zyhwX11N1li@y8#JS7uv;rW2HbWBll3IiK?DdMyXB2?eF{FZ!V0ZieLHMqr-$U1VHj z{xbJ!ANNG~a+duUav!p&8*(r2=+>`4j_&*3y?bwwc0+R1kUqe1eYw%~(OpG1W&G-z zR;xu!OpNl~481G+l11H+mY7zExa7hrY1SX(k3asnZ~FDesqA}dYAR;UngwfXYs{NB z4`N5E@`)-Nr?TgDO+S4&bm-6l++$c+SkRVxW66>w;JJi=fB=XLKS;Y@`o#u_On7>F zQZEhbm(V4$^UXKkfPNWPuU-w>KMWr}9Q3b2w+Xc8>K#LCkbSy#?HVE@BP-pr`=LKD zFc2Sq{4u(A?TT^Z#wnRFG{5edtTihui!v=bSqMKrKWyH-8KXvx!mF>o3Xw5GP5AQG zTW_Ir=gvw7>7!P7?SAOz`GKZQo8p~!-ob?n7Zgnu1}=FZG+(}a85=fifR~q-TI;rL z+i?2yX@rM|t54-Nt_WAIT)_t)e1IlRnkbnoKhOQrPk#dR^UDuc;a+I8x3^dF-@ku< zh&~J+JQ$4|H%5;hJ@DOk-&LyvvIfSXpbt=C{JN%JWVd(k-b&sF4H_gqCC;5YhlL9l zg6BJ4eDTFf9i>FnRK1r7NPZUwrWejvhSkcBC;=bM9u;`TU+$&(+53!_Ef*id-B`# z>C-`ZF4bw-7tvjjzsDbc{GrMJfddC%W@ZM)U|=kx#ufPxzP4=HQt?`Jwr}6QU>J)h zo_GQc8Z=NkT)$pDv@|w`@!;oRY+{Pe{QcSf{m_%|KmGL6U@k?C^X8jx;^&`##u``xI~V~YOANr&~n zKZn&hZ&*a8!QRmc{U=XB?-grdfBJ%YX5keAi@!Z#wRAO1Z0*z@h}_8dGLP`18|PrW zUCMj;Qd{~(&zM&$!)K9+*5;Pzvf>BWZQc#5-wwjc=`<{P-=;%HSdAVBOVVu}9FMM! zhhewF5#1Izpy%(0(9`WYx?Z>eyBp!K4NHPmR2n*rd9_&9>eq+x$=uvrtwZEU&JX=o zTlz(QZr;2J`qp^)<(JXEeS0M*;?J1&?u#z7=fdvGWw6|L4CWE3uwA#Ft_I529~2^pTrVZkQl|@z~d3dpkzaW91VLtMfiEVO(QHUfW!}1yg=E z=%PgC(j^=esxXrj}MD}{k5PV9~(Tcp}$uU%R!OVIKxsZFaB`bmH;gx4^6-r?;z~H;)SZ@q_Q>D75%yN_vd>1!?=u$Q)alHrztB#yaKWJ z(q#n4Con&h#4WqO9Ap*odjsmc!Td>OyeRXW^ehdGfU(Gr-Wp^-Y-uAm`uHKY7CH7Z zW0%~Bj%?brsj~cuhUb6Z<;#~JWZaQe-fkHhugtTeB5Vr`gDGueDgC6~lKQ*SFG$5> zW!|sFjo5I;fO(9GTt|!;@iNbML-{K{R;>x`{Dhx=`stG7gKLr_SNR3fzb`Bb=CN6@ zh%4R$6UzM@FF&MKVGpZY#@)MjizxHACr+IBA!DG{IwGsPHgDd%Eo0Ho>AQtWe!j9M z>&v1I$&t7{I0`+fr~Ucqb61N_w4Z0ZE5@*ujivuQJw07w&bT>q=6pwg!lF8QEBU=1 z{mxFz<7{ztbxoF-ynGX1I-W7X0bf6i_Y24P(0dpYk%SS^X&C8#4R>N=%Xg{rUVP}Q zSFdI{I5_-={^0=9WmE}mwRzl-KHKn>D_8#I>+4$vzsO}i=N9u5bI)GD9M2n=?-QWb z(aS#uzxmumVGZq!$T)q)f-PINT%?TuFNteh8v<+iZZqPB)99Cb)2}PVkCK@W+2!tm zmDg`#>80!V{LnG{&ixRXs!0?16gg#VG@t%kUEoJ$miOe7Pj-Coz4w+7zl+x4ce3wd z-?B(^24$*{F=cg4x?PhrVrk%+*tqE zE#vK0lO|32K>U+CckUD?$MpnA4)hk!y}dVWT6in801AWMLueGY+u!;MWscI1(RsVBJM9=ajjnw&L`~94zItHLQ8~mNg51 zUC4&xX)Sgf%))YxkxV+Jv0f$r%$YOiDNkd$C;I+-V`2oZqD#7#n8NxE)Bst9OiF}>ky>Y;Np#3IA6(8H5D87W#R(w_q#Bs7MhPEtRK7wlGY=Cn5m4h~lMZ*BBL=C`r2sd9eNbIO9s<&~#H>W=7B z;qABI#*rgO6sAlYEOAbmS7kq0pHd;8RFnDl?c1mDY1-8?W9n|`%PGuBctPJz;dp|{ z>o@Ve83QOj>0`b%nP0Fq!BoV?mv4eaNnTZWK%G&0A@52~y?hh;1X~oGPSOo_F#7WEOI^b2ufKj@^DkYxw9MWKFJwP|`|UTicEKfNZ7;m= zg5tBRQDjDROj?N@R=le1`gwkUIwE;3g%t=^F1c~RYAL%47cpktLOWt^R>ZEPuEdsj z(IDbJQd1z9qv*!mxpNinz`EyB)>OxT(Iv^53r-=}n$#zl5&ttKF7_1hN|l=@Ce^rY zJG7rP6?UJ01&e`$(PjF3=ra6ybn4a})}*CxzkZ58ue|b#*qj>AFLETiGERwvHG&h;9enZF3VHts~Hs{ybiCSXH6#<)jM7!#uu z%*BMX3;w9f{Z+Sq@g-P`s$@*+1OA8DjoHMvU`?FL^0Wsm*KCB{&--B!o&wvy++cY% z5H`1>Va8xyKeu zR>4~ECGRkp2i%3#^vhPt2d3qWUL43a-K zkIsZeR606GreZ>1G!_L%<5E;iwK)<4^Eaj)DRcfBnp>U)C8wZwR4Pp31h-)ho;mHA z{t<}dnNZc5b>o$szrMtn_0QjWl+Piw5hrLra&&T{>!W|YmywC#%p(fcB6E)q;Te@; zxb8>4pM0m?%xC=U&f1AF<@^uj3Q*sgG1othbs^rYB`P*zdV}N>+X_xj#fzNZo_W~r z#88~WBK6)d{FQQC#QLm2){uTo?6%rmF>ACs(*7)AT}OF0FyEdo^T*TYS?_rr-(K^@ zXBV&IV(dM-ivRVAbn}+jeJx`%C;AAr_P_P~G7Yi+MzrCh8GpD){E0cuZI(lUcmb9c4az0#G8>L zN1E|0T*ZAYjX!zLCDGPx;eI`;T&EFh-iG}7=btV-XRmzp(MM8y{Mh|0Q>AHgOaMtE zl%y0S)gWny2#uzZ8{>or`N$YDR%!Y|=8(B$PNpg(Pa$auQ+`VG6g7+%S?^;+z7Jt8 zz?JyD2mR0&SpQR_-ps3-(GIWX|1aGYeGyrv?8Z}u*73ZvCOHNh>(yGhy1LGey>sU_ z{c9zMqJla#EFfl08yd|2?V9J{;Gl0~>$ASqwj1mjA!yEB#Yal1pnn#g|>NVmbeZ%ZYVt#s0g* z4dP=9U+6yvGw)%-m{IkYb6EUc(jr5tnmzGV#7A&+bX3peovmB9s=8c`U#7qKHtW`{ zQ~e}9pgt=<=bahTv0?o>B$i-u;yXy4CS#q_{){<%rN7iM&!c}aY4T)MH}#Z-CCo>> zfS#krV(i<~v5>wiaY5Bze}3sNHL6nUX35%~r(PP3wo~7Q$@(3zJM=f~oG-)ds0-S6 zv&ZmZ!&J?NuD?G2q;^{BaX0STjX^G#VL?C0A|z4OC`|E>!YNlbg|CTB>G-F=e~L-+ zhx}a}lY|cRUxr4e;E#LBxTh`q|6aQNqcAG@2?+^tA>Z=U7~&_#rJ^%^kfRAagE^nB z@!Tp13k$nIzhWG7SEV|^c&sJ=e`GfCKmUZ3)S@5#L$N$02I)D)ahB}8$S-Al9%bE7 zu0rVr_t5;3r|0UhnAp2njB#YGjK>p1e;6}Vsz3TlWnpG!w14m3p|qv0#5g=y&-)_l ktV(Isf47J^;2aa?f0_GrW9DKB5v8g=@{YVKeWdUI0oQ;__y7O^ literal 0 HcmV?d00001 From b010c4d45f2723bd9bb9f5a1c570cf5bc39e6b20 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 14 Dec 2023 19:23:54 +0800 Subject: [PATCH 72/88] checkPoTools found bad translations (#5681) * checkPoTools found bad translations * real translator advice :) --- po/R-zh_CN.po | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/po/R-zh_CN.po b/po/R-zh_CN.po index 105b94145..52b308af3 100644 --- a/po/R-zh_CN.po +++ b/po/R-zh_CN.po @@ -206,7 +206,7 @@ msgstr "" "列才可以联结" msgid "%s has same type (%s) as %s. No coercion needed." -msgstr "%s 有 %s 的类型。不需要强制转换。" +msgstr "%1$s 与 %3$s 为相同类型 (%2$s)。不需要强制转换。" msgid "Coercing all-NA %s (%s) to type %s to match type of %s." msgstr "强制转换 all-NA %s (%s) 为 %s 类型用来匹配 %s 类型。" @@ -769,7 +769,7 @@ msgid "" "Assigning to 0 row subset of %d rows" msgstr "" "没有找到匹配 i 的行。无法增加新的列所以无法运算 RHS of :=\n" -"指定一个 0 行的子集" +"从 %d 行中分配 0 行" msgid "" "Invalid .internal.selfref detected and fixed by taking a (shallow) copy of " From ae6a3c28aec78b3ac59e7e130422ef3550333d6a Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 14 Dec 2023 19:33:20 +0800 Subject: [PATCH 73/88] call flush.console() (#5592) --- R/between.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/between.R b/R/between.R index 42925637a..82703ed7b 100644 --- a/R/between.R +++ b/R/between.R @@ -84,7 +84,7 @@ inrange = function(x,lower,upper,incbounds=TRUE) { options(datatable.verbose=verbose) if (verbose) {last.started.at=proc.time();catf("Generating final logical vector ... ");flush.console()} .Call(Cinrange, idx <- vector("logical", length(x)), xo, ans[["starts"]], ans[["lens"]]) - if (verbose) {catf("done in %s\n",timetaken(last.started.at)); flush.console} + if (verbose) {catf("done in %s\n",timetaken(last.started.at)); flush.console()} idx } From 3bd4fd16395b015fcb8f72db54948f7cc5562710 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 14 Dec 2023 19:35:12 +0800 Subject: [PATCH 74/88] namespace-qualify methods::as() as needed (#5644) * namespace-qualify methods::as() as needed As identified in https://bugs.r-project.org/show_bug.cgi?id=18540 * Update tests.Rraw * revert * minify diff --- inst/tests/tests.Rraw | 4 ++-- man/IDateTime.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 8eeb8f7ee..9d194346b 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -12911,8 +12911,8 @@ test(1914.04, dt.s4.list, as.list(dt)) # Underlying data not identical # simple S4 conversion-isms work df = data.frame(a=sample(letters, 10), b=1:10) dt = as.data.table(df) -test(1914.05, identical(as(df, 'data.table'), dt)) -test(1914.06, identical(as(dt, 'data.frame'), df)) +test(1914.05, identical(methods::as(df, 'data.table'), dt)) +test(1914.06, identical(methods::as(dt, 'data.frame'), df)) # data.table can be used in an S4 slot dt <- data.table(a=sample(letters[1:3], 10, replace=TRUE), score=rnorm(10)) dt.comp <- new("S4Composition", data=dt) diff --git a/man/IDateTime.Rd b/man/IDateTime.Rd index 6854f59ae..928e732bc 100644 --- a/man/IDateTime.Rd +++ b/man/IDateTime.Rd @@ -226,13 +226,13 @@ See 'Details' in \code{\link{round}} for more information. (d <- as.IDate("2001-01-01")) # S4 coercion also works -identical(as.IDate("2001-01-01"), as("2001-01-01", "IDate")) +identical(as.IDate("2001-01-01"), methods::as("2001-01-01", "IDate")) # create ITime: (t <- as.ITime("10:45")) # S4 coercion also works -identical(as.ITime("10:45"), as("10:45", "ITime")) +identical(as.ITime("10:45"), methods::as("10:45", "ITime")) (t <- as.ITime("10:45:04")) From 7f5498a7ffb185accc2da2bf4ae0a5ebb8076539 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 15 Dec 2023 00:03:00 +0800 Subject: [PATCH 75/88] avoid with=FALSE in doc example (#5594) * avoid with=FALSE in doc example * Use Tyson suggestion --- man/setcolorder.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/setcolorder.Rd b/man/setcolorder.Rd index 71c6cd87f..d401a5a38 100644 --- a/man/setcolorder.Rd +++ b/man/setcolorder.Rd @@ -17,7 +17,7 @@ setcolorder(x, neworder=key(x), before=NULL, after=NULL) \item{before, after}{ If one of them (not both) was provided with a column name or number, \code{neworder} will be inserted before or after that column. } } \details{ - To reorder \code{data.table} columns, the idiomatic way is to use \code{setcolorder(x, neworder)}, instead of doing \code{x <- x[, neworder, with=FALSE]}. This is because the latter makes an entire copy of the \code{data.table}, which maybe unnecessary in most situations. \code{setcolorder} also allows column numbers instead of names for \code{neworder} argument, although we recommend using names as a good programming practice. + To reorder \code{data.table} columns, the idiomatic way is to use \code{setcolorder(x, neworder)}, instead of doing \code{x <- x[, ..neworder]} (or \code{x <- x[, neworder, with=FALSE]}). This is because the latter makes an entire copy of the \code{data.table}, which maybe unnecessary in most situations. \code{setcolorder} also allows column numbers instead of names for \code{neworder} argument, although we recommend using names as a good programming practice. } \value{ The input is modified by reference, and returned (invisibly) so it can be used in compound statements. If you require a copy, take a copy first (using \code{DT2 = copy(DT)}). See \code{?copy}. From e5774f912aed8186e739bd134e0a50929dede77f Mon Sep 17 00:00:00 2001 From: Toby Dylan Hocking Date: Thu, 14 Dec 2023 13:40:43 -0700 Subject: [PATCH 76/88] add GOVERNANCE.md document (#5772) * first draft GOVERNANCE.md * more contributions Co-authored-by: Michael Chirico * run-time signals are accessible Co-authored-by: Michael Chirico * Update GOVERNANCE.md Co-authored-by: Michael Chirico * CRAN maintainer responsible for responding Co-authored-by: Michael Chirico * ok -> OK Co-authored-by: Michael Chirico * remove comma Co-authored-by: Michael Chirico * various updates based on comments from Jan and Michael * process for changing scope * user-friendly * rbindlist * add history, other updates as suggested by reviewers * sorting -> common functions Co-authored-by: Jan Gorecki * summarize by and grouping sets Co-authored-by: Jan Gorecki * add set ops Co-authored-by: Jan Gorecki * subset -> filtering Co-authored-by: Jan Gorecki * code -> substantial * backquotes Co-authored-by: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> * z version includes devel Co-authored-by: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> * PR 3 bullets * ben reviewed * rm modeling Co-authored-by: Jan Gorecki * efficiency first Co-authored-by: Michael Chirico * Data->data Co-authored-by: Michael Chirico * remove other roles * different person -> someone other Co-authored-by: Michael Chirico * same as -> often will be Co-authored-by: Michael Chirico * hotfix/patch should not include new features Co-authored-by: Michael Chirico * rm repetition --------- Co-authored-by: Michael Chirico Co-authored-by: Jan Gorecki Co-authored-by: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> --- GOVERNANCE.md | 128 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 GOVERNANCE.md diff --git a/GOVERNANCE.md b/GOVERNANCE.md new file mode 100644 index 000000000..7b74bbbbc --- /dev/null +++ b/GOVERNANCE.md @@ -0,0 +1,128 @@ +Governance for the R data.table project + +# Purpose and scope + +## This document + +The purpose of this document is to define how people related to the project work together, so that the project can expand to handle a larger and more diverse group of contributors. + +## The R package + +The purpose of the project is to maintain the R data.table package, which is guided by the following principles: + +* Time & memory efficiency +* Concise syntax (minimal redundancy in code) +* No external Imports/LinkingTo/Depends dependencies (external meaning those not maintained by the project) +* Few (if any) Suggests/Enhances dependencies +* Stable code base (strong preference for user-friendly back-compatibility with data.table itself and with old versions of R) +* Comprehensive and accessible documentation and run-time signals (errors, warnings) + +To prioritize developer time, we define what is in and out of current scope. Feature requests in issues and pull requests that are out of current scope should be closed immediately, because they are not the current priority. If someone wants to contribute code that is currently out of scope, they first have to make a pull request that changes the scope as defined below. + +The current scope of package functionality includes: +* data manipulation and analysis + * reshaping/pivoting + * aggregation/summarizing (via `[,, by=...]` and _grouping sets_) + * filtering rows + * all sorts of joins + * adding/updating/deleting columns + * set operations (union/rbind, intersection, difference) +* high-performance common functions (`frank`, `fcase`, `fifelse`, `transpose`, `chmatch`, `fsort`, `forder`, `uniqueN`, ...) +* common convenience functions (`%like%`, `%notin%`, `timetaken`, `substitute2`, ...) +* ordered data functions (`rleid`, `shift`, `fcoalesce`, _locf_/_nocb_ `nafill`, rolling functions) +* date and time related classes and functions (`IDate`, `ITime`) +* technical functions (`address`, `tables`, `update_dev_pkg`) +* Reading/writing of data from/to flat (plain text) files like CSV + +Functionality that is out of current scope: +* Plotting/graphics (like ggplot2) +* Manipulating out-of-memory data, e.g. data stored on disk or remote SQL DB, (as opposed e.g. to sqldf / dbplyr) +* Machine learning (like mlr3) +* Reading/writing of data from/to binary files like parquet + +# Roles + +## Contributor + +* Definition: a user who has written/commented at least one issue, worked to label/triage issues, written a blog post, given a talk, etc. +* How this role is recognized: there is no central list of contributors / no formal recognition for contributors. + +## Project member + +* Definition: some one who has submitted at least one PR with substantial contributions, that has been merged into master. PRs improving documentation are welcome, and substantial contributions to the docs should count toward membership, but minor contributions such as spelling fixes do not count toward membership. +* How to obtain this role: any user/contributor can become a member by submitting a PR with substantial contributions, then having it reviewed and merged into master. Contributors who have written issues should be encouraged to submit their first PR to become a project member. Contributors can look at https://github.com/Rdatatable/data.table/labels/beginner-task for easy issues to work on. +* How this role is recognized: Members are credited via role="ctb" in DESCRIPTION (so they appear in Author list on CRAN), and they are added to https://github.com/orgs/Rdatatable/teams/project-members so they can create new branches in the Rdatatable/data.table GitHub repo. They also appear on https://github.com/Rdatatable/data.table/graphs/contributors (Contributions to master, excluding merge commits). + +## Reviewer + +* Definition: a member who has volunteered to do code reviews for some features/files. +* How to obtain this role: after one or more significant PRs to a given file, a member should be invited to add their name as a reviewer of that file in CODEOWNERS, and after that is merged into master, then they are considered a reviewer. +* How this role is recognized: same credit in DESCRIPTION as a regular member, role="ctb" (so they appear in Author list on CRAN). +* Note: having your name in CODEOWNERS does not give any special permission, but it does mean that you will be notified whenever there is a new PR with changes to that file. + +## Committer + +* Definition: permission to commit to, and merge PRs into, master branch. +* How to obtain this role: after a reviewer has a consistent history of careful reviews of others' PRs, then a current Committer should ask all other current Committers if they approve promoting the Reviewer to Committer, and it should be done if there is Consensus among active Committers. +* How this role is recognized: credited via role="aut" in DESCRIPTION (so they appear in Author list on CRAN), and added to https://github.com/orgs/Rdatatable/teams/maintainers which gives permission to merge PRs into master branch. + +## CRAN maintainer + +* Definition: in charge of communication with CRAN. Responsible for submitting releases to CRAN on a regular basis, and for responding to requests from CRAN. +* How to obtain this role: (1) merge into master a PR adding role="cre" to DESCRIPTION, and (2) submit updated package to CRAN (previous CRAN maintainer will have to confirm change by email to CRAN). +* How this role is recognized: credited via role="cre" in DESCRIPTION, so they appear as Maintainer on CRAN. + +# Decision-making processes + +## Definition of Consensus + +Most decisions in the project happen by Consensus, which means that no active people (typically Reviewers and/or Committers) have expressed major blocking concerns, in a public discussion (typically in a GitHub issue or pull request). In Consensus, non-response by inactive members indicates tacit agreement. + +## Pull Requests + +A pull request can be merged by any committer, if there is one approving review, and Consensus from active Reviewers and Committers. +* approving review must come from someone other than the author of the PR. +* approving review ideally comes from a reviewer of the affected files. +* approving review can and often will be by the committer who merges the PR. + +## CRAN updates + +* Regular CRAN releases should ideally occur twice per year, and can include new features. +* A hotfix/patch CRAN release should occur when CRAN asks for one, at which time the CRAN maintainer should post an issue on github, and ask others to help fix/prepare the release. It should not include new features. +* Both kinds of releases should be discussed in an issue, and the release should happen only if there is Consensus among active Reviewers and Committers. +* It is the responsibility of the CRAN maintainer to ensure quality prior to release. This includes CRAN checks, unit tests, performance tests, etc, and these tasks can be delegated to others. + +## Changing this GOVERNANCE.md document + +There is no special process for changing this document (submit a PR +and ask for review). + +# Code of conduct + +As contributors of this project, we pledge to respect all people who contribute through reporting issues, posting feature requests, updating documentation, submitting pull requests or patches, and other activities. + +We are committed to making participation in this project a harassment-free experience for everyone, regardless of level of experience, gender, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, religion, etc. + +Examples of unacceptable behavior by participants include the use of sexual language or imagery, derogatory comments or personal attacks, trolling, public or private harassment, insults, or other unprofessional conduct. + +Committers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct. A person with special roles who does not follow the Code of Conduct may have their roles revoked. + +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by opening an issue or emailing one or more of the Committers. + +This Code of Conduct is adapted from Tidyverse code of conduct. + +# Version numbering + +data.table Version line in DESCRIPTION typically has the following meanings + +* x.y.z where x=major, y=minor, z=patch/hotfix/devel. +* x should be incremented only for major backwards-incompatible changes. +* z is even for CRAN releases, odd for GitHub development. +* z=99 for master branch with new features (for example 1.14.99 or 1.15.99), which eventually becomes a regular CRAN release, with incremented y and z=0 (for example 1.15.0 or 1.16.0). +* patch/hotfix development should occur on GitHub as z=odd (1.15.1) and release to CRAN as z=even (1.15.2). + +# Governance history + +Nov-Dec 2023: initial version drafted by Toby Dylan Hocking and +reviewed by Tyson Barrett, Jan Gorecki, Michael Chirico, Benjamin +Schwendinger. From 34e02f26cfecd63fe18a90e0150f7c5c2998c01f Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Fri, 15 Dec 2023 08:35:32 +0100 Subject: [PATCH 77/88] Update .Rbuildignore for governance file (#5831) --- .Rbuildignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.Rbuildignore b/.Rbuildignore index 9b64f6267..25e5424de 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -25,6 +25,7 @@ ^_pkgdown\.yml$ ^src/Makevars$ ^CODEOWNERS$ +^GOVERNANCE\.md$ ^\.RData$ ^\.Rhistory$ From 6c1fd839e0e0257d13f07975ead0fe6fdfee2f61 Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Tue, 19 Dec 2023 07:24:25 +0100 Subject: [PATCH 78/88] fix bit64 coercion in memrecycle (#5835) * fix bit64 coercion in memrecycle * update tests --- inst/tests/nafill.Rraw | 5 +++-- src/assign.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index d2ee592cc..98b1acbb9 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -181,14 +181,14 @@ if (test_bit64) { test(6.44, identical(coerceFill(NaN), list(NA_integer_, NaN, as.integer64(NA)))) test(6.45, identical(coerceFill(Inf), list(NA_integer_, Inf, as.integer64(NA))), warning=c("precision lost","precision lost")) test(6.46, identical(coerceFill(-Inf), list(NA_integer_, -Inf, as.integer64(NA))), warning=c("precision lost","precision lost")) - test(6.47, identical(coerceFill(-(2^62)), list(NA_integer_, -(2^62), as.integer64("-4611686018427387904"))), warning=c("precision lost","precision lost")) + test(6.47, identical(coerceFill(-(2^62)), list(NA_integer_, -(2^62), as.integer64("-4611686018427387904"))), warning="precision lost") test(6.48, identical(coerceFill(-(2^64)), list(NA_integer_, -(2^64), as.integer64(NA))), warning=c("precision lost","precision lost")) test(6.49, identical(coerceFill(x<-as.integer64(-2147483647)), list(-2147483647L, -2147483647, x))) test(6.50, identical(coerceFill(x<-as.integer64(-2147483648)), list(NA_integer_, -2147483648, x)), warning="out-of-range") test(6.51, identical(coerceFill(x<-as.integer64(-2147483649)), list(NA_integer_, -2147483649, x)), warning="out-of-range") test(6.52, identical(coerceFill(-2147483647), list(-2147483647L, -2147483647, as.integer64("-2147483647")))) test(6.53, identical(coerceFill(-2147483648), list(NA_integer_, -2147483648, as.integer64("-2147483648")))) - test(6.54, identical(coerceFill(-2147483649), list(NA_integer_, -2147483649, as.integer64("-2147483649"))), warning=c("precision lost","precision lost")) + test(6.54, identical(coerceFill(-2147483649), list(NA_integer_, -2147483649, as.integer64("-2147483649"))), warning="precision lost") } # nan argument to treat NaN as NA in nafill, #4020 @@ -289,6 +289,7 @@ if (test_bit64) { test(10.87, coerceAs(x, 1L), 1L, output=c("double[integer64] into integer[integer]","Zero-copy coerce when assigning 'integer64' to 'integer'")) test(10.88, coerceAs(1L, x), x, output=c("integer[integer] into double[integer64]","Zero-copy coerce when assigning 'integer' to 'integer64'")) options(datatable.verbose=2L) + test(10.89, coerceAs(-2147483649, x), as.integer64(-2147483649), output="double[numeric] into double[integer64]") } if (test_nanotime) { x = nanotime(1L) diff --git a/src/assign.c b/src/assign.c index ce2c707df..cacf8206e 100644 --- a/src/assign.c +++ b/src/assign.c @@ -904,7 +904,7 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con case REALSXP: switch (TYPEOF(source)) { case REALSXP: if (targetIsI64 && !sourceIsI64) - CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || (int)val!=val), "f", "truncated (precision lost)", val) + CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || (int64_t)val!=val), "f", "truncated (precision lost)", val) break; case CPLXSXP: if (targetIsI64) CHECK_RANGE(Rcomplex, COMPLEX, !((ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)) && From 6782251bcbc38c5240bb41f3af1b5f7436dac948 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 21 Dec 2023 02:13:04 +0800 Subject: [PATCH 79/88] Change logical01 option default to TRUE (#5843) * Change logical01 option default to TRUE * fix tests --- NEWS.md | 4 +++- R/fread.R | 2 +- R/fwrite.R | 2 +- inst/tests/tests.Rraw | 36 ++++++++++++++++++------------------ man/fread.Rd | 2 +- man/fwrite.Rd | 2 +- 6 files changed, 25 insertions(+), 23 deletions(-) diff --git a/NEWS.md b/NEWS.md index 48f7c529e..4ca025d22 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,10 +2,12 @@ # data.table [v1.14.99](https://github.com/Rdatatable/data.table/milestone/29) (in development) -## BREAKING CHANGE +## BREAKING CHANGES 1. `shift` and `nafill` will now raise error `input must not be matrix or array` when `matrix` or `array` is provided on input, rather than giving useless result, [#5287](https://github.com/Rdatatable/data.table/issues/5287). Thanks to @ethanbsmith for reporting. +2. The `logical01=` arguments in `fread()` and `fwrite()` change their default from `getOption("datatable.logical01", FALSE)` to `getOption("datatable.logical01", TRUE)`. That is, they change from `FALSE` to `TRUE`, but for now you can retain the old behavior by setting option `datatable.logical01`. See the discussion in v1.11.0 (May 2018) release notes where this planned deprecation was first discussed. In the future, the option will be removed. + ## NEW FEATURES 1. `nafill()` now applies `fill=` to the front/back of the vector when `type="locf|nocb"`, [#3594](https://github.com/Rdatatable/data.table/issues/3594). Thanks to @ben519 for the feature request. It also now returns a named object based on the input names. Note that if you are considering joining and then using `nafill(...,type='locf|nocb')` afterwards, please review `roll=`/`rollends=` which should achieve the same result in one step more efficiently. `nafill()` is for when filling-while-joining (i.e. `roll=`/`rollends=`/`nomatch=`) cannot be applied. diff --git a/R/fread.R b/R/fread.R index 8e9a11b12..9a1d9cbbd 100644 --- a/R/fread.R +++ b/R/fread.R @@ -4,7 +4,7 @@ na.strings=getOption("datatable.na.strings","NA"), stringsAsFactors=FALSE, verbo skip="__auto__", select=NULL, drop=NULL, colClasses=NULL, integer64=getOption("datatable.integer64","integer64"), col.names, check.names=FALSE, encoding="unknown", strip.white=TRUE, fill=FALSE, blank.lines.skip=FALSE, key=NULL, index=NULL, showProgress=getOption("datatable.showProgress",interactive()), data.table=getOption("datatable.fread.datatable",TRUE), -nThread=getDTthreads(verbose), logical01=getOption("datatable.logical01",FALSE), keepLeadingZeros=getOption("datatable.keepLeadingZeros",FALSE), +nThread=getDTthreads(verbose), logical01=getOption("datatable.logical01",TRUE), keepLeadingZeros=getOption("datatable.keepLeadingZeros",FALSE), yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC") { if (missing(input)+is.null(file)+is.null(text)+is.null(cmd) < 3L) stopf("Used more than one of the arguments input=, file=, text= and cmd=.") diff --git a/R/fwrite.R b/R/fwrite.R index e1484b9e3..23f1605df 100644 --- a/R/fwrite.R +++ b/R/fwrite.R @@ -3,7 +3,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto", sep2=c("","|",""), eol=if (.Platform$OS.type=="windows") "\r\n" else "\n", na="", dec=".", row.names=FALSE, col.names=TRUE, qmethod=c("double","escape"), - logical01=getOption("datatable.logical01", FALSE), # due to change to TRUE; see NEWS + logical01=getOption("datatable.logical01", TRUE), logicalAsInt=logical01, scipen=getOption('scipen', 0L), dateTimeAs = c("ISO","squash","epoch","write.csv"), diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 9d194346b..aa5f4860f 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -7760,14 +7760,14 @@ read_table = function(str, ...) { test(1552.1, fread(str, na.strings="#N/A"), read_table(str, na.strings="#N/A")) test(1552.2, fread(str, na.strings=c("#N/A", "-999")), read_table(str, na.strings=c("#N/A", "-999"))) test(1552.3, fread(str, na.strings=c("#N/A", "-999", "+1")), read_table(str, na.strings=c("#N/A", "-999", "+1"))) -test(1552.4, fread(str, na.strings=c("#N/A", "-999", "+1", "1")), read_table(str, na.strings=c("#N/A", "-999", "+1", "1"))) # enabled by FR #2927 +test(1552.4, fread(str, na.strings=c("#N/A", "-999", "+1", "1"), logical01=FALSE), read_table(str, na.strings=c("#N/A", "-999", "+1", "1"))) # enabled by FR #2927 test(1552.5, fread(str, na.strings=c("#N/A", "-999", "FALSE")), error="NAstring <>.*boolean.*not permitted") test(1552.6, fread("A\n1.0\n2\n-", na.strings=c("-")), data.table(A=c(1.0, 2.0, NA))) test(1552.7, fread(str, na.strings=c("#N/A", "-999", "+1", "1"), logical01=TRUE), error="NAstring <<1>> and logical01=TRUE.*not permitted") str = "a,b,c\n0,1,2\n1,0,2" -test(1552.8, fread(str, na.strings = "0"), data.table(a=c(NA,1L), b=c(1L,NA), c=c(2L,2L))) -test(1552.9, fread(str, na.strings = c("0","1")), data.table(a=c(NA,NA), b=c(NA,NA), c=c(2L,2L))) +test(1552.8, fread(str, na.strings = "0", logical01=FALSE), data.table(a=c(NA,1L), b=c(1L,NA), c=c(2L,2L))) +test(1552.9, fread(str, na.strings = c("0","1"), logical01=FALSE), data.table(a=c(NA,NA), b=c(NA,NA), c=c(2L,2L))) # FR #1177: 'quote' option of 'print.data.table' DT1 <- data.table(s1=paste(" ",LETTERS[1:5],sep=""),s2=LETTERS[1:5]) @@ -10445,8 +10445,8 @@ d = tempfile("dir") test(1703.16, fread(text=c('a,b','1,2'), tmpdir=d), error=base_messages$cant_open_file, warning=base_messages$missing_file) dir.create(d) -test(1703.17, fread(text=c('a,b','1,2'), tmpdir=d), data.table(a=1L,b=2L)) -test(1703.18, fread(text=c('a,b','1,2')), data.table(a=1L, b=2L)) +test(1703.17, fread(text=c('a,b','1,2'), tmpdir=d), data.table(a=TRUE, b=2L)) +test(1703.18, fread(text=c('a,b','1,2')), data.table(a=TRUE, b=2L)) unlink(d) test(1703.19, fread(text="a b c"), data.table(a=logical(), b=logical(), c=logical())) # text= with no \n, #4689 @@ -10904,10 +10904,10 @@ test(1743.03, fread("a,b\n1,a", colClasses=c(NA, TRUE)), error="colClasses is.*l test(1743.04, fread("a,b\n1,a", colClasses=c("character", "factor")), data.table(a="1", b=factor("a"))) # and the length-1 character case; #4237 -test(1743.041, fread("a,b\n1,a", colClasses=NA_character_), data.table(a=1L, b="a")) -test(1743.042, fread("a,b\n1,a", colClasses=""), data.table(a=1L, b="a")) -test(1743.043, fread("a\n1", colClasses=NA_character_), data.table(a=1L)) -test(1743.044, fread("a\n1", colClasses=""), data.table(a=1L)) +test(1743.041, fread("a,b\n1,a", colClasses=NA_character_), data.table(a=TRUE, b="a")) +test(1743.042, fread("a,b\n1,a", colClasses=""), data.table(a=TRUE, b="a")) +test(1743.043, fread("a\n1", colClasses=NA_character_), data.table(a=TRUE)) +test(1743.044, fread("a\n1", colClasses=""), data.table(a=TRUE)) # Issue #1634: 'fread doesn't check colClasses to be valid type' # Currently using BioGenerics, which doesn't support USE.NAMES @@ -13255,8 +13255,8 @@ test(1957.3, fread("A,B\na,b\nc,d\n", stringsAsFactors=TRUE, verbose=TRUE), data # misc. coverage tests in fread test(1958.01, fread('\U0001f64d', encoding = 'UTF-16'), error = "Argument 'encoding' must be") -test(1958.02, fread('a,b\n1,2', nrows = NA_real_), data.table(a = 1L, b = 2L)) -test(1958.03, fread('a,b\n1,2', nrows = -1), data.table(a = 1L, b = 2L)) +test(1958.02, fread('a,b\n1,2', nrows = NA_real_), data.table(a=TRUE, b=2L)) +test(1958.03, fread('a,b\n1,2', nrows = -1), data.table(a=TRUE, b=2L)) test(1958.04, fread('a,b\n1,2', key = 1), error = 'must be a character vector naming columns') test(1958.05, fread("A,B,C\n1,2,3\n3,4,5\n0,0,0\n", nrows=0), data.table(A=integer(), B=integer(), C=integer())) #2747 test(1958.06, fread("A,B,C\n1,2,3\n3,4,5\n0,0,100\n", nrows=0, sep=','), data.table(A=integer(), B=integer(), C=integer())) @@ -13265,12 +13265,12 @@ test(1958.08, fread('A,B,C,D\n"a,b",4,5\n"c,d",6,7,8\n', fill=TRUE), data.table( test(1958.09, fread("A,B,C\n1,2,3\n3,4,5\n0,0,0\n", nrows=0L), data.table(A=integer(), B=integer(), C=integer())) # nrows=0 vs 0L, 4686 test(1958.10, fread("A,B,C\n1,2,3\n3,4,5\n0,0,100\n", nrows=0L, sep=','), data.table(A=integer(), B=integer(), C=integer())) # nrows=0 should perform a full sample to get the empty column types right as documented, #4029 -test(1958.11, fread('A,B,C,D\n1,CHAR,"CHAR",3.1', nrows=0L), data.table(A=integer(), B=character(), C=character(), D=numeric())) +test(1958.11, fread('A,B,C,D\n1,CHAR,"CHAR",3.1', nrows=0L), data.table(A=logical(), B=character(), C=character(), D=numeric())) # .. one different type in the middle of under 100 txt = paste(c("A,B\n1,2\n", rep("3,4\n",48), "3,4.1\n", rep("5,6\n",48)), collapse="") test(1958.12, fread(text=txt, nrows=0L), data.table(A=integer(), B=numeric())) test(1958.13, fread(text=txt, nrows=0L, skip=1L), data.table(V1=integer(), V2=numeric())) -test(1958.14, fread(text=txt, nrows=1L), data.table(A=1L, B=2L)) # B integer not numeric because sample is min(nrows,100) when nrows>=1 +test(1958.14, fread(text=txt, nrows=1L), data.table(A=TRUE, B=2L)) # B integer not numeric because sample is min(nrows,100) when nrows>=1 test(1958.15, fread(text=txt, nrows=1L, skip=1L), data.table(V1=1L, V2=2L)) test(1958.16, fread(text=txt, nrows=2L), data.table(A=c(1L,3L), B=c(2L,4L))) test(1958.17, fread(text=txt, nrows=2L, skip=1L), data.table(V1=c(1L,3L), V2=c(2L,4L))) @@ -13292,10 +13292,10 @@ eols = c("\n", "\r\n", "\r", "\n\r") for (i in 1:4) { eol = eols[i] src = paste(c("A", "B", "...", ",,,,,", "c1,c2,c3", "1,2,3"), collapse=eol) - test(1959 + (i*0.1), fread(text=src, skip=4), data.table(c1=1L, c2=2L, c3=3L)) + test(1959 + (i*0.1), fread(text=src, skip=4), data.table(c1=TRUE, c2=2L, c3=3L)) } test(1959.5, fread("A\n\nB\n\nC\n1\n", skip=2), data.table(B=c("", "C", "1"))) -test(1959.6, fread("A,B\r\r\nX,Y\r\r\nB,C\r\r\n1,2", skip=4), data.table(B=1L, C=2L)) +test(1959.6, fread("A,B\r\r\nX,Y\r\r\nB,C\r\r\n1,2", skip=4), data.table(B=TRUE, C=2L)) # empty set with constant j, #3173 DT = data.table( @@ -14519,9 +14519,9 @@ test(2013.3, DT[2], error="Column 2 ['b'] is length 4 but column 1 is length 3; ## new fread keepLeadingZeros parameter in v1.12.2 # leading zeros in both integer and float numbers are converted to character when keepLeadingZeros=TRUE test_data_single <- "0, 00, 01, 00010, 002.01\n" -test(2014.1, fread(test_data_single), data.table(0L, 0L, 1L, 10L, 2.01)) -test(2014.2, fread(test_data_single, keepLeadingZeros = FALSE), data.table(0L, 0L, 1L, 10L, 2.01)) -test(2014.3, fread(test_data_single, keepLeadingZeros = TRUE), data.table(0L, "00","01","00010","002.01")) +test(2014.1, fread(test_data_single), data.table(FALSE, 0L, 1L, 10L, 2.01)) +test(2014.2, fread(test_data_single, keepLeadingZeros = FALSE), data.table(FALSE, 0L, 1L, 10L, 2.01)) +test(2014.3, fread(test_data_single, keepLeadingZeros = TRUE), data.table(FALSE, "00","01","00010","002.01")) # converts whole column to character when keepLeadingZeros = TRUE and at least 1 value contains a leading zero test_data_mult <- paste0(c(sample(1:100),"0010",sample(1:100)), collapse="\n") test(2014.4, class(fread(test_data_mult, keepLeadingZeros = TRUE)[[1]]), "character") diff --git a/man/fread.Rd b/man/fread.Rd index 4456e11d1..7131eb50d 100644 --- a/man/fread.Rd +++ b/man/fread.Rd @@ -22,7 +22,7 @@ key=NULL, index=NULL, showProgress=getOption("datatable.showProgress", interactive()), data.table=getOption("datatable.fread.datatable", TRUE), nThread=getDTthreads(verbose), -logical01=getOption("datatable.logical01", FALSE), # due to change to TRUE; see NEWS +logical01=getOption("datatable.logical01", TRUE), keepLeadingZeros = getOption("datatable.keepLeadingZeros", FALSE), yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC" ) diff --git a/man/fwrite.Rd b/man/fwrite.Rd index 42ae44a29..ba4205591 100644 --- a/man/fwrite.Rd +++ b/man/fwrite.Rd @@ -11,7 +11,7 @@ fwrite(x, file = "", append = FALSE, quote = "auto", eol = if (.Platform$OS.type=="windows") "\r\n" else "\n", na = "", dec = ".", row.names = FALSE, col.names = TRUE, qmethod = c("double","escape"), - logical01 = getOption("datatable.logical01", FALSE), # due to change to TRUE; see NEWS + logical01 = getOption("datatable.logical01", TRUE), logicalAsInt = logical01, # deprecated scipen = getOption('scipen', 0L), dateTimeAs = c("ISO","squash","epoch","write.csv"), From eb7662b96c51d4d41ca5fdc33df5f0bd5e499870 Mon Sep 17 00:00:00 2001 From: Grant McDermott Date: Thu, 21 Dec 2023 17:37:05 -0800 Subject: [PATCH 80/88] Point to updated DuckDB benchmarks (#5846) --- _pkgdown.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_pkgdown.yml b/_pkgdown.yml index c69f920c0..d72231430 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -52,7 +52,7 @@ navbar: href: news/index.html benchmarks: text: Benchmarks - href: https://h2oai.github.io/db-benchmark + href: https://duckdblabs.github.io/db-benchmark presentations: text: Presentations href: https://github.com/Rdatatable/data.table/wiki/Presentations From 20a43afdb305f0b00ed92cfdf8f413d61dc9673b Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Fri, 22 Dec 2023 10:46:15 +0800 Subject: [PATCH 81/88] Update more h2o URLs (#5851) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 19a812df4..9ac1d16a7 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ * fast and friendly delimited **file reader**: **[`?fread`](https://rdatatable.gitlab.io/data.table/reference/fread.html)**, see also [convenience features for _small_ data](https://github.com/Rdatatable/data.table/wiki/Convenience-features-of-fread) * fast and feature rich delimited **file writer**: **[`?fwrite`](https://rdatatable.gitlab.io/data.table/reference/fwrite.html)** * low-level **parallelism**: many common operations are internally parallelized to use multiple CPU threads -* fast and scalable aggregations; e.g. 100GB in RAM (see [benchmarks](https://h2oai.github.io/db-benchmark/) on up to **two billion rows**) +* fast and scalable aggregations; e.g. 100GB in RAM (see [benchmarks](https://duckdblabs.github.io/db-benchmark/) on up to **two billion rows**) * fast and feature rich joins: **ordered joins** (e.g. rolling forwards, backwards, nearest and limited staleness), **[overlapping range joins](https://github.com/Rdatatable/data.table/wiki/talks/EARL2014_OverlapRangeJoin_Arun.pdf)** (similar to `IRanges::findOverlaps`), **[non-equi joins](https://github.com/Rdatatable/data.table/wiki/talks/ArunSrinivasanUseR2016.pdf)** (i.e. joins using operators `>, >=, <, <=`), **aggregate on join** (`by=.EACHI`), **update on join** * fast add/update/delete columns **by reference** by group using no copies at all * fast and feature rich **reshaping** data: **[`?dcast`](https://rdatatable.gitlab.io/data.table/reference/dcast.data.table.html)** (_pivot/wider/spread_) and **[`?melt`](https://rdatatable.gitlab.io/data.table/reference/melt.data.table.html)** (_unpivot/longer/gather_) From 4333884715e7555eaf779c903f94578ef8279fcd Mon Sep 17 00:00:00 2001 From: Jan Gorecki Date: Fri, 22 Dec 2023 10:29:50 +0100 Subject: [PATCH 82/88] links to blog and fosstodon (#5852) --- README.md | 1 + _pkgdown.yml | 2 ++ 2 files changed, 3 insertions(+) diff --git a/README.md b/README.md index 9ac1d16a7..811fb86c0 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,7 @@ DT[Petal.Width > 1.0, mean(Petal.Length), by = Species] - click the **Watch** button at the top and right of GitHub project page - read [NEWS file](https://github.com/Rdatatable/data.table/blob/master/NEWS.md) - follow [#rdatatable](https://twitter.com/hashtag/rdatatable) on twitter +- follow [#rdatatable](https://fosstodon.org/tags/rdatatable) on fosstodon - watch recent [Presentations](https://github.com/Rdatatable/data.table/wiki/Presentations) - read recent [Articles](https://github.com/Rdatatable/data.table/wiki/Articles) diff --git a/_pkgdown.yml b/_pkgdown.yml index d72231430..1f3c01ca1 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -12,6 +12,8 @@ home: href: web/packages/data.table/index.html - text: CRAN-like checks href: web/checks/check_results_data.table.html + - text: Community blog + href: https://rdatatable-community.github.io/The-Raft/ navbar: structure: From eace83a40c3ae3e95c52d6ec92ef193c2ded6e9b Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Fri, 22 Dec 2023 15:59:25 +0100 Subject: [PATCH 83/88] Add ubsan devcontainer (#5850) * add multiple containers add symlinks add RUN use echo to create .Rprofile add workdir symlink + echo add dependencies for check add char vec update echo update dockerfile fix FROM update dirs use rocker image rollback to jans clang image change docker order change flags update makevars update flags add workdir change C version clang working rm local .Rprofile * tidy spaces * update flags to #5509 * Robust deps install, use files over echo, tidy * no ARG * Wrong context? * need to COPY DESCRIPTION. Try more WORKDIR. * attempt to fix Makevars, .Rprofile * Try copying .Rprofile to /root. Remove ~/GitHub for now. * same changes to gcc container * Try adding REdtiorSupport extension by default * load cc conditionally * Don't install languageserver by default (slow) * udpate image and makevars * drop native * terminal newline * whitespace * whitespace --------- Co-authored-by: Michael Chirico Co-authored-by: Michael Chirico --- .devcontainer/Dockerfile | 4 ---- .devcontainer/devcontainer.json | 3 --- .devcontainer/r-devel-clang-ubsan/.Rprofile | 2 ++ .devcontainer/r-devel-clang-ubsan/Dockerfile | 23 +++++++++++++++++++ .devcontainer/r-devel-clang-ubsan/Makevars | 6 +++++ .../r-devel-clang-ubsan/devcontainer.json | 4 ++++ .devcontainer/r-devel-gcc/.Rprofile | 2 ++ .devcontainer/r-devel-gcc/Dockerfile | 18 +++++++++++++++ .devcontainer/r-devel-gcc/devcontainer.json | 4 ++++ 9 files changed, 59 insertions(+), 7 deletions(-) delete mode 100644 .devcontainer/Dockerfile delete mode 100644 .devcontainer/devcontainer.json create mode 100644 .devcontainer/r-devel-clang-ubsan/.Rprofile create mode 100644 .devcontainer/r-devel-clang-ubsan/Dockerfile create mode 100644 .devcontainer/r-devel-clang-ubsan/Makevars create mode 100644 .devcontainer/r-devel-clang-ubsan/devcontainer.json create mode 100644 .devcontainer/r-devel-gcc/.Rprofile create mode 100644 .devcontainer/r-devel-gcc/Dockerfile create mode 100644 .devcontainer/r-devel-gcc/devcontainer.json diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile deleted file mode 100644 index 0fb2392ae..000000000 --- a/.devcontainer/Dockerfile +++ /dev/null @@ -1,4 +0,0 @@ -FROM registry.gitlab.com/jangorecki/dockerfiles/r-devel-gcc - -RUN apt-get -qq update \ - && apt-get install -y --no-install-recommends git diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json deleted file mode 100644 index a1447f19e..000000000 --- a/.devcontainer/devcontainer.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "build": { "dockerfile": "Dockerfile" } -} diff --git a/.devcontainer/r-devel-clang-ubsan/.Rprofile b/.devcontainer/r-devel-clang-ubsan/.Rprofile new file mode 100644 index 000000000..344dd9f3c --- /dev/null +++ b/.devcontainer/r-devel-clang-ubsan/.Rprofile @@ -0,0 +1,2 @@ +Sys.setenv(PROJ_PATH='/workspaces/data.table') +if (file.exists('.dev/cc.R')) source('.dev/cc.R') diff --git a/.devcontainer/r-devel-clang-ubsan/Dockerfile b/.devcontainer/r-devel-clang-ubsan/Dockerfile new file mode 100644 index 000000000..edb4b245a --- /dev/null +++ b/.devcontainer/r-devel-clang-ubsan/Dockerfile @@ -0,0 +1,23 @@ +FROM rocker/r-devel-ubsan-clang:latest + +RUN apt-get -qq update \ + && apt-get install -y --no-install-recommends git + +COPY DESCRIPTION . + +# install dependencies without ubsan flags +RUN Rscript -e ' \ +read.dcf("DESCRIPTION", c("Imports", "Suggests")) |> \ + tools:::.split_dependencies() |> \ + names() |> \ + setdiff(tools:::.get_standard_package_names()$base) |> \ + install.packages() \ +' + +# setup cc() +WORKDIR /root +COPY .devcontainer/r-devel-clang-ubsan/.Rprofile . + +# set ubsan flags +WORKDIR .R +COPY .devcontainer/r-devel-clang-ubsan/Makevars . diff --git a/.devcontainer/r-devel-clang-ubsan/Makevars b/.devcontainer/r-devel-clang-ubsan/Makevars new file mode 100644 index 000000000..1d241cb42 --- /dev/null +++ b/.devcontainer/r-devel-clang-ubsan/Makevars @@ -0,0 +1,6 @@ +CC=clang -fsanitize=address,undefined -fno-sanitize=float-divide-by-zero -fno-sanitize=alignment -fno-omit-frame-pointer +CXX=clang++ -fsanitize=address,undefined -fno-sanitize=float-divide-by-zero -fno-sanitize=alignment -fno-omit-frame-pointer -frtti +CFLAGS=-g -O3 -Wall -pedantic +FFLAGS=-g -O2 +CXXFLAGS=-g -O3 -Wall -pedantic +LDFLAGS=-L/usr/lib/gcc/x86_64-linux-gnu/11/libubsan.so -lubsan diff --git a/.devcontainer/r-devel-clang-ubsan/devcontainer.json b/.devcontainer/r-devel-clang-ubsan/devcontainer.json new file mode 100644 index 000000000..f1d985f31 --- /dev/null +++ b/.devcontainer/r-devel-clang-ubsan/devcontainer.json @@ -0,0 +1,4 @@ +{ + "build": { "dockerfile": "Dockerfile", "context": "../.." }, + "customizations": { "vscode": { "extensions": [ "REditorSupport.r" ] } } +} diff --git a/.devcontainer/r-devel-gcc/.Rprofile b/.devcontainer/r-devel-gcc/.Rprofile new file mode 100644 index 000000000..344dd9f3c --- /dev/null +++ b/.devcontainer/r-devel-gcc/.Rprofile @@ -0,0 +1,2 @@ +Sys.setenv(PROJ_PATH='/workspaces/data.table') +if (file.exists('.dev/cc.R')) source('.dev/cc.R') diff --git a/.devcontainer/r-devel-gcc/Dockerfile b/.devcontainer/r-devel-gcc/Dockerfile new file mode 100644 index 000000000..8ddeed4d8 --- /dev/null +++ b/.devcontainer/r-devel-gcc/Dockerfile @@ -0,0 +1,18 @@ +FROM registry.gitlab.com/jangorecki/dockerfiles/r-devel-gcc + +RUN apt-get -qq update \ + && apt-get install -y --no-install-recommends git + +COPY DESCRIPTION . + +RUN Rscript -e ' \ +read.dcf("DESCRIPTION", c("Imports", "Suggests")) |> \ + tools:::.split_dependencies() |> \ + names() |> \ + setdiff(tools:::.get_standard_package_names()$base) |> \ + install.packages() \ +' + +# setup cc() +WORKDIR /root +COPY .devcontainer/r-devel-gcc/.Rprofile . diff --git a/.devcontainer/r-devel-gcc/devcontainer.json b/.devcontainer/r-devel-gcc/devcontainer.json new file mode 100644 index 000000000..f1d985f31 --- /dev/null +++ b/.devcontainer/r-devel-gcc/devcontainer.json @@ -0,0 +1,4 @@ +{ + "build": { "dockerfile": "Dockerfile", "context": "../.." }, + "customizations": { "vscode": { "extensions": [ "REditorSupport.r" ] } } +} From 78dee17e647e16ccd23120594ed53d3d5934a87e Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sat, 23 Dec 2023 01:06:05 +0800 Subject: [PATCH 84/88] names<- retains sorting attributes (#5849) * names<- retains sorting attributes * Add tests from #5133 * Add colnames<- method, setalloccol() output to enable set() --- NAMESPACE | 1 + R/data.table.R | 8 +++++--- inst/tests/tests.Rraw | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 75b490068..c5a67095a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -106,6 +106,7 @@ S3method(dim, data.table) S3method(dimnames, data.table) S3method("dimnames<-", data.table) S3method("names<-", data.table) +S3method("colnames<-", data.table) S3method(duplicated, data.table) S3method(unique, data.table) S3method(merge, data.table) diff --git a/R/data.table.R b/R/data.table.R index 801482147..209e7aa37 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -2250,15 +2250,17 @@ dimnames.data.table = function(x) { x # this returned value is now shallow copied by R 3.1.0 via *tmp*. A very welcome change. } -"names<-.data.table" = function(x,value) +"names<-.data.table" = "colnames<-.data.table" = function(x,value) { # When non data.table aware packages change names, we'd like to maintain the key. # If call is names(DT)[2]="newname", R will call this names<-.data.table function (notice no i) with 'value' already prepared to be same length as ncol - x = shallow(x) # `names<-` should not modify by reference. Related to #1015, #476 and #825. Needed for R v3.1.0+. TO DO: revisit + x = .shallow(x, retain.key=TRUE) # `names<-` should not modify by reference. Related to #1015, #476 and #825. Needed for R v3.1.0+. TO DO: revisit if (is.null(value)) setattr(x,"names",NULL) # e.g. plyr::melt() calls base::unname() - else + else { setnames(x,value) + setalloccol(x) + } x # this returned value is now shallow copied by R 3.1.0 via *tmp*. A very welcome change. } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index aa5f4860f..2e49bb5f6 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18111,3 +18111,35 @@ test(2238.9, NA %notin% c(1:5, NA), FALSE) # shift actionable error on matrix input #5287 test(2239.1, shift(matrix(1:10, ncol = 1)), error="consider wrapping") + +# names<- retains index and key, #5125, #5126, #5126, #5128 +DT <- data.table(a = 1, b = 2) +setkey(DT, a) +setindex(DT, b) +names(DT) <- c("c", "d") +test(2240.1, key(DT), "c") +test(2240.2, indices(DT), "d") + +# Test warnings for names<- and colnames<-, but only warnings when caller is data.table aware. +DT = data.table(a=1:3, b=4:6, key="a") +test(2241.01, names(DT)[1]<-"A", "A") +test(2241.02, DT, data.table(A=1:3, b=4:6, key="A")) # key wasn't retained in dev after #5084 +test(2241.03, DT[, C:=1], data.table(A=1:3, b=4:6, C=1, key="A")) # ensure over-allocated ok and no warning +test(2241.04, set(DT, j="D", value=2), data.table(A=1:3, b=4:6, C=1, D=2, key="A")) # using set() too +test(2241.05, colnames(DT)[2]<-"B", "B") +test(2241.06, DT, data.table(A=1:3, B=4:6, C=1, D=2, key="A")) +test(2241.07, set(DT, j="E", value=3), data.table(A=1:3, B=4:6, C=1, D=2, E=3, key="A")) +test(2241.08, names(DT)<-letters[1:5], letters[1:5]) # R doesn't copy *tmp* when assigning to all names +test(2241.09, DT, data.table(a=1:3, b=4:6, c=1, d=2, e=3, key="a")) +test(2241.10, set(DT, j="f", value=4), data.table(a=1:3, b=4:6, c=1, d=2, e=3, f=4, key="a")) + +# spotted by @ColeMiller1 in https://github.com/Rdatatable/data.table/pull/5133/files#r1320780851 +DT = data.table(id=1:2, x=1:2) +r = copy(DT)[, x := 5L] +test(2241.11, DT, data.table(id=1:2, x=1:2)) +test(2241.12, r, data.table(id=1:2, x=c(5L,5L))) + +DT = data.table(id=1:2, x=1:2) +r = copy(DT)[1L, x:= 5L] +test(2241.13, DT, data.table(id=1:2, x=1:2)) +test(2241.14, r, data.table(id=1:2, x=c(5L,2L))) From 92105e8c9a33efceaad83c4f4b10dcbbddb69f7f Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 25 Dec 2023 23:05:51 +0800 Subject: [PATCH 85/88] Revert "Change logical01 option default to TRUE (#5843)" (#5855) This reverts commit 6782251bcbc38c5240bb41f3af1b5f7436dac948. --- NEWS.md | 4 +--- R/fread.R | 2 +- R/fwrite.R | 2 +- inst/tests/tests.Rraw | 36 ++++++++++++++++++------------------ man/fread.Rd | 2 +- man/fwrite.Rd | 2 +- 6 files changed, 23 insertions(+), 25 deletions(-) diff --git a/NEWS.md b/NEWS.md index 4ca025d22..48f7c529e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,12 +2,10 @@ # data.table [v1.14.99](https://github.com/Rdatatable/data.table/milestone/29) (in development) -## BREAKING CHANGES +## BREAKING CHANGE 1. `shift` and `nafill` will now raise error `input must not be matrix or array` when `matrix` or `array` is provided on input, rather than giving useless result, [#5287](https://github.com/Rdatatable/data.table/issues/5287). Thanks to @ethanbsmith for reporting. -2. The `logical01=` arguments in `fread()` and `fwrite()` change their default from `getOption("datatable.logical01", FALSE)` to `getOption("datatable.logical01", TRUE)`. That is, they change from `FALSE` to `TRUE`, but for now you can retain the old behavior by setting option `datatable.logical01`. See the discussion in v1.11.0 (May 2018) release notes where this planned deprecation was first discussed. In the future, the option will be removed. - ## NEW FEATURES 1. `nafill()` now applies `fill=` to the front/back of the vector when `type="locf|nocb"`, [#3594](https://github.com/Rdatatable/data.table/issues/3594). Thanks to @ben519 for the feature request. It also now returns a named object based on the input names. Note that if you are considering joining and then using `nafill(...,type='locf|nocb')` afterwards, please review `roll=`/`rollends=` which should achieve the same result in one step more efficiently. `nafill()` is for when filling-while-joining (i.e. `roll=`/`rollends=`/`nomatch=`) cannot be applied. diff --git a/R/fread.R b/R/fread.R index 9a1d9cbbd..8e9a11b12 100644 --- a/R/fread.R +++ b/R/fread.R @@ -4,7 +4,7 @@ na.strings=getOption("datatable.na.strings","NA"), stringsAsFactors=FALSE, verbo skip="__auto__", select=NULL, drop=NULL, colClasses=NULL, integer64=getOption("datatable.integer64","integer64"), col.names, check.names=FALSE, encoding="unknown", strip.white=TRUE, fill=FALSE, blank.lines.skip=FALSE, key=NULL, index=NULL, showProgress=getOption("datatable.showProgress",interactive()), data.table=getOption("datatable.fread.datatable",TRUE), -nThread=getDTthreads(verbose), logical01=getOption("datatable.logical01",TRUE), keepLeadingZeros=getOption("datatable.keepLeadingZeros",FALSE), +nThread=getDTthreads(verbose), logical01=getOption("datatable.logical01",FALSE), keepLeadingZeros=getOption("datatable.keepLeadingZeros",FALSE), yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC") { if (missing(input)+is.null(file)+is.null(text)+is.null(cmd) < 3L) stopf("Used more than one of the arguments input=, file=, text= and cmd=.") diff --git a/R/fwrite.R b/R/fwrite.R index 23f1605df..e1484b9e3 100644 --- a/R/fwrite.R +++ b/R/fwrite.R @@ -3,7 +3,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto", sep2=c("","|",""), eol=if (.Platform$OS.type=="windows") "\r\n" else "\n", na="", dec=".", row.names=FALSE, col.names=TRUE, qmethod=c("double","escape"), - logical01=getOption("datatable.logical01", TRUE), + logical01=getOption("datatable.logical01", FALSE), # due to change to TRUE; see NEWS logicalAsInt=logical01, scipen=getOption('scipen', 0L), dateTimeAs = c("ISO","squash","epoch","write.csv"), diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 2e49bb5f6..52d8bbb80 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -7760,14 +7760,14 @@ read_table = function(str, ...) { test(1552.1, fread(str, na.strings="#N/A"), read_table(str, na.strings="#N/A")) test(1552.2, fread(str, na.strings=c("#N/A", "-999")), read_table(str, na.strings=c("#N/A", "-999"))) test(1552.3, fread(str, na.strings=c("#N/A", "-999", "+1")), read_table(str, na.strings=c("#N/A", "-999", "+1"))) -test(1552.4, fread(str, na.strings=c("#N/A", "-999", "+1", "1"), logical01=FALSE), read_table(str, na.strings=c("#N/A", "-999", "+1", "1"))) # enabled by FR #2927 +test(1552.4, fread(str, na.strings=c("#N/A", "-999", "+1", "1")), read_table(str, na.strings=c("#N/A", "-999", "+1", "1"))) # enabled by FR #2927 test(1552.5, fread(str, na.strings=c("#N/A", "-999", "FALSE")), error="NAstring <>.*boolean.*not permitted") test(1552.6, fread("A\n1.0\n2\n-", na.strings=c("-")), data.table(A=c(1.0, 2.0, NA))) test(1552.7, fread(str, na.strings=c("#N/A", "-999", "+1", "1"), logical01=TRUE), error="NAstring <<1>> and logical01=TRUE.*not permitted") str = "a,b,c\n0,1,2\n1,0,2" -test(1552.8, fread(str, na.strings = "0", logical01=FALSE), data.table(a=c(NA,1L), b=c(1L,NA), c=c(2L,2L))) -test(1552.9, fread(str, na.strings = c("0","1"), logical01=FALSE), data.table(a=c(NA,NA), b=c(NA,NA), c=c(2L,2L))) +test(1552.8, fread(str, na.strings = "0"), data.table(a=c(NA,1L), b=c(1L,NA), c=c(2L,2L))) +test(1552.9, fread(str, na.strings = c("0","1")), data.table(a=c(NA,NA), b=c(NA,NA), c=c(2L,2L))) # FR #1177: 'quote' option of 'print.data.table' DT1 <- data.table(s1=paste(" ",LETTERS[1:5],sep=""),s2=LETTERS[1:5]) @@ -10445,8 +10445,8 @@ d = tempfile("dir") test(1703.16, fread(text=c('a,b','1,2'), tmpdir=d), error=base_messages$cant_open_file, warning=base_messages$missing_file) dir.create(d) -test(1703.17, fread(text=c('a,b','1,2'), tmpdir=d), data.table(a=TRUE, b=2L)) -test(1703.18, fread(text=c('a,b','1,2')), data.table(a=TRUE, b=2L)) +test(1703.17, fread(text=c('a,b','1,2'), tmpdir=d), data.table(a=1L,b=2L)) +test(1703.18, fread(text=c('a,b','1,2')), data.table(a=1L, b=2L)) unlink(d) test(1703.19, fread(text="a b c"), data.table(a=logical(), b=logical(), c=logical())) # text= with no \n, #4689 @@ -10904,10 +10904,10 @@ test(1743.03, fread("a,b\n1,a", colClasses=c(NA, TRUE)), error="colClasses is.*l test(1743.04, fread("a,b\n1,a", colClasses=c("character", "factor")), data.table(a="1", b=factor("a"))) # and the length-1 character case; #4237 -test(1743.041, fread("a,b\n1,a", colClasses=NA_character_), data.table(a=TRUE, b="a")) -test(1743.042, fread("a,b\n1,a", colClasses=""), data.table(a=TRUE, b="a")) -test(1743.043, fread("a\n1", colClasses=NA_character_), data.table(a=TRUE)) -test(1743.044, fread("a\n1", colClasses=""), data.table(a=TRUE)) +test(1743.041, fread("a,b\n1,a", colClasses=NA_character_), data.table(a=1L, b="a")) +test(1743.042, fread("a,b\n1,a", colClasses=""), data.table(a=1L, b="a")) +test(1743.043, fread("a\n1", colClasses=NA_character_), data.table(a=1L)) +test(1743.044, fread("a\n1", colClasses=""), data.table(a=1L)) # Issue #1634: 'fread doesn't check colClasses to be valid type' # Currently using BioGenerics, which doesn't support USE.NAMES @@ -13255,8 +13255,8 @@ test(1957.3, fread("A,B\na,b\nc,d\n", stringsAsFactors=TRUE, verbose=TRUE), data # misc. coverage tests in fread test(1958.01, fread('\U0001f64d', encoding = 'UTF-16'), error = "Argument 'encoding' must be") -test(1958.02, fread('a,b\n1,2', nrows = NA_real_), data.table(a=TRUE, b=2L)) -test(1958.03, fread('a,b\n1,2', nrows = -1), data.table(a=TRUE, b=2L)) +test(1958.02, fread('a,b\n1,2', nrows = NA_real_), data.table(a = 1L, b = 2L)) +test(1958.03, fread('a,b\n1,2', nrows = -1), data.table(a = 1L, b = 2L)) test(1958.04, fread('a,b\n1,2', key = 1), error = 'must be a character vector naming columns') test(1958.05, fread("A,B,C\n1,2,3\n3,4,5\n0,0,0\n", nrows=0), data.table(A=integer(), B=integer(), C=integer())) #2747 test(1958.06, fread("A,B,C\n1,2,3\n3,4,5\n0,0,100\n", nrows=0, sep=','), data.table(A=integer(), B=integer(), C=integer())) @@ -13265,12 +13265,12 @@ test(1958.08, fread('A,B,C,D\n"a,b",4,5\n"c,d",6,7,8\n', fill=TRUE), data.table( test(1958.09, fread("A,B,C\n1,2,3\n3,4,5\n0,0,0\n", nrows=0L), data.table(A=integer(), B=integer(), C=integer())) # nrows=0 vs 0L, 4686 test(1958.10, fread("A,B,C\n1,2,3\n3,4,5\n0,0,100\n", nrows=0L, sep=','), data.table(A=integer(), B=integer(), C=integer())) # nrows=0 should perform a full sample to get the empty column types right as documented, #4029 -test(1958.11, fread('A,B,C,D\n1,CHAR,"CHAR",3.1', nrows=0L), data.table(A=logical(), B=character(), C=character(), D=numeric())) +test(1958.11, fread('A,B,C,D\n1,CHAR,"CHAR",3.1', nrows=0L), data.table(A=integer(), B=character(), C=character(), D=numeric())) # .. one different type in the middle of under 100 txt = paste(c("A,B\n1,2\n", rep("3,4\n",48), "3,4.1\n", rep("5,6\n",48)), collapse="") test(1958.12, fread(text=txt, nrows=0L), data.table(A=integer(), B=numeric())) test(1958.13, fread(text=txt, nrows=0L, skip=1L), data.table(V1=integer(), V2=numeric())) -test(1958.14, fread(text=txt, nrows=1L), data.table(A=TRUE, B=2L)) # B integer not numeric because sample is min(nrows,100) when nrows>=1 +test(1958.14, fread(text=txt, nrows=1L), data.table(A=1L, B=2L)) # B integer not numeric because sample is min(nrows,100) when nrows>=1 test(1958.15, fread(text=txt, nrows=1L, skip=1L), data.table(V1=1L, V2=2L)) test(1958.16, fread(text=txt, nrows=2L), data.table(A=c(1L,3L), B=c(2L,4L))) test(1958.17, fread(text=txt, nrows=2L, skip=1L), data.table(V1=c(1L,3L), V2=c(2L,4L))) @@ -13292,10 +13292,10 @@ eols = c("\n", "\r\n", "\r", "\n\r") for (i in 1:4) { eol = eols[i] src = paste(c("A", "B", "...", ",,,,,", "c1,c2,c3", "1,2,3"), collapse=eol) - test(1959 + (i*0.1), fread(text=src, skip=4), data.table(c1=TRUE, c2=2L, c3=3L)) + test(1959 + (i*0.1), fread(text=src, skip=4), data.table(c1=1L, c2=2L, c3=3L)) } test(1959.5, fread("A\n\nB\n\nC\n1\n", skip=2), data.table(B=c("", "C", "1"))) -test(1959.6, fread("A,B\r\r\nX,Y\r\r\nB,C\r\r\n1,2", skip=4), data.table(B=TRUE, C=2L)) +test(1959.6, fread("A,B\r\r\nX,Y\r\r\nB,C\r\r\n1,2", skip=4), data.table(B=1L, C=2L)) # empty set with constant j, #3173 DT = data.table( @@ -14519,9 +14519,9 @@ test(2013.3, DT[2], error="Column 2 ['b'] is length 4 but column 1 is length 3; ## new fread keepLeadingZeros parameter in v1.12.2 # leading zeros in both integer and float numbers are converted to character when keepLeadingZeros=TRUE test_data_single <- "0, 00, 01, 00010, 002.01\n" -test(2014.1, fread(test_data_single), data.table(FALSE, 0L, 1L, 10L, 2.01)) -test(2014.2, fread(test_data_single, keepLeadingZeros = FALSE), data.table(FALSE, 0L, 1L, 10L, 2.01)) -test(2014.3, fread(test_data_single, keepLeadingZeros = TRUE), data.table(FALSE, "00","01","00010","002.01")) +test(2014.1, fread(test_data_single), data.table(0L, 0L, 1L, 10L, 2.01)) +test(2014.2, fread(test_data_single, keepLeadingZeros = FALSE), data.table(0L, 0L, 1L, 10L, 2.01)) +test(2014.3, fread(test_data_single, keepLeadingZeros = TRUE), data.table(0L, "00","01","00010","002.01")) # converts whole column to character when keepLeadingZeros = TRUE and at least 1 value contains a leading zero test_data_mult <- paste0(c(sample(1:100),"0010",sample(1:100)), collapse="\n") test(2014.4, class(fread(test_data_mult, keepLeadingZeros = TRUE)[[1]]), "character") diff --git a/man/fread.Rd b/man/fread.Rd index 7131eb50d..4456e11d1 100644 --- a/man/fread.Rd +++ b/man/fread.Rd @@ -22,7 +22,7 @@ key=NULL, index=NULL, showProgress=getOption("datatable.showProgress", interactive()), data.table=getOption("datatable.fread.datatable", TRUE), nThread=getDTthreads(verbose), -logical01=getOption("datatable.logical01", TRUE), +logical01=getOption("datatable.logical01", FALSE), # due to change to TRUE; see NEWS keepLeadingZeros = getOption("datatable.keepLeadingZeros", FALSE), yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC" ) diff --git a/man/fwrite.Rd b/man/fwrite.Rd index ba4205591..42ae44a29 100644 --- a/man/fwrite.Rd +++ b/man/fwrite.Rd @@ -11,7 +11,7 @@ fwrite(x, file = "", append = FALSE, quote = "auto", eol = if (.Platform$OS.type=="windows") "\r\n" else "\n", na = "", dec = ".", row.names = FALSE, col.names = TRUE, qmethod = c("double","escape"), - logical01 = getOption("datatable.logical01", TRUE), + logical01 = getOption("datatable.logical01", FALSE), # due to change to TRUE; see NEWS logicalAsInt = logical01, # deprecated scipen = getOption('scipen', 0L), dateTimeAs = c("ISO","squash","epoch","write.csv"), From 19e1798bc80a83b3e465054b2614b64315f2fe0a Mon Sep 17 00:00:00 2001 From: HughParsonage Date: Tue, 26 Dec 2023 14:07:44 +1100 Subject: [PATCH 86/88] Closes #5510: undefined behaviour (#5832) * Fix UB via int conversion (#5510) Note that some double -> integer64 conversions are collapsed into the one warning, affecting tests, so these have been updated. * Ensure nan is not coerced. Closes #5510 * Fix '1' mistakenly removed from earlier commit * Re #5834 * Rename function more sensibly * Incorporate finite checks into the logic * Suspend int64 coerce tests subject to #5834 * Update CODEOWNERS * Revert CODEOWNERS * Update CODEOWNERS With extant file endings * Simplify logic for integer check * Check for NA_INTEGER superfluous * Test 6.53 should emit warning on -2^31 * C++ toolkit included by default * first pass at fix * Simplify/unite with within_int64_repres() * weak inequality --------- Co-authored-by: Michael Chirico --- .../r-devel-clang-ubsan/devcontainer.json | 7 ++++++- .devcontainer/r-devel-gcc/devcontainer.json | 7 ++++++- CODEOWNERS | 6 ++++++ inst/tests/nafill.Rraw | 2 +- inst/tests/tests.Rraw | 14 +++++++------- src/assign.c | 17 +++++++++-------- src/data.table.h | 2 ++ src/gsumm.c | 2 +- src/utils.c | 13 ++++++++++++- 9 files changed, 50 insertions(+), 20 deletions(-) diff --git a/.devcontainer/r-devel-clang-ubsan/devcontainer.json b/.devcontainer/r-devel-clang-ubsan/devcontainer.json index f1d985f31..de21d3dfe 100644 --- a/.devcontainer/r-devel-clang-ubsan/devcontainer.json +++ b/.devcontainer/r-devel-clang-ubsan/devcontainer.json @@ -1,4 +1,9 @@ { "build": { "dockerfile": "Dockerfile", "context": "../.." }, - "customizations": { "vscode": { "extensions": [ "REditorSupport.r" ] } } + "customizations": { "vscode": { + "extensions": [ + "REditorSupport.r", + "ms-vscode.cpptools-extension-pack" + ] + }} } diff --git a/.devcontainer/r-devel-gcc/devcontainer.json b/.devcontainer/r-devel-gcc/devcontainer.json index f1d985f31..de21d3dfe 100644 --- a/.devcontainer/r-devel-gcc/devcontainer.json +++ b/.devcontainer/r-devel-gcc/devcontainer.json @@ -1,4 +1,9 @@ { "build": { "dockerfile": "Dockerfile", "context": "../.." }, - "customizations": { "vscode": { "extensions": [ "REditorSupport.r" ] } } + "customizations": { "vscode": { + "extensions": [ + "REditorSupport.r", + "ms-vscode.cpptools-extension-pack" + ] + }} } diff --git a/CODEOWNERS b/CODEOWNERS index 5d98e0242..7d7a5ecaa 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -43,3 +43,9 @@ # .SD vignette /vignettes/datatable-sd-usage.Rmd @michaelchirico + +# assign +/src/assign.c @HughParsonage + +# utils +/src/utils.c @HughParsonage diff --git a/inst/tests/nafill.Rraw b/inst/tests/nafill.Rraw index 98b1acbb9..b72c0b506 100644 --- a/inst/tests/nafill.Rraw +++ b/inst/tests/nafill.Rraw @@ -187,7 +187,7 @@ if (test_bit64) { test(6.50, identical(coerceFill(x<-as.integer64(-2147483648)), list(NA_integer_, -2147483648, x)), warning="out-of-range") test(6.51, identical(coerceFill(x<-as.integer64(-2147483649)), list(NA_integer_, -2147483649, x)), warning="out-of-range") test(6.52, identical(coerceFill(-2147483647), list(-2147483647L, -2147483647, as.integer64("-2147483647")))) - test(6.53, identical(coerceFill(-2147483648), list(NA_integer_, -2147483648, as.integer64("-2147483648")))) + test(6.53, identical(coerceFill(-2147483648), list(NA_integer_, -2147483648, as.integer64("-2147483648"))), warning="precision lost") test(6.54, identical(coerceFill(-2147483649), list(NA_integer_, -2147483649, as.integer64("-2147483649"))), warning="precision lost") } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 52d8bbb80..35158857d 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -103,7 +103,7 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) { year = data.table::year # lubridate yearmon = data.table::yearmon # zoo yearqtr = data.table::yearqtr # zoo - + rm_all = function(env=parent.frame()) { tt = setdiff(ls(envir=env), .do_not_rm) rm(list=tt, envir=env) @@ -3886,7 +3886,7 @@ test(1133.75, DT[, new := .N, by=x], data.table(x=INT(1,1,1,1,1,2,2), new=INT(5, # on a new column with warning on 2nd assign DT[,new:=NULL] test(1133.8, DT[, new := if (.GRP==1L) 7L else 3.4, by=x], data.table(x=INT(1,1,1,1,1,2,2), new=INT(7,7,7,7,7,3,3)), - warning="Group 2 column 'new': 3.4.*double.*at RHS position 1 truncated.*precision lost.*integer") + warning="Group 2 column 'new': 3.4.*double.*at RHS position 1.*truncated.*precision lost.*integer") # Fix for FR #2496 - catch `{` in `:=` expression in `j`: DT <- data.table(x=c("A", "A", "B", "B"), val =1:4) @@ -5071,7 +5071,7 @@ dt <- data.table(a=1:3, b=c(7,8,9), c=c(TRUE, NA, FALSE), d=as.list(4:6), e=c("a test(1294.01, dt[, a := 1]$a, rep(1L, 3L)) test(1294.02, dt[, a := 1.5]$a, rep(1L, 3L), - warning="1.5.*double.*position 1 truncated.*integer.*column 1 named 'a'") + warning="1.5.*double.*position 1.*truncated.*integer.*column 1 named 'a'") test(1294.03, dt[, a := NA]$a, rep(NA_integer_, 3L)) test(1294.04, dt[, a := "a"]$a, rep(NA_integer_, 3L), warning=c("Coercing 'character' RHS to 'integer'.*column 1 named 'a'", @@ -9668,7 +9668,7 @@ nqjoin_test <- function(x, y, k=1L, test_no, mult="all") { gc() # no longer needed but left in place just in case, no harm } -dt1 = nq_fun(100L) # 400 reduced to 100, #5517 +dt1 = nq_fun(100L) # 400 reduced to 100, #5517 dt2 = nq_fun(50L) x = na.omit(dt1) y = na.omit(dt2) @@ -14365,7 +14365,7 @@ DT[,foo:=factor(c("a","b","c"))] test(2005.05, DT[2, foo:=8i], error="Can't assign to column 'foo' (type 'factor') a value of type 'complex' (not character, factor, integer or numeric)") test(2005.06, DT[2, a:=9, verbose=TRUE], notOutput="Coerced") test(2005.07, DT[2, a:=NA, verbose=TRUE], notOutput="Coerced") -test(2005.08, DT[2, a:=9.9]$a, INT(1,9,3), warning="9.9.*double.*position 1 truncated.*integer.*column 1 named 'a'") +test(2005.08, DT[2, a:=9.9]$a, INT(1,9,3), warning="9.9.*double.*position 1.*truncated.*integer.*column 1 named 'a'") test(2005.09, set(DT, 1L, "c", expression(x+2)), error="type 'expression' cannot be coerced to 'raw'") test(2005.10, set(DT, 1L, "d", expression(x+2)), error="type 'expression' cannot be coerced to 'logical'") test(2005.11, set(DT, 1L, "e", expression(x+2)), error="type 'expression' cannot be coerced to 'double'") @@ -14417,7 +14417,7 @@ test(2006.2, rbindlist(list(data.table(x = as.raw(1:2), y=as.raw(5:6)), data.tab if (test_bit64) { test(2007.1, rbindlist(list( list(a=as.integer64(1), b=3L), list(a=2L, b=4L) )), data.table(a=as.integer64(1:2), b=3:4)) test(2007.2, rbindlist(list( list(a=3.4, b=5L), list(a=as.integer64(4), b=6L) )), data.table(a=as.integer64(3:4), b=5:6), - warning="Column 1 of item 1: 3.4.*double.*position 1 truncated.*precision lost.*when assigning.*integer64.*column 1 named 'a'") + warning="Column 1 of item 1: 3.4.*double.*position 1.*truncated.*precision lost.*when assigning.*integer64.*column 1 named 'a'") test(2007.3, rbindlist(list( list(a=3.0, b=5L), list(a=as.integer64(4), b=6L) )), data.table(a=as.integer64(3:4), b=5:6)) test(2007.4, rbindlist(list( list(b=5:6), list(a=as.integer64(4), b=7L)), fill=TRUE), data.table(b=5:7, a=as.integer64(c(NA,NA,4)))) # tests writeNA of integer64 test(2007.5, rbindlist(list( list(a=INT(1,NA,-2)), list(a=as.integer64(c(3,NA))) )), data.table(a=as.integer64(c(1,NA,-2,3,NA)))) # int NAs combined with int64 NA @@ -18004,7 +18004,7 @@ test(2233.34, copy(DT)[, same_value:=value[1], by=.(by1, by2), verbose=TRUE], test(2233.35, copy(DT)[, same_value:=value[1], by=.(by2, by1), verbose=TRUE], ans, output=out) test(2233.36, copy(DT)[, same_value:=value[1], keyby=.(by2, by1), verbose=TRUE], setkey(ans,by2,by1), output=out) # similar to #5307 using integer -DT = data.table(A=INT(2,1,2,1), B=6:3, v=11:14) +DT = data.table(A=INT(2,1,2,1), B=6:3, v=11:14) test(2233.37, copy(DT)[, val:=v[1L], by=.(A,B), verbose=TRUE], copy(DT)[, val:=11:14], output=out) test(2233.38, copy(DT)[, val:=v[1L], keyby=.(A,B), verbose=TRUE], data.table(A=INT(1,1,2,2), B=INT(3,5,4,6), v=INT(14,12,13,11), val=INT(14,12,13,11), key=c("A","B")), output=out) # test from #5326 but with n=100 rather than n=100000; confirmed that n=100 fails tests 2233.403-405 before fix diff --git a/src/assign.c b/src/assign.c index cacf8206e..ef49fd230 100644 --- a/src/assign.c +++ b/src/assign.c @@ -239,7 +239,7 @@ int checkOverAlloc(SEXP x) error(_("getOption('datatable.alloccol') should be a number, by default 1024. But its type is '%s'."), type2char(TYPEOF(x))); if (LENGTH(x) != 1) error(_("getOption('datatable.alloc') is a numeric vector ok but its length is %d. Its length should be 1."), LENGTH(x)); - int ans = isInteger(x) ? INTEGER(x)[0] : (int)REAL(x)[0]; + int ans = asInteger(x); if (ans<0) error(_("getOption('datatable.alloc')==%d. It must be >=0 and not NA."), ans); return ans; @@ -742,7 +742,8 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con const double *sd = REAL(source); for (int i=0; inlevel)) { + // Since nlevel is an int, val < 1 || val > nlevel will deflect UB guarded against in PR #5832 + if (!ISNAN(val) && (val < 1 || val > nlevel || val != (int)val)) { error(_("Assigning factor numbers to %s. But %f is outside the level range [1,%d], or is not a whole number."), targetDesc(colnum, colname), val, nlevel); } } @@ -897,14 +898,14 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con switch (TYPEOF(source)) { case REALSXP: if (sourceIsI64) CHECK_RANGE(int64_t, REAL, val!=NA_INTEGER64 && (val<=NA_INTEGER || val>INT_MAX), PRId64, "out-of-range (NA)", val) - else CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || (int)val!=val), "f", "truncated (precision lost)", val) + else CHECK_RANGE(double, REAL, !ISNAN(val) && (!within_int32_repres(val) || (int)val!=val), "f", "out-of-range(NA) or truncated (precision lost)", val) case CPLXSXP: CHECK_RANGE(Rcomplex, COMPLEX, !((ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)) && - (ISNAN(val.r) || (R_FINITE(val.r) && (int)val.r==val.r))), "f", "either imaginary part discarded or real part truncated (precision lost)", val.r) + (ISNAN(val.r) || (within_int32_repres(val.r) && (int)val.r==val.r))), "f", "either imaginary part discarded or real part truncated (precision lost)", val.r) } break; case REALSXP: switch (TYPEOF(source)) { case REALSXP: if (targetIsI64 && !sourceIsI64) - CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || (int64_t)val!=val), "f", "truncated (precision lost)", val) + CHECK_RANGE(double, REAL, !ISNAN(val) && (!within_int64_repres(val) || (int64_t)val!=val), "f", "out-of-range(NA) or truncated (precision lost)", val) break; case CPLXSXP: if (targetIsI64) CHECK_RANGE(Rcomplex, COMPLEX, !((ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)) && @@ -1004,8 +1005,8 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con case REALSXP: if (sourceIsI64) BODY(int64_t, REAL, int, (val==NA_INTEGER64||val>INT_MAX||val<=NA_INTEGER) ? NA_INTEGER : (int)val, td[i]=cval) - else BODY(double, REAL, int, ISNAN(val) ? NA_INTEGER : (int)val, td[i]=cval) - case CPLXSXP: BODY(Rcomplex, COMPLEX, int, ISNAN(val.r) ? NA_INTEGER : (int)val.r, td[i]=cval) + else BODY(double, REAL, int, (ISNAN(val) || !within_int32_repres(val)) ? NA_INTEGER : (int)val, td[i]=cval) + case CPLXSXP: BODY(Rcomplex, COMPLEX, int, (ISNAN(val.r) || !within_int32_repres(val.r)) ? NA_INTEGER : (int)val.r, td[i]=cval) default: COERCE_ERROR("integer"); // test 2005.4 } } break; @@ -1021,7 +1022,7 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con if (mc) { memcpy(td, (int64_t *)REAL(source), slen*sizeof(int64_t)); break; } else BODY(int64_t, REAL, int64_t, val, td[i]=cval) - } else BODY(double, REAL, int64_t, R_FINITE(val) ? val : NA_INTEGER64, td[i]=cval) + } else BODY(double, REAL, int64_t, within_int64_repres(val) ? val : NA_INTEGER64, td[i]=cval) case CPLXSXP: BODY(Rcomplex, COMPLEX, int64_t, ISNAN(val.r) ? NA_INTEGER64 : (int64_t)val.r, td[i]=cval) default: COERCE_ERROR("integer64"); } diff --git a/src/data.table.h b/src/data.table.h index 4c9df894c..0a6eb207a 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -232,6 +232,8 @@ SEXP between(SEXP x, SEXP lower, SEXP upper, SEXP incbounds, SEXP NAbounds, SEXP SEXP coalesce(SEXP x, SEXP inplace); // utils.c +bool within_int32_repres(double x); +bool within_int64_repres(double x); bool isRealReallyInt(SEXP x); SEXP isRealReallyIntR(SEXP x); SEXP isReallyReal(SEXP x); diff --git a/src/gsumm.c b/src/gsumm.c index 96d85999b..63c65d837 100644 --- a/src/gsumm.c +++ b/src/gsumm.c @@ -1164,7 +1164,7 @@ SEXP gprod(SEXP x, SEXP narmArg) { if (INHERITS(x, char_integer64)) { int64_t *ansd = (int64_t *)REAL(ans); for (int i=0; iINT64_MAX || s[i]<=INT64_MIN) ? NA_INTEGER64 : (int64_t)s[i]; + ansd[i] = (ISNAN(s[i]) || s[i]>INT64_MAX || s[i]<=INT64_MIN) ? NA_INTEGER64 : (int64_t)s[i]; } } else { double *ansd = REAL(ans); diff --git a/src/utils.c b/src/utils.c index e5e343ac9..1fba47cac 100644 --- a/src/utils.c +++ b/src/utils.c @@ -1,11 +1,22 @@ #include "data.table.h" +bool within_int32_repres(double x) { + // N.B. (int)2147483647.99 is not undefined behaviour since s 6.3.1.4 of the C + // standard states that behaviour is undefined only if the integral part of a + // finite value of standard floating type cannot be represented. + return R_FINITE(x) && x < 2147483648 && x > -2147483648; +} + +bool within_int64_repres(double x) { + return R_FINITE(x) && x <= (double)INT64_MAX && x >= (double)INT64_MIN; +} + static R_xlen_t firstNonInt(SEXP x) { R_xlen_t n=xlength(x), i=0; const double *dx = REAL(x); while (i Date: Wed, 27 Dec 2023 00:33:18 +0800 Subject: [PATCH 87/88] Progress some planned deprecations (#5841) * Progress some planned deprecations * remove y= from test() * update tests * vestigial * Fix implicit inter-test dependence --- NEWS.md | 5 +++++ R/fwrite.R | 2 +- R/onLoad.R | 7 +++---- R/setkey.R | 19 ++++--------------- inst/tests/tests.Rraw | 25 +++++++++---------------- man/data.table.Rd | 1 - 6 files changed, 22 insertions(+), 37 deletions(-) diff --git a/NEWS.md b/NEWS.md index 48f7c529e..3eea18c9a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -609,5 +609,10 @@ 15. Thanks to @ssh352, Václav Tlapák, Cole Miller, András Svraka and Toby Dylan Hocking for reporting and bisecting a significant performance regression in dev. This was fixed before release thanks to a PR by Jan Gorecki, [#5463](https://github.com/Rdatatable/data.table/pull/5463). +16. `key(x) <- value` is now fully deprecated (from warning to error). Use `setkey()` to set a table's key. We started warning not to use this approach in 2012, with a stronger warning starting in 2019 (1.12.2). This function will be removed in the next release. + +17. Argument `logicalAsInt` to `fwrite()` now warns. Use `logical01` instead. We stated the intention to begin removing this option in 2018 (v1.11.0). It will be upgraded to an error in the next release before being removed in the subsequent release. + +18. Option `datatable.CJ.names` no longer has any effect, after becoming `TRUE` by default in v1.12.2 (2019). Setting it now gives a warning, which will be dropped in the next release. # data.table v1.14.10 (Dec 2023) back to v1.10.0 (Dec 2016) has been moved to [NEWS.1.md](https://github.com/Rdatatable/data.table/blob/master/NEWS.1.md) diff --git a/R/fwrite.R b/R/fwrite.R index e1484b9e3..20f1c70f5 100644 --- a/R/fwrite.R +++ b/R/fwrite.R @@ -27,7 +27,7 @@ fwrite = function(x, file="", append=FALSE, quote="auto", if (!missing(logical01) && !missing(logicalAsInt)) stopf("logicalAsInt has been renamed logical01. Use logical01 only, not both.") if (!missing(logicalAsInt)) { - # TODO: warningf("logicalAsInt has been renamed logical01 for consistency with fread. It will work fine but please change to logical01 at your convenience so we can remove logicalAsInt in future.") + warningf("logicalAsInt has been renamed logical01 for consistency with fread. It works fine for now but please change to logical01 at your convenience so we can remove logicalAsInt in future.") logical01 = logicalAsInt logicalAsInt=NULL } diff --git a/R/onLoad.R b/R/onLoad.R index b4ebeafdf..9080e328f 100644 --- a/R/onLoad.R +++ b/R/onLoad.R @@ -90,10 +90,9 @@ eval(parse(text=paste0("options(",i,"=",opts[i],")"))) } - if (!is.null(getOption("datatable.old.bywithoutby"))) - warningf("Option 'datatable.old.bywithoutby' has been removed as warned for 2 years. It is now ignored. Please use by=.EACHI instead and stop using this option.") - if (!is.null(getOption("datatable.old.unique.by.key"))) - warningf("Option 'datatable.old.unique.by.key' has been removed as warned for 4 years. It is now ignored. Please use by=key(DT) instead and stop using this option.") + # default TRUE from v1.12.0, FALSE before. Now ineffectual. Remove this warning after 1.15.0. + if (!is.null(getOption("datatable.CJ.names"))) + warningf("Option 'datatable.CJ.names' no longer has any effect, as promised for 4 years. It is now ignored. Manually name `...` entries as needed if you still prefer the old behavior.") # Test R behaviour that changed in v3.1 and is now depended on x = 1L:3L diff --git a/R/setkey.R b/R/setkey.R index 5f3027a2d..84488a803 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -18,15 +18,9 @@ setindexv = function(x, cols, verbose=getOption("datatable.verbose")) { } } -# upgrade to error after Mar 2020. Has already been warning since 2012, and stronger warning in Mar 2019 (note in news for 1.12.2); #3399 +# Has been warning since 2012, with stronger warning in Mar 2019 (note in news for 1.12.2); #3399 "key<-" = function(x,value) { - warningf("key(x)<-value is deprecated and not supported. Please change to use setkey() with perhaps copy(). Has been warning since 2012 and will be an error in future.") - setkeyv(x,value) - # The returned value here from key<- is then copied by R before assigning to x, it seems. That's - # why we can't do anything about it without a change in R itself. If we return NULL (or invisible()) from this key<- - # method, the table gets set to NULL. So, although we call setkeyv(x,cols) here, and that doesn't copy, the - # returned value (x) then gets copied by R. - # So, solution is that caller has to call setkey or setkeyv directly themselves, to avoid <- dispatch and its copy. + stopf("key(x)<-value is deprecated and not supported. Please change to use setkey() with perhaps copy(). Has been warning since 2012.") } setkeyv = function(x, cols, verbose=getOption("datatable.verbose"), physical=TRUE) @@ -325,13 +319,8 @@ CJ = function(..., sorted = TRUE, unique = FALSE) # Cross Join will then produce a join table with the combination of all values (cross product). # The last vector is varied the quickest in the table, so dates should be last for roll for example l = list(...) - if (isFALSE(getOption("datatable.CJ.names", TRUE))) { # default TRUE from v1.12.0, FALSE before. TODO: remove option in v1.13.0 as stated in news - if (is.null(vnames <- names(l))) vnames = paste0("V", seq_len(length(l))) - else if (any(tt <- vnames=="")) vnames[tt] = paste0("V", which(tt)) - } else { - vnames = name_dots(...)$vnames - if (any(tt <- vnames=="")) vnames[tt] = paste0("V", which(tt)) - } + vnames = name_dots(...)$vnames + if (any(tt <- !nzchar(vnames))) vnames[tt] = paste0("V", which(tt)) dups = FALSE # fix for #1513 for (i in seq_along(l)) { y = l[[i]] diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 35158857d..a9000e492 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -1591,11 +1591,9 @@ test(505, DT[J(a=1,b=6),sum(i.b*b),by=.EACHI]$V1, 24) # 24 now 'double' because # Test := after a key<- DT = data.table(a=3:1,b=4:6) -test(506, key(DT)<-"a", "a", warning="deprecated") -test(508, DT, data.table(a=1:3,b=6:4,key="a")) -test(509, DT[,b:=10L], data.table(a=1:3,b=10L,key="a")) -test(510, DT[,c:=11L], data.table(a=1:3,b=10L,c=11L,key="a"), # no warning between 1.8.3 and 1.12.2 due to (now removed) setmutable and SET_NAMED in setalloccol, #3729 - warning="Invalid .internal.selfref detected and fixed") # but the warning makes sense after the (deprecated) key(DT)<- above, so this warns again from 1.12.4 +test(506, key(DT)<-"a", error="deprecated") + +# tests 508, 509, 510 related to follow-up operations after key<-, which are now irrelevant # Test new functons chmatch and %chin% y=letters @@ -2950,12 +2948,8 @@ test(995, DT[CJ(c(5,3), c(5,1), sorted=FALSE)], OUT) xx <- factor(letters[1:2], ordered=TRUE) yy <- sample(2L) yy_sort = base::sort.int(yy) -old = options(datatable.CJ.names=FALSE) -test(996.01, CJ(xx, yy), setkey(data.table(rep(xx, each=2L), rep(yy_sort, 2L)))) -test(996.02, CJ(a = xx, yy), setkey(data.table(a = rep(xx, each=2L), V2 = rep(yy_sort, 2L)))) -options(datatable.CJ.names=TRUE) +# 996.01, 996.02 tested the now-ineffectual datatable.CJ.names option test(996.03, CJ(xx, yy), setkey(data.table(xx = rep(xx, each=2L), yy = rep(yy_sort, 2L)))) -options(old) # #3597 -- CJ properly informs sorted can't apply to list input test(996.04, CJ(list(1:2, 3L)), error = "non-atomic, which can't be sorted") @@ -7305,12 +7299,10 @@ test(1524, ans1, ans2) x = c(1, 2, 1) y = c(5, 8, 8, 4) w = c(10, 12, 12, 13) # already sorted but has dups; more efficient case to cover -options(datatable.CJ.names=FALSE) -test(1525.1, CJ(x, y, unique=TRUE), CJ(V1=c(1,2), V2=c(4,5,8))) -test(1525.2, CJ(x, z=y, unique=TRUE), ans<-data.table(V1=rep(c(1,2), each=3), z=c(4,5,8), key="V1,z")) # naming of one but not both, too -options(datatable.CJ.names=TRUE) +# tests 1525.1, 1525.2 tested the now-ineffectual datatable.CJ.names option. +ans<-data.table(V1=rep(c(1,2), each=3), z=c(4,5,8), key="V1,z") test(1525.3, CJ(x, y, unique=TRUE), CJ( x=c(1,2), y=c(4,5,8))) -test(1525.4, CJ(x, z=y, unique=TRUE), setnames(ans,c("x","z"))) +test(1525.4, CJ(x, z=y, unique=TRUE), setnames(copy(ans),c("x","z"))) test(1525.5, CJ(x, w, unique=TRUE), data.table(x=(rep(c(1,2), each=3)), w=c(10,12,13), key="x,w")) # `key` argument fix for `setDT` when input is already a `data.table`, #1169 @@ -10779,7 +10771,8 @@ test(1736.05, capture.output(fwrite(DT, sep='|', sep2=c("c(",",",")"), logical01 "2|c(15,16,17,18)|c(1.2,2.3,3.4,3.14159265358979,-9)", "3|c(7)|c(foo,bar)", "4|c(9,10)|c(TRUE,TRUE,FALSE)")) test(1736.06, capture.output(fwrite(DT, sep='|', sep2=c("{",",","}"), logicalAsInt=TRUE)), c("A|B|C", "1|{1,2,3,4,5,6,7,8,9,10}|{s,t,u,v,w}", - "2|{15,16,17,18}|{1.2,2.3,3.4,3.14159265358979,-9}", "3|{7}|{foo,bar}", "4|{9,10}|{1,1,0}")) + "2|{15,16,17,18}|{1.2,2.3,3.4,3.14159265358979,-9}", "3|{7}|{foo,bar}", "4|{9,10}|{1,1,0}"), + warning="logicalAsInt has been renamed logical01") DT = data.table(A=c("foo","ba|r","baz")) test(1736.07, capture.output(fwrite(DT,na="")), c("A","foo","ba|r","baz")) # no list column so no need to quote test(1736.08, capture.output(fwrite(DT)), c("A","foo","ba|r","baz")) diff --git a/man/data.table.Rd b/man/data.table.Rd index b8011b422..2e326fed0 100644 --- a/man/data.table.Rd +++ b/man/data.table.Rd @@ -344,7 +344,6 @@ setkey(kDT,x) # set a 1-column key. No quotes, for conve setkeyv(kDT,"x") # same (v in setkeyv stands for vector) v="x" setkeyv(kDT,v) # same -# key(kDT)<-"x" # copies whole table, please use set* functions instead haskey(kDT) # TRUE key(kDT) # "x" From 62b1044e48bd200f8224e412bd2c6f3b0206d56b Mon Sep 17 00:00:00 2001 From: Benjamin Schwendinger <52290390+ben-schwen@users.noreply.github.com> Date: Wed, 27 Dec 2023 17:02:54 +0100 Subject: [PATCH 88/88] remove use of rbindlist(..., use.names=FALSE, fill=TRUE) in merge (#5857) * add regression fix * add tests from #5309 * added comment about NA rectangle * emphasize subtle part about attributes too --------- Co-authored-by: Michael Chirico --- NEWS.md | 2 +- R/merge.R | 14 +++++++++++++- inst/tests/tests.Rraw | 13 +++++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 3eea18c9a..c06b7c699 100644 --- a/NEWS.md +++ b/NEWS.md @@ -202,7 +202,7 @@ # v1.14.4 0.4826 0.5586 0.6586 0.6329 0.7348 1.318 100 ``` -30. `rbind()` and `rbindlist()` now support `fill=TRUE` with `use.names=FALSE` instead of issuing the warning `use.names= cannot be FALSE when fill is TRUE. Setting use.names=TRUE.`, [#5444](https://github.com/Rdatatable/data.table/issues/5444). Thanks to @sindribaldur for testing dev and filing a bug report which was fixed before release. +30. `rbind()` and `rbindlist()` now support `fill=TRUE` with `use.names=FALSE` instead of issuing the warning `use.names= cannot be FALSE when fill is TRUE. Setting use.names=TRUE.`, [#5444](https://github.com/Rdatatable/data.table/issues/5444). Thanks to @sindribaldur, @dcaseykc, @fox34, @adrian-quintario and @berg-michael for testing dev and filing a bug report which was fixed before release. ```R DT1 diff --git a/R/merge.R b/R/merge.R index cbc9b9e29..8062d91fc 100644 --- a/R/merge.R +++ b/R/merge.R @@ -96,8 +96,20 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL if (all.y && nrow(y)) { # If y does not have any rows, no need to proceed # Perhaps not very commonly used, so not a huge deal that the join is redone here. missingyidx = y[!x, which=TRUE, on=by, allow.cartesian=allow.cartesian] + # TO DO: replace by following once #5446 is merged + # if (length(missingyidx)) dt = rbind(dt, y[missingyidx], use.names=FALSE, fill=TRUE, ignore.attr=TRUE) if (length(missingyidx)) { - dt = rbind(dt, y[missingyidx], use.names=FALSE, fill=TRUE) + yy = y[missingyidx] + othercolsx = setdiff(nm_x, by) + if (length(othercolsx)) { + # create NA rectangle with correct types and attributes of x to cbind to y + tmp = rep.int(NA_integer_, length(missingyidx)) + # TO DO: use set() here instead.. + yy = cbind(yy, x[tmp, othercolsx, with = FALSE]) + } + # empty data.tables (nrow =0, ncol>0) doesn't skip names anymore in new rbindlist + # takes care of #24 without having to save names. This is how it should be, IMHO. + dt = rbind(dt, yy, use.names=FALSE) } } # X[Y] syntax puts JIS i columns at the end, merge likes them alongside i. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index a9000e492..66292a25c 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -1919,6 +1919,11 @@ test(631.5, DT3, data.table(a=c(2), total=c(5), key="a")) # .. nrow(y)=1, i subset y with 1 and match with x test(631.6, merge(DT1,DT4,all.y=TRUE), data.table(a=c(3),total.x=c(1),total.y=c(1),key="a")) test(631.7, DT4, data.table(a=c(3), total=c(1), key="a")) +# merge columns with different attributes #5309 +x = data.table(a=1L, b=as.IDate(16801)) +y = data.table(a=2L, b=NA) +test(631.8, merge(x,y,by="a",all=TRUE), data.table(a=c(1L,2L), b.x=as.IDate(c(16801,NA)), b.y=NA, key="a")) +test(631.9, merge(y,x,by="a",all=TRUE), data.table(a=c(1L,2L), b.x=NA, b.y=as.IDate(c(16801,NA)), key="a")) test(632, merge(DT1,DT2,all=TRUE), data.table(a=c(1,2,3,4,5),total.x=c(2,NA,1,3,1),total.y=c(NA,5,1,NA,2),key="a")) test(632.1, merge(DT1,DT2,all=TRUE), setkey(adt(merge(adf(DT1),adf(DT2),by="a",all=TRUE)),a)) @@ -14335,6 +14340,14 @@ test(2003.5, rbindlist(list(data.table(a=1:2), data.table(b=3:4, c=5:6)), fill=T # rbindlist segfault with fill=TRUE and usenames=FALSE #5444 test(2003.6, rbindlist(list(list(1), list(2,3)), fill=TRUE, use.names=FALSE), data.table(c(1,2), c(NA, 3))) test(2003.7, rbindlist(list(list(1), list(2,factor(3))), fill=TRUE, use.names=FALSE), data.table(c(1,2), factor(c(NA, 3)))) +# rbind with different attributes #5309 +x=data.table(a=as.Date(NA)) +y=data.table(a=as.Date('2021-10-05'), b=as.POSIXct("2021-10-06 13:58:00 UTC")) +ans=data.table(a=as.Date(c(NA_character_, '2021-10-05')), b=as.POSIXct(c(NA_character_, "2021-10-06 13:58:00 UTC"))) +test(2003.81, rbind(x, y, fill=TRUE, use.names=TRUE), ans) +test(2003.82, rbind(y, x, fill=TRUE, use.names=TRUE), ans[2:1,]) +test(2003.83, rbind(x, y, fill=TRUE, use.names=FALSE), ans) +test(2003.84, rbind(y, x, fill=TRUE, use.names=FALSE), ans[2:1,]) # chmatch coverage for two different non-ascii encodings matching; issues mentioned in comments in chmatch.c #69 #2538 #111 x1 = "fa\xE7ile"