From 3130d51f90e04c0bfdf7f5877a18a6f5566ab803 Mon Sep 17 00:00:00 2001 From: oliverbock Date: Fri, 9 Apr 2021 22:35:02 +1000 Subject: [PATCH] Use old-style SPSS compression by default for .sav (SPSS) files, since almost nobody uses the inefficient uncompressed format. The newer ZCompressed format (compress=TRUE) is even better, but is much less used and less supported by third-party tools. (#544) Co-authored-by: numbers --- NEWS.md | 4 ++++ R/haven.R | 2 +- man/read_spss.Rd | 2 +- src/DfWriter.cpp | 2 ++ 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index a584b38f..017c816e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -15,6 +15,10 @@ * Updated to ReadStat 1.1.5. Most importantly this includes support for SAS binary compression. +* `write_sav(..., compress=FALSE)` now uses SPSS Bytecode compression instead + of the rarely-used uncompressed mode. `compress=TRUE` continues to use the new + (and not universally supported, but more compact) ZLIB format. (@oliverbock, #544) + # haven 2.3.1 * Add missing methods so `median()`, `quantile()` and `summary()` work diff --git a/R/haven.R b/R/haven.R index ec34e910..35ca60d8 100644 --- a/R/haven.R +++ b/R/haven.R @@ -217,7 +217,7 @@ read_por <- function(file, user_na = FALSE, col_select = NULL, skip = 0, n_max = #' @export #' @rdname read_spss #' @param compress If `TRUE`, will compress the file, resulting in a `.zsav` -#' file. +#' file. Otherwise the `.sav` file will be bytecode compressed. write_sav <- function(data, path, compress = FALSE) { validate_sav(data) write_sav_(data, normalizePath(path, mustWork = FALSE), compress = compress) diff --git a/man/read_spss.Rd b/man/read_spss.Rd index f7bed388..abb1ce30 100644 --- a/man/read_spss.Rd +++ b/man/read_spss.Rd @@ -91,7 +91,7 @@ to enforce them.} \item{path}{Path to a file where the data will be written.} \item{compress}{If \code{TRUE}, will compress the file, resulting in a \code{.zsav} -file.} +file. Otherwise the \code{.sav} file will be bytecode compressed.} } \value{ A tibble, data frame variant with nice defaults. diff --git a/src/DfWriter.cpp b/src/DfWriter.cpp index 52cef9c6..64bdaa14 100644 --- a/src/DfWriter.cpp +++ b/src/DfWriter.cpp @@ -403,6 +403,8 @@ void write_sav_(cpp11::list data, cpp11::strings path, bool compress) { Writer writer(HAVEN_SAV, data, path); if (compress) writer.setCompression(READSTAT_COMPRESS_BINARY); + else + writer.setCompression(READSTAT_COMPRESS_ROWS); writer.write(); }