-
Notifications
You must be signed in to change notification settings - Fork 982
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
First version of the fwrite function #580
- Loading branch information
Showing
7 changed files
with
279 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
fwrite <- function(dt, file.path, append = FALSE, quote = TRUE, | ||
sep = ",", eol = "\n", col.names = TRUE, qmethod = "double", | ||
block.size = 10000) { | ||
|
||
# validate arguments | ||
stopifnot(is.data.frame(dt)) | ||
stopifnot(ncol(dt) > 0) | ||
stopifnot(identical(unique(names(dt)), names(dt))) | ||
|
||
stopifnot(length(quote) == 1 && class(quote) == "logical") | ||
stopifnot(length(sep) == 1 && class(sep) == "character" && nchar(sep) == 1) | ||
stopifnot(length(eol) == 1 && class(eol) == "character") | ||
stopifnot(length(qmethod) == 1 && qmethod %in% c("double", "escape")) | ||
stopifnot(length(col.names) == 1 && class(col.names) == "logical") | ||
stopifnot(length(append) == 1 && class(append) == "logical") | ||
stopifnot(length(block.size) == 1 && block.size > 0) | ||
|
||
quoted_cols <- rep(quote, ncol(dt)) | ||
|
||
# special case: single-column data.frame, doing dt[block_begin:block_end,] | ||
# for such data frame gives a vector | ||
if (!is.data.table(dt) && ncol(dt) == 1) dt <- as.data.table(dt) | ||
|
||
# write header row separately for correct quoting of row names | ||
if (col.names && !append) { | ||
.Call(Cwritefile, as.list(names(dt)), file.path, sep, eol, quoted_cols, qmethod == "escape", append) | ||
append <- TRUE | ||
} | ||
|
||
# handle empty dt | ||
if (nrow(dt) == 0) return() | ||
|
||
# determine from column types, which ones should be quoted | ||
if (quote) { | ||
column_types <- lapply(dt, class) | ||
quoted_cols <- column_types %in% c('character', 'factor') | ||
} | ||
|
||
# write in blocks of given size to avoid generating full copies | ||
# of columns in memory | ||
block_begin <- 1 | ||
|
||
repeat { | ||
block_end <- min(block_begin+(block.size-1), nrow(dt)) | ||
|
||
dt_block <- dt[c(block_begin:block_end),] | ||
|
||
# convert data.frame row block to a list of columns | ||
col_list <- lapply(dt_block, function(column) { | ||
str_col <- as.character(column) | ||
str_col[is.na(str_col)] <- '' | ||
str_col | ||
}) | ||
|
||
.Call(Cwritefile, col_list, file.path, sep, eol, quoted_cols, qmethod == "escape", append) | ||
|
||
if (block_end == nrow(dt)) break | ||
|
||
append <- TRUE | ||
block_begin <- block_end+1 | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
\name{fwrite} | ||
\alias{fwrite} | ||
\title{Fast CSV writer} | ||
\description{ | ||
Similar to \code{write.table} but faster and more limited in features. | ||
} | ||
\usage{ | ||
fwrite(dt, file.path, append = FALSE, quote = TRUE, sep = ",", eol = "\n", | ||
col.names = TRUE, qmethod = "double", block.size = 10000) | ||
} | ||
\arguments{ | ||
\item{dt}{The \code{data.table} or \code{data.frame} to be write} | ||
\item{file.path}{Output file name} | ||
\item{append}{If \code{TRUE}, the file is opened in append mode and column names (header row) are not written.} | ||
\item{quote}{If \code{TRUE}, all columns of character and factor types, as well as all column names, will be surrounded by double quotes. If \code{FALSE}, nothing is quoted, even if this would break the CSV (the column contents are not checked for separator characters).} | ||
\item{sep}{The separator between columns} | ||
\item{eol}{Line separator} | ||
\item{col.names}{A logical value indicating if the column names (header row) should be written} | ||
\item{qmethod}{A character string specifying how to deal with embedded double quote characters when quoting strings. Must be one of "escape", in which case the quote character (as well as the backslash character) is escaped in C style by a backslash, or "double" (default), in which case it is doubled.} | ||
\item{block.size}{The output is written in blocks, each of which contains at most this number of rows. This is to avoid making large copies in memory. Can be used to tweak performance and memory usage.} | ||
} | ||
\details{ | ||
\code{NA} values are always represented by empty strings, quoted for character and factor columns if \code{quote=TRUE}. | ||
|
||
The speed-up compared to \code{write.csv} depends on the parameters and column types. This version should be significantly faster if the table consists mainly of character columns. In addition, improved performance can be expected if one can make do with \code{quote=FALSE}. | ||
} | ||
\seealso{ \code{\link[utils]{write.csv}} } | ||
\examples{ | ||
\dontrun{ | ||
|
||
fwrite(data.table(first=c(1,2), second=c(NA, 'foo"bar')), "table.csv") | ||
|
||
# table.csv contains: | ||
|
||
# "first","second" | ||
# "1","" | ||
# "2","foo""bar" | ||
} | ||
} | ||
\keyword{ data } | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
#include <R.h> | ||
#include <errno.h> | ||
#include <Rinternals.h> | ||
|
||
void writefile(SEXP list_of_columns, | ||
SEXP filename, | ||
SEXP col_sep_exp, | ||
SEXP row_sep_exp, | ||
SEXP quote_cols, | ||
SEXP qmethod_escape_exp, | ||
SEXP append) { | ||
|
||
int error_number = 0; | ||
int qmethod_escape = *LOGICAL(qmethod_escape_exp); | ||
|
||
errno = 0; /* clear flag possibly set by previous errors */ | ||
|
||
char col_sep = *CHAR(STRING_ELT(col_sep_exp, 0)); | ||
const char *row_sep = CHAR(STRING_ELT(row_sep_exp, 0)); | ||
const char QUOTE_CHAR = '"'; | ||
const char ESCAPE_CHAR = '\\'; | ||
|
||
/* open input file in correct mode */ | ||
const char *open_mode = "wb"; | ||
if (*LOGICAL(append)) open_mode = "ab"; | ||
FILE *f = fopen(CHAR(STRING_ELT(filename, 0)), open_mode); | ||
if (f == NULL) goto end; | ||
|
||
R_xlen_t ncols = LENGTH(list_of_columns); | ||
R_xlen_t nrows = LENGTH(VECTOR_ELT(list_of_columns, 0)); | ||
|
||
for (R_xlen_t row_i = 0; row_i < nrows; ++row_i) { | ||
for (int col_i = 0; col_i < ncols; ++col_i) { | ||
|
||
if (col_i > 0) fputc(col_sep, f); | ||
|
||
int quote = LOGICAL(quote_cols)[col_i]; | ||
|
||
if (quote) fputc(QUOTE_CHAR, f); | ||
for (const char *ch = CHAR(STRING_ELT(VECTOR_ELT(list_of_columns, col_i), row_i)); *ch != '\0'; ++ch) { | ||
if (quote) { | ||
if (*ch == QUOTE_CHAR) { | ||
if (qmethod_escape) fputc(ESCAPE_CHAR, f); | ||
else fputc(QUOTE_CHAR, f); /* qmethod = "double" */ | ||
} | ||
if (qmethod_escape && *ch == ESCAPE_CHAR) fputc(ESCAPE_CHAR, f); | ||
} | ||
fputc(*ch, f); | ||
} | ||
if (quote) fputc(QUOTE_CHAR, f); | ||
} | ||
if (fputs(row_sep, f) < 0) goto end; | ||
} | ||
|
||
end: | ||
error_number = errno; | ||
if (f != NULL) fclose(f); | ||
if (error_number) error(strerror(errno)); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Great tests. Just small note that data.table's test() function already has an
error=
argument to test for expected error and that the error contains that text. Will replace fwrite_expect_error with using test(...,error="...").