From 263958f76b6b320bcce0eb79b0a87e3ae7b55759 Mon Sep 17 00:00:00 2001 From: Jim Hester Date: Tue, 15 Nov 2022 04:15:23 -0500 Subject: [PATCH] Fix memory leak in fread (#4710) --- NEWS.md | 5 +++++ R/onAttach.R | 4 ++++ inst/tests/benchmark.Rraw | 8 ++++++++ src/data.table.h | 4 ++++ src/freadR.c | 8 +++++--- src/init.c | 11 +++++++++++ 6 files changed, 37 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index 68f826791..bee06b42e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,10 +2,15 @@ # data.table [v1.14.6](https://github.com/Rdatatable/data.table/milestone/27?closed=1) +## BUG FIXES + +1. `fread()` could leak memory, [#3292](https://github.com/Rdatatable/data.table/issues/3292). Thanks to @patrickhowerter for reporting, and Jim Hester for the fix. The fix requires R 3.4.0 or later. Loading `data.table` in earlier versions now highlights this issue on startup, asks users to upgrade R, and warns that we intend to upgrade `data.table`'s dependency from 8 year old R 3.1.0 (April 2014) to 5 year old R 3.4.0 (April 2017). + ## NOTES 1. Test 1962.098 has been modified to pass latest changes to `POSIXt` in R-devel. + # data.table [v1.14.4](https://github.com/Rdatatable/data.table/milestone/26?closed=1) (17 Oct 2022) ## NOTES diff --git a/R/onAttach.R b/R/onAttach.R index 4ccddfc09..a9f297569 100644 --- a/R/onAttach.R +++ b/R/onAttach.R @@ -32,6 +32,10 @@ else paste0("This is ", Sys.info()["sysname"], ". This warning should not normally occur on Windows or Linux where OpenMP is turned on by data.table's configure script by passing -fopenmp to the compiler. If you see this warning on Windows or Linux, please file a GitHub issue."), "\n**********") + if (.Call(CbeforeR340)) { + # not base::getRversion()<"3.4.0" in case the user upgrades R but does not reinstall data.table; a reasonable mistake since data.table would seem to be the latest version + packageStartupMessage("**********\nThis data.table installation was compiled for R < 3.4.0 (Apr 2017) where fread() is known to leak memory. Please upgrade R and reinstall data.table to fix the leak. Maintaining and testing code branches to support very old versions increases development time so please do upgrade R. We intend to bump data.table's dependency from 8 year old R 3.1.0 (Apr 2014) to 5 year old R 3.4.0 (Apr 2017).\n**********") + } } } diff --git a/inst/tests/benchmark.Rraw b/inst/tests/benchmark.Rraw index 1c8bf146a..971fdfe1c 100644 --- a/inst/tests/benchmark.Rraw +++ b/inst/tests/benchmark.Rraw @@ -168,3 +168,11 @@ test(1742.5, substring(x,nchar(x)-10,nchar(x)), c("50,28,95,76","62,87,23,40")) # Add scaled-up non-ASCII forder test 1896 +# fread leak, #3292 +dummy = rep("1\t2\t3\t4\t5", 10000000) +writeLines(dummy, "out.tsv") +start = gc()["Vcells",2] +for (i in 1:10) data.table::fread("out.tsv") +end = gc()["Vcells",2] +test(, end/start < 1.05) + diff --git a/src/data.table.h b/src/data.table.h index 094cbf72b..2d810478e 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -7,6 +7,9 @@ # define USE_RINTERNALS // #3301 # define DATAPTR_RO(x) ((const void *)DATAPTR(x)) #endif +#if !defined(R_VERSION) || R_VERSION < R_Version(3, 4, 0) +# define SET_GROWABLE_BIT(x) // #3292 +#endif #include #define SEXPPTR_RO(x) ((const SEXP *)DATAPTR_RO(x)) // to avoid overhead of looped STRING_ELT and VECTOR_ELT #include // for uint64_t rather than unsigned long long @@ -314,6 +317,7 @@ SEXP nqRecreateIndices(SEXP, SEXP, SEXP, SEXP, SEXP); SEXP fsort(SEXP, SEXP); SEXP inrange(SEXP, SEXP, SEXP, SEXP); SEXP hasOpenMP(void); +SEXP beforeR340(void); SEXP uniqueNlogical(SEXP, SEXP); SEXP dllVersion(void); SEXP initLastUpdated(SEXP); diff --git a/src/freadR.c b/src/freadR.c index 29c75db72..6e39d2e26 100644 --- a/src/freadR.c +++ b/src/freadR.c @@ -526,9 +526,11 @@ void setFinalNrow(size_t nrow) { if (length(DT)) { if (nrow == dtnrows) return; - for (int i=0; i