Skip to content

Commit 49ddf38

Browse files
authored
Merge branch 'master' into constCastMacro
2 parents a09a51b + 5483d48 commit 49ddf38

File tree

14 files changed

+103
-23
lines changed

14 files changed

+103
-23
lines changed

.gitlab-ci.yml

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,14 @@ variables:
1313
TZ: "UTC" ## to avoid 'Failed to create bus connection' from timedatectl via Sys.timezone() on Docker with R 3.4.
1414
## Setting TZ for all GLCI jobs to isolate them from timezone. We could have a new GLCI job to test under
1515
## a non-UTC timezone, although, that's what we do routinely in dev.
16-
R_REL_VERSION: "4.4" # only raise when RTOOLS for REL is available
17-
R_REL_WIN_BIN: "https://cloud.r-project.org/bin/windows/base/old/4.4.1/R-4.4.1-win.exe"
18-
R_DEV_VERSION: "4.5"
16+
R_REL_VERSION: "4.5" # only raise when RTOOLS for REL is available
17+
R_REL_WIN_BIN: "https://cloud.r-project.org/bin/windows/base/old/4.5.0/R-4.5.0-win.exe"
18+
R_DEV_VERSION: "4.6"
1919
R_DEV_WIN_BIN: "https://cloud.r-project.org/bin/windows/base/R-devel-win.exe"
20-
R_OLD_VERSION: "4.3"
21-
R_OLD_WIN_BIN: "https://cloud.r-project.org/bin/windows/base/old/4.3.3/R-4.3.3-win.exe"
22-
R_REL_MAC_BIN: "https://cloud.r-project.org/bin/macosx/big-sur-arm64/base/R-4.4.1-arm64.pkg"
23-
R_OLD_MAC_BIN: "https://cloud.r-project.org/bin/macosx/big-sur-arm64/base/R-4.3.3-arm64.pkg"
20+
R_OLD_VERSION: "4.4"
21+
R_OLD_WIN_BIN: "https://cloud.r-project.org/bin/windows/base/old/4.4.3/R-4.4.3-win.exe"
22+
R_REL_MAC_BIN: "https://cloud.r-project.org/bin/macosx/big-sur-arm64/base/R-4.5.0-arm64.pkg"
23+
R_OLD_MAC_BIN: "https://cloud.r-project.org/bin/macosx/big-sur-arm64/base/R-4.4.3-arm64.pkg"
2424

2525
stages:
2626
- dependencies
@@ -48,6 +48,9 @@ mirror-packages:
4848
cache:
4949
paths:
5050
- bus/$CI_JOB_NAME/cran
51+
variables:
52+
# TODO(R-ancient>=3.5.0): remove this; let it save PACKAGES.rds in version-3 format
53+
R_DEFAULT_SERIALIZE_VERSION: 2
5154
script:
5255
- echo 'source(".ci/ci.R")' >> .Rprofile
5356
- mkdir -p bus/$CI_JOB_NAME/cran/src/contrib
@@ -104,21 +107,23 @@ build:
104107
# force all suggests
105108
# flags: gcc -O3 -flto=auto -fno-common -Wunused-result
106109
# tests for compilation warnings
110+
# runs the --as-cran check, including the URL checks
107111
test-lin-rel:
108112
<<: *test-lin
109113
image: registry.gitlab.com/rdatatable/dockerfiles/r-data.table
110114
variables:
111-
_R_CHECK_COMPILATION_FLAGS_KNOWN_: "-Wvla"
112-
_R_CHECK_CRAN_INCOMING_: "FALSE"
113-
_R_CHECK_CRAN_INCOMING_REMOTE_: "FALSE"
114115
_R_CHECK_FORCE_SUGGESTS_: "TRUE"
115116
OPENBLAS_MAIN_FREE: "1"
116117
script:
117118
- *install-deps
118119
- echo 'CFLAGS=-g -O3 -flto=auto -fno-common -fopenmp -Wall -Wvla -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars
119120
- echo 'CXXFLAGS=-g -O3 -flto=auto -fno-common -fopenmp -Wall -Wvla -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars
120-
- R CMD check $(ls -1t data.table_*.tar.gz | head -n 1)
121-
- (! grep "warning:" data.table.Rcheck/00install.out)
121+
- echo '_R_CHECK_COMPILATION_FLAGS_KNOWN_=-Wvla' >> ~/.Renviron
122+
- |
123+
res1=0; R CMD check --as-cran $(ls -1t data.table_*.tar.gz | head -n 1) || res1=$?
124+
res2=0; grep -v "warning:" data.table.Rcheck/00install.out || res2=$?
125+
res3=0; Rscript -e 'l=tail(readLines("data.table.Rcheck/00check.log"), 1L); if (!identical(l, "Status: 2 NOTEs")) stop("Last line of ", shQuote("00check.log"), " is not ", shQuote("Status: 2 NOTEs"), " (CRAN incoming feasibility, non-API) but ", shQuote(l))' || res3=$?
126+
if [ $res1 -ne 0 ] || [ $res2 -ne 0 ] || [ $res3 -ne 0 ]; then exit 1; fi
122127
123128
## vanilla minimal
124129
# no zlib
@@ -151,9 +156,10 @@ test-lin-rel-cran:
151156
- *install-deps
152157
- echo 'CFLAGS=-g -O2 -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' > ~/.R/Makevars
153158
- echo 'CXXFLAGS=-g -O2 -fopenmp -Wall -pedantic -fstack-protector-strong -D_FORTIFY_SOURCE=2' >> ~/.R/Makevars
154-
- R CMD check --as-cran $(ls -1t data.table_*.tar.gz | head -n 1)
155-
- >-
156-
Rscript -e 'l=tail(readLines("data.table.Rcheck/00check.log"), 1L); if (!identical(l, "Status: OK")) stop("Last line of ", shQuote("00check.log"), " is not ", shQuote("Status: OK"), " but ", shQuote(l)) else q("no")'
159+
- |
160+
res1=0; R CMD check --as-cran $(ls -1t data.table_*.tar.gz | head -n 1) || res1=$?
161+
res2=0; Rscript -e 'l=tail(readLines("data.table.Rcheck/00check.log"), 1L); if (!identical(l, "Status: 1 NOTE")) stop("Last line of ", shQuote("00check.log"), " is not ", shQuote("Status: 1 NOTE"), " (non-API) but ", shQuote(l))' || res2=$?
162+
if [ $res1 -ne 0 ] || [ $res2 -ne 0 ]; then exit 1; fi
157163
158164
## R-devel on Linux gcc strict
159165
# R built with --enable-strict-barrier --disable-long-double
@@ -175,7 +181,7 @@ test-lin-dev-gcc-strict-cran:
175181
- R CMD check --as-cran $(ls -1t data.table_*.tar.gz | head -n 1)
176182
- (! grep "warning:" data.table.Rcheck/00install.out)
177183
- >-
178-
Rscript -e 'l=tail(readLines("data.table.Rcheck/00check.log"), 1L); notes<-"Status: 3 NOTEs"; if (!identical(l, notes)) stop("Last line of ", shQuote("00check.log"), " is not ", shQuote(notes), " (size of tarball, installed package size, non-API calls) but ", shQuote(l)) else q("no")'
184+
Rscript -e 'l=tail(readLines("data.table.Rcheck/00check.log"), 1L); notes<-"Status: 3 NOTEs"; if (!identical(l, notes)) stop("Last line of ", shQuote("00check.log"), " is not ", shQuote(notes), " (size of tarball, non-API calls, V8 package) but ", shQuote(l)) else q("no")'
179185
180186
## R-devel on Linux clang
181187
# R compiled with clang, flags removed: -flto=auto -fopenmp
@@ -198,7 +204,7 @@ test-lin-dev-clang-cran:
198204
- R CMD check --as-cran $(ls -1t data.table_*.tar.gz | head -n 1)
199205
- (! grep "warning:" data.table.Rcheck/00install.out)
200206
- >-
201-
Rscript -e 'l=tail(readLines("data.table.Rcheck/00check.log"), 1L); notes<-"Status: 2 NOTEs"; if (!identical(l, notes)) stop("Last line of ", shQuote("00check.log"), " is not ", shQuote(notes), " (size of tarball, non-API calls) but ", shQuote(l)) else q("no")'
207+
Rscript -e 'l=tail(readLines("data.table.Rcheck/00check.log"), 1L); notes<-"Status: 3 NOTEs"; if (!identical(l, notes)) stop("Last line of ", shQuote("00check.log"), " is not ", shQuote(notes), " (size of tarball, non-API calls, V8 package) but ", shQuote(l)) else q("no")'
202208
203209
# stated dependency on R
204210
test-lin-ancient-cran:
@@ -290,11 +296,14 @@ test-win-old:
290296
before_script:
291297
- curl -O $R_BIN
292298
- sudo installer -pkg "$(ls -1t R-*-arm64.pkg | head -n 1)" -target /
299+
- sudo Rscript -e "source('https://mac.R-project.org/bin/install.R'); install.libs('gettext')"
293300
- *install-deps
294301
- cp $(ls -1t bus/build/data.table_*.tar.gz | head -n 1) .
295302
script:
296303
- R CMD check --no-manual $(ls -1t data.table_*.tar.gz | head -n 1)
297304
- R CMD INSTALL --build $(ls -1t data.table_*.tar.gz | head -n 1)
305+
- >-
306+
tail -n 1 data.table.Rcheck/00check.log | grep -q -e '^Status: [0-9]* NOTEs*$' -e '^Status: OK$'
298307
after_script:
299308
- mkdir -p bus/$CI_JOB_NAME
300309
- '[ -d data.table.Rcheck ] && mv data.table.Rcheck bus/$CI_JOB_NAME/'

DESCRIPTION

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,5 +102,6 @@ Authors@R: c(
102102
person("Aljaž", "Sluga", role="ctb"),
103103
person("Bill", "Evans", role="ctb"),
104104
person("Reino", "Bruner", role="ctb"),
105-
person(comment=c(github="@badasahog"), role="ctb")
105+
person(comment=c(github="@badasahog"), role="ctb"),
106+
person("Vinit", "Thakur", role="ctb")
106107
)

NEWS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@
3838

3939
9. Joins to extended data.frames, e.g. `x[i, col := x.col1 + i.col2]` where `i` is a `tbl`, can use the `x.` and `i.` prefix forms, [#6998](https://github.com/Rdatatable/data.table/issues/6998). Thanks @MichaelChirico for the bug and PR.
4040

41+
10. On a heavily loaded machine, a `forder` thread could try to perform a zero-length copy from a null pointer, which was de-facto harmless but is against the C standard and was caught by additional CRAN checks, [#7051](https://github.com/Rdatatable/data.table/issues/7051). Thanks to @helske for the report and @aitap for the PR.
42+
43+
11. Out of sample type bumps now respect `integer64=` selection, [#7032](https://github.com/Rdatatable/data.table/pull/7032).
44+
4145
### NOTES
4246

4347
1. Continued work to remove non-API C functions, [#6180](https://github.com/Rdatatable/data.table/issues/6180). Thanks Ivan Krylov for the PRs and for writing a clear and concise guide about the R API: https://aitap.codeberg.page/R-api/.

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,12 @@
1919

2020
The `data.table` project uses a [custom governance agreement](./GOVERNANCE.md)
2121
and is fiscally sponsored by [NumFOCUS](https://numfocus.org/). Consider making
22-
a [tax-deductible donation](https://numfocus.org/donate-to-data-table) to help the project
22+
a [tax-deductible donation](https://numfocus.org/project/data-table) to help the project
2323
pay for developer time, professional services, travel, workshops, and a variety of other needs.
2424

2525
<div align="center">
2626
<a href="https://numfocus.org/project/data-table">
27-
<img height="60px"
27+
<img width="25%"
2828
src="https://raw.githubusercontent.com/numfocus/templates/master/images/numfocus-logo.png"
2929
align="center">
3030
</a>

inst/tests/tests.Rraw

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3070,6 +3070,16 @@ if (test_bit64) test(1017.1, fread(f), copy(DT)[,A:=as.integer64(A)])
30703070
test(1017.2, fread(f, integer64="character"), DT)
30713071
unlink(f)
30723072

3073+
DT = data.table(a=seq(10000), b="100")
3074+
DT[111, b := "1000000000000"]
3075+
f = tempfile()
3076+
fwrite(DT, f)
3077+
3078+
test(1017.3, fread(f, integer64="numeric"), fread(f, colClasses=c("integer", "numeric")))
3079+
test(1017.4, fread(f, integer64="character"), fread(f, colClasses=c("integer", "character")))
3080+
3081+
unlink(f)
3082+
30733083
# ERANGE errno handled, #106 #4165
30743084
test(1018.1, identical(fread("1.46761e-313\n"), data.table(V1=1.46761e-313)))
30753085
test(1018.2, identical(fread("1.46761e+313\n"), data.table(V1=1.46761e+313)))

man/IDateTime.Rd

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,11 @@ hours. Because \code{ITime} is stored in seconds, you can add it to a
137137
\code{POSIXct} object, but you should not add it to a \code{Date}
138138
object.
139139

140+
For \code{as.ITime}, note that the string \code{"24:00:00"} is parsed as \code{"00:00:00"}.
141+
This is because the conversion uses \code{as.POSIXct}, which treats \code{"24:00:00"} as midnight of the next day.
142+
This differs from ISO 8601 (which allows \code{"24:00:00"} to represent end-of-day), but aligns with POSIX standards.
143+
To represent end-of-day intervals, use \code{"23:59:59"} or arithmetic (e.g., \code{as.ITime("23:59:59") + 1L}).
144+
140145
We also provide S3 methods to convert to and from \code{Date} and \code{POSIXct}.
141146

142147
\code{ITime} is time zone-agnostic. When converting \code{ITime} and
@@ -245,6 +250,12 @@ identical(as.ITime("10:45"), methods::as("10:45", "ITime"))
245250
246251
(t <- as.ITime("10:45:04", format = "\%H:\%M:\%S"))
247252
253+
# "24:00:00" is parsed as "00:00:00"
254+
as.ITime("24:00:00")
255+
256+
# Workaround for end-of-day: add 1 second to "23:59:59"
257+
as.ITime("23:59:59") + 1L
258+
248259
as.POSIXct("2001-01-01") + as.ITime("10:45")
249260
250261
datetime <- seq(as.POSIXct("2001-01-01"), as.POSIXct("2001-01-03"), by = "5 hour")

man/data.table.Rd

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ data.table(\dots, keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFac
174174
\item For convenience during interactive scenarios, it is also possible to use \code{.()} syntax as \code{X[Y, on=.(a, b)]}.
175175
\item From v1.9.8, (non-equi) joins using binary operators \code{>=, >, <=, <} are also possible, e.g., \code{X[Y, on=c("x>=a", "y<=b")]}, or for interactive use as \code{X[Y, on=.(x>=a, y<=b)]}.
176176
}
177-
See examples as well as \href{../doc/datatable-secondary-indices-and-auto-indexing.html}{\code{vignette("datatable-secondary-indices-and-auto-indexing")}}.
177+
Note that providing \code{on} is \emph{required} for \code{X[Y]} joins when \code{X} is unkeyed. See examples as well as \href{../doc/datatable-secondary-indices-and-auto-indexing.html}{\code{vignette("datatable-secondary-indices-and-auto-indexing")}}.
178178
}
179179
180180
\item{env}{ List or an environment, passed to \code{\link{substitute2}} for substitution of parameters in \code{i}, \code{j} and \code{by} (or \code{keyby}). Use \code{verbose} to preview constructed expressions. For more details see \href{../doc/datatable-programming.html}{\code{vignette("datatable-programming")}}. }
@@ -298,7 +298,9 @@ DT[, sum(v), by=x][order(x)] # same but by chaining expressions together
298298

299299
# fast ad hoc row subsets (subsets as joins)
300300
DT["a", on="x"] # same as x == "a" but uses binary search (fast)
301+
# NB: requires DT to be keyed!
301302
DT["a", on=.(x)] # same, for convenience, no need to quote every column
303+
# NB: works regardless of whether or not DT is keyed!
302304
DT[.("a"), on="x"] # same
303305
DT[x=="a"] # same, single "==" internally optimised to use binary search (fast)
304306
DT[x!="b" | y!=3] # not yet optimized, currently vector scan subset

man/last.Rd

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ of \code{xts::first} is deployed. }
1818
\item{\dots}{ Not applicable for \code{data.table} first/last. Any arguments here
1919
are passed through to \code{xts}'s first/last. }
2020
}
21+
\note{
22+
For zero-length vectors, \code{first(x)} and \code{last(x)} mimic \code{head(x, 1)} and \code{tail(x, 1)} by returning an empty vector instead of \code{NA}. However, unlike \code{head()}/\code{tail()} and base R subsetting (e.g., \code{x[1]}), they do not preserve attributes like names.
23+
}
2124
\value{
2225
If no other arguments are supplied it depends on the type of \code{x}. The first/last item
2326
of a vector or list. The first/last row of a \code{data.frame} or \code{data.table}.

man/setkey.Rd

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,14 @@ The sort is \emph{stable}; i.e., the order of ties (if any) is preserved.
7474
For character vectors, \code{data.table} takes advantage of R's internal global string cache, also exported as \code{\link{chorder}}.
7575
}
7676

77+
\section{Keys vs. Indices}{
78+
Setting a key (with \code{setkey}) and an index (with \code{setindex}) are similar, but have very important distinctions.
79+
80+
Setting a key physically reorders the data in RAM.
81+
82+
Setting an index computes the sort order, but instead of applying the reordering, simply \emph{stores} this computed ordering. That means that multiple indices can coexist, and that the original row order is preserved.
83+
}
84+
7785
\section{Good practice}{
7886
In general, it's good practice to use column names rather than numbers. This is
7987
why \code{setkey} and \code{setkeyv} only accept column names.

src/forder.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,8 @@ static void flush(void) {
128128
if (!retgrp) return;
129129
int me = omp_get_thread_num();
130130
int n = gs_thread_n[me];
131+
// normally doesn't happen, can be encountered under heavy load, #7051
132+
if (!n) return; // # nocov
131133
int newn = gs_n + n;
132134
if (gs_alloc < newn) {
133135
gs_alloc = (newn < nrow/3) ? (1+(newn*2)/4096)*4096 : nrow;

0 commit comments

Comments
 (0)