Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

names<- retain key and option to turn new warning on #5133

Draft
wants to merge 9 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 16 additions & 9 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -1158,7 +1158,7 @@ replace_dot_alias = function(e) {
if (verbose) {
catf("No rows match i. No new columns to add so not evaluating RHS of :=\nAssigning to 0 row subset of %d rows\n", nrow(x))
}
.Call(Cassign, x, irows, NULL, NULL, NULL) # only purpose is to write 0 to .Last.updated
x = .Call(Cassign, x, irows, NULL, NULL, NULL) # only purpose is to write 0 to .Last.updated
.global$print = address(x)
return(invisible(x))
}
Expand Down Expand Up @@ -2145,6 +2145,10 @@ tail.data.table = function(x, n=6L, ...) {
return(setalloccol(x)) # over-allocate (again). Avoid all this by using :=.
}
# TO DO: warningf("Please use DT[i,j:=value] syntax instead of DT[i,j]<-value, for efficiency. See ?':='")

# x has already been copied by R before entry to this method, and selfrefok is already false. If an error occurs
# R already changed the x in calling scope. Which is odd and difficult to deal with, hence set*.

if (!missing(i)) {
isub=substitute(i)
i = eval(.massagei(isub), x, parent.frame())
Expand Down Expand Up @@ -2183,12 +2187,12 @@ tail.data.table = function(x, n=6L, ...) {
# code did).
reinstatekey=key(x)
}
if (!selfrefok(x) || truelength(x) < ncol(x)+length(newnames)) {
x = setalloccol(x, length(x)+length(newnames)) # because [<- copies via *tmp* and main/duplicate.c copies at length but copies truelength over too
# search for one other .Call to assign in [.data.table to see how it differs
}
#if (!selfrefok(x) || truelength(x) < ncol(x)+length(newnames)) {
# x = setalloccol(x, length(x)+length(newnames)) # because [<- copies via *tmp* and main/duplicate.c copies at length but copies truelength over too
# # search for one other .Call to assign in [.data.table to see how it differs
#}
x = .Call(Cassign,copy(x),i,cols,newnames,value) # From 3.1.0, DF[2,"b"] = 7 no longer copies DF$a (so in this [<-.data.table method we need to copy)
setalloccol(x) # can maybe avoid this realloc, but this is (slow) [<- anyway, so just be safe.
#setalloccol(x) # can maybe avoid this realloc, but this is (slow) [<- anyway, so just be safe.
if (length(reinstatekey)) setkeyv(x,reinstatekey)
invisible(x)
# no copy at all if user calls directly; i.e. `[<-.data.table`(x,i,j,value)
Expand Down Expand Up @@ -2252,9 +2256,12 @@ dimnames.data.table = function(x) {

"names<-.data.table" = function(x,value)
{
# When non data.table aware packages change names, we'd like to maintain the key.
# Support names<- on a data.table the best we can so that names<- works for non-data.table-aware packages
# i) if a key or index column is having its name changed, update the sorted/index attribute which setnames() does
# ii) restore over-allocation because R drops overallocation in subassign via *tmp* before this method is reached
# If call is names(DT)[2]="newname", R will call this names<-.data.table function (notice no i) with 'value' already prepared to be same length as ncol
x = shallow(x) # `names<-` should not modify by reference. Related to #1015, #476 and #825. Needed for R v3.1.0+. TO DO: revisit
x = .shallow(x, retain.key=TRUE) # `names<-` (i.e. no subassign) should not modify by reference in R 3.1.0+. Related to #1015, #476 and #825.
# TO DO: ********** optional warning when cedta() *************
if (is.null(value))
setattr(x,"names",NULL) # e.g. plyr::melt() calls base::unname()
else
Expand Down Expand Up @@ -2509,7 +2516,7 @@ copy = function(x) {
.shallow = function(x, cols = NULL, retain.key = FALSE, unlock = FALSE) {
wasnull = is.null(cols)
cols = colnamesInt(x, cols, check_dups=FALSE)
ans = .Call(Cshallowwrapper, x, cols) # copies VECSXP only
ans = .Call(Cshallow, x, cols) # copies VECSXP only

if(retain.key){
if (wasnull) return(ans) # handle most frequent case first
Expand Down
54 changes: 35 additions & 19 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -1323,6 +1323,7 @@ old = getOption("datatable.alloccol") # Test that unsetting datatable.alloccol
options(datatable.alloccol=NULL) # In this =NULL case, options() in R 3.0.0 returned TRUE rather than the old value. This R bug was fixed in R 3.1.1.
MichaelChirico marked this conversation as resolved.
Show resolved Hide resolved
# This is why getOption is called first rather than just using the result of option() like elsewhere in this test file.
# TODO: simplify this test if/when R dependency >= 3.1.1
# try() outside test() otherwise test() itself fails on the timings[,:=] step which calls CsubsetDT and uses datatable.alloccol
err1 = try(data.table(a=1:3), silent=TRUE)
options(datatable.alloccol="1024")
err2 = try(data.table(a=1:3), silent=TRUE)
Expand All @@ -1332,12 +1333,12 @@ options(datatable.alloccol=NA_integer_)
err4 = try(data.table(a=1:3), silent=TRUE)
options(datatable.alloccol=-1)
err5 = try(data.table(a=1:3), silent=TRUE)
options(datatable.alloccol=1024L) # otherwise test() itself fails in its internals with the alloc.col error
test(432.1, inherits(err1,"try-error") && grep("Has getOption[(]'datatable.alloccol'[)] somehow become unset?", err1))
test(432.2, inherits(err2,"try-error") && grep("getOption[(]'datatable.alloccol'[)] should be a number, by default 1024. But its type is 'character'.", err2))
test(432.3, inherits(err3,"try-error") && grep("is a numeric vector ok but its length is 2. Its length should be 1.", err3))
test(432.4, inherits(err4,"try-error") && grep("It must be >=0 and not NA.", err4))
test(432.5, inherits(err5,"try-error") && grep("It must be >=0 and not NA.", err5))
options(datatable.alloccol=1024L)
test(432.1, inherits(err1,"try-error") && grep("datatable.alloccol.*should be a single number but it is length 0 and type 'NULL'", err1))
test(432.2, inherits(err2,"try-error") && grep("datatable.alloccol.*should be a single number but it is length 1 and type 'character'", err2))
test(432.3, inherits(err3,"try-error") && grep("datatable.alloccol.*should be a single number but it is length 2 and type 'integer'", err3))
test(432.4, inherits(err4,"try-error") && grep("datatable.alloccol.*is -2147483648.*must be >=0 and not NA", err4))
test(432.5, inherits(err5,"try-error") && grep("datatable.alloccol.*is -1.*must be >=0 and not NA", err5))
# Repeat the tests but this time with subsetting, to ensure the validity check on option happens for those too
DT = data.table(a=1:3, b=4:6)
options(datatable.alloccol=NULL)
Expand All @@ -1350,12 +1351,12 @@ options(datatable.alloccol=NA_integer_)
err4 = try(DT[,"b"], silent=TRUE)
options(datatable.alloccol=-1)
err5 = try(DT[2,"b"], silent=TRUE)
options(datatable.alloccol=1024L) # otherwise test() itself fails in its internals with the alloc.col error
test(433.1, inherits(err1,"try-error") && grep("Has getOption[(]'datatable.alloccol'[)] somehow become unset?", err1))
test(433.2, inherits(err2,"try-error") && grep("getOption[(]'datatable.alloccol'[)] should be a number, by default 1024. But its type is 'character'.", err2))
test(433.3, inherits(err3,"try-error") && grep("is a numeric vector ok but its length is 2. Its length should be 1.", err3))
test(433.4, inherits(err4,"try-error") && grep("It must be >=0 and not NA.", err4))
test(433.5, inherits(err5,"try-error") && grep("It must be >=0 and not NA.", err5))
options(datatable.alloccol=1024L)
test(433.1, inherits(err1,"try-error") && grep("length 0 and type 'NULL'", err1))
test(433.2, inherits(err2,"try-error") && grep("length 1 and type 'character'", err2))
test(433.3, inherits(err3,"try-error") && grep("length 2 and type 'integer'", err3))
test(433.4, inherits(err4,"try-error") && grep("must be >=0 and not NA", err4))
test(433.5, inherits(err5,"try-error") && grep("must be >=0 and not NA", err5))

# simple realloc test
DT = data.table(a=1:3,b=4:6)
Expand Down Expand Up @@ -1528,16 +1529,31 @@ DT <- data.table(ID=rep(1:3, each=3), TIME=rep(1:3, 3), X=1:9, Y=10:18)
test(488, data.table(stats::reshape(DT, idvar="ID", timevar="TIME", direction="wide")),
data.table(ID=1:3,X.1=INT(1,4,7),Y.1=INT(10,13,16),X.2=INT(2,5,8),Y.2=INT(11,14,17),X.3=INT(3,6,9),Y.3=INT(12,15,18)))

# Test warnings for names<- and colnames<-, but only warnings when caller is data.table aware.
DT = data.table(a=1:3,b=4:6)
test(489, names(DT)[1]<-"A", "A") # needs R >= 3.1.0, which is stated dependency now
test(490, names(DT), c("A","b"))
test(491, colnames(DT)[2]<-"B", "B")
test(492, names(DT), c("A","B"))
# Test warnings for names<- and colnames<-, but only warnings when caller is data.table aware.
DT = data.table(a=1:3, b=4:6, key="a")
test(489.1, names(DT)[1]<-"A", "A") # needs R >= 3.1.0, which is stated dependency now
test(489.2, DT, data.table(A=1:3, b=4:6, key="A")) # key wasn't retained in dev after #5084
test(489.3, DT[, C:=1], data.table(A=1:3, b=4:6, C=1, key="A")) # ensure over-allocated ok and no warning
test(489.4, set(DT, j="D", value=2), data.table(A=1:3, b=4:6, C=1, D=2, key="A")) # using set() too
test(490.1, colnames(DT)[2]<-"B", "B")
test(490.2, DT, data.table(A=1:3, B=4:6, C=1, D=2, key="A"))
test(490.3, set(DT, j="E", value=3), data.table(A=1:3, B=4:6, C=1, D=2, E=3, key="A"))
test(491.1, names(DT)<-letters[1:5], letters[1:5]) # R doesn't copy *tmp* when assigning to all names
test(491.2, DT, data.table(a=1:3, b=4:6, c=1, d=2, e=3, key="a"))
test(491.3, set(DT, j="f", value=4), data.table(a=1:3, b=4:6, c=1, d=2, e=3, f=4, key="a"))
# spotted by @ColeMiller1 in https://github.com/Rdatatable/data.table/pull/5133/files#r1320780851
dt = data.table(id=1:2, x=1:2)
r = copy(dt)[, x := 5L]
test(492.1, dt, data.table(id=1:2, x=1:2))
test(492.2, r, data.table(id=1:2, x=c(5L,5L)))
dt = data.table(id=1:2, x=1:2)
r = copy(dt)[1L, x:= 5L]
test(492.3, dt, data.table(id=1:2, x=1:2))
test(492.4, r, data.table(id=1:2, x=c(5L,2L)))

# Check setnames out of bounds errors
test(493, setnames(DT,"foo","bar"), error="not found.*foo")
test(494, setnames(DT,3,"bar"), error="NA (or out of bounds) in 'old' at positions [1]")
test(494, setnames(DT,7,"bar"), error="NA (or out of bounds) in 'old' at positions [1]")

# Test setcolorder() and allowance of length(neworder)<length(x) in v1.10.5 (#592)
DT = data.table(a=1:2,b=3:4,c=5:6)
Expand Down
Loading
Loading