diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index c966a5efc..058c7db3b 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17127,3 +17127,14 @@ test(2151, dcast(DT, 1 ~ a, value.var='survmean'), data.table('.'='.', s=1L, x=2 y = person(given='Joel', family='Mossong') test(2152, copy(y), y) +# .N and .GRP special statics copied correctly when placed as a vector in a list column; part of PR#4655 +# see comments in anySpecialStatic() at the top of dogroups.c +# .SD, .I and .BY are covered by previous tests +DT = data.table(x=c(1L,2L,2L), y=1:3) +test(2153.1, DT[, .(list(.N)), by=x], data.table(x=1:2, V1=as.list(1:2))) +test(2153.2, DT[, .(list(.GRP)), by=x], data.table(x=1:2, V1=as.list(1:2))) +test(2153.3, ans<-DT[, .(list(.NGRP)), by=x], data.table(x=1:2, V1=list(2L,2L))) +test(2153.4, address(ans$V1[[1L]]), address(ans$V1[[2L]])) # .NGRP doesn't change group to group so the same object can be referenced many times unlike .N and .GRP +test(2153.5, DT[, .(list(c(0L,.N,0L))), by=x], # c() here will create new object so this is ok anyway; i.e. address(.N) is not present in j's result + data.table(x=1:2, V1=list(c(0L,1L,0L), c(0L,2L,0L)))) + diff --git a/src/dogroups.c b/src/dogroups.c index 603752ad9..9b7b4cbc9 100644 --- a/src/dogroups.c +++ b/src/dogroups.c @@ -4,15 +4,17 @@ #include static bool anySpecialStatic(SEXP x) { - // Special refers to .SD, .BY and .I - // Static because these are like C static arrays which are the same memory for each group; i.e., dogroups - // creates .SD for the largest group once up front, overwriting the contents for each group + // Special refers to special symbols .BY, .I, .N, and .GRP; see special-symbols.Rd + // Static because these are like C static arrays which are the same memory for each group; e.g., dogroups + // creates .SD for the largest group once up front, overwriting the contents for each group. Their + // value changes across group but not their memory address. (.NGRP is also special static but its value + // is constant across groups so that's excluded here.) // This works well, other than a relatively rare case when two conditions are both true : // 1) the j expression returns a group column as-is without doing any aggregation // 2) that result is placed in a list column result // The list column result can then incorrectly contain the result for the last group repeated for all - // groups because the list column ends up holding a pointer to these special static vectors: SD, BY, I. - // To illustrate, consider a simplified test 1341: + // groups because the list column ends up holding a pointer to these special static vectors. + // See test 2153, and to illustrate here, consider a simplified test 1341 // > DT // x y // @@ -89,7 +91,9 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX // TO DO: check this check above. N = PROTECT(findVar(install(".N"), env)); nprotect++; // PROTECT for rchk + SET_TRUELENGTH(N, -1); // marker for anySpecialStatic(); see its comments GRP = PROTECT(findVar(install(".GRP"), env)); nprotect++; + SET_TRUELENGTH(GRP, -1); // marker for anySpecialStatic(); see its comments iSD = PROTECT(findVar(install(".iSD"), env)); nprotect++; // 1-row and possibly no cols (if no i variables are used via JIS) xSD = PROTECT(findVar(install(".xSD"), env)); nprotect++; R_len_t maxGrpSize = 0; @@ -430,6 +434,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX SEXP this = VECTOR_ELT(BY, i); SET_TRUELENGTH(this, length(this)); // might be 0 or 1; see its allocVector above } + SET_TRUELENGTH(N, 1); + SET_TRUELENGTH(GRP, 1); if (verbose) { if (nblock[0] && nblock[1]) error(_("Internal error: block 0 [%d] and block 1 [%d] have both run"), nblock[0], nblock[1]); // # nocov int w = nblock[1]>0;