Skip to content

Commit

Permalink
improvement for scalability: fintersect fsetdiff #547
Browse files Browse the repository at this point in the history
  • Loading branch information
jangorecki committed Mar 9, 2016
1 parent 9741af4 commit d99c967
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions R/setops.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ setdiff_ <- function(x, y, by.x=seq_along(x), by.y=seq_along(y), use.names=FALSE
.Call("CsubsetDT", ux, which_(ix, FALSE), seq_along(ux)) # more memory efficient version of which(!ix)
}

# set operators ---
# set operators ----

funique <- function(x) {
stopifnot(is.data.table(x))
Expand All @@ -64,7 +64,8 @@ fintersect <- function(x, y, all=FALSE) {
if (all) {
x = shallow(x)[, ".seqn" := rowidv(x)]
y = shallow(y)[, ".seqn" := rowidv(y)]
x[y, .SD, .SDcols=setdiff(names(x),".seqn"), nomatch=0L, on=names(x)]
jn.on = c(".seqn",setdiff(names(x),".seqn"))
x[y, .SD, .SDcols=setdiff(names(x),".seqn"), nomatch=0L, on=jn.on]
} else {
x[funique(y), nomatch=0L, on=names(x), mult="first"]
}
Expand All @@ -84,7 +85,8 @@ fsetdiff <- function(x, y, all=FALSE) {
if (all) {
x = shallow(x)[, ".seqn" := rowidv(x)]
y = shallow(y)[, ".seqn" := rowidv(y)]
x[!y, .SD, .SDcols=setdiff(names(x),".seqn"), on=names(x)]
jn.on = c(".seqn",setdiff(names(x),".seqn"))
x[!y, .SD, .SDcols=setdiff(names(x),".seqn"), on=jn.on]
} else {
funique(x[!y, on=names(x)])
}
Expand Down

0 comments on commit d99c967

Please sign in to comment.