-
Notifications
You must be signed in to change notification settings - Fork 1k
Closed
Description
Hi,
after upgrading to today’s dev version from github, some of my merge
s stopped working correctly. Took me some time to track down, it seems to be connected to fread and using two keys. With previous dev versions of 1.9.5 it worked, so I think it is connected to 14e39e4 or 44b1e00.
Reproduce with:
library('data.table')
(x1 <- data.table(a1 = c('a', 'b', 'c'), a2 = c(1, 3, 2)))
write.csv(x1, 'x1.csv', row.names = FALSE)
(x2 <- fread('x1.csv'))
y <- data.table(a2 = 1:3)
setkey(y, a2)
setkey(x1, a1, a2)
setkey(x2, a1, a2)
merge(x1, y) # OK: 3 rows
merge(x2, y) # FAIL: 2 rows
merge(x2, y, by = 'a2') # FAIL: 2 rows
setkey(x2, a2)
merge(x2, y) # OK: 3 rows
# different behaviour with character keys!
(x1 <- data.table(a1 = c('a', 'b', 'c'), a2 = c('a', 'c', 'b')))
write.csv(x1, 'x1.csv', row.names = FALSE)
(x2 <- fread('x1.csv'))
y <- data.table(a2 = c('a', 'b', 'c'))
setkey(y, a2)
setkey(x1, a1, a2)
setkey(x2, a1, a2)
merge(x1, y) # FAIL: 2 rows
merge(x2, y) # FAIL: 2 rows
merge(x2, y, by = 'a2') # FAIL: 2 rows
setkey(x2, a2)
merge(x2, y) # OK: 3 rows
This is my output:
> library('data.table')
data.table 1.9.5 For help type ?data.table or https://github.com/Rdatatable/data.table/wiki
> (x1 <- data.table(a1 = c('a', 'b', 'c'), a2 = c(1, 3, 2)))
a1 a2
1: a 1
2: b 3
3: c 2
> write.csv(x1, 'x1.csv', row.names = FALSE)
> (x2 <- fread('x1.csv'))
a1 a2
1: a 1
2: b 3
3: c 2
> y <- data.table(a2 = 1:3)
> setkey(y, a2)
> setkey(x1, a1, a2)
> setkey(x2, a1, a2)
> merge(x1, y) # OK: 3 rows
a2 a1
1: 1 a
2: 2 c
3: 3 b
> merge(x2, y) # FAIL: 2 rows
a2 a1
1: 1 a
2: 3 b
> merge(x2, y, by = 'a2') # FAIL: 2 rows
a2 a1
1: 1 a
2: 3 b
> setkey(x2, a2)
> merge(x2, y) # OK: 3 rows
a2 a1
1: 1 a
2: 2 c
3: 3 b
> # different behaviour with character keys!
>
> (x1 <- data.table(a1 = c('a', 'b', 'c'), a2 = c('a', 'c', 'b')))
a1 a2
1: a a
2: b c
3: c b
> write.csv(x1, 'x1.csv', row.names = FALSE)
> (x2 <- fread('x1.csv'))
a1 a2
1: a a
2: b c
3: c b
> y <- data.table(a2 = c('a', 'b', 'c'))
> setkey(y, a2)
> setkey(x1, a1, a2)
> setkey(x2, a1, a2)
> merge(x1, y) # FAIL: 2 rows
a2 a1
1: a a
2: c b
> merge(x2, y) # FAIL: 2 rows
a2 a1
1: a a
2: c b
> merge(x2, y, by = 'a2') # FAIL: 2 rows
a2 a1
1: a a
2: c b
> setkey(x2, a2)
> merge(x2, y) # OK: 3 rows
a2 a1
1: a a
2: b c
3: c b
Thanks!
Bela