Skip to content

fread + two keys causes merge problems #1258

@nr0cinu

Description

@nr0cinu

Hi,

after upgrading to today’s dev version from github, some of my merges stopped working correctly. Took me some time to track down, it seems to be connected to fread and using two keys. With previous dev versions of 1.9.5 it worked, so I think it is connected to 14e39e4 or 44b1e00.

Reproduce with:

library('data.table')

(x1 <- data.table(a1 = c('a', 'b', 'c'), a2 = c(1, 3, 2)))
write.csv(x1, 'x1.csv', row.names = FALSE)
(x2 <- fread('x1.csv'))

y <- data.table(a2 = 1:3)

setkey(y, a2)
setkey(x1, a1, a2)
setkey(x2, a1, a2)

merge(x1, y) # OK: 3 rows
merge(x2, y) # FAIL: 2 rows
merge(x2, y, by = 'a2') # FAIL: 2 rows

setkey(x2, a2)
merge(x2, y) # OK: 3 rows


# different behaviour with character keys!

(x1 <- data.table(a1 = c('a', 'b', 'c'), a2 = c('a', 'c', 'b')))
write.csv(x1, 'x1.csv', row.names = FALSE)
(x2 <- fread('x1.csv'))

y <- data.table(a2 = c('a', 'b', 'c'))

setkey(y, a2)
setkey(x1, a1, a2)
setkey(x2, a1, a2)

merge(x1, y) # FAIL: 2 rows
merge(x2, y) # FAIL: 2 rows
merge(x2, y, by = 'a2') # FAIL: 2 rows

setkey(x2, a2)
merge(x2, y) # OK: 3 rows

This is my output:

> library('data.table')
data.table 1.9.5  For help type ?data.table or https://github.com/Rdatatable/data.table/wiki

> (x1 <- data.table(a1 = c('a', 'b', 'c'), a2 = c(1, 3, 2)))
   a1 a2
1:  a  1
2:  b  3
3:  c  2

> write.csv(x1, 'x1.csv', row.names = FALSE)

> (x2 <- fread('x1.csv'))
   a1 a2
1:  a  1
2:  b  3
3:  c  2

> y <- data.table(a2 = 1:3)

> setkey(y, a2)

> setkey(x1, a1, a2)

> setkey(x2, a1, a2)

> merge(x1, y) # OK: 3 rows
   a2 a1
1:  1  a
2:  2  c
3:  3  b

> merge(x2, y) # FAIL: 2 rows
   a2 a1
1:  1  a
2:  3  b

> merge(x2, y, by = 'a2') # FAIL: 2 rows
   a2 a1
1:  1  a
2:  3  b

> setkey(x2, a2)

> merge(x2, y) # OK: 3 rows
   a2 a1
1:  1  a
2:  2  c
3:  3  b

> # different behaviour with character keys!
> 
> (x1 <- data.table(a1 = c('a', 'b', 'c'), a2 = c('a', 'c', 'b')))
   a1 a2
1:  a  a
2:  b  c
3:  c  b

> write.csv(x1, 'x1.csv', row.names = FALSE)

> (x2 <- fread('x1.csv'))
   a1 a2
1:  a  a
2:  b  c
3:  c  b

> y <- data.table(a2 = c('a', 'b', 'c'))

> setkey(y, a2)

> setkey(x1, a1, a2)

> setkey(x2, a1, a2)

> merge(x1, y) # FAIL: 2 rows
   a2 a1
1:  a  a
2:  c  b

> merge(x2, y) # FAIL: 2 rows
   a2 a1
1:  a  a
2:  c  b

> merge(x2, y, by = 'a2') # FAIL: 2 rows
   a2 a1
1:  a  a
2:  c  b

> setkey(x2, a2)

> merge(x2, y) # OK: 3 rows
   a2 a1
1:  a  a
2:  b  c
3:  c  b

Thanks!
Bela

Metadata

Metadata

Labels

Type

No type

Projects

No projects

Milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions