Skip to content

Commit fd2a915

Browse files
Improve merge.data.table error messages for missing keys (#6713)
* fixed the error * corrected code * introduced teh changes * introduced teh latest changes * lint-r corrected * lintr * corrected test case * indentation correction * restore unrelated changes * More changes for uniformity, style, translation * use sequential check over big check of both tables * fix up some tests * fix remaining test comparisons * further shrink diff * Further shrink diff * missed '{' * shrink diff again * remove whitespace changes --------- Co-authored-by: Michael Chirico <chiricom@google.com> Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>
1 parent 2714cb4 commit fd2a915

File tree

2 files changed

+27
-22
lines changed

2 files changed

+27
-22
lines changed

R/merge.R

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
1111
by = key(x)
1212
}
1313
}
14-
x0 = length(x)==0L
15-
y0 = length(y)==0L
14+
x0 = length(x) == 0L
15+
y0 = length(y) == 0L
1616
if (x0 || y0) {
1717
if (x0 && y0)
1818
warningf("Neither of the input data.tables to join have columns.")
@@ -28,17 +28,19 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
2828
nm_y = names(y)
2929

3030
## set up 'by'/'by.x'/'by.y'
31-
if ( (!is.null(by.x) || !is.null(by.y)) && length(by.x)!=length(by.y) )
31+
if ((!is.null(by.x) || !is.null(by.y)) && length(by.x) != length(by.y))
3232
stopf("`by.x` and `by.y` must be of same length.")
3333
if (!missing(by) && !missing(by.x))
34-
warningf("Supplied both `by` and `by.x/by.y`. `by` argument will be ignored.")
34+
warningf("Supplied both `by` and `by.x`/`by.y`. `by` argument will be ignored.")
3535
if (!is.null(by.x)) {
36-
if (length(by.x)==0L || !is.character(by.x) || !is.character(by.y))
36+
if (length(by.x) == 0L || !is.character(by.x) || !is.character(by.y))
3737
stopf("A non-empty vector of column names is required for `by.x` and `by.y`.")
38-
if (!all(by.x %chin% nm_x))
39-
stopf("Elements listed in `by.x` must be valid column names in x.")
40-
if (!all(by.y %chin% nm_y))
41-
stopf("Elements listed in `by.y` must be valid column names in y.")
38+
if (!all(idx <- by.x %chin% nm_x)) {
39+
stopf("The following columns listed in `%s` are missing from %s: %s", "by.x", "x", brackify(by.x[!idx]))
40+
}
41+
if (!all(idx <- by.y %chin% nm_y)) {
42+
stopf("The following columns listed in `%s` are missing from %s: %s", "by.y", "y", brackify(by.y[!idx]))
43+
}
4244
by = by.x
4345
names(by) = by.y
4446
} else {
@@ -50,8 +52,12 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
5052
by = intersect(nm_x, nm_y)
5153
if (length(by) == 0L || !is.character(by))
5254
stopf("A non-empty vector of column names for `by` is required.")
53-
if (!all(by %chin% intersect(nm_x, nm_y)))
54-
stopf("Elements listed in `by` must be valid column names in x and y")
55+
if (!all(idx <- by %in% nm_x)) {
56+
stopf("The following columns listed in `%s` are missing from %s: %s", "by", "x", brackify(by[!idx]))
57+
}
58+
if (!all(idx <- by %in% nm_y)) {
59+
stopf("The following columns listed in `%s` are missing from %s: %s", "by", "y", brackify(by[!idx]))
60+
}
5561
by = unname(by)
5662
by.x = by.y = by
5763
}

inst/tests/tests.Rraw

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8567,14 +8567,13 @@ DT1 = data.table(a=1)
85678567
test(1601.1, merge(DT1, DT1, by="a"), data.table(a=1, key="a"))
85688568
test(1601.2, merge(DT1, DT0, by="a"),
85698569
warning="Input data.table 'y' has no columns.",
8570-
error="Elements listed in `by`")
8570+
error="The following columns listed in `by` are missing from y: [a]")
85718571
test(1601.3, merge(DT0, DT1, by="a"),
85728572
warning="Input data.table 'x' has no columns.",
8573-
error="Elements listed in `by`")
8573+
error="The following columns listed in `by` are missing from x: [a]")
85748574
test(1601.4, merge(DT0, DT0, by="a"),
85758575
warning="Neither of the input data.tables to join have columns.",
8576-
error="Elements listed in `by`")
8577-
8576+
error="The following columns listed in `by` are missing from x: [a]")
85788577
# fix for #1549
85798578
d1 <- data.table(v1=1:2,x=x)
85808579
d2 <- data.table(v1=3:4)
@@ -13546,14 +13545,14 @@ test(1962.016, merge(DT1, DT2, by.x = 'a', by.y = c('a', 'V')),
1354613545
test(1962.017, merge(DT1, DT2, by = 'V', by.x = 'a', by.y = 'a'),
1354713546
data.table(a = 2:3, V.x = c("a", "a"), V.y = c("b", "b"), key = 'a'),
1354813547
warning = 'Supplied both.*argument will be ignored')
13549-
test(1962.018, merge(DT1, DT2, by.x = 'z', by.y = 'a'),
13550-
error = 'Elements listed in `by.x`')
13551-
test(1962.019, merge(DT1, DT2, by.x = 'a', by.y = 'z'),
13552-
error = 'Elements listed in `by.y`')
13548+
test(1962.018, merge(DT1, DT2, by.x='z', by.y='a'),
13549+
error="The following columns listed in `by.x` are missing from x: [z]")
13550+
test(1962.019, merge(DT1, DT2, by.x='a', by.y='z'),
13551+
error="The following columns listed in `by.y` are missing from y: [z]")
1355313552
test(1962.0201, merge(DT1, DT2, by=character(0L)), ans) # was error before PR#5183
1355413553
test(1962.0202, merge(DT1, DT2, by=NULL), ans) # test explicit NULL too as missing() could be used inside merge()
13555-
test(1962.021, merge(DT1, DT2, by = 'z'),
13556-
error = 'must be valid column names in x and y')
13554+
test(1962.021, merge(DT1, DT2, by='z'),
13555+
error='The following columns listed in `by` are missing from x: [z]')
1355713556

1355813557
## frank.R
1355913558
x = c(1, 1, 2, 5, 4, 3, 4, NA, 6)
@@ -18014,7 +18013,7 @@ test(2230.4, setDF(merge(DT, y, by="k2", incomparables=c(1, NA, 4, 5))), merge(x
1801418013
test(2230.5, setDF(merge(DT, y, by="k2", incomparables=c(NA, 3, 4, 5))), merge(x, y, by="k2", incomparables=c(NA,3,4,5)))
1801518014
test(2230.6, merge(DT, y, by="k2", unk=1), merge(DT, y, by="k2"), warning="Unknown argument 'unk' has been passed.")
1801618015
test(2230.7, merge(DT, y, by="k2", NULL, NULL, FALSE, FALSE, FALSE, TRUE, c(".x", ".y"), TRUE, getOption("datatable.allow.cartesian"), NULL, 1L),
18017-
merge(DT, y, by="k2"), warning=c("Supplied both `by` and `by.x/by.y`. `by` argument will be ignored.", "Passed 1 unknown and unnamed arguments."))
18016+
merge(DT, y, by="k2"), warning=c("Supplied both `by` and `by.x`/`by.y`. `by` argument will be ignored.", "Passed 1 unknown and unnamed arguments."))
1801818017

1801918018
# weighted.mean GForce optimized, #3977
1802018019
old = options(datatable.optimize=1L)

0 commit comments

Comments
 (0)