Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 17 additions & 11 deletions R/merge.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
by = key(x)
}
}
x0 = length(x)==0L
y0 = length(y)==0L
x0 = length(x) == 0L
y0 = length(y) == 0L
if (x0 || y0) {
if (x0 && y0)
warningf("Neither of the input data.tables to join have columns.")
Expand All @@ -28,17 +28,19 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
nm_y = names(y)

## set up 'by'/'by.x'/'by.y'
if ( (!is.null(by.x) || !is.null(by.y)) && length(by.x)!=length(by.y) )
if ((!is.null(by.x) || !is.null(by.y)) && length(by.x) != length(by.y))
stopf("`by.x` and `by.y` must be of same length.")
if (!missing(by) && !missing(by.x))
warningf("Supplied both `by` and `by.x/by.y`. `by` argument will be ignored.")
warningf("Supplied both `by` and `by.x`/`by.y`. `by` argument will be ignored.")
if (!is.null(by.x)) {
if (length(by.x)==0L || !is.character(by.x) || !is.character(by.y))
if (length(by.x) == 0L || !is.character(by.x) || !is.character(by.y))
stopf("A non-empty vector of column names is required for `by.x` and `by.y`.")
if (!all(by.x %chin% nm_x))
stopf("Elements listed in `by.x` must be valid column names in x.")
if (!all(by.y %chin% nm_y))
stopf("Elements listed in `by.y` must be valid column names in y.")
if (!all(idx <- by.x %chin% nm_x)) {
stopf("The following columns listed in `%s` are missing from %s: %s", "by.x", "x", brackify(by.x[!idx]))
}
if (!all(idx <- by.y %chin% nm_y)) {
stopf("The following columns listed in `%s` are missing from %s: %s", "by.y", "y", brackify(by.y[!idx]))
}
by = by.x
names(by) = by.y
} else {
Expand All @@ -50,8 +52,12 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL
by = intersect(nm_x, nm_y)
if (length(by) == 0L || !is.character(by))
stopf("A non-empty vector of column names for `by` is required.")
if (!all(by %chin% intersect(nm_x, nm_y)))
stopf("Elements listed in `by` must be valid column names in x and y")
if (!all(idx <- by %in% nm_x)) {
stopf("The following columns listed in `%s` are missing from %s: %s", "by", "x", brackify(by[!idx]))
}
if (!all(idx <- by %in% nm_y)) {
stopf("The following columns listed in `%s` are missing from %s: %s", "by", "y", brackify(by[!idx]))
}
by = unname(by)
by.x = by.y = by
}
Expand Down
21 changes: 10 additions & 11 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -8567,14 +8567,13 @@ DT1 = data.table(a=1)
test(1601.1, merge(DT1, DT1, by="a"), data.table(a=1, key="a"))
test(1601.2, merge(DT1, DT0, by="a"),
warning="Input data.table 'y' has no columns.",
error="Elements listed in `by`")
error="The following columns listed in `by` are missing from y: [a]")
test(1601.3, merge(DT0, DT1, by="a"),
warning="Input data.table 'x' has no columns.",
error="Elements listed in `by`")
error="The following columns listed in `by` are missing from x: [a]")
test(1601.4, merge(DT0, DT0, by="a"),
warning="Neither of the input data.tables to join have columns.",
error="Elements listed in `by`")

error="The following columns listed in `by` are missing from x: [a]")
# fix for #1549
d1 <- data.table(v1=1:2,x=x)
d2 <- data.table(v1=3:4)
Expand Down Expand Up @@ -13546,14 +13545,14 @@ test(1962.016, merge(DT1, DT2, by.x = 'a', by.y = c('a', 'V')),
test(1962.017, merge(DT1, DT2, by = 'V', by.x = 'a', by.y = 'a'),
data.table(a = 2:3, V.x = c("a", "a"), V.y = c("b", "b"), key = 'a'),
warning = 'Supplied both.*argument will be ignored')
test(1962.018, merge(DT1, DT2, by.x = 'z', by.y = 'a'),
error = 'Elements listed in `by.x`')
test(1962.019, merge(DT1, DT2, by.x = 'a', by.y = 'z'),
error = 'Elements listed in `by.y`')
test(1962.018, merge(DT1, DT2, by.x='z', by.y='a'),
error="The following columns listed in `by.x` are missing from x: [z]")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please remove backticks, which are for markdown, not error messages

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi Toby, sorry, I disagree.

The backticks serve to highlight that this is a code object, and not a plain English word. Without them, a reader can easily be confused into thinking there's some grammatical mistake "in by", or otherwise struggle to parse the message they're given.

Of course, we could choose some other convention (single/double quotes, e.g.), and we should try and pick one and stick to it throughout the codebase... but that's a separate issue.

Personally, these days I am using `arg=` for function arguments to highlight that (1) it's code with the backticks and (2) it's a keyword argument with =.

test(1962.019, merge(DT1, DT2, by.x='a', by.y='z'),
error="The following columns listed in `by.y` are missing from y: [z]")
test(1962.0201, merge(DT1, DT2, by=character(0L)), ans) # was error before PR#5183
test(1962.0202, merge(DT1, DT2, by=NULL), ans) # test explicit NULL too as missing() could be used inside merge()
test(1962.021, merge(DT1, DT2, by = 'z'),
error = 'must be valid column names in x and y')
test(1962.021, merge(DT1, DT2, by='z'),
error='The following columns listed in `by` are missing from x: [z]')

## frank.R
x = c(1, 1, 2, 5, 4, 3, 4, NA, 6)
Expand Down Expand Up @@ -18014,7 +18013,7 @@ test(2230.4, setDF(merge(DT, y, by="k2", incomparables=c(1, NA, 4, 5))), merge(x
test(2230.5, setDF(merge(DT, y, by="k2", incomparables=c(NA, 3, 4, 5))), merge(x, y, by="k2", incomparables=c(NA,3,4,5)))
test(2230.6, merge(DT, y, by="k2", unk=1), merge(DT, y, by="k2"), warning="Unknown argument 'unk' has been passed.")
test(2230.7, merge(DT, y, by="k2", NULL, NULL, FALSE, FALSE, FALSE, TRUE, c(".x", ".y"), TRUE, getOption("datatable.allow.cartesian"), NULL, 1L),
merge(DT, y, by="k2"), warning=c("Supplied both `by` and `by.x/by.y`. `by` argument will be ignored.", "Passed 1 unknown and unnamed arguments."))
merge(DT, y, by="k2"), warning=c("Supplied both `by` and `by.x`/`by.y`. `by` argument will be ignored.", "Passed 1 unknown and unnamed arguments."))

# weighted.mean GForce optimized, #3977
old = options(datatable.optimize=1L)
Expand Down
Loading