diff --git a/R/mergelist.R b/R/mergelist.R
index 435ee0a60..52ce68493 100644
--- a/R/mergelist.R
+++ b/R/mergelist.R
@@ -125,6 +125,116 @@ dtmerge = function(x, i, on, how, mult, join.many, void=FALSE, verbose) {
   return(list(ans=ans, irows=irows, xrows=xrows))
 }
 
+# atomic join between two tables
+mergepair = function(lhs, rhs, on, how, mult, lhs.cols=names(lhs), rhs.cols=names(rhs), copy=TRUE, join.many=TRUE, verbose=FALSE) {
+  semianti = how=="semi" || how=="anti"
+  innerfull = how=="inner" || how=="full"
+  {
+    if (how!="cross") {
+      if (is.null(on)) {
+        if (how=="left" || semianti) on = key(rhs)
+        else if (how=="right") on = key(lhs)
+        else if (innerfull) on = onkeys(key(lhs), key(rhs))
+        if (is.null(on))
+          stopf("'on' is missing and necessary key is not present")
+      }
+      if (any(bad.on <- !on %chin% names(lhs)))
+        stopf("'on' argument specify columns to join [%s] that are not present in LHS table [%s]", brackify(on[bad.on]), brackify(names(lhs)))
+      if (any(bad.on <- !on %chin% names(rhs)))
+        stopf("'on' argument specify columns to join [%s] that are not present in RHS table [%s]", brackify(on[bad.on]), brackify(names(rhs)))
+    } else if (is.null(on)) {
+      on = character() ## cross join only
+    }
+  } ## on
+  {
+    if (how!="right") {
+      jnfm = lhs; fm.cols = lhs.cols; jnto = rhs; to.cols = rhs.cols
+    } else {
+      jnfm = rhs; fm.cols = rhs.cols; jnto = lhs; to.cols = lhs.cols
+    }
+  } ## join-to and join-from tables and columns (right outer join swap)
+
+  ## ensure symmetric join for inner|full join, apply mult on both tables, bmerge do only 'x' table
+  cp.i = FALSE ## copy marker of out.i
+  if ((innerfull) && !is.null(mult) && (mult=="first" || mult=="last")) {
+    jnfm = fdistinct(jnfm, on=on, mult=mult, cols=fm.cols, copy=FALSE) ## might not copy when already unique by 'on'
+    cp.i = nrow(jnfm)!=nrow(lhs) ## nrow(lhs) bc how='inner|full' so jnfm=lhs
+  } else if (how=="inner" && (is.null(mult) || mult=="error")) { ## we do this branch only to raise error from bmerge, we cannot use forder to just find duplicates because those duplicates might not have matching rows in another table, full join checks mult='error' during two non-void bmerges
+    dtmerge(x=jnfm, i=jnto, on=on, how=how, mult=mult, verbose=verbose, join.many=join.many, void=TRUE)
+  }
+
+  ## binary merge
+  ans = dtmerge(x=jnto, i=jnfm, on=on, how=how, mult=mult, verbose=verbose, join.many=join.many)
+
+  ## make i side
+  out.i = if (is.null(ans$irows))
+    .shallow(jnfm, cols=someCols(jnfm, fm.cols, keep=on, retain.order=semianti), retain.key=TRUE)
+  else
+    .Call(CsubsetDT, jnfm, ans$irows, someCols(jnfm, fm.cols, keep=on, retain.order=semianti))
+  cp.i = cp.i || !is.null(ans$irows)
+
+  ## make x side
+  if (semianti) {
+    out.x = list(); cp.x = TRUE
+  } else {
+    out.x = if (is.null(ans$xrows)) ## as of now xrows cannot be NULL #4409 thus nocov below
+      internal_error("dtmerge()$xrows returned NULL, #4409 been resolved but related code has not been updated?") #.shallow(jnto, cols=someCols(jnto, to.cols, drop=on), retain.key=TRUE) # nocov ## as of now nocov does not make difference r-lib/covr#279
+    else
+      .Call(CsubsetDT, jnto, ans$xrows, someCols(jnto, to.cols, drop=on))
+    cp.x = !is.null(ans$xrows)
+    ## ensure no duplicated column names in merge results
+    if (any(dup.i<-names(out.i) %chin% names(out.x)))
+      stopf("merge result has duplicated column names, use 'cols' argument or rename columns in 'l' tables, duplicated column(s): %s", brackify(names(out.i)[dup.i]))
+  }
+
+  ## stack i and x
+  if (how!="full") {
+    if (!cp.i && copy) out.i = copy(out.i)
+    #if (!cp.x && copy) out.x = copy(out.x) ## as of now cp.x always TRUE, search for #4409 here
+    out = .Call(Ccbindlist, list(out.i, out.x), FALSE)
+    if (how=="right") setcolorder(out, neworder=c(on, names(out.x))) ## arrange columns: i.on, x.cols, i.cols
+  } else { # how=="full"
+    ## we made left join side above, proceed to right join side, so swap tbls
+    jnfm = rhs; fm.cols = rhs.cols; jnto = lhs; to.cols = lhs.cols
+
+    cp.r = FALSE
+    if (!is.null(mult) && (mult=="first" || mult=="last")) {
+      jnfm = fdistinct(jnfm, on=on, mult=mult, cols=fm.cols, copy=FALSE)
+      cp.r = nrow(jnfm)!=nrow(rhs) ## nrow(rhs) bc jnfm=rhs
+    } ## mult=="error" check was made on one side already, below we do on the second side, test 101.43
+
+    ## binary merge anti join
+    bns = dtmerge(x=jnto, i=jnfm, on=on, how="anti", mult=if (!is.null(mult) && mult!="all") mult, verbose=verbose, join.many=join.many)
+
+    ## make anti join side
+    out.r = if (is.null(bns$irows))
+      .shallow(jnfm, cols=someCols(jnfm, fm.cols, keep=on), retain.key=TRUE) ## retain.key is used only in the edge case when !nrow(out.i)
+    else
+      .Call(CsubsetDT, jnfm, bns$irows, someCols(jnfm, fm.cols, keep=on))
+    cp.r = cp.r || !is.null(bns$irows)
+
+    ## short circuit to avoid rbindlist to empty sets and retains keys
+    if (!nrow(out.r)) { ## possibly also !nrow(out.i)
+      if (!cp.i && copy) out.i = copy(out.i)
+      #if (!cp.x && copy) out.x = copy(out.x) ## as of now cp.x always TRUE, search for #4409 here
+      out = .Call(Ccbindlist, list(out.i, out.x), FALSE)
+    } else if (!nrow(out.i)) { ## but not !nrow(out.r)
+      if (!cp.r && copy) out.r = copy(out.r)
+      if (length(add<-setdiff(names(out.i), names(out.r)))) { ## add missing columns of proper types NA
+        neworder = copy(names(out.i)) #set(out.r, NULL, add, lapply(unclass(out.i)[add], `[`, 1L)) ## 291.04 overalloc exceed fail during set()
+        out.i = lapply(unclass(out.i)[add], `[`, seq_len(nrow(out.r))) ## could eventually remove this when cbindlist recycle 0 rows up, note that we need out.r not to be copied
+        out.r = .Call(Ccbindlist, list(out.r, out.i), FALSE)
+        setcolorder(out.r, neworder=neworder)
+      }
+      out = out.r
+    } else { ## all might have not been copied yet, rbindlist will copy
+      out.l = .Call(Ccbindlist, list(out.i, out.x), FALSE)
+      out = rbindlist(list(out.l, out.r), use.names=TRUE, fill=TRUE)
+    }
+  }
+  setDT(out)
+}
+
 # Previously, we had a custom C implementation here, which is ~2x faster,
 #   but this is fast enough we don't bother maintaining a new routine.
 #   Hopefully in the future rep() can recognize the ALTREP and use that, too.
diff --git a/R/onLoad.R b/R/onLoad.R
index ef96849e8..ff8b18c02 100644
--- a/R/onLoad.R
+++ b/R/onLoad.R
@@ -82,6 +82,7 @@
        "datatable.print.trunc.cols"="FALSE",   # for print.data.table
        "datatable.show.indices"="FALSE",       # for print.data.table
        "datatable.allow.cartesian"="FALSE",    # datatable.<argument name>
+       "datatable.join.many"="TRUE",           # mergelist, [.data.table #4383 #914
        "datatable.dfdispatchwarn"="TRUE",                   # not a function argument
        "datatable.warnredundantby"="TRUE",                  # not a function argument
        "datatable.alloccol"="1024L",           # argument 'n' of alloc.col. Over-allocate 1024 spare column slots
diff --git a/inst/tests/mergelist.Rraw b/inst/tests/mergelist.Rraw
index a35c4f410..422d8d709 100644
--- a/inst/tests/mergelist.Rraw
+++ b/inst/tests/mergelist.Rraw
@@ -6,6 +6,7 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {
 } else {
   require(data.table)
   test = data.table:::test
+  mergepair = data.table:::mergepair
   perhaps.data.table = data.table:::perhaps.data.table
   hasindex = data.table:::hasindex
   fdistinct = data.table:::fdistinct
@@ -13,6 +14,16 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {
 }
 
 addresses = function(x) vapply(x, address, "")
+copied = function(ans, l) {
+  all(!addresses(ans) %chin% unlist(recursive=FALSE, lapply(l, addresses)))
+}
+notcopied = function(ans, l, how="left", unless=character()) {
+  if (how %chin% unless) return(copied(ans, l)) ## used during looping tests for easier escape
+  if (how=="full") return( ## either side, left|right, notcopied is fine
+    all(addresses(l[[1L]]) %chin% addresses(ans)) || all(addresses(l[[length(l)]]) %chin% addresses(ans))
+  )
+  all(addresses(if (how=="right") l[[length(l)]] else l[[1L]]) %chin% addresses(ans))
+}
 
 # internal helpers
 
@@ -110,6 +121,189 @@ test(13.04, key(ans), "id1")
 test(13.05, indices(ans), c("id1","id2","id3","id1__id2__id3","id6","id7","id9"))
 test(13.06, ii, lapply(l, indices)) ## this tests that original indices have not been touched, shallow_duplicate in mergeIndexAttrib
 
+# mergepair
+
+## test copy-ness argument in mergepair
+
+### LHS equal to RHS: no copy in all cases
+num = 21.000
+l = list(
+  lhs = data.table(id1=1:2, v1=1:2),
+  rhs = data.table(id1=1:2, v2=1:2)
+)
+expected = data.table(id1=1:2, v1=1:2, v2=1:2)
+for (how in c("inner","left","right","full")) {
+  num = trunc(num*10)/10 + 0.1
+  for (mult in c("all","first","last","error")) {
+    num = trunc(num*100)/100 + 0.01
+    test(num<-num+0.001, ans <- mergepair(l$lhs, l$rhs, on="id1", how=how, mult=mult, copy=TRUE), expected) ## copy=TRUE: no shared columns
+    test(num<-num+0.001, copied(ans, l))
+    test(num<-num+0.001, ans <- mergepair(l$lhs, l$rhs, on="id1", how=how, mult=mult, copy=FALSE), expected) ## copy=FALSE: LHS shared but no RHS
+    test(num<-num+0.001, notcopied(ans, l, how=how))
+  }
+}
+### RHS includes LHS: no copy in inner, left, right
+num = 22.000
+unless = "full"
+l = list(
+  lhs = data.table(id1=1:2, v1=1:2),
+  rhs = data.table(id1=1:3, v2=1:3)
+)
+expected = list(
+  inner = data.table(id1=1:2, v1=1:2, v2=1:2),
+  left = data.table(id1=1:2, v1=1:2, v2=1:2),
+  right = data.table(id1=1:3, v1=c(1:2,NA), v2=1:3),
+  full = data.table(id1=1:3, v1=c(1:2,NA), v2=1:3)
+)
+for (how in c("inner","left","right","full")) {
+  num = trunc(num*10)/10 + 0.1
+  for (mult in c("all","first","last","error")) {
+    num = trunc(num*100)/100 + 0.01
+    test(num<-num+0.001, ans <- mergepair(l$lhs, l$rhs, on="id1", how=how, mult=mult, copy=TRUE), expected[[how]])
+    test(num<-num+0.001, copied(ans, l))
+    test(num<-num+0.001, ans <- mergepair(l$lhs, l$rhs, on="id1", how=how, mult=mult, copy=FALSE), expected[[how]])
+    test(num<-num+0.001, notcopied(ans, l, how=how, unless=unless))
+  }
+}
+### LHS includes RHS: no copy in left, right, full
+num = 23.000
+unless = "inner"
+l = list(
+  lhs = data.table(id1=1:3, v1=1:3),
+  rhs = data.table(id1=1:2, v2=1:2)
+)
+expected = list(
+  inner = data.table(id1=1:2, v1=1:2, v2=1:2),
+  left = data.table(id1=1:3, v1=1:3, v2=c(1:2,NA)),
+  right = data.table(id1=1:2, v1=1:2, v2=1:2),
+  full = data.table(id1=1:3, v1=1:3, v2=c(1:2,NA))
+)
+for (how in c("inner","left","right","full")) {
+  num = trunc(num*10)/10 + 0.1
+  for (mult in c("all","first","last","error")) {
+    num = trunc(num*100)/100 + 0.01
+    test(num<-num+0.001, ans <- mergepair(l$lhs, l$rhs, on="id1", how=how, mult=mult, copy=TRUE), expected[[how]])
+    test(num<-num+0.001, copied(ans, l))
+    test(num<-num+0.001, ans <- mergepair(l$lhs, l$rhs, on="id1", how=how, mult=mult, copy=FALSE), expected[[how]])
+    test(num<-num+0.001, notcopied(ans, l, how=how, unless=unless))
+  }
+}
+### LHS single nonmatch RHS on both sides: no copy in left, right
+num = 24.000
+unless = c("inner","full")
+l = list(
+  lhs = data.table(id1=3:1, v1=1:3),
+  rhs = data.table(id1=c(4L,2:1), v2=1:3)
+)
+expected = list(
+  inner = data.table(id1=2:1, v1=2:3, v2=2:3),
+  left = data.table(id1=3:1, v1=1:3, v2=c(NA,2:3)),
+  right = data.table(id1=c(4L,2:1), v1=c(NA,2:3), v2=1:3),
+  full = data.table(id1=c(3:1,4L), v1=c(1:3,NA), v2=c(NA,2:3,1L))
+)
+for (how in c("inner","left","right","full")) {
+  num = trunc(num*10)/10 + 0.1
+  for (mult in c("all","first","last","error")) {
+    num = trunc(num*100)/100 + 0.01
+    test(num<-num+0.001, ans <- mergepair(l$lhs, l$rhs, on="id1", how=how, mult=mult, copy=TRUE), expected[[how]])
+    test(num<-num+0.001, copied(ans, l))
+    test(num<-num+0.001, ans <- mergepair(l$lhs, l$rhs, on="id1", how=how, mult=mult, copy=FALSE), expected[[how]])
+    test(num<-num+0.001, notcopied(ans, l, how=how, unless=unless))
+  }
+}
+### LHS zero match RHS: no copy in left, right
+num = 25.000
+unless = c("inner","full")
+l = list(
+  lhs = data.table(id1=2:1, v1=1:2),
+  rhs = data.table(id1=3:4, v2=1:2)
+)
+expected = list(
+  inner = data.table(id1=integer(), v1=integer(), v2=integer()),
+  left = data.table(id1=2:1, v1=1:2, v2=c(NA_integer_,NA)),
+  right = data.table(id1=3:4, v1=c(NA_integer_,NA), v2=1:2),
+  full = data.table(id1=c(2:1,3:4), v1=c(1:2,NA,NA), v2=c(NA,NA,1:2))
+)
+for (how in c("inner","left","right","full")) {
+  num = trunc(num*10)/10 + 0.1
+  for (mult in c("all","first","last","error")) {
+    num = trunc(num*100)/100 + 0.01
+    test(num<-num+0.001, ans <- mergepair(l$lhs, l$rhs, on="id1", how=how, mult=mult, copy=TRUE), expected[[how]])
+    test(num<-num+0.001, copied(ans, l))
+    test(num<-num+0.001, ans <- mergepair(l$lhs, l$rhs, on="id1", how=how, mult=mult, copy=FALSE), expected[[how]])
+    test(num<-num+0.001, notcopied(ans, l, how=how, unless=unless))
+  }
+}
+### LHS and RHS zero nrow: no copies
+num = 26.000
+unless = character()
+l = list(
+  lhs = data.table(id1=integer(), v1=integer()),
+  rhs = data.table(id1=integer(), v2=integer())
+)
+expected = list(
+  inner = data.table(id1=integer(), v1=integer(), v2=integer()),
+  left = data.table(id1=integer(), v1=integer(), v2=integer()),
+  right = data.table(id1=integer(), v1=integer(), v2=integer()),
+  full = data.table(id1=integer(), v1=integer(), v2=integer())
+)
+for (how in c("inner","left","right","full")) {
+  num = trunc(num*10)/10 + 0.1
+  for (mult in c("all","first","last","error")) {
+    num = trunc(num*100)/100 + 0.01
+    test(num<-num+0.001, ans <- mergepair(l$lhs, l$rhs, on="id1", how=how, mult=mult, copy=TRUE), expected[[how]])
+    test(num<-num+0.001, copied(ans, l))
+    test(num<-num+0.001, ans <- mergepair(l$lhs, l$rhs, on="id1", how=how, mult=mult, copy=FALSE), expected[[how]])
+    test(num<-num+0.001, notcopied(ans, l, how=how, unless=unless))
+  }
+}
+### LHS has zero nrow: no copies
+num = 27.000
+unless = character()
+l = list(
+  lhs = data.table(id1=integer(), v1=integer()),
+  rhs = data.table(id1=2:1, v2=1:2)
+)
+expected = list(
+  inner = data.table(id1=integer(), v1=integer(), v2=integer()),
+  left = data.table(id1=integer(), v1=integer(), v2=integer()),
+  right = data.table(id1=2:1, v1=c(NA_integer_,NA), v2=1:2),
+  full = data.table(id1=2:1, v1=c(NA_integer_,NA), v2=1:2)
+)
+for (how in c("inner","left","right","full")) {
+  num = trunc(num*10)/10 + 0.1
+  for (mult in c("all","first","last","error")) {
+    num = trunc(num*100)/100 + 0.01
+    test(num<-num+0.001, ans <- mergepair(l$lhs, l$rhs, on="id1", how=how, mult=mult, copy=TRUE), expected[[how]])
+    test(num<-num+0.001, copied(ans, l))
+    test(num<-num+0.001, ans <- mergepair(l$lhs, l$rhs, on="id1", how=how, mult=mult, copy=FALSE), expected[[how]])
+    test(num<-num+0.001, notcopied(ans, l, how=how, unless=unless))
+  }
+}
+### RHS has zero nrow
+num = 28.000
+unless = "inner"
+l = list(
+  lhs = data.table(id1=2:1, v1=1:2),
+  rhs = data.table(id1=integer(), v2=integer())
+)
+expected = list(
+  inner = data.table(id1=integer(), v1=integer(), v2=integer()),
+  left = data.table(id1=2:1, v1=1:2, v2=c(NA_integer_,NA)),
+  right = data.table(id1=integer(), v1=integer(), v2=integer()),
+  full = data.table(id1=2:1, v1=1:2, v2=c(NA_integer_,NA))
+)
+for (how in c("inner","left","right","full")) {
+  num = trunc(num*10)/10 + 0.1
+  for (mult in c("all","first","last","error")) {
+    num = trunc(num*100)/100 + 0.01
+    test(num<-num+0.001, ans <- mergepair(l$lhs, l$rhs, on="id1", how=how, mult=mult, copy=TRUE), expected[[how]])
+    test(num<-num+0.001, copied(ans, l))
+    test(num<-num+0.001, ans <- mergepair(l$lhs, l$rhs, on="id1", how=how, mult=mult, copy=FALSE), expected[[how]])
+    test(num<-num+0.001, notcopied(ans, l, how=how, unless=unless))
+  }
+}
+
 ## fdistinct, another round
 
 dt = data.table(x =