diff --git a/R/data.table.R b/R/data.table.R
index b1cc0812b..66c5879d8 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -125,7 +125,7 @@ replace_dot_alias = function(e) {
   }
 }
 
-"[.data.table" = function (x, i, j, by, keyby, with=TRUE, nomatch=getOption("datatable.nomatch", NA), mult="all", roll=FALSE, rollends=if (roll=="nearest") c(TRUE,TRUE) else if (roll>=0) c(FALSE,TRUE) else c(TRUE,FALSE), which=FALSE, .SDcols, verbose=getOption("datatable.verbose"), allow.cartesian=getOption("datatable.allow.cartesian"), drop=NULL, on=NULL)
+"[.data.table" = function (x, i, j, by, keyby, with=TRUE, nomatch=getOption("datatable.nomatch", NA), mult, roll=FALSE, rollends=if (roll=="nearest") c(TRUE,TRUE) else if (roll>=0) c(FALSE,TRUE) else c(TRUE,FALSE), which=FALSE, .SDcols, verbose=getOption("datatable.verbose"), allow.cartesian=getOption("datatable.allow.cartesian"), drop=NULL, on=NULL)
 {
   # ..selfcount <<- ..selfcount+1  # in dev, we check no self calls, each of which doubles overhead, or could
   # test explicitly if the caller is [.data.table (even stronger test. TO DO.)
@@ -183,7 +183,12 @@ replace_dot_alias = function(e) {
     }
     return(x)
   }
-  if (!mult %chin% c("first","last","all","error")) stop("mult argument can only be 'first', 'last', 'all' or 'error'")
+  missingmult = missing(mult)
+  if (!missingmult) {
+    if (!mult %chin% c("first","last","all","error")) stop("mult argument can only be 'first', 'last', 'all' or 'error'")
+  } else {
+    mult = "all"
+  }
   missingroll = missing(roll)
   if (length(roll)!=1L || is.na(roll)) stop("roll must be a single TRUE, FALSE, positive/negative integer/double including +Inf and -Inf or 'nearest'")
   if (is.character(roll)) {
@@ -212,6 +217,7 @@ replace_dot_alias = function(e) {
   ..syms = NULL
   av = NULL
   jsub = NULL
+  jassign = FALSE
   if (!missing(j)) {
     jsub = replace_dot_alias(substitute(j))
     root = if (is.call(jsub)) as.character(jsub[[1L]])[1L] else ""
@@ -284,6 +290,7 @@ replace_dot_alias = function(e) {
         warning("nomatch isn't relevant together with :=, ignoring nomatch")
         nomatch=0L
       }
+      jassign = TRUE
     }
   }
 
@@ -442,7 +449,7 @@ replace_dot_alias = function(e) {
         ops = rep(1L, length(leftcols))
       }
       # Implementation for not-join along with by=.EACHI, #604
-      if (notjoin && (byjoin || (mult=="first" || mult=="last"))) { # mult != "all" needed for #1571
+      if (notjoin && (byjoin || (mult=="first" || mult=="last"))) { # mult != "all|error" needed for #1571
         notjoin = FALSE
         if (verbose) {last.started.at=proc.time();cat("not-join called with 'by=.EACHI'; Replacing !i with i=setdiff_(x,i) ...");flush.console()}
         orignames = copy(names(i))
@@ -452,8 +459,22 @@ replace_dot_alias = function(e) {
         setattr(i, 'sorted', names(i)) # since 'x' has key set, this'll always be sorted
       }
       i = .shallow(i, retain.key = TRUE)
+      if (!missingmult && jassign && missingby) { ## if we just could swap x and i for jassign, all 'mult' cases, dups check via ans$allLens1 would already work
+        if (mult=="first") stop("Argument mult='first' during update-on-join not yet implemented")
+        else if (mult=="all") {
+          warning("Argument 'mult' during update-on-join must not be equal to 'all'. For backward compatibility it will be set to 'last'. To avoid this warning do not use mult arg or provide value other than 'all'.")
+          mult = "last"
+        } # mult=="error" # handled after bmerge, not instantly
+      }
       ans = bmerge(i, x, leftcols, rightcols, roll, rollends, nomatch, mult, ops, verbose=verbose)
-      if (mult=="error") mult="all" ## there was no multiple matches so we can proceed as if 'all'
+      dups = NULL # so we can re-use later
+      if (jassign && missingby && mult!="last" && (dups<-anyDuplicated(ans$starts, incomparables = c(0L, NA_integer_)))) { # warn here if duplicated matches occured, as proposed in #3747, ask users to use mult='last' explicitly
+        if (missingmult)
+          warning("During update-on-join there were multiple matches, in such case the last matching row will be used to lookup the value from. To avoid this warning use mult argument 'last' (default), 'first' (not yet implemented) or 'error'.")
+        else if (mult=="error")
+          stop("mult='error' and multiple matches during merge") # same error as in bmerge
+      }
+      if (mult=="error") mult="all" ## there were no multiple matches (error would have been raised already) so we can proceed as if 'all', or probably even as if any first or last as well
       xo = ans$xo ## to make it available for further use.
       # temp fix for issue spotted by Jan, test #1653.1. TODO: avoid this
       # 'setorder', as there's another 'setorder' in generating 'irows' below...
@@ -480,7 +501,7 @@ replace_dot_alias = function(e) {
           irows = if (allLen1) f__ else vecseq(f__,len__,
             if (allow.cartesian ||
                 notjoin || # #698. When notjoin=TRUE, ignore allow.cartesian. Rows in answer will never be > nrow(x).
-                !anyDuplicated(f__, incomparables = c(0L, NA_integer_))) {
+                !(if (!is.null(dups)) dups else anyDuplicated(f__, incomparables = c(0L, NA_integer_)))) {
               NULL # #742. If 'i' has no duplicates, ignore
             } else as.double(nrow(x)+nrow(i))) # rows in i might not match to x so old max(nrow(x),nrow(i)) wasn't enough. But this limit now only applies when there are duplicates present so the reason now for nrow(x)+nrow(i) is just to nail it down and be bigger than max(nrow(x),nrow(i)).
           if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()}
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 7cc6819e8..160cff2e6 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -5814,7 +5814,7 @@ DT <- data.table(id=rep(letters[1:2], 2), var = rnorm(4), key="id")
 test(1383.1, DT[letters[1:3], list(var)], DT[1:5, list(var)])
 # Fix for #800 - allow.cartesian should be ignored if jsub[1L] has `:=`.
 DT=data.table(id=c(1,1), date=c(1992,1991), value=c(4.1,4.5), key="id")
-test(1383.2, copy(DT)[DT, a:=1], DT[, a := 1])
+test(1383.2, copy(DT)[DT, a:=1], DT[, a := 1], warning="last matching row will be used") #4370
 
 # Somehow DT[col==max(col)] was never tested, broken by auto-indexing new in v1.9.4, #858
 DT = data.table(a = c(1,1,1,2,2,2,3,3,3), b = rnorm(9))
diff --git a/man/data.table.Rd b/man/data.table.Rd
index 8c8e0d537..c4caf2b88 100644
--- a/man/data.table.Rd
+++ b/man/data.table.Rd
@@ -22,7 +22,7 @@ data.table(\dots, keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFac
 
 \method{[}{data.table}(x, i, j, by, keyby, with = TRUE,
   nomatch = getOption("datatable.nomatch", NA),
-  mult = "all",
+  mult,
   roll = FALSE,
   rollends = if (roll=="nearest") c(TRUE,TRUE)
              else if (roll>=0) c(FALSE,TRUE)
@@ -57,7 +57,7 @@ data.table(\dots, keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFac
         If \code{i} is a \code{data.table}, the columns in \code{i} to be matched against \code{x} can be specified using one of these ways:
 
         \itemize{
-            \item{\code{on} argument (see below). It allows for both \code{equi-} and the newly implemented \code{non-equi} joins.}
+            \item{\code{on} argument (see below). It allows for both \emph{equi} and \emph{non-equi} joins.}
 
             \item{If not, \code{x} \emph{must be keyed}. Key can be set using \code{\link{setkey}}. If \code{i} is also keyed, then first \emph{key} column of \code{i} is matched against first \emph{key} column of \code{x}, second against second, etc..
 
@@ -67,9 +67,9 @@ data.table(\dots, keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFac
         }
         Using \code{on=} is recommended (even during keyed joins) as it helps understand the code better and also allows for \emph{non-equi} joins.
 
-        When the binary operator \code{==} alone is used, an \emph{equi} join is performed. In SQL terms, \code{x[i]} then performs a \emph{right join} by default. \code{i} prefixed with \code{!} signals a \emph{not-join} or \emph{not-select}.
+        When the binary operator \code{==} alone is used, an \emph{equi} join is performed. In SQL terms, \code{x[i]} then performs a \emph{right outer join} by default. \code{i} prefixed with \code{!} signals a \emph{not-join} or \emph{not-select}.
 
-        Support for \emph{non-equi} join was recently implemented, which allows for other binary operators \code{>=, >, <= and <}.
+        Support for \emph{non-equi} join is implemented, which allows for other binary operators \code{>=, >, <= and <}.
 
         See \href{../doc/datatable-keys-fast-subset.html}{\code{vignette("datatable-keys-fast-subset")}} and \href{../doc/datatable-secondary-indices-and-auto-indexing.html}{\code{vignette("datatable-secondary-indices-and-auto-indexing")}}.
 
@@ -118,7 +118,7 @@ data.table(\dots, keep.rownames=FALSE, check.names=FALSE, key=NULL, stringsAsFac
 
     \item{nomatch}{ When a row in \code{i} has no match to \code{x}, \code{nomatch=NA} (default) means \code{NA} is returned. \code{NULL} (or \code{0} for backward compatibility) means no rows will be returned for that row of \code{i}. Use \code{options(datatable.nomatch=NULL)} to change the default value (used when \code{nomatch} is not supplied).}
 
-    \item{mult}{ When \code{i} is a \code{list} (or \code{data.frame} or \code{data.table}) and \emph{multiple} rows in \code{x} match to the row in \code{i}, \code{mult} controls which are returned: \code{"all"} (default), \code{"first"} or \code{"last"}.}
+    \item{mult}{ When \code{i} is a \code{list} (or \code{data.frame} or \code{data.table}) and \emph{multiple} rows in \code{x} match to the row in \code{i}, \code{mult} controls which are returned: \code{"all"}, \code{"first"}, \code{"last"} or \code{"error"}. Default is \code{"all"} unless \emph{update-on-join} (join and \code{:=}) is performed, then \code{mult} is \code{"last"}. }
 
     \item{roll}{ When \code{i} is a \code{data.table} and its row matches to all but the last \code{x} join column, and its value in the last \code{i} join column falls in a gap (including after the last observation in \code{x} for that group), then:
 
@@ -216,7 +216,7 @@ A \code{data.table} is a \code{list} of vectors, just like a \code{data.frame}.
 \item it has enhanced functionality in \code{[.data.table} for fast joins of keyed tables, fast aggregation, fast last observation carried forward (LOCF) and fast add/modify/delete of columns by reference with no copy at all.
 }
 
-See the \code{see also} section for the several other \emph{methods} that are available for operating on data.tables efficiently.
+See the \emph{see also} section for the several other \emph{methods} that are available for operating on data.tables efficiently.
 
 }
 \references{