Skip to content

Commit 9c04e42

Browse files
felixcheungFelix Cheung
authored and
Felix Cheung
committed
[SPARK-18823][SPARKR] add support for assigning to column
## What changes were proposed in this pull request? Support for ``` df[[myname]] <- 1 df[[2]] <- df$eruptions ``` ## How was this patch tested? manual tests, unit tests Author: Felix Cheung <felixcheung_m@hotmail.com> Closes #16663 from felixcheung/rcolset. (cherry picked from commit f27e024) Signed-off-by: Felix Cheung <felixcheung@apache.org>
1 parent 570e5e1 commit 9c04e42

File tree

2 files changed

+55
-13
lines changed

2 files changed

+55
-13
lines changed

R/pkg/R/DataFrame.R

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1711,6 +1711,23 @@ getColumn <- function(x, c) {
17111711
column(callJMethod(x@sdf, "col", c))
17121712
}
17131713

1714+
setColumn <- function(x, c, value) {
1715+
if (class(value) != "Column" && !is.null(value)) {
1716+
if (isAtomicLengthOne(value)) {
1717+
value <- lit(value)
1718+
} else {
1719+
stop("value must be a Column, literal value as atomic in length of 1, or NULL")
1720+
}
1721+
}
1722+
1723+
if (is.null(value)) {
1724+
nx <- drop(x, c)
1725+
} else {
1726+
nx <- withColumn(x, c, value)
1727+
}
1728+
nx
1729+
}
1730+
17141731
#' @param name name of a Column (without being wrapped by \code{""}).
17151732
#' @rdname select
17161733
#' @name $
@@ -1729,19 +1746,7 @@ setMethod("$", signature(x = "SparkDataFrame"),
17291746
#' @note $<- since 1.4.0
17301747
setMethod("$<-", signature(x = "SparkDataFrame"),
17311748
function(x, name, value) {
1732-
if (class(value) != "Column" && !is.null(value)) {
1733-
if (isAtomicLengthOne(value)) {
1734-
value <- lit(value)
1735-
} else {
1736-
stop("value must be a Column, literal value as atomic in length of 1, or NULL")
1737-
}
1738-
}
1739-
1740-
if (is.null(value)) {
1741-
nx <- drop(x, name)
1742-
} else {
1743-
nx <- withColumn(x, name, value)
1744-
}
1749+
nx <- setColumn(x, name, value)
17451750
x@sdf <- nx@sdf
17461751
x
17471752
})
@@ -1761,6 +1766,21 @@ setMethod("[[", signature(x = "SparkDataFrame", i = "numericOrcharacter"),
17611766
getColumn(x, i)
17621767
})
17631768

1769+
#' @rdname subset
1770+
#' @name [[<-
1771+
#' @aliases [[<-,SparkDataFrame,numericOrcharacter-method
1772+
#' @note [[<- since 2.1.1
1773+
setMethod("[[<-", signature(x = "SparkDataFrame", i = "numericOrcharacter"),
1774+
function(x, i, value) {
1775+
if (is.numeric(i)) {
1776+
cols <- columns(x)
1777+
i <- cols[[i]]
1778+
}
1779+
nx <- setColumn(x, i, value)
1780+
x@sdf <- nx@sdf
1781+
x
1782+
})
1783+
17641784
#' @rdname subset
17651785
#' @name [
17661786
#' @aliases [,SparkDataFrame-method
@@ -1808,6 +1828,8 @@ setMethod("[", signature(x = "SparkDataFrame"),
18081828
#' @param j,select expression for the single Column or a list of columns to select from the SparkDataFrame.
18091829
#' @param drop if TRUE, a Column will be returned if the resulting dataset has only one column.
18101830
#' Otherwise, a SparkDataFrame will always be returned.
1831+
#' @param value a Column or an atomic vector in the length of 1 as literal value, or \code{NULL}.
1832+
#' If \code{NULL}, the specified Column is dropped.
18111833
#' @param ... currently not used.
18121834
#' @return A new SparkDataFrame containing only the rows that meet the condition with selected columns.
18131835
#' @export

R/pkg/inst/tests/testthat/test_sparkSQL.R

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1021,6 +1021,9 @@ test_that("select operators", {
10211021
df$age2 <- df$age * 2
10221022
expect_equal(columns(df), c("name", "age", "age2"))
10231023
expect_equal(count(where(df, df$age2 == df$age * 2)), 2)
1024+
df$age2 <- df[["age"]] * 3
1025+
expect_equal(columns(df), c("name", "age", "age2"))
1026+
expect_equal(count(where(df, df$age2 == df$age * 3)), 2)
10241027

10251028
df$age2 <- 21
10261029
expect_equal(columns(df), c("name", "age", "age2"))
@@ -1033,6 +1036,23 @@ test_that("select operators", {
10331036
expect_error(df$age3 <- c(22, NA),
10341037
"value must be a Column, literal value as atomic in length of 1, or NULL")
10351038

1039+
df[["age2"]] <- 23
1040+
expect_equal(columns(df), c("name", "age", "age2"))
1041+
expect_equal(count(where(df, df$age2 == 23)), 3)
1042+
1043+
df[[3]] <- 24
1044+
expect_equal(columns(df), c("name", "age", "age2"))
1045+
expect_equal(count(where(df, df$age2 == 24)), 3)
1046+
1047+
df[[3]] <- df$age
1048+
expect_equal(count(where(df, df$age2 == df$age)), 2)
1049+
1050+
df[["age2"]] <- df[["name"]]
1051+
expect_equal(count(where(df, df$age2 == df$name)), 3)
1052+
1053+
expect_error(df[["age3"]] <- c(22, 23),
1054+
"value must be a Column, literal value as atomic in length of 1, or NULL")
1055+
10361056
# Test parameter drop
10371057
expect_equal(class(df[, 1]) == "SparkDataFrame", T)
10381058
expect_equal(class(df[, 1, drop = T]) == "Column", T)

0 commit comments

Comments
 (0)