Skip to content

Commit

Permalink
[SPARK-10328] [SPARKR] Fix generic for na.omit
Browse files Browse the repository at this point in the history
S3 function is at https://stat.ethz.ch/R-manual/R-patched/library/stats/html/na.fail.html

Author: Shivaram Venkataraman <shivaram@cs.berkeley.edu>
Author: Shivaram Venkataraman <shivaram.venkataraman@gmail.com>
Author: Yu ISHIKAWA <yuu.ishikawa@gmail.com>

Closes apache#8495 from shivaram/na-omit-fix.
  • Loading branch information
shivaram committed Aug 28, 2015
1 parent 7583681 commit 2f99c37
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 6 deletions.
6 changes: 3 additions & 3 deletions R/pkg/R/DataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -1699,9 +1699,9 @@ setMethod("dropna",
#' @name na.omit
#' @export
setMethod("na.omit",
signature(x = "DataFrame"),
function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
dropna(x, how, minNonNulls, cols)
signature(object = "DataFrame"),
function(object, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
dropna(object, how, minNonNulls, cols)
})

#' fillna
Expand Down
2 changes: 1 addition & 1 deletion R/pkg/R/generics.R
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ setGeneric("dropna",
#' @rdname nafunctions
#' @export
setGeneric("na.omit",
function(x, how = c("any", "all"), minNonNulls = NULL, cols = NULL) {
function(object, ...) {
standardGeneric("na.omit")
})

Expand Down
23 changes: 22 additions & 1 deletion R/pkg/inst/tests/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -1083,7 +1083,7 @@ test_that("describe() and summarize() on a DataFrame", {
expect_equal(collect(stats2)[5, "age"], "30")
})

test_that("dropna() on a DataFrame", {
test_that("dropna() and na.omit() on a DataFrame", {
df <- jsonFile(sqlContext, jsonPathNa)
rows <- collect(df)

Expand All @@ -1092,6 +1092,8 @@ test_that("dropna() on a DataFrame", {
expected <- rows[!is.na(rows$name),]
actual <- collect(dropna(df, cols = "name"))
expect_identical(expected, actual)
actual <- collect(na.omit(df, cols = "name"))
expect_identical(expected, actual)

expected <- rows[!is.na(rows$age),]
actual <- collect(dropna(df, cols = "age"))
Expand All @@ -1101,48 +1103,67 @@ test_that("dropna() on a DataFrame", {
expect_identical(expected$age, actual$age)
expect_identical(expected$height, actual$height)
expect_identical(expected$name, actual$name)
actual <- collect(na.omit(df, cols = "age"))

expected <- rows[!is.na(rows$age) & !is.na(rows$height),]
actual <- collect(dropna(df, cols = c("age", "height")))
expect_identical(expected, actual)
actual <- collect(na.omit(df, cols = c("age", "height")))
expect_identical(expected, actual)

expected <- rows[!is.na(rows$age) & !is.na(rows$height) & !is.na(rows$name),]
actual <- collect(dropna(df))
expect_identical(expected, actual)
actual <- collect(na.omit(df))
expect_identical(expected, actual)

# drop with how

expected <- rows[!is.na(rows$age) & !is.na(rows$height) & !is.na(rows$name),]
actual <- collect(dropna(df))
expect_identical(expected, actual)
actual <- collect(na.omit(df))
expect_identical(expected, actual)

expected <- rows[!is.na(rows$age) | !is.na(rows$height) | !is.na(rows$name),]
actual <- collect(dropna(df, "all"))
expect_identical(expected, actual)
actual <- collect(na.omit(df, "all"))
expect_identical(expected, actual)

expected <- rows[!is.na(rows$age) & !is.na(rows$height) & !is.na(rows$name),]
actual <- collect(dropna(df, "any"))
expect_identical(expected, actual)
actual <- collect(na.omit(df, "any"))
expect_identical(expected, actual)

expected <- rows[!is.na(rows$age) & !is.na(rows$height),]
actual <- collect(dropna(df, "any", cols = c("age", "height")))
expect_identical(expected, actual)
actual <- collect(na.omit(df, "any", cols = c("age", "height")))
expect_identical(expected, actual)

expected <- rows[!is.na(rows$age) | !is.na(rows$height),]
actual <- collect(dropna(df, "all", cols = c("age", "height")))
expect_identical(expected, actual)
actual <- collect(na.omit(df, "all", cols = c("age", "height")))
expect_identical(expected, actual)

# drop with threshold

expected <- rows[as.integer(!is.na(rows$age)) + as.integer(!is.na(rows$height)) >= 2,]
actual <- collect(dropna(df, minNonNulls = 2, cols = c("age", "height")))
expect_identical(expected, actual)
actual <- collect(na.omit(df, minNonNulls = 2, cols = c("age", "height")))
expect_identical(expected, actual)

expected <- rows[as.integer(!is.na(rows$age)) +
as.integer(!is.na(rows$height)) +
as.integer(!is.na(rows$name)) >= 3,]
actual <- collect(dropna(df, minNonNulls = 3, cols = c("name", "age", "height")))
expect_identical(expected, actual)
actual <- collect(na.omit(df, minNonNulls = 3, cols = c("name", "age", "height")))
expect_identical(expected, actual)
})

test_that("fillna() on a DataFrame", {
Expand Down
2 changes: 1 addition & 1 deletion dev/lint-r
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ fi

`which Rscript` --vanilla "$SPARK_ROOT_DIR/dev/lint-r.R" "$SPARK_ROOT_DIR" | tee "$LINT_R_REPORT_FILE_NAME"

NUM_LINES=`wc -l < "$LINT_R_REPORT_FILE_NAME"`
NUM_LINES=`wc -l < "$LINT_R_REPORT_FILE_NAME" | awk '{print $1}'`
if [ "$NUM_LINES" = "0" ] ; then
lint_status=0
echo "lintr checks passed."
Expand Down

0 comments on commit 2f99c37

Please sign in to comment.