Skip to content

Commit 1f703aa

Browse files
committed
Merge branch 'master' into issues/SPARK-26060/set_command
2 parents 336a331 + affe809 commit 1f703aa

File tree

670 files changed

+26122
-19103
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

670 files changed

+26122
-19103
lines changed

R/WINDOWS.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
To build SparkR on Windows, the following steps are required
44

55
1. Install R (>= 3.1) and [Rtools](http://cran.r-project.org/bin/windows/Rtools/). Make sure to
6-
include Rtools and R in `PATH`.
6+
include Rtools and R in `PATH`. Note that support for R prior to version 3.4 is deprecated as of Spark 3.0.0.
77

88
2. Install
99
[JDK8](http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html) and set

R/pkg/DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ URL: http://www.apache.org/ http://spark.apache.org/
1515
BugReports: http://spark.apache.org/contributing.html
1616
SystemRequirements: Java (== 8)
1717
Depends:
18-
R (>= 3.0),
18+
R (>= 3.1),
1919
methods
2020
Suggests:
2121
knitr,

R/pkg/NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ exportMethods("arrange",
169169
"toJSON",
170170
"transform",
171171
"union",
172+
"unionAll",
172173
"unionByName",
173174
"unique",
174175
"unpersist",

R/pkg/R/DataFrame.R

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -767,6 +767,14 @@ setMethod("repartition",
767767
#' using \code{spark.sql.shuffle.partitions} as number of partitions.}
768768
#'}
769769
#'
770+
#' At least one partition-by expression must be specified.
771+
#' When no explicit sort order is specified, "ascending nulls first" is assumed.
772+
#'
773+
#' Note that due to performance reasons this method uses sampling to estimate the ranges.
774+
#' Hence, the output may not be consistent, since sampling can return different values.
775+
#' The sample size can be controlled by the config
776+
#' \code{spark.sql.execution.rangeExchange.sampleSizePerPartition}.
777+
#'
770778
#' @param x a SparkDataFrame.
771779
#' @param numPartitions the number of partitions to use.
772780
#' @param col the column by which the range partitioning will be performed.
@@ -2724,6 +2732,20 @@ setMethod("union",
27242732
dataFrame(unioned)
27252733
})
27262734

2735+
#' Return a new SparkDataFrame containing the union of rows
2736+
#'
2737+
#' This is an alias for `union`.
2738+
#'
2739+
#' @rdname union
2740+
#' @name unionAll
2741+
#' @aliases unionAll,SparkDataFrame,SparkDataFrame-method
2742+
#' @note unionAll since 1.4.0
2743+
setMethod("unionAll",
2744+
signature(x = "SparkDataFrame", y = "SparkDataFrame"),
2745+
function(x, y) {
2746+
union(x, y)
2747+
})
2748+
27272749
#' Return a new SparkDataFrame containing the union of rows, matched by column names
27282750
#'
27292751
#' Return a new SparkDataFrame containing the union of rows in this SparkDataFrame

R/pkg/R/functions.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3370,7 +3370,7 @@ setMethod("flatten",
33703370
#'
33713371
#' @rdname column_collection_functions
33723372
#' @aliases map_entries map_entries,Column-method
3373-
#' @note map_entries since 2.4.0
3373+
#' @note map_entries since 3.0.0
33743374
setMethod("map_entries",
33753375
signature(x = "Column"),
33763376
function(x) {

R/pkg/R/generics.R

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,9 @@ setGeneric("toRDD", function(x) { standardGeneric("toRDD") })
631631
#' @rdname union
632632
setGeneric("union", function(x, y) { standardGeneric("union") })
633633

634+
#' @rdname union
635+
setGeneric("unionAll", function(x, y) { standardGeneric("unionAll") })
636+
634637
#' @rdname unionByName
635638
setGeneric("unionByName", function(x, y) { standardGeneric("unionByName") })
636639

R/pkg/R/stats.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ setMethod("corr",
109109
#'
110110
#' Finding frequent items for columns, possibly with false positives.
111111
#' Using the frequent element count algorithm described in
112-
#' \url{http://dx.doi.org/10.1145/762471.762473}, proposed by Karp, Schenker, and Papadimitriou.
112+
#' \url{https://doi.org/10.1145/762471.762473}, proposed by Karp, Schenker, and Papadimitriou.
113113
#'
114114
#' @param x A SparkDataFrame.
115115
#' @param cols A vector column names to search frequent items in.
@@ -143,7 +143,7 @@ setMethod("freqItems", signature(x = "SparkDataFrame", cols = "character"),
143143
#' *exact* rank of x is close to (p * N). More precisely,
144144
#' floor((p - err) * N) <= rank(x) <= ceil((p + err) * N).
145145
#' This method implements a variation of the Greenwald-Khanna algorithm (with some speed
146-
#' optimizations). The algorithm was first present in [[http://dx.doi.org/10.1145/375663.375670
146+
#' optimizations). The algorithm was first present in [[https://doi.org/10.1145/375663.375670
147147
#' Space-efficient Online Computation of Quantile Summaries]] by Greenwald and Khanna.
148148
#' Note that NA values will be ignored in numerical columns before calculation. For
149149
#' columns only containing NA values, an empty list is returned.

R/pkg/inst/profile/general.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616
#
1717

1818
.First <- function() {
19+
if (utils::compareVersion(paste0(R.version$major, ".", R.version$minor), "3.4.0") == -1) {
20+
warning("Support for R prior to version 3.4 is deprecated since Spark 3.0.0")
21+
}
22+
1923
packageDir <- Sys.getenv("SPARKR_PACKAGE_DIR")
2024
dirs <- strsplit(packageDir, ",")[[1]]
2125
.libPaths(c(dirs, .libPaths()))

R/pkg/inst/profile/shell.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@
1616
#
1717

1818
.First <- function() {
19+
if (utils::compareVersion(paste0(R.version$major, ".", R.version$minor), "3.4.0") == -1) {
20+
warning("Support for R prior to version 3.4 is deprecated since Spark 3.0.0")
21+
}
22+
1923
home <- Sys.getenv("SPARK_HOME")
2024
.libPaths(c(file.path(home, "R", "lib"), .libPaths()))
2125
Sys.setenv(NOAWT = 1)

R/pkg/tests/fulltests/test_sparkSQL.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1674,7 +1674,7 @@ test_that("column functions", {
16741674

16751675
# check for unparseable
16761676
df <- as.DataFrame(list(list("a" = "")))
1677-
expect_equal(collect(select(df, from_json(df$a, schema)))[[1]][[1]], NA)
1677+
expect_equal(collect(select(df, from_json(df$a, schema)))[[1]][[1]]$a, NA)
16781678

16791679
# check if array type in string is correctly supported.
16801680
jsonArr <- "[{\"name\":\"Bob\"}, {\"name\":\"Alice\"}]"
@@ -2458,6 +2458,7 @@ test_that("union(), unionByName(), rbind(), except(), and intersect() on a DataF
24582458
expect_equal(count(unioned), 6)
24592459
expect_equal(first(unioned)$name, "Michael")
24602460
expect_equal(count(arrange(suppressWarnings(union(df, df2)), df$age)), 6)
2461+
expect_equal(count(arrange(suppressWarnings(unionAll(df, df2)), df$age)), 6)
24612462

24622463
df1 <- select(df2, "age", "name")
24632464
unioned1 <- arrange(unionByName(df1, df), df1$age)

0 commit comments

Comments
 (0)