apache
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎LICENSE-binary
Lines changed: 0 additions & 2 deletions b/‎LICENSE-binary
Lines changed: 0 additions & 2 deletions
diff --git a/‎NOTICE
Lines changed: 0 additions & 2 deletions b/‎NOTICE
Lines changed: 0 additions & 2 deletions
diff --git a/‎NOTICE-binary
Lines changed: 0 additions & 21 deletions b/‎NOTICE-binary
Lines changed: 0 additions & 21 deletions
diff --git a/‎R/README.md
Lines changed: 1 addition & 1 deletion b/‎R/README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/WINDOWS.md
Lines changed: 1 addition & 1 deletion b/‎R/WINDOWS.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎R/pkg/NAMESPACE
Lines changed: 6 additions & 0 deletions b/‎R/pkg/NAMESPACE
Lines changed: 6 additions & 0 deletions
diff --git a/‎R/pkg/R/DataFrame.R
Lines changed: 58 additions & 1 deletion b/‎R/pkg/R/DataFrame.R
Lines changed: 58 additions & 1 deletion
diff --git a/‎R/pkg/R/context.R
Lines changed: 4 additions & 5 deletions b/‎R/pkg/R/context.R
Lines changed: 4 additions & 5 deletions
diff --git a/‎R/pkg/R/functions.R
Lines changed: 60 additions & 3 deletions b/‎R/pkg/R/functions.R
Lines changed: 60 additions & 3 deletions
@@ -77,6 +77,7 @@ target/
 unit-tests.log
 work/
 docs/.jekyll-metadata
+*.crc
 
 # For Hive
 TempStatsStore/
 
@@ -228,7 +228,6 @@ org.apache.xbean:xbean-asm5-shaded
 com.squareup.okhttp3:logging-interceptor
 com.squareup.okhttp3:okhttp
 com.squareup.okio:okio
-net.java.dev.jets3t:jets3t
 org.apache.spark:spark-catalyst_2.11
 org.apache.spark:spark-kvstore_2.11
 org.apache.spark:spark-launcher_2.11
@@ -447,7 +446,6 @@ org.slf4j:jul-to-slf4j
 org.slf4j:slf4j-api
 org.slf4j:slf4j-log4j12
 com.github.scopt:scopt_2.11
-org.bouncycastle:bcprov-jdk15on
 
 core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js
 core/src/main/resources/org/apache/spark/ui/static/*dataTables*
 
@@ -26,5 +26,3 @@ The following provides more details on the included cryptographic software:
 This software uses Apache Commons Crypto (https://commons.apache.org/proper/commons-crypto/) to
 support authentication, and encryption and decryption of data sent across the network between
 services.
-
-This software includes Bouncy Castle (http://bouncycastle.org/) to support the jets3t library.
@@ -27,8 +27,6 @@ This software uses Apache Commons Crypto (https://commons.apache.org/proper/comm
 support authentication, and encryption and decryption of data sent across the network between
 services.
 
-This software includes Bouncy Castle (http://bouncycastle.org/) to support the jets3t library.
-
 
 // ------------------------------------------------------------------
 // NOTICE file corresponding to the section 4d of The Apache License,
@@ -1162,25 +1160,6 @@ NonlinearMinimizer class in package breeze.optimize.proximal is distributed with
 2015, Debasish Das (Verizon), all rights reserved.
 
 
-   =========================================================================
-   ==  NOTICE file corresponding to section 4(d) of the Apache License,   ==
-   ==  Version 2.0, in this case for the distribution of jets3t.          ==
-   =========================================================================
-
-   This product includes software developed by:
-
-   The Apache Software Foundation (http://www.apache.org/).
-
-   The ExoLab Project (http://www.exolab.org/)
-
-   Sun Microsystems (http://www.sun.com/)
-
-   Codehaus (http://castor.codehaus.org)
-
-   Tatu Saloranta (http://wiki.fasterxml.com/TatuSaloranta)
-
-
-
 stream-lib
 Copyright 2016 AddThis
 
 
@@ -17,7 +17,7 @@ export R_HOME=/home/username/R
 
 #### Build Spark
 
-Build Spark with [Maven](http://spark.apache.org/docs/latest/building-spark.html#building-with-buildmvn) and include the `-Psparkr` profile to build the R package. For example to use the default Hadoop versions you can run
+Build Spark with [Maven](http://spark.apache.org/docs/latest/building-spark.html#buildmvn) and include the `-Psparkr` profile to build the R package. For example to use the default Hadoop versions you can run
 
 ```bash
 build/mvn -DskipTests -Psparkr package
 
@@ -14,7 +14,7 @@ directory in Maven in `PATH`.
 
 4. Set `MAVEN_OPTS` as described in [Building Spark](http://spark.apache.org/docs/latest/building-spark.html).
 
-5. Open a command shell (`cmd`) in the Spark directory and build Spark with [Maven](http://spark.apache.org/docs/latest/building-spark.html#building-with-buildmvn) and include the `-Psparkr` profile to build the R package. For example to use the default Hadoop versions you can run
+5. Open a command shell (`cmd`) in the Spark directory and build Spark with [Maven](http://spark.apache.org/docs/latest/building-spark.html#buildmvn) and include the `-Psparkr` profile to build the R package. For example to use the default Hadoop versions you can run
 
     ```bash
     mvn.cmd -DskipTests -Psparkr package
 
@@ -117,6 +117,7 @@ exportMethods("arrange",
               "dropna",
               "dtypes",
               "except",
+              "exceptAll",
               "explain",
               "fillna",
               "filter",
@@ -131,6 +132,7 @@ exportMethods("arrange",
               "hint",
               "insertInto",
               "intersect",
+              "intersectAll",
               "isLocal",
               "isStreaming",
               "join",
@@ -202,6 +204,8 @@ exportMethods("%<=>%",
               "approxQuantile",
               "array_contains",
               "array_distinct",
+              "array_except",
+              "array_intersect",
               "array_join",
               "array_max",
               "array_min",
@@ -210,6 +214,7 @@ exportMethods("%<=>%",
               "array_repeat",
               "array_sort",
               "arrays_overlap",
+              "array_union",
               "arrays_zip",
               "asc",
               "ascii",
@@ -353,6 +358,7 @@ exportMethods("%<=>%",
               "shiftLeft",
               "shiftRight",
               "shiftRightUnsigned",
+              "shuffle",
               "sd",
               "sign",
               "signum",
 
@@ -2848,6 +2848,35 @@ setMethod("intersect",
             dataFrame(intersected)
           })
 
+#' intersectAll
+#'
+#' Return a new SparkDataFrame containing rows in both this SparkDataFrame
+#' and another SparkDataFrame while preserving the duplicates.
+#' This is equivalent to \code{INTERSECT ALL} in SQL. Also as standard in
+#' SQL, this function resolves columns by position (not by name).
+#'
+#' @param x a SparkDataFrame.
+#' @param y a SparkDataFrame.
+#' @return A SparkDataFrame containing the result of the intersect all operation.
+#' @family SparkDataFrame functions
+#' @aliases intersectAll,SparkDataFrame,SparkDataFrame-method
+#' @rdname intersectAll
+#' @name intersectAll
+#' @examples
+#'\dontrun{
+#' sparkR.session()
+#' df1 <- read.json(path)
+#' df2 <- read.json(path2)
+#' intersectAllDF <- intersectAll(df1, df2)
+#' }
+#' @note intersectAll since 2.4.0
+setMethod("intersectAll",
+          signature(x = "SparkDataFrame", y = "SparkDataFrame"),
+          function(x, y) {
+            intersected <- callJMethod(x@sdf, "intersectAll", y@sdf)
+            dataFrame(intersected)
+          })
+
 #' except
 #'
 #' Return a new SparkDataFrame containing rows in this SparkDataFrame
@@ -2867,7 +2896,6 @@ setMethod("intersect",
 #' df2 <- read.json(path2)
 #' exceptDF <- except(df, df2)
 #' }
-#' @rdname except
 #' @note except since 1.4.0
 setMethod("except",
           signature(x = "SparkDataFrame", y = "SparkDataFrame"),
@@ -2876,6 +2904,35 @@ setMethod("except",
             dataFrame(excepted)
           })
 
+#' exceptAll
+#'
+#' Return a new SparkDataFrame containing rows in this SparkDataFrame
+#' but not in another SparkDataFrame while preserving the duplicates.
+#' This is equivalent to \code{EXCEPT ALL} in SQL. Also as standard in
+#' SQL, this function resolves columns by position (not by name).
+#'
+#' @param x a SparkDataFrame.
+#' @param y a SparkDataFrame.
+#' @return A SparkDataFrame containing the result of the except all operation.
+#' @family SparkDataFrame functions
+#' @aliases exceptAll,SparkDataFrame,SparkDataFrame-method
+#' @rdname exceptAll
+#' @name exceptAll
+#' @examples
+#'\dontrun{
+#' sparkR.session()
+#' df1 <- read.json(path)
+#' df2 <- read.json(path2)
+#' exceptAllDF <- exceptAll(df1, df2)
+#' }
+#' @note exceptAll since 2.4.0
+setMethod("exceptAll",
+          signature(x = "SparkDataFrame", y = "SparkDataFrame"),
+          function(x, y) {
+            excepted <- callJMethod(x@sdf, "exceptAll", y@sdf)
+            dataFrame(excepted)
+          })
+
 #' Save the contents of SparkDataFrame to a data source.
 #'
 #' The data source is specified by the \code{source} and a set of options (...).
 
@@ -138,11 +138,10 @@ parallelize <- function(sc, coll, numSlices = 1) {
 
   sizeLimit <- getMaxAllocationLimit(sc)
   objectSize <- object.size(coll)
+  len <- length(coll)
 
   # For large objects we make sure the size of each slice is also smaller than sizeLimit
-  numSerializedSlices <- max(numSlices, ceiling(objectSize / sizeLimit))
-  if (numSerializedSlices > length(coll))
-    numSerializedSlices <- length(coll)
+  numSerializedSlices <- min(len, max(numSlices, ceiling(objectSize / sizeLimit)))
 
   # Generate the slice ids to put each row
   # For instance, for numSerializedSlices of 22, length of 50
@@ -153,8 +152,8 @@ parallelize <- function(sc, coll, numSlices = 1) {
   splits <- if (numSerializedSlices > 0) {
     unlist(lapply(0: (numSerializedSlices - 1), function(x) {
       # nolint start
-      start <- trunc((x * length(coll)) / numSerializedSlices)
-      end <- trunc(((x + 1) * length(coll)) / numSerializedSlices)
+      start <- trunc((as.numeric(x) * len) / numSerializedSlices)
+      end <- trunc(((as.numeric(x) + 1) * len) / numSerializedSlices)
       # nolint end
       rep(start, end - start)
     }))
 
@@ -208,7 +208,7 @@ NULL
 #' # Dataframe used throughout this doc
 #' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
 #' tmp <- mutate(df, v1 = create_array(df$mpg, df$cyl, df$hp))
-#' head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1)))
+#' head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1), shuffle(tmp$v1)))
 #' head(select(tmp, array_max(tmp$v1), array_min(tmp$v1), array_distinct(tmp$v1)))
 #' head(select(tmp, array_position(tmp$v1, 21), array_repeat(df$mpg, 3), array_sort(tmp$v1)))
 #' head(select(tmp, flatten(tmp$v1), reverse(tmp$v1), array_remove(tmp$v1, 21)))
@@ -223,6 +223,8 @@ NULL
 #' head(select(tmp3, element_at(tmp3$v3, "Valiant")))
 #' tmp4 <- mutate(df, v4 = create_array(df$mpg, df$cyl), v5 = create_array(df$cyl, df$hp))
 #' head(select(tmp4, concat(tmp4$v4, tmp4$v5), arrays_overlap(tmp4$v4, tmp4$v5)))
+#' head(select(tmp4, array_except(tmp4$v4, tmp4$v5), array_intersect(tmp4$v4, tmp4$v5)))
+#' head(select(tmp4, array_union(tmp4$v4, tmp4$v5)))
 #' head(select(tmp4, arrays_zip(tmp4$v4, tmp4$v5), map_from_arrays(tmp4$v4, tmp4$v5)))
 #' head(select(tmp, concat(df$mpg, df$cyl, df$hp)))
 #' tmp5 <- mutate(df, v6 = create_array(df$model, df$model))
@@ -1697,8 +1699,8 @@ setMethod("to_date",
           })
 
 #' @details
-#' \code{to_json}: Converts a column containing a \code{structType}, array of \code{structType},
-#' a \code{mapType} or array of \code{mapType} into a Column of JSON string.
+#' \code{to_json}: Converts a column containing a \code{structType}, a \code{mapType}
+#' or an \code{arrayType} into a Column of JSON string.
 #' Resolving the Column can fail if an unsupported type is encountered.
 #'
 #' @rdname column_collection_functions
@@ -3024,6 +3026,34 @@ setMethod("array_distinct",
             column(jc)
           })
 
+#' @details
+#' \code{array_except}: Returns an array of the elements in the first array but not in the second
+#'  array, without duplicates. The order of elements in the result is not determined.
+#'
+#' @rdname column_collection_functions
+#' @aliases array_except array_except,Column-method
+#' @note array_except since 2.4.0
+setMethod("array_except",
+          signature(x = "Column", y = "Column"),
+          function(x, y) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "array_except", x@jc, y@jc)
+            column(jc)
+          })
+
+#' @details
+#' \code{array_intersect}: Returns an array of the elements in the intersection of the given two
+#'  arrays, without duplicates.
+#'
+#' @rdname column_collection_functions
+#' @aliases array_intersect array_intersect,Column-method
+#' @note array_intersect since 2.4.0
+setMethod("array_intersect",
+          signature(x = "Column", y = "Column"),
+          function(x, y) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "array_intersect", x@jc, y@jc)
+            column(jc)
+          })
+
 #' @details
 #' \code{array_join}: Concatenates the elements of column using the delimiter.
 #' Null values are replaced with nullReplacement if set, otherwise they are ignored.
@@ -3149,6 +3179,20 @@ setMethod("arrays_overlap",
             column(jc)
           })
 
+#' @details
+#' \code{array_union}: Returns an array of the elements in the union of the given two arrays,
+#'  without duplicates.
+#'
+#' @rdname column_collection_functions
+#' @aliases array_union array_union,Column-method
+#' @note array_union since 2.4.0
+setMethod("array_union",
+          signature(x = "Column", y = "Column"),
+          function(x, y) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "array_union", x@jc, y@jc)
+            column(jc)
+          })
+
 #' @details
 #' \code{arrays_zip}: Returns a merged array of structs in which the N-th struct contains all N-th
 #' values of input arrays.
@@ -3167,6 +3211,19 @@ setMethod("arrays_zip",
             column(jc)
           })
 
+#' @details
+#' \code{shuffle}: Returns a random permutation of the given array.
+#'
+#' @rdname column_collection_functions
+#' @aliases shuffle shuffle,Column-method
+#' @note shuffle since 2.4.0
+setMethod("shuffle",
+          signature(x = "Column"),
+          function(x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "shuffle", x@jc)
+            column(jc)
+          })
+
 #' @details
 #' \code{flatten}: Creates a single array from an array of arrays.
 #' If a structure of nested arrays is deeper than two levels, only one level of nesting is removed.