apache
diff --git a/‎.asf.yaml
Lines changed: 29 additions & 0 deletions b/‎.asf.yaml
Lines changed: 29 additions & 0 deletions
diff --git a/‎.gitignore
Lines changed: 0 additions & 2 deletions b/‎.gitignore
Lines changed: 0 additions & 2 deletions
diff --git a/‎R/pkg/NAMESPACE
Lines changed: 12 additions & 1 deletion b/‎R/pkg/NAMESPACE
Lines changed: 12 additions & 1 deletion
diff --git a/‎R/pkg/R/DataFrame.R
Lines changed: 26 additions & 0 deletions b/‎R/pkg/R/DataFrame.R
Lines changed: 26 additions & 0 deletions
diff --git a/‎R/pkg/R/catalog.R
Lines changed: 54 additions & 0 deletions b/‎R/pkg/R/catalog.R
Lines changed: 54 additions & 0 deletions
diff --git a/‎R/pkg/R/functions.R
Lines changed: 58 additions & 6 deletions b/‎R/pkg/R/functions.R
Lines changed: 58 additions & 6 deletions
diff --git a/‎R/pkg/R/generics.R
Lines changed: 20 additions & 0 deletions b/‎R/pkg/R/generics.R
Lines changed: 20 additions & 0 deletions
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# https://cwiki.apache.org/confluence/display/INFRA/.asf.yaml+features+for+git+repositories
+---
+github:
+  description: "Apache Spark - A unified analytics engine for large-scale data processing"
+  homepage: https://spark.apache.org/
+  labels:
+    - python
+    - scala
+    - r
+    - java
+    - big-data
+    - jdbc
+    - sql
+    - spark
@@ -18,8 +18,6 @@
 .idea_modules/
 .project
 .pydevproject
-.python-version
-.ruby-version
 .scala_dependencies
 .settings
 /lib/
 
@@ -28,6 +28,7 @@ importFrom("utils", "download.file", "object.size", "packageVersion", "tail", "u
 
 # S3 methods exported
 export("sparkR.session")
+export("sparkR.init")
 export("sparkR.session.stop")
 export("sparkR.stop")
 export("sparkR.conf")
@@ -41,6 +42,9 @@ export("sparkR.callJStatic")
 
 export("install.spark")
 
+export("sparkRSQL.init",
+       "sparkRHive.init")
+
 # MLlib integration
 exportMethods("glm",
               "spark.glm",
@@ -68,7 +72,10 @@ exportMethods("glm",
               "spark.freqItemsets",
               "spark.associationRules",
               "spark.findFrequentSequentialPatterns",
-              "spark.assignClusters")
+              "spark.assignClusters",
+              "spark.fmClassifier",
+              "spark.lm",
+              "spark.fmRegressor")
 
 # Job group lifecycle management methods
 export("setJobGroup",
@@ -148,6 +155,7 @@ exportMethods("arrange",
               "printSchema",
               "randomSplit",
               "rbind",
+              "registerTempTable",
               "rename",
               "repartition",
               "repartitionByRange",
@@ -345,6 +353,7 @@ exportMethods("%<=>%",
               "over",
               "overlay",
               "percent_rank",
+              "percentile_approx",
               "pmod",
               "posexplode",
               "posexplode_outer",
@@ -430,8 +439,10 @@ export("as.DataFrame",
        "cacheTable",
        "clearCache",
        "createDataFrame",
+       "createExternalTable",
        "createTable",
        "currentDatabase",
+       "dropTempTable",
        "dropTempView",
        "listColumns",
        "listDatabases",
 
@@ -521,6 +521,32 @@ setMethod("createOrReplaceTempView",
               invisible(callJMethod(x@sdf, "createOrReplaceTempView", viewName))
           })
 
+#' (Deprecated) Register Temporary Table
+#'
+#' Registers a SparkDataFrame as a Temporary Table in the SparkSession
+#' @param x A SparkDataFrame
+#' @param tableName A character vector containing the name of the table
+#'
+#' @seealso \link{createOrReplaceTempView}
+#' @rdname registerTempTable-deprecated
+#' @name registerTempTable
+#' @aliases registerTempTable,SparkDataFrame,character-method
+#' @examples
+#'\dontrun{
+#' sparkR.session()
+#' path <- "path/to/file.json"
+#' df <- read.json(path)
+#' registerTempTable(df, "json_df")
+#' new_df <- sql("SELECT * FROM json_df")
+#'}
+#' @note registerTempTable since 1.4.0
+setMethod("registerTempTable",
+          signature(x = "SparkDataFrame", tableName = "character"),
+          function(x, tableName) {
+              .Deprecated("createOrReplaceTempView")
+              invisible(callJMethod(x@sdf, "createOrReplaceTempView", tableName))
+          })
+
 #' insertInto
 #'
 #' Insert the contents of a SparkDataFrame into a table registered in the current SparkSession.
 
@@ -17,6 +17,35 @@
 
 # catalog.R: SparkSession catalog functions
 
+#' (Deprecated) Create an external table
+#'
+#' Creates an external table based on the dataset in a data source,
+#' Returns a SparkDataFrame associated with the external table.
+#'
+#' The data source is specified by the \code{source} and a set of options(...).
+#' If \code{source} is not specified, the default data source configured by
+#' "spark.sql.sources.default" will be used.
+#'
+#' @param tableName a name of the table.
+#' @param path the path of files to load.
+#' @param source the name of external data source.
+#' @param schema the schema of the data required for some data sources.
+#' @param ... additional argument(s) passed to the method.
+#' @return A SparkDataFrame.
+#' @rdname createExternalTable-deprecated
+#' @seealso \link{createTable}
+#' @examples
+#'\dontrun{
+#' sparkR.session()
+#' df <- createExternalTable("myjson", path="path/to/json", source="json", schema)
+#' }
+#' @name createExternalTable
+#' @note createExternalTable since 1.4.0
+createExternalTable <- function(tableName, path = NULL, source = NULL, schema = NULL, ...) {
+  .Deprecated("createTable", old = "createExternalTable")
+  createTable(tableName, path, source, schema, ...)
+}
+
 #' Creates a table based on the dataset in a data source
 #'
 #' Creates a table based on the dataset in a data source. Returns a SparkDataFrame associated with
@@ -130,6 +159,31 @@ clearCache <- function() {
   invisible(callJMethod(catalog, "clearCache"))
 }
 
+#' (Deprecated) Drop Temporary Table
+#'
+#' Drops the temporary table with the given table name in the catalog.
+#' If the table has been cached/persisted before, it's also unpersisted.
+#'
+#' @param tableName The name of the SparkSQL table to be dropped.
+#' @seealso \link{dropTempView}
+#' @rdname dropTempTable-deprecated
+#' @examples
+#' \dontrun{
+#' sparkR.session()
+#' df <- read.df(path, "parquet")
+#' createOrReplaceTempView(df, "table")
+#' dropTempTable("table")
+#' }
+#' @name dropTempTable
+#' @note dropTempTable since 1.4.0
+dropTempTable <- function(tableName) {
+  .Deprecated("dropTempView", old = "dropTempTable")
+  if (class(tableName) != "character") {
+    stop("tableName must be a string.")
+  }
+  dropTempView(tableName)
+}
+
 #' Drops the temporary view with the given view name in the catalog.
 #'
 #' Drops the temporary view with the given view name in the catalog.
 
@@ -77,7 +77,13 @@ NULL
 #'          days to be added to or subtracted from \code{y}. For class \code{character}, it is
 #'          \itemize{
 #'          \item \code{date_format}: date format specification.
-#'          \item \code{from_utc_timestamp}, \code{to_utc_timestamp}: time zone to use.
+#'          \item \code{from_utc_timestamp}, \code{to_utc_timestamp}: A string detailing
+#'            the time zone ID that the input should be adjusted to. It should be in the format
+#'            of either region-based zone IDs or zone offsets. Region IDs must have the form
+#'            'area/city', such as 'America/Los_Angeles'. Zone offsets must be in the format
+#'            (+|-)HH:mm', for example '-08:00' or '+01:00'. Also 'UTC' and 'Z' are supported
+#'            as aliases of '+00:00'. Other short names are not recommended to use
+#'            because they can be ambiguous.
 #'          \item \code{next_day}: day of the week string.
 #'          }
 #' @param ... additional argument(s).
@@ -1410,6 +1416,52 @@ setMethod("quarter",
             column(jc)
           })
 
+#' @details
+#' \code{percentile_approx} Returns the approximate percentile value of
+#' numeric column at the given percentage.
+#'
+#' @param percentage Numeric percentage at which percentile should be computed
+#'                   All values should be between 0 and 1.
+#'                   If length equals to 1 resulting column is of type double,
+#'                   otherwise, array type of double.
+#' @param accuracy A positive numeric literal (default: 10000) which
+#'                 controls approximation accuracy at the cost of memory.
+#'                 Higher value of accuracy yields better accuracy, 1.0/accuracy
+#'                 is the relative error of the approximation.
+#'
+#' @rdname column_aggregate_functions
+#' @aliases percentile_approx percentile_approx,Column-method
+#' @note percentile_approx since 3.1.0
+setMethod("percentile_approx",
+          signature(x = "characterOrColumn", percentage = "numericOrColumn"),
+          function(x, percentage, accuracy = 10000) {
+            col <- if (class(x) == "Column") {
+              x@jc
+            } else {
+              column(x)@jc
+            }
+
+            percentage <- if (class(percentage) == "Column") {
+              percentage@jc
+            } else if (length(percentage) > 1) {
+              do.call(create_array, lapply(percentage, lit))@jc
+            } else {
+              lit(percentage)@jc
+            }
+
+            accuracy <- if (class(accuracy) == "Column") {
+              accuracy@jc
+            } else {
+              lit(as.integer(accuracy))@jc
+            }
+
+            jc <- callJStatic(
+              "org.apache.spark.sql.functions", "percentile_approx",
+              col, percentage, accuracy
+            )
+            column(jc)
+          })
+
 #' @details
 #' \code{reverse}: Returns a reversed string or an array with reverse order of elements.
 #'
@@ -1833,7 +1885,7 @@ setMethod("radians",
 #' @details
 #' \code{to_date}: Converts the column into a DateType. You may optionally specify
 #' a format according to the rules in:
-#' \url{https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html}.
+#' \href{https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html}{Datetime Pattern}
 #' If the string cannot be parsed according to the specified format (or default),
 #' the value of the column will be null.
 #' By default, it follows casting rules to a DateType if the format is omitted
@@ -1929,7 +1981,7 @@ setMethod("to_csv", signature(x = "Column"),
 #' @details
 #' \code{to_timestamp}: Converts the column into a TimestampType. You may optionally specify
 #' a format according to the rules in:
-#' \url{https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html}.
+#' \href{https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html}{Datetime Pattern}
 #' If the string cannot be parsed according to the specified format (or default),
 #' the value of the column will be null.
 #' By default, it follows casting rules to a TimestampType if the format is omitted
@@ -2801,8 +2853,8 @@ setMethod("format_string", signature(format = "character", x = "Column"),
 #' \code{from_unixtime}: Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC)
 #' to a string representing the timestamp of that moment in the current system time zone in the JVM
 #' in the given format.
-#' See \href{https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html}{
-#' Customizing Formats} for available options.
+#' See \href{https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html}{
+#' Datetime Pattern} for available options.
 #'
 #' @rdname column_datetime_functions
 #'
@@ -2923,7 +2975,7 @@ setMethod("lpad", signature(x = "Column", len = "numeric", pad = "character"),
 
 #' @details
 #' \code{rand}: Generates a random column with independent and identically distributed (i.i.d.)
-#' samples from U[0.0, 1.0].
+#' samples uniformly distributed in [0.0, 1.0).
 #' Note: the function is non-deterministic in general case.
 #'
 #' @rdname column_nonaggregate_functions
 
@@ -528,6 +528,9 @@ setGeneric("persist", function(x, newLevel) { standardGeneric("persist") })
 #' @rdname printSchema
 setGeneric("printSchema", function(x) { standardGeneric("printSchema") })
 
+#' @rdname registerTempTable-deprecated
+setGeneric("registerTempTable", function(x, tableName) { standardGeneric("registerTempTable") })
+
 #' @rdname rename
 setGeneric("rename", function(x, ...) { standardGeneric("rename") })
 
@@ -1189,6 +1192,11 @@ setGeneric("overlay", function(x, replace, pos, ...) { standardGeneric("overlay"
 #' @name NULL
 setGeneric("percent_rank", function(x = "missing") { standardGeneric("percent_rank") })
 
+#' @rdname column_aggregate_functions
+#' @name NULL
+setGeneric("percentile_approx",
+           function(x, percentage, ...) { standardGeneric("percentile_approx") })
+
 #' @rdname column_math_functions
 #' @name NULL
 setGeneric("pmod", function(y, x) { standardGeneric("pmod") })
@@ -1471,6 +1479,14 @@ setGeneric("spark.als", function(data, ...) { standardGeneric("spark.als") })
 setGeneric("spark.bisectingKmeans",
            function(data, formula, ...) { standardGeneric("spark.bisectingKmeans") })
 
+#' @rdname spark.fmClassifier
+setGeneric("spark.fmClassifier",
+           function(data, formula, ...) { standardGeneric("spark.fmClassifier") })
+
+#' @rdname spark.fmRegressor
+setGeneric("spark.fmRegressor",
+           function(data, formula, ...) { standardGeneric("spark.fmRegressor") })
+
 #' @rdname spark.gaussianMixture
 setGeneric("spark.gaussianMixture",
            function(data, formula, ...) { standardGeneric("spark.gaussianMixture") })
@@ -1539,6 +1555,10 @@ setGeneric("spark.findFrequentSequentialPatterns",
 setGeneric("spark.assignClusters",
             function(data, ...) { standardGeneric("spark.assignClusters") })
 
+#' @rdname spark.lm
+setGeneric("spark.lm",
+           function(data, formula, ...) { standardGeneric("spark.lm") })
+
 #' @param object a fitted ML model object.
 #' @param path the directory where the model is saved.
 #' @param ... additional argument(s) passed to the method.