Skip to content

Commit c2be400

Browse files
committed
Merge branch 'master' of github.com:apache/spark into SPARK-25959
2 parents 7d24f33 + 6abe906 commit c2be400

File tree

125 files changed

+1118
-1001
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

125 files changed

+1118
-1001
lines changed

R/pkg/NAMESPACE

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ exportMethods("%<=>%",
194194
"acos",
195195
"add_months",
196196
"alias",
197+
"approx_count_distinct",
197198
"approxCountDistinct",
198199
"approxQuantile",
199200
"array_contains",
@@ -252,6 +253,7 @@ exportMethods("%<=>%",
252253
"dayofweek",
253254
"dayofyear",
254255
"decode",
256+
"degrees",
255257
"dense_rank",
256258
"desc",
257259
"element_at",
@@ -334,6 +336,7 @@ exportMethods("%<=>%",
334336
"posexplode",
335337
"posexplode_outer",
336338
"quarter",
339+
"radians",
337340
"rand",
338341
"randn",
339342
"rank",

R/pkg/R/functions.R

Lines changed: 64 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ NULL
112112
#' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
113113
#' tmp <- mutate(df, v1 = log(df$mpg), v2 = cbrt(df$disp),
114114
#' v3 = bround(df$wt, 1), v4 = bin(df$cyl),
115-
#' v5 = hex(df$wt), v6 = toDegrees(df$gear),
115+
#' v5 = hex(df$wt), v6 = degrees(df$gear),
116116
#' v7 = atan2(df$cyl, df$am), v8 = hypot(df$cyl, df$am),
117117
#' v9 = pmod(df$hp, df$cyl), v10 = shiftLeft(df$disp, 1),
118118
#' v11 = conv(df$hp, 10, 16), v12 = sign(df$vs - 0.5),
@@ -320,23 +320,37 @@ setMethod("acos",
320320
})
321321

322322
#' @details
323-
#' \code{approxCountDistinct}: Returns the approximate number of distinct items in a group.
323+
#' \code{approx_count_distinct}: Returns the approximate number of distinct items in a group.
324324
#'
325325
#' @rdname column_aggregate_functions
326-
#' @aliases approxCountDistinct approxCountDistinct,Column-method
326+
#' @aliases approx_count_distinct approx_count_distinct,Column-method
327327
#' @examples
328328
#'
329329
#' \dontrun{
330-
#' head(select(df, approxCountDistinct(df$gear)))
331-
#' head(select(df, approxCountDistinct(df$gear, 0.02)))
330+
#' head(select(df, approx_count_distinct(df$gear)))
331+
#' head(select(df, approx_count_distinct(df$gear, 0.02)))
332332
#' head(select(df, countDistinct(df$gear, df$cyl)))
333333
#' head(select(df, n_distinct(df$gear)))
334334
#' head(distinct(select(df, "gear")))}
335+
#' @note approx_count_distinct(Column) since 3.0.0
336+
setMethod("approx_count_distinct",
337+
signature(x = "Column"),
338+
function(x) {
339+
jc <- callJStatic("org.apache.spark.sql.functions", "approx_count_distinct", x@jc)
340+
column(jc)
341+
})
342+
343+
#' @details
344+
#' \code{approxCountDistinct}: Returns the approximate number of distinct items in a group.
345+
#'
346+
#' @rdname column_aggregate_functions
347+
#' @aliases approxCountDistinct approxCountDistinct,Column-method
335348
#' @note approxCountDistinct(Column) since 1.4.0
336349
setMethod("approxCountDistinct",
337350
signature(x = "Column"),
338351
function(x) {
339-
jc <- callJStatic("org.apache.spark.sql.functions", "approxCountDistinct", x@jc)
352+
.Deprecated("approx_count_distinct")
353+
jc <- callJStatic("org.apache.spark.sql.functions", "approx_count_distinct", x@jc)
340354
column(jc)
341355
})
342356

@@ -1651,7 +1665,22 @@ setMethod("tanh",
16511665
setMethod("toDegrees",
16521666
signature(x = "Column"),
16531667
function(x) {
1654-
jc <- callJStatic("org.apache.spark.sql.functions", "toDegrees", x@jc)
1668+
.Deprecated("degrees")
1669+
jc <- callJStatic("org.apache.spark.sql.functions", "degrees", x@jc)
1670+
column(jc)
1671+
})
1672+
1673+
#' @details
1674+
#' \code{degrees}: Converts an angle measured in radians to an approximately equivalent angle
1675+
#' measured in degrees.
1676+
#'
1677+
#' @rdname column_math_functions
1678+
#' @aliases degrees degrees,Column-method
1679+
#' @note degrees since 3.0.0
1680+
setMethod("degrees",
1681+
signature(x = "Column"),
1682+
function(x) {
1683+
jc <- callJStatic("org.apache.spark.sql.functions", "degrees", x@jc)
16551684
column(jc)
16561685
})
16571686

@@ -1665,7 +1694,22 @@ setMethod("toDegrees",
16651694
setMethod("toRadians",
16661695
signature(x = "Column"),
16671696
function(x) {
1668-
jc <- callJStatic("org.apache.spark.sql.functions", "toRadians", x@jc)
1697+
.Deprecated("radians")
1698+
jc <- callJStatic("org.apache.spark.sql.functions", "radians", x@jc)
1699+
column(jc)
1700+
})
1701+
1702+
#' @details
1703+
#' \code{radians}: Converts an angle measured in degrees to an approximately equivalent angle
1704+
#' measured in radians.
1705+
#'
1706+
#' @rdname column_math_functions
1707+
#' @aliases radians radians,Column-method
1708+
#' @note radians since 3.0.0
1709+
setMethod("radians",
1710+
signature(x = "Column"),
1711+
function(x) {
1712+
jc <- callJStatic("org.apache.spark.sql.functions", "radians", x@jc)
16691713
column(jc)
16701714
})
16711715

@@ -2065,13 +2109,24 @@ setMethod("pmod", signature(y = "Column"),
20652109

20662110
#' @param rsd maximum estimation error allowed (default = 0.05).
20672111
#'
2112+
#' @rdname column_aggregate_functions
2113+
#' @aliases approx_count_distinct,Column-method
2114+
#' @note approx_count_distinct(Column, numeric) since 3.0.0
2115+
setMethod("approx_count_distinct",
2116+
signature(x = "Column"),
2117+
function(x, rsd = 0.05) {
2118+
jc <- callJStatic("org.apache.spark.sql.functions", "approx_count_distinct", x@jc, rsd)
2119+
column(jc)
2120+
})
2121+
20682122
#' @rdname column_aggregate_functions
20692123
#' @aliases approxCountDistinct,Column-method
20702124
#' @note approxCountDistinct(Column, numeric) since 1.4.0
20712125
setMethod("approxCountDistinct",
20722126
signature(x = "Column"),
20732127
function(x, rsd = 0.05) {
2074-
jc <- callJStatic("org.apache.spark.sql.functions", "approxCountDistinct", x@jc, rsd)
2128+
.Deprecated("approx_count_distinct")
2129+
jc <- callJStatic("org.apache.spark.sql.functions", "approx_count_distinct", x@jc, rsd)
20752130
column(jc)
20762131
})
20772132

R/pkg/R/generics.R

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -746,6 +746,10 @@ setGeneric("windowOrderBy", function(col, ...) { standardGeneric("windowOrderBy"
746746
#' @name NULL
747747
setGeneric("add_months", function(y, x) { standardGeneric("add_months") })
748748

749+
#' @rdname column_aggregate_functions
750+
#' @name NULL
751+
setGeneric("approx_count_distinct", function(x, ...) { standardGeneric("approx_count_distinct") })
752+
749753
#' @rdname column_aggregate_functions
750754
#' @name NULL
751755
setGeneric("approxCountDistinct", function(x, ...) { standardGeneric("approxCountDistinct") })
@@ -1287,10 +1291,18 @@ setGeneric("substring_index", function(x, delim, count) { standardGeneric("subst
12871291
#' @name NULL
12881292
setGeneric("sumDistinct", function(x) { standardGeneric("sumDistinct") })
12891293

1294+
#' @rdname column_math_functions
1295+
#' @name NULL
1296+
setGeneric("degrees", function(x) { standardGeneric("degrees") })
1297+
12901298
#' @rdname column_math_functions
12911299
#' @name NULL
12921300
setGeneric("toDegrees", function(x) { standardGeneric("toDegrees") })
12931301

1302+
#' @rdname column_math_functions
1303+
#' @name NULL
1304+
setGeneric("radians", function(x) { standardGeneric("radians") })
1305+
12941306
#' @rdname column_math_functions
12951307
#' @name NULL
12961308
setGeneric("toRadians", function(x) { standardGeneric("toRadians") })

R/pkg/tests/fulltests/test_sparkSQL.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1379,7 +1379,7 @@ test_that("column operators", {
13791379

13801380
test_that("column functions", {
13811381
c <- column("a")
1382-
c1 <- abs(c) + acos(c) + approxCountDistinct(c) + ascii(c) + asin(c) + atan(c)
1382+
c1 <- abs(c) + acos(c) + approx_count_distinct(c) + ascii(c) + asin(c) + atan(c)
13831383
c2 <- avg(c) + base64(c) + bin(c) + bitwiseNOT(c) + cbrt(c) + ceil(c) + cos(c)
13841384
c3 <- cosh(c) + count(c) + crc32(c) + hash(c) + exp(c)
13851385
c4 <- explode(c) + expm1(c) + factorial(c) + first(c) + floor(c) + hex(c)
@@ -1388,7 +1388,7 @@ test_that("column functions", {
13881388
c7 <- mean(c) + min(c) + month(c) + negate(c) + posexplode(c) + quarter(c)
13891389
c8 <- reverse(c) + rint(c) + round(c) + rtrim(c) + sha1(c) + monotonically_increasing_id()
13901390
c9 <- signum(c) + sin(c) + sinh(c) + size(c) + stddev(c) + soundex(c) + sqrt(c) + sum(c)
1391-
c10 <- sumDistinct(c) + tan(c) + tanh(c) + toDegrees(c) + toRadians(c)
1391+
c10 <- sumDistinct(c) + tan(c) + tanh(c) + degrees(c) + radians(c)
13921392
c11 <- to_date(c) + trim(c) + unbase64(c) + unhex(c) + upper(c)
13931393
c12 <- variance(c) + ltrim(c, "a") + rtrim(c, "b") + trim(c, "c")
13941394
c13 <- lead("col", 1) + lead(c, 1) + lag("col", 1) + lag(c, 1)

bin/docker-image-tool.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,9 @@ do
197197
if ! which minikube 1>/dev/null; then
198198
error "Cannot find minikube."
199199
fi
200+
if ! minikube status 1>/dev/null; then
201+
error "Cannot contact minikube. Make sure it's running."
202+
fi
200203
eval $(minikube docker-env)
201204
;;
202205
esac

bin/spark-shell

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@ if [ -z "${SPARK_HOME}" ]; then
3232
source "$(dirname "$0")"/find-spark-home
3333
fi
3434

35-
export _SPARK_CMD_USAGE="Usage: ./bin/spark-shell [options]"
35+
export _SPARK_CMD_USAGE="Usage: ./bin/spark-shell [options]
36+
37+
Scala REPL options:
38+
-I <file> preload <file>, enforcing line-by-line interpretation"
3639

3740
# SPARK-4161: scala does not assume use of the java classpath,
3841
# so we need to add the "-Dscala.usejavacp=true" flag manually. We

bin/spark-shell2.cmd

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,13 @@ rem
2020
rem Figure out where the Spark framework is installed
2121
call "%~dp0find-spark-home.cmd"
2222

23-
set _SPARK_CMD_USAGE=Usage: .\bin\spark-shell.cmd [options]
23+
set LF=^
24+
25+
26+
rem two empty lines are required
27+
set _SPARK_CMD_USAGE=Usage: .\bin\spark-shell.cmd [options]^%LF%%LF%^%LF%%LF%^
28+
Scala REPL options:^%LF%%LF%^
29+
-I ^<file^> preload ^<file^>, enforcing line-by-line interpretation
2430

2531
rem SPARK-4161: scala does not assume use of the java classpath,
2632
rem so we need to add the "-Dscala.usejavacp=true" flag manually. We

core/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
</dependency>
5757
<dependency>
5858
<groupId>org.apache.xbean</groupId>
59-
<artifactId>xbean-asm6-shaded</artifactId>
59+
<artifactId>xbean-asm7-shaded</artifactId>
6060
</dependency>
6161
<dependency>
6262
<groupId>org.apache.hadoop</groupId>

core/src/main/java/org/apache/spark/ExecutorPlugin.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,18 @@
2020
import org.apache.spark.annotation.DeveloperApi;
2121

2222
/**
23-
* A plugin which can be automaticaly instantiated within each Spark executor. Users can specify
23+
* A plugin which can be automatically instantiated within each Spark executor. Users can specify
2424
* plugins which should be created with the "spark.executor.plugins" configuration. An instance
2525
* of each plugin will be created for every executor, including those created by dynamic allocation,
2626
* before the executor starts running any tasks.
2727
*
2828
* The specific api exposed to the end users still considered to be very unstable. We will
29-
* hopefully be able to keep compatability by providing default implementations for any methods
29+
* hopefully be able to keep compatibility by providing default implementations for any methods
3030
* added, but make no guarantees this will always be possible across all Spark releases.
3131
*
3232
* Spark does nothing to verify the plugin is doing legitimate things, or to manage the resources
3333
* it uses. A plugin acquires the same privileges as the user running the task. A bad plugin
34-
* could also intefere with task execution and make the executor fail in unexpected ways.
34+
* could also interfere with task execution and make the executor fail in unexpected ways.
3535
*/
3636
@DeveloperApi
3737
public interface ExecutorPlugin {

core/src/main/scala/org/apache/spark/BarrierTaskContext.scala

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -158,8 +158,6 @@ class BarrierTaskContext private[spark] (
158158

159159
override def isInterrupted(): Boolean = taskContext.isInterrupted()
160160

161-
override def isRunningLocally(): Boolean = taskContext.isRunningLocally()
162-
163161
override def addTaskCompletionListener(listener: TaskCompletionListener): this.type = {
164162
taskContext.addTaskCompletionListener(listener)
165163
this

core/src/main/scala/org/apache/spark/SparkConf.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@ import scala.collection.mutable.LinkedHashSet
2525

2626
import org.apache.avro.{Schema, SchemaNormalization}
2727

28-
import org.apache.spark.deploy.history.config._
2928
import org.apache.spark.internal.Logging
3029
import org.apache.spark.internal.config._
30+
import org.apache.spark.internal.config.History._
3131
import org.apache.spark.serializer.KryoSerializer
3232
import org.apache.spark.util.Utils
3333

core/src/main/scala/org/apache/spark/TaskContext.scala

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -96,13 +96,6 @@ abstract class TaskContext extends Serializable {
9696
*/
9797
def isInterrupted(): Boolean
9898

99-
/**
100-
* Returns true if the task is running locally in the driver program.
101-
* @return false
102-
*/
103-
@deprecated("Local execution was removed, so this always returns false", "2.0.0")
104-
def isRunningLocally(): Boolean
105-
10699
/**
107100
* Adds a (Java friendly) listener to be executed on task completion.
108101
* This will be called in all situations - success, failure, or cancellation. Adding a listener

core/src/main/scala/org/apache/spark/TaskContextImpl.scala

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,8 +157,6 @@ private[spark] class TaskContextImpl(
157157
@GuardedBy("this")
158158
override def isCompleted(): Boolean = synchronized(completed)
159159

160-
override def isRunningLocally(): Boolean = false
161-
162160
override def isInterrupted(): Boolean = reasonIfKilled.isDefined
163161

164162
override def getLocalProperty(key: String): String = localProperties.getProperty(key)

core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,14 @@ import org.fusesource.leveldbjni.internal.NativeDB
4242
import org.apache.spark.{SecurityManager, SparkConf, SparkException}
4343
import org.apache.spark.deploy.SparkHadoopUtil
4444
import org.apache.spark.internal.Logging
45+
import org.apache.spark.internal.config.History._
46+
import org.apache.spark.internal.config.Status._
4547
import org.apache.spark.io.CompressionCodec
4648
import org.apache.spark.scheduler._
4749
import org.apache.spark.scheduler.ReplayListenerBus._
4850
import org.apache.spark.status._
4951
import org.apache.spark.status.KVUtils._
5052
import org.apache.spark.status.api.v1.{ApplicationAttemptInfo, ApplicationInfo}
51-
import org.apache.spark.status.config._
5253
import org.apache.spark.ui.SparkUI
5354
import org.apache.spark.util.{Clock, SystemClock, ThreadUtils, Utils}
5455
import org.apache.spark.util.kvstore._
@@ -86,7 +87,6 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
8687
this(conf, new SystemClock())
8788
}
8889

89-
import config._
9090
import FsHistoryProvider._
9191

9292
// Interval between safemode checks.

core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder}
2828

2929
import org.apache.spark.{SecurityManager, SparkConf}
3030
import org.apache.spark.deploy.SparkHadoopUtil
31-
import org.apache.spark.deploy.history.config.HISTORY_SERVER_UI_PORT
3231
import org.apache.spark.internal.Logging
3332
import org.apache.spark.internal.config._
33+
import org.apache.spark.internal.config.History.HISTORY_SERVER_UI_PORT
3434
import org.apache.spark.status.api.v1.{ApiRootResource, ApplicationInfo, UIRoot}
3535
import org.apache.spark.ui.{SparkUI, UIUtils, WebUI}
3636
import org.apache.spark.ui.JettyUtils._

core/src/main/scala/org/apache/spark/deploy/history/HistoryServerDiskManager.scala

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import org.apache.commons.io.FileUtils
2727

2828
import org.apache.spark.SparkConf
2929
import org.apache.spark.internal.Logging
30+
import org.apache.spark.internal.config.History._
3031
import org.apache.spark.status.KVUtils._
3132
import org.apache.spark.util.{Clock, Utils}
3233
import org.apache.spark.util.kvstore.KVStore
@@ -50,8 +51,6 @@ private class HistoryServerDiskManager(
5051
listing: KVStore,
5152
clock: Clock) extends Logging {
5253

53-
import config._
54-
5554
private val appStoreDir = new File(path, "apps")
5655
if (!appStoreDir.isDirectory() && !appStoreDir.mkdir()) {
5756
throw new IllegalArgumentException(s"Failed to create app directory ($appStoreDir).")

core/src/main/scala/org/apache/spark/executor/ShuffleWriteMetrics.scala

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -56,14 +56,4 @@ class ShuffleWriteMetrics private[spark] () extends Serializable {
5656
private[spark] def decRecordsWritten(v: Long): Unit = {
5757
_recordsWritten.setValue(recordsWritten - v)
5858
}
59-
60-
// Legacy methods for backward compatibility.
61-
// TODO: remove these once we make this class private.
62-
@deprecated("use bytesWritten instead", "2.0.0")
63-
def shuffleBytesWritten: Long = bytesWritten
64-
@deprecated("use writeTime instead", "2.0.0")
65-
def shuffleWriteTime: Long = writeTime
66-
@deprecated("use recordsWritten instead", "2.0.0")
67-
def shuffleRecordsWritten: Long = recordsWritten
68-
6959
}

core/src/main/scala/org/apache/spark/deploy/history/config.scala renamed to core/src/main/scala/org/apache/spark/internal/config/History.scala

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,13 @@
1515
* limitations under the License.
1616
*/
1717

18-
package org.apache.spark.deploy.history
18+
package org.apache.spark.internal.config
1919

2020
import java.util.concurrent.TimeUnit
2121

22-
import org.apache.spark.internal.config.ConfigBuilder
2322
import org.apache.spark.network.util.ByteUnit
2423

25-
private[spark] object config {
24+
private[spark] object History {
2625

2726
val DEFAULT_LOG_DIR = "file:/tmp/spark-events"
2827

@@ -63,5 +62,4 @@ private[spark] object config {
6362
"parts of event log files. It can be disabled by setting this config to 0.")
6463
.bytesConf(ByteUnit.BYTE)
6564
.createWithDefaultString("1m")
66-
6765
}

0 commit comments

Comments
 (0)