Skip to content

Commit cf62b95

Browse files
committed
Merge remote-tracking branch 'upstream/master' into expr_bin
2 parents 0cf20f2 + 9db73ec commit cf62b95

File tree

99 files changed

+1819
-696
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

99 files changed

+1819
-696
lines changed

R/log4j.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
log4j.rootCategory=INFO, file
2020
log4j.appender.file=org.apache.log4j.FileAppender
2121
log4j.appender.file.append=true
22-
log4j.appender.file.file=R-unit-tests.log
22+
log4j.appender.file.file=R/target/unit-tests.log
2323
log4j.appender.file.layout=org.apache.log4j.PatternLayout
2424
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n
2525

build/mvn

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,14 @@ install_app() {
6969

7070
# Install maven under the build/ folder
7171
install_mvn() {
72+
local MVN_VERSION="3.3.3"
73+
7274
install_app \
73-
"http://archive.apache.org/dist/maven/maven-3/3.2.5/binaries" \
74-
"apache-maven-3.2.5-bin.tar.gz" \
75-
"apache-maven-3.2.5/bin/mvn"
76-
MVN_BIN="${_DIR}/apache-maven-3.2.5/bin/mvn"
75+
"http://archive.apache.org/dist/maven/maven-3/${MVN_VERSION}/binaries" \
76+
"apache-maven-${MVN_VERSION}-bin.tar.gz" \
77+
"apache-maven-${MVN_VERSION}/bin/mvn"
78+
79+
MVN_BIN="${_DIR}/apache-maven-${MVN_VERSION}/bin/mvn"
7780
}
7881

7982
# Install zinc under the build/ folder
@@ -105,28 +108,16 @@ install_scala() {
105108
SCALA_LIBRARY="$(cd "$(dirname ${scala_bin})/../lib" && pwd)/scala-library.jar"
106109
}
107110

108-
# Determines if a given application is already installed. If not, will attempt
109-
# to install
110-
## Arg1 - application name
111-
## Arg2 - Alternate path to local install under build/ dir
112-
check_and_install_app() {
113-
# create the local environment variable in uppercase
114-
local app_bin="`echo $1 | awk '{print toupper(\$0)}'`_BIN"
115-
# some black magic to set the generated app variable (i.e. MVN_BIN) into the
116-
# environment
117-
eval "${app_bin}=`which $1 2>/dev/null`"
118-
119-
if [ -z "`which $1 2>/dev/null`" ]; then
120-
install_$1
121-
fi
122-
}
123-
124111
# Setup healthy defaults for the Zinc port if none were provided from
125112
# the environment
126113
ZINC_PORT=${ZINC_PORT:-"3030"}
127114

128-
# Check and install all applications necessary to build Spark
129-
check_and_install_app "mvn"
115+
# Install Maven if necessary
116+
MVN_BIN="$(command -v mvn)"
117+
118+
if [ ! "$MVN_BIN" ]; then
119+
install_mvn
120+
fi
130121

131122
# Install the proper version of Scala and Zinc for the build
132123
install_zinc

core/src/main/scala/org/apache/spark/SecurityManager.scala

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ private[spark] class SecurityManager(sparkConf: SparkConf)
192192
// key used to store the spark secret in the Hadoop UGI
193193
private val sparkSecretLookupKey = "sparkCookie"
194194

195-
private val authOn = sparkConf.getBoolean("spark.authenticate", false)
195+
private val authOn = sparkConf.getBoolean(SecurityManager.SPARK_AUTH_CONF, false)
196196
// keep spark.ui.acls.enable for backwards compatibility with 1.0
197197
private var aclsOn =
198198
sparkConf.getBoolean("spark.acls.enable", sparkConf.getBoolean("spark.ui.acls.enable", false))
@@ -365,10 +365,12 @@ private[spark] class SecurityManager(sparkConf: SparkConf)
365365
cookie
366366
} else {
367367
// user must have set spark.authenticate.secret config
368-
sparkConf.getOption("spark.authenticate.secret") match {
368+
// For Master/Worker, auth secret is in conf; for Executors, it is in env variable
369+
sys.env.get(SecurityManager.ENV_AUTH_SECRET)
370+
.orElse(sparkConf.getOption(SecurityManager.SPARK_AUTH_SECRET_CONF)) match {
369371
case Some(value) => value
370372
case None => throw new Exception("Error: a secret key must be specified via the " +
371-
"spark.authenticate.secret config")
373+
SecurityManager.SPARK_AUTH_SECRET_CONF + " config")
372374
}
373375
}
374376
sCookie
@@ -449,3 +451,12 @@ private[spark] class SecurityManager(sparkConf: SparkConf)
449451
override def getSaslUser(appId: String): String = getSaslUser()
450452
override def getSecretKey(appId: String): String = getSecretKey()
451453
}
454+
455+
private[spark] object SecurityManager {
456+
457+
val SPARK_AUTH_CONF: String = "spark.authenticate"
458+
val SPARK_AUTH_SECRET_CONF: String = "spark.authenticate.secret"
459+
// This is used to set auth secret to an executor's env variable. It should have the same
460+
// value as SPARK_AUTH_SECERET_CONF set in SparkConf
461+
val ENV_AUTH_SECRET = "_SPARK_AUTH_SECRET"
462+
}

core/src/main/scala/org/apache/spark/SparkConf.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,7 @@ private[spark] object SparkConf extends Logging {
557557
def isExecutorStartupConf(name: String): Boolean = {
558558
isAkkaConf(name) ||
559559
name.startsWith("spark.akka") ||
560-
name.startsWith("spark.auth") ||
560+
(name.startsWith("spark.auth") && name != SecurityManager.SPARK_AUTH_SECRET_CONF) ||
561561
name.startsWith("spark.ssl") ||
562562
isSparkPortConf(name)
563563
}

core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ import org.apache.ivy.core.resolve.ResolveOptions
3535
import org.apache.ivy.core.retrieve.RetrieveOptions
3636
import org.apache.ivy.core.settings.IvySettings
3737
import org.apache.ivy.plugins.matcher.GlobPatternMatcher
38-
import org.apache.ivy.plugins.resolver.{ChainResolver, IBiblioResolver}
38+
import org.apache.ivy.plugins.repository.file.FileRepository
39+
import org.apache.ivy.plugins.resolver.{FileSystemResolver, ChainResolver, IBiblioResolver}
3940
import org.apache.spark.SPARK_VERSION
4041
import org.apache.spark.deploy.rest._
4142
import org.apache.spark.util.{ChildFirstURLClassLoader, MutableURLClassLoader, Utils}
@@ -735,8 +736,14 @@ private[spark] object SparkSubmitUtils {
735736
}
736737

737738
/** Path of the local Maven cache. */
738-
private[spark] def m2Path: File = new File(System.getProperty("user.home"),
739-
".m2" + File.separator + "repository" + File.separator)
739+
private[spark] def m2Path: File = {
740+
if (Utils.isTesting) {
741+
// test builds delete the maven cache, and this can cause flakiness
742+
new File("dummy", ".m2" + File.separator + "repository")
743+
} else {
744+
new File(System.getProperty("user.home"), ".m2" + File.separator + "repository")
745+
}
746+
}
740747

741748
/**
742749
* Extracts maven coordinates from a comma-delimited string
@@ -756,12 +763,13 @@ private[spark] object SparkSubmitUtils {
756763
localM2.setName("local-m2-cache")
757764
cr.add(localM2)
758765

759-
val localIvy = new IBiblioResolver
760-
localIvy.setRoot(new File(ivySettings.getDefaultIvyUserDir,
761-
"local" + File.separator).toURI.toString)
766+
val localIvy = new FileSystemResolver
767+
val localIvyRoot = new File(ivySettings.getDefaultIvyUserDir, "local")
768+
localIvy.setLocal(true)
769+
localIvy.setRepository(new FileRepository(localIvyRoot))
762770
val ivyPattern = Seq("[organisation]", "[module]", "[revision]", "[type]s",
763771
"[artifact](-[classifier]).[ext]").mkString(File.separator)
764-
localIvy.setPattern(ivyPattern)
772+
localIvy.addIvyPattern(localIvyRoot.getAbsolutePath + File.separator + ivyPattern)
765773
localIvy.setName("local-ivy-cache")
766774
cr.add(localIvy)
767775

core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
160160
replayBus.addListener(appListener)
161161
val appInfo = replay(fs.getFileStatus(new Path(logDir, attempt.logPath)), replayBus)
162162

163-
ui.setAppName(s"${appInfo.name} ($appId)")
163+
appInfo.foreach { app => ui.setAppName(s"${app.name} ($appId)") }
164164

165165
val uiAclsEnabled = conf.getBoolean("spark.history.ui.acls.enable", false)
166166
ui.getSecurityManager.setAcls(uiAclsEnabled)
@@ -282,8 +282,12 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
282282
val newAttempts = logs.flatMap { fileStatus =>
283283
try {
284284
val res = replay(fileStatus, bus)
285-
logInfo(s"Application log ${res.logPath} loaded successfully.")
286-
Some(res)
285+
res match {
286+
case Some(r) => logDebug(s"Application log ${r.logPath} loaded successfully.")
287+
case None => logWarning(s"Failed to load application log ${fileStatus.getPath}. " +
288+
"The application may have not started.")
289+
}
290+
res
287291
} catch {
288292
case e: Exception =>
289293
logError(
@@ -429,9 +433,11 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
429433

430434
/**
431435
* Replays the events in the specified log file and returns information about the associated
432-
* application.
436+
* application. Return `None` if the application ID cannot be located.
433437
*/
434-
private def replay(eventLog: FileStatus, bus: ReplayListenerBus): FsApplicationAttemptInfo = {
438+
private def replay(
439+
eventLog: FileStatus,
440+
bus: ReplayListenerBus): Option[FsApplicationAttemptInfo] = {
435441
val logPath = eventLog.getPath()
436442
logInfo(s"Replaying log path: $logPath")
437443
val logInput =
@@ -445,16 +451,18 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
445451
val appCompleted = isApplicationCompleted(eventLog)
446452
bus.addListener(appListener)
447453
bus.replay(logInput, logPath.toString, !appCompleted)
448-
new FsApplicationAttemptInfo(
449-
logPath.getName(),
450-
appListener.appName.getOrElse(NOT_STARTED),
451-
appListener.appId.getOrElse(logPath.getName()),
452-
appListener.appAttemptId,
453-
appListener.startTime.getOrElse(-1L),
454-
appListener.endTime.getOrElse(-1L),
455-
getModificationTime(eventLog).get,
456-
appListener.sparkUser.getOrElse(NOT_STARTED),
457-
appCompleted)
454+
appListener.appId.map { appId =>
455+
new FsApplicationAttemptInfo(
456+
logPath.getName(),
457+
appListener.appName.getOrElse(NOT_STARTED),
458+
appId,
459+
appListener.appAttemptId,
460+
appListener.startTime.getOrElse(-1L),
461+
appListener.endTime.getOrElse(-1L),
462+
getModificationTime(eventLog).get,
463+
appListener.sparkUser.getOrElse(NOT_STARTED),
464+
appCompleted)
465+
}
458466
} finally {
459467
logInput.close()
460468
}

core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import scala.collection.JavaConversions._
2424
import scala.collection.Map
2525

2626
import org.apache.spark.Logging
27+
import org.apache.spark.SecurityManager
2728
import org.apache.spark.deploy.Command
2829
import org.apache.spark.launcher.WorkerCommandBuilder
2930
import org.apache.spark.util.Utils
@@ -40,12 +41,14 @@ object CommandUtils extends Logging {
4041
*/
4142
def buildProcessBuilder(
4243
command: Command,
44+
securityMgr: SecurityManager,
4345
memory: Int,
4446
sparkHome: String,
4547
substituteArguments: String => String,
4648
classPaths: Seq[String] = Seq[String](),
4749
env: Map[String, String] = sys.env): ProcessBuilder = {
48-
val localCommand = buildLocalCommand(command, substituteArguments, classPaths, env)
50+
val localCommand = buildLocalCommand(
51+
command, securityMgr, substituteArguments, classPaths, env)
4952
val commandSeq = buildCommandSeq(localCommand, memory, sparkHome)
5053
val builder = new ProcessBuilder(commandSeq: _*)
5154
val environment = builder.environment()
@@ -69,27 +72,34 @@ object CommandUtils extends Logging {
6972
*/
7073
private def buildLocalCommand(
7174
command: Command,
75+
securityMgr: SecurityManager,
7276
substituteArguments: String => String,
7377
classPath: Seq[String] = Seq[String](),
7478
env: Map[String, String]): Command = {
7579
val libraryPathName = Utils.libraryPathEnvName
7680
val libraryPathEntries = command.libraryPathEntries
7781
val cmdLibraryPath = command.environment.get(libraryPathName)
7882

79-
val newEnvironment = if (libraryPathEntries.nonEmpty && libraryPathName.nonEmpty) {
83+
var newEnvironment = if (libraryPathEntries.nonEmpty && libraryPathName.nonEmpty) {
8084
val libraryPaths = libraryPathEntries ++ cmdLibraryPath ++ env.get(libraryPathName)
8185
command.environment + ((libraryPathName, libraryPaths.mkString(File.pathSeparator)))
8286
} else {
8387
command.environment
8488
}
8589

90+
// set auth secret to env variable if needed
91+
if (securityMgr.isAuthenticationEnabled) {
92+
newEnvironment += (SecurityManager.ENV_AUTH_SECRET -> securityMgr.getSecretKey)
93+
}
94+
8695
Command(
8796
command.mainClass,
8897
command.arguments.map(substituteArguments),
8998
newEnvironment,
9099
command.classPathEntries ++ classPath,
91100
Seq[String](), // library path already captured in environment variable
92-
command.javaOpts)
101+
// filter out auth secret from java options
102+
command.javaOpts.filterNot(_.startsWith("-D" + SecurityManager.SPARK_AUTH_SECRET_CONF)))
93103
}
94104

95105
/** Spawn a thread that will redirect a given stream to a file */

core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,8 @@ private[deploy] class DriverRunner(
8585
}
8686

8787
// TODO: If we add ability to submit multiple jars they should also be added here
88-
val builder = CommandUtils.buildProcessBuilder(driverDesc.command, driverDesc.mem,
89-
sparkHome.getAbsolutePath, substituteVariables)
88+
val builder = CommandUtils.buildProcessBuilder(driverDesc.command, securityManager,
89+
driverDesc.mem, sparkHome.getAbsolutePath, substituteVariables)
9090
launchDriver(builder, driverDir, driverDesc.supervise)
9191
}
9292
catch {

core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import akka.actor.ActorRef
2525
import com.google.common.base.Charsets.UTF_8
2626
import com.google.common.io.Files
2727

28-
import org.apache.spark.{SparkConf, Logging}
28+
import org.apache.spark.{SecurityManager, SparkConf, Logging}
2929
import org.apache.spark.deploy.{ApplicationDescription, ExecutorState}
3030
import org.apache.spark.deploy.DeployMessages.ExecutorStateChanged
3131
import org.apache.spark.util.Utils
@@ -125,8 +125,8 @@ private[deploy] class ExecutorRunner(
125125
private def fetchAndRunExecutor() {
126126
try {
127127
// Launch the process
128-
val builder = CommandUtils.buildProcessBuilder(appDesc.command, memory,
129-
sparkHome.getAbsolutePath, substituteVariables)
128+
val builder = CommandUtils.buildProcessBuilder(appDesc.command, new SecurityManager(conf),
129+
memory, sparkHome.getAbsolutePath, substituteVariables)
130130
val command = builder.command()
131131
logInfo("Launch command: " + command.mkString("\"", "\" \"", "\""))
132132

core/src/main/scala/org/apache/spark/storage/BlockManager.scala

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,10 @@ private[spark] class BlockManager(
8383
private var externalBlockStoreInitialized = false
8484
private[spark] val memoryStore = new MemoryStore(this, maxMemory)
8585
private[spark] val diskStore = new DiskStore(this, diskBlockManager)
86-
private[spark] lazy val externalBlockStore: ExternalBlockStore =
86+
private[spark] lazy val externalBlockStore: ExternalBlockStore = {
87+
externalBlockStoreInitialized = true
8788
new ExternalBlockStore(this, executorId)
89+
}
8890

8991
private[spark]
9092
val externalShuffleServiceEnabled = conf.getBoolean("spark.shuffle.service.enabled", false)

core/src/main/scala/org/apache/spark/ui/UIUtils.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,7 @@ private[spark] object UIUtils extends Logging {
362362
{ g.incomingEdges.map { e => <div class="incoming-edge">{e.fromId},{e.toId}</div> } }
363363
{ g.outgoingEdges.map { e => <div class="outgoing-edge">{e.fromId},{e.toId}</div> } }
364364
{
365-
g.rootCluster.getAllNodes.filter(_.cached).map { n =>
365+
g.rootCluster.getCachedNodes.map { n =>
366366
<div class="cached-rdd">{n.id}</div>
367367
}
368368
}

core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,9 @@ private[ui] class RDDOperationCluster(val id: String, private var _name: String)
6666
_childClusters += childCluster
6767
}
6868

69-
/** Return all the nodes container in this cluster, including ones nested in other clusters. */
70-
def getAllNodes: Seq[RDDOperationNode] = {
71-
_childNodes ++ _childClusters.flatMap(_.childNodes)
69+
/** Return all the nodes which are cached. */
70+
def getCachedNodes: Seq[RDDOperationNode] = {
71+
_childNodes.filter(_.cached) ++ _childClusters.flatMap(_.getCachedNodes)
7272
}
7373
}
7474

0 commit comments

Comments
 (0)