alteryx
diff --git a/‎core/src/main/scala/org/apache/spark/TestUtils.scala
Lines changed: 6 additions & 5 deletions b/‎core/src/main/scala/org/apache/spark/TestUtils.scala
Lines changed: 6 additions & 5 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
Lines changed: 14 additions & 3 deletions b/‎core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
Lines changed: 14 additions & 3 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
Lines changed: 4 additions & 6 deletions b/‎core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala
Lines changed: 4 additions & 6 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/master/Master.scala
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/deploy/master/Master.scala
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
Lines changed: 4 additions & 6 deletions b/‎core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
Lines changed: 4 additions & 6 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/rpc/akka/AkkaRpcEnv.scala
Lines changed: 1 addition & 1 deletion b/‎core/src/main/scala/org/apache/spark/rpc/akka/AkkaRpcEnv.scala
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
Lines changed: 4 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
Lines changed: 4 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
Lines changed: 2 additions & 2 deletions b/‎core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
Lines changed: 2 additions & 2 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/util/ShutdownHookManager.scala
Lines changed: 19 additions & 18 deletions b/‎core/src/main/scala/org/apache/spark/util/ShutdownHookManager.scala
Lines changed: 19 additions & 18 deletions
diff --git a/‎core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
Lines changed: 11 additions & 3 deletions b/‎core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
Lines changed: 11 additions & 3 deletions
@@ -19,6 +19,7 @@ package org.apache.spark
 
 import java.io.{ByteArrayInputStream, File, FileInputStream, FileOutputStream}
 import java.net.{URI, URL}
+import java.nio.file.Paths
 import java.util.jar.{JarEntry, JarOutputStream}
 
 import scala.collection.JavaConversions._
@@ -78,15 +79,15 @@ private[spark] object TestUtils {
   }
 
   /**
-   * Create a jar file that contains this set of files. All files will be located at the root
-   * of the jar.
+   * Create a jar file that contains this set of files. All files will be located in the specified
+   * directory or at the root of the jar.
    */
-  def createJar(files: Seq[File], jarFile: File): URL = {
+  def createJar(files: Seq[File], jarFile: File, directoryPrefix: Option[String] = None): URL = {
     val jarFileStream = new FileOutputStream(jarFile)
     val jarStream = new JarOutputStream(jarFileStream, new java.util.jar.Manifest())
 
     for (file <- files) {
-      val jarEntry = new JarEntry(file.getName)
+      val jarEntry = new JarEntry(Paths.get(directoryPrefix.getOrElse(""), file.getName).toString)
       jarStream.putNextEntry(jarEntry)
 
       val in = new FileInputStream(file)
@@ -118,7 +119,7 @@ private[spark] object TestUtils {
       classpathUrls: Seq[URL]): File = {
     val compiler = ToolProvider.getSystemJavaCompiler
 
-    // Calling this outputs a class file in pwd. It's easier to just rename the file than
+    // Calling this outputs a class file in pwd. It's easier to just rename the files than
     // build a custom FileManager that controls the output location.
     val options = if (classpathUrls.nonEmpty) {
       Seq("-classpath", classpathUrls.map { _.getFile }.mkString(File.pathSeparator))
 
@@ -39,7 +39,7 @@ import org.apache.ivy.plugins.matcher.GlobPatternMatcher
 import org.apache.ivy.plugins.repository.file.FileRepository
 import org.apache.ivy.plugins.resolver.{FileSystemResolver, ChainResolver, IBiblioResolver}
 
-import org.apache.spark.{SparkUserAppException, SPARK_VERSION}
+import org.apache.spark.{SparkException, SparkUserAppException, SPARK_VERSION}
 import org.apache.spark.api.r.RUtils
 import org.apache.spark.deploy.rest._
 import org.apache.spark.util.{ChildFirstURLClassLoader, MutableURLClassLoader, Utils}
@@ -521,8 +521,19 @@ object SparkSubmit {
         sysProps.put("spark.yarn.isPython", "true")
       }
       if (args.principal != null) {
-        require(args.keytab != null, "Keytab must be specified when the keytab is specified")
-        UserGroupInformation.loginUserFromKeytab(args.principal, args.keytab)
+        require(args.keytab != null, "Keytab must be specified when principal is specified")
+        if (!new File(args.keytab).exists()) {
+          throw new SparkException(s"Keytab file: ${args.keytab} does not exist")
+        } else {
+          // Add keytab and principal configurations in sysProps to make them available
+          // for later use; e.g. in spark sql, the isolated class loader used to talk
+          // to HiveMetastore will use these settings. They will be set as Java system
+          // properties and then loaded by SparkConf
+          sysProps.put("spark.yarn.keytab", args.keytab)
+          sysProps.put("spark.yarn.principal", args.principal)
+
+          UserGroupInformation.loginUserFromKeytab(args.principal, args.keytab)
+        }
       }
     }
 
 
@@ -66,12 +66,10 @@ private[spark] class AppClient(
     // A thread pool for registering with masters. Because registering with a master is a blocking
     // action, this thread pool must be able to create "masterRpcAddresses.size" threads at the same
     // time so that we can register with all masters.
-    private val registerMasterThreadPool = new ThreadPoolExecutor(
-      0,
-      masterRpcAddresses.size, // Make sure we can register with all masters at the same time
-      60L, TimeUnit.SECONDS,
-      new SynchronousQueue[Runnable](),
-      ThreadUtils.namedThreadFactory("appclient-register-master-threadpool"))
+    private val registerMasterThreadPool = ThreadUtils.newDaemonCachedThreadPool(
+      "appclient-register-master-threadpool",
+      masterRpcAddresses.length // Make sure we can register with all masters at the same time
+    )
 
     // A scheduled executor for scheduling the registration actions
     private val registrationRetryThread =
 
@@ -925,7 +925,7 @@ private[deploy] class Master(
         }
 
       val eventLogFilePrefix = EventLoggingListener.getLogPath(
-          eventLogDir, app.id, app.desc.eventLogCodec)
+          eventLogDir, app.id, appAttemptId = None, compressionCodecName = app.desc.eventLogCodec)
       val fs = Utils.getHadoopFileSystem(eventLogDir, hadoopConf)
       val inProgressExists = fs.exists(new Path(eventLogFilePrefix +
           EventLoggingListener.IN_PROGRESS))
 
@@ -147,12 +147,10 @@ private[deploy] class Worker(
   // A thread pool for registering with masters. Because registering with a master is a blocking
   // action, this thread pool must be able to create "masterRpcAddresses.size" threads at the same
   // time so that we can register with all masters.
-  private val registerMasterThreadPool = new ThreadPoolExecutor(
-    0,
-    masterRpcAddresses.size, // Make sure we can register with all masters at the same time
-    60L, TimeUnit.SECONDS,
-    new SynchronousQueue[Runnable](),
-    ThreadUtils.namedThreadFactory("worker-register-master-threadpool"))
+  private val registerMasterThreadPool = ThreadUtils.newDaemonCachedThreadPool(
+    "worker-register-master-threadpool",
+    masterRpcAddresses.size // Make sure we can register with all masters at the same time
+  )
 
   var coresUsed = 0
   var memoryUsed = 0
 
@@ -267,7 +267,7 @@ private[akka] class ErrorMonitor extends Actor with ActorLogReceive with Logging
   }
 
   override def receiveWithLogging: Actor.Receive = {
-    case Error(cause: Throwable, _, _, message: String) => logError(message, cause)
+    case Error(cause: Throwable, _, _, message: String) => logDebug(message, cause)
   }
 }
 
 
@@ -818,7 +818,9 @@ class DAGScheduler(
       stage.resetInternalAccumulators()
     }
 
-    val properties = jobIdToActiveJob.get(stage.firstJobId).map(_.properties).orNull
+    // Use the scheduling pool, job group, description, etc. from an ActiveJob associated
+    // with this Stage
+    val properties = jobIdToActiveJob(jobId).properties
 
     runningStages += stage
     // SparkListenerStageSubmitted should be posted before testing whether tasks are
@@ -913,7 +915,7 @@ class DAGScheduler(
       stage.pendingPartitions ++= tasks.map(_.partitionId)
       logDebug("New pending partitions: " + stage.pendingPartitions)
       taskScheduler.submitTasks(new TaskSet(
-        tasks.toArray, stage.id, stage.latestInfo.attemptId, stage.firstJobId, properties))
+        tasks.toArray, stage.id, stage.latestInfo.attemptId, jobId, properties))
       stage.latestInfo.submissionTime = Some(clock.getTimeMillis())
     } else {
       // Because we posted SparkListenerStageSubmitted earlier, we should mark
 
@@ -103,16 +103,16 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
     try {
       getTaskResultExecutor.execute(new Runnable {
         override def run(): Unit = Utils.logUncaughtExceptions {
+          val loader = Utils.getContextOrSparkClassLoader
           try {
             if (serializedData != null && serializedData.limit() > 0) {
               reason = serializer.get().deserialize[TaskEndReason](
-                serializedData, Utils.getSparkClassLoader)
+                serializedData, loader)
             }
           } catch {
             case cnd: ClassNotFoundException =>
               // Log an error but keep going here -- the task failed, so not catastrophic
               // if we can't deserialize the reason.
-              val loader = Utils.getContextOrSparkClassLoader
               logError(
                 "Could not deserialize TaskEndReason: ClassNotFound with classloader " + loader)
             case ex: Exception => {}
 
@@ -57,7 +57,9 @@ private[spark] object ShutdownHookManager extends Logging {
   // Add a shutdown hook to delete the temp dirs when the JVM exits
   addShutdownHook(TEMP_DIR_SHUTDOWN_PRIORITY) { () =>
     logInfo("Shutdown hook called")
-    shutdownDeletePaths.foreach { dirPath =>
+    // we need to materialize the paths to delete because deleteRecursively removes items from
+    // shutdownDeletePaths as we are traversing through it.
+    shutdownDeletePaths.toArray.foreach { dirPath =>
       try {
         logInfo("Deleting directory " + dirPath)
         Utils.deleteRecursively(new File(dirPath))
@@ -204,7 +206,7 @@ private[spark] object ShutdownHookManager extends Logging {
 private [util] class SparkShutdownHookManager {
 
   private val hooks = new PriorityQueue[SparkShutdownHook]()
-  private var shuttingDown = false
+  @volatile private var shuttingDown = false
 
   /**
    * Install a hook to run at shutdown and run all registered hooks in order. Hadoop 1.x does not
@@ -228,28 +230,27 @@ private [util] class SparkShutdownHookManager {
     }
   }
 
-  def runAll(): Unit = synchronized {
+  def runAll(): Unit = {
     shuttingDown = true
-    while (!hooks.isEmpty()) {
-      Try(Utils.logUncaughtExceptions(hooks.poll().run()))
+    var nextHook: SparkShutdownHook = null
+    while ({ nextHook = hooks.synchronized { hooks.poll() }; nextHook != null }) {
+      Try(Utils.logUncaughtExceptions(nextHook.run()))
     }
   }
 
-  def add(priority: Int, hook: () => Unit): AnyRef = synchronized {
-    checkState()
-    val hookRef = new SparkShutdownHook(priority, hook)
-    hooks.add(hookRef)
-    hookRef
-  }
-
-  def remove(ref: AnyRef): Boolean = synchronized {
-    hooks.remove(ref)
+  def add(priority: Int, hook: () => Unit): AnyRef = {
+    hooks.synchronized {
+      if (shuttingDown) {
+        throw new IllegalStateException("Shutdown hooks cannot be modified during shutdown.")
+      }
+      val hookRef = new SparkShutdownHook(priority, hook)
+      hooks.add(hookRef)
+      hookRef
+    }
   }
 
-  private def checkState(): Unit = {
-    if (shuttingDown) {
-      throw new IllegalStateException("Shutdown hooks cannot be modified during shutdown.")
-    }
+  def remove(ref: AnyRef): Boolean = {
+    hooks.synchronized { hooks.remove(ref) }
   }
 
 }
 
@@ -57,10 +57,18 @@ private[spark] object ThreadUtils {
    * Create a cached thread pool whose max number of threads is `maxThreadNumber`. Thread names
    * are formatted as prefix-ID, where ID is a unique, sequentially assigned integer.
    */
-  def newDaemonCachedThreadPool(prefix: String, maxThreadNumber: Int): ThreadPoolExecutor = {
+  def newDaemonCachedThreadPool(
+      prefix: String, maxThreadNumber: Int, keepAliveSeconds: Int = 60): ThreadPoolExecutor = {
     val threadFactory = namedThreadFactory(prefix)
-    new ThreadPoolExecutor(
-      0, maxThreadNumber, 60L, TimeUnit.SECONDS, new SynchronousQueue[Runnable], threadFactory)
+    val threadPool = new ThreadPoolExecutor(
+      maxThreadNumber, // corePoolSize: the max number of threads to create before queuing the tasks
+      maxThreadNumber, // maximumPoolSize: because we use LinkedBlockingDeque, this one is not used
+      keepAliveSeconds,
+      TimeUnit.SECONDS,
+      new LinkedBlockingQueue[Runnable],
+      threadFactory)
+    threadPool.allowCoreThreadTimeOut(true)
+    threadPool
   }
 
   /**
Original file line number	Diff line number	Diff line change
`@@ -925,7 +925,7 @@ private[deploy] class Master(`
`925`	`925`	`}`
`926`	`926`
`927`	`927`	`val eventLogFilePrefix = EventLoggingListener.getLogPath(`
`928`		`- eventLogDir, app.id, app.desc.eventLogCodec)`
	`928`	`+ eventLogDir, app.id, appAttemptId = None, compressionCodecName = app.desc.eventLogCodec)`
`929`	`929`	`val fs = Utils.getHadoopFileSystem(eventLogDir, hadoopConf)`
`930`	`930`	`val inProgressExists = fs.exists(new Path(eventLogFilePrefix +`
`931`	`931`	`EventLoggingListener.IN_PROGRESS))`
Original file line number	Diff line number	Diff line change
`@@ -267,7 +267,7 @@ private[akka] class ErrorMonitor extends Actor with ActorLogReceive with Logging`
`267`	`267`	`}`
`268`	`268`
`269`	`269`	`override def receiveWithLogging: Actor.Receive = {`
`270`		`- case Error(cause: Throwable, _, _, message: String) => logError(message, cause)`
	`270`	`+ case Error(cause: Throwable, _, _, message: String) => logDebug(message, cause)`
`271`	`271`	`}`
`272`	`272`	`}`
`273`	`273`