apache
diff --git a/‎R/pkg/tests/fulltests/test_sparkSQL_eager.R
Lines changed: 8 additions & 8 deletions b/‎R/pkg/tests/fulltests/test_sparkSQL_eager.R
Lines changed: 8 additions & 8 deletions
diff --git a/‎bin/pyspark
Lines changed: 1 addition & 1 deletion b/‎bin/pyspark
Lines changed: 1 addition & 1 deletion
diff --git a/‎bin/pyspark2.cmd
Lines changed: 1 addition & 1 deletion b/‎bin/pyspark2.cmd
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/pom.xml
Lines changed: 1 addition & 1 deletion b/‎core/pom.xml
Lines changed: 1 addition & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/SparkConf.scala
Lines changed: 3 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/SparkConf.scala
Lines changed: 3 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
Lines changed: 2 additions & 1 deletion b/‎core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
Lines changed: 2 additions & 1 deletion
diff --git a/‎core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
Lines changed: 31 additions & 15 deletions b/‎core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
Lines changed: 31 additions & 15 deletions
@@ -22,30 +22,30 @@ context("test show SparkDataFrame when eager execution is enabled.")
 test_that("eager execution is not enabled", {
   # Start Spark session without eager execution enabled
   sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
-  
+
   df <- createDataFrame(faithful)
   expect_is(df, "SparkDataFrame")
   expected <- "eruptions:double, waiting:double"
   expect_output(show(df), expected)
-  
+
   # Stop Spark session
   sparkR.session.stop()
 })
 
 test_that("eager execution is enabled", {
   # Start Spark session with eager execution enabled
   sparkConfig <- list(spark.sql.repl.eagerEval.enabled = "true")
-  
+
   sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE, sparkConfig = sparkConfig)
-  
+
   df <- createDataFrame(faithful)
   expect_is(df, "SparkDataFrame")
   expected <- paste0("(+---------+-------+\n",
                      "|eruptions|waiting|\n",
                      "+---------+-------+\n)*",
                      "(only showing top 20 rows)")
   expect_output(show(df), expected)
-  
+
   # Stop Spark session
   sparkR.session.stop()
 })
@@ -55,9 +55,9 @@ test_that("eager execution is enabled with maxNumRows and truncate set", {
   sparkConfig <- list(spark.sql.repl.eagerEval.enabled = "true",
                       spark.sql.repl.eagerEval.maxNumRows = as.integer(5),
                       spark.sql.repl.eagerEval.truncate = as.integer(2))
-  
+
   sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE, sparkConfig = sparkConfig)
-  
+
   df <- arrange(createDataFrame(faithful), "waiting")
   expect_is(df, "SparkDataFrame")
   expected <- paste0("(+---------+-------+\n",
@@ -66,7 +66,7 @@ test_that("eager execution is enabled with maxNumRows and truncate set", {
                      "|       1.|     43|\n)*",
                      "(only showing top 5 rows)")
   expect_output(show(df), expected)
-  
+
   # Stop Spark session
   sparkR.session.stop()
 })
@@ -57,7 +57,7 @@ export PYSPARK_PYTHON
 
 # Add the PySpark classes to the Python path:
 export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH"
-export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.7-src.zip:$PYTHONPATH"
+export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.8.1-src.zip:$PYTHONPATH"
 
 # Load the PySpark shell.py script when ./pyspark is used interactively:
 export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"
 
@@ -30,7 +30,7 @@ if "x%PYSPARK_DRIVER_PYTHON%"=="x" (
 )
 
 set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH%
-set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.7-src.zip;%PYTHONPATH%
+set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.8.1-src.zip;%PYTHONPATH%
 
 set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%
 set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py
 
@@ -350,7 +350,7 @@
     <dependency>
       <groupId>net.sf.py4j</groupId>
       <artifactId>py4j</artifactId>
-      <version>0.10.7</version>
+      <version>0.10.8.1</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
 
@@ -731,7 +731,9 @@ private[spark] object SparkConf extends Logging {
     KEYTAB.key -> Seq(
       AlternateConfig("spark.yarn.keytab", "3.0")),
     PRINCIPAL.key -> Seq(
-      AlternateConfig("spark.yarn.principal", "3.0"))
+      AlternateConfig("spark.yarn.principal", "3.0")),
+    KERBEROS_RELOGIN_PERIOD.key -> Seq(
+      AlternateConfig("spark.yarn.kerberos.relogin.period", "3.0"))
   )
 
   /**
 
@@ -32,7 +32,8 @@ private[spark] object PythonUtils {
     val pythonPath = new ArrayBuffer[String]
     for (sparkHome <- sys.env.get("SPARK_HOME")) {
       pythonPath += Seq(sparkHome, "python", "lib", "pyspark.zip").mkString(File.separator)
-      pythonPath += Seq(sparkHome, "python", "lib", "py4j-0.10.7-src.zip").mkString(File.separator)
+      pythonPath +=
+        Seq(sparkHome, "python", "lib", "py4j-0.10.8.1-src.zip").mkString(File.separator)
     }
     pythonPath ++= SparkContext.jarOfObject(this)
     pythonPath.mkString(File.pathSeparator)
 
@@ -18,6 +18,7 @@
 package org.apache.spark.deploy
 
 import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream, File, IOException}
+import java.lang.reflect.Method
 import java.security.PrivilegedExceptionAction
 import java.text.DateFormat
 import java.util.{Arrays, Comparator, Date, Locale}
@@ -30,7 +31,7 @@ import scala.util.control.NonFatal
 
 import com.google.common.primitives.Longs
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FileStatus, FileSystem, Path, PathFilter}
+import org.apache.hadoop.fs._
 import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 import org.apache.hadoop.security.token.{Token, TokenIdentifier}
@@ -412,20 +413,6 @@ object SparkHadoopUtil {
 
   def get: SparkHadoopUtil = instance
 
-  /**
-   * Given an expiration date for the current set of credentials, calculate the time when new
-   * credentials should be created.
-   *
-   * @param expirationDate Drop-dead expiration date
-   * @param conf Spark configuration
-   * @return Timestamp when new credentials should be created.
-   */
-  private[spark] def nextCredentialRenewalTime(expirationDate: Long, conf: SparkConf): Long = {
-    val ct = System.currentTimeMillis
-    val ratio = conf.get(CREDENTIALS_RENEWAL_INTERVAL_RATIO)
-    (ct + (ratio * (expirationDate - ct))).toLong
-  }
-
   /**
    * Returns a Configuration object with Spark configuration applied on top. Unlike
    * the instance method, this will always return a Configuration instance, and not a
@@ -471,4 +458,33 @@ object SparkHadoopUtil {
       hadoopConf.set(key.substring("spark.hadoop.".length), value)
     }
   }
+
+  // scalastyle:off line.size.limit
+  /**
+   * Create a path that uses replication instead of erasure coding (ec), regardless of the default
+   * configuration in hdfs for the given path.  This can be helpful as hdfs ec doesn't support
+   * hflush(), hsync(), or append()
+   * https://hadoop.apache.org/docs/r3.0.0/hadoop-project-dist/hadoop-hdfs/HDFSErasureCoding.html#Limitations
+   */
+  // scalastyle:on line.size.limit
+  def createNonECFile(fs: FileSystem, path: Path): FSDataOutputStream = {
+    try {
+      // Use reflection as this uses apis only avialable in hadoop 3
+      val builderMethod = fs.getClass().getMethod("createFile", classOf[Path])
+      val builder = builderMethod.invoke(fs, path)
+      val builderCls = builder.getClass()
+      // this may throw a NoSuchMethodException if the path is not on hdfs
+      val replicateMethod = builderCls.getMethod("replicate")
+      val buildMethod = builderCls.getMethod("build")
+      val b2 = replicateMethod.invoke(builder)
+      buildMethod.invoke(b2).asInstanceOf[FSDataOutputStream]
+    } catch {
+      case  _: NoSuchMethodException =>
+        // No createFile() method, we're using an older hdfs client, which doesn't give us control
+        // over EC vs. replication.  Older hdfs doesn't have EC anyway, so just create a file with
+        // old apis.
+        fs.create(path)
+    }
+  }
+
 }
Original file line number	Diff line number	Diff line change
`@@ -30,7 +30,7 @@ if "x%PYSPARK_DRIVER_PYTHON%"=="x" (`
`30`	`30`	`)`
`31`	`31`
`32`	`32`	`set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH%`
`33`		`-set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.7-src.zip;%PYTHONPATH%`
	`33`	`+set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.8.1-src.zip;%PYTHONPATH%`
`34`	`34`
`35`	`35`	`set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%`
`36`	`36`	`set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py`