-
Notifications
You must be signed in to change notification settings - Fork 28.6k
[SPARK-10500][SPARKR] sparkr.zip cannot be created if /R/lib is unwritable #9390
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
3eb8743
3fbb2db
865471c
4530e7f
65b3d16
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ | |
|
||
.First <- function() { | ||
packageDir <- Sys.getenv("SPARKR_PACKAGE_DIR") | ||
.libPaths(c(packageDir, .libPaths())) | ||
dirs <- strsplit(packageDir, ",")[[1]] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also comment what exists in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as above |
||
.libPaths(c(dirs, .libPaths())) | ||
Sys.setenv(NOAWT=1) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,10 @@ import java.util.Arrays | |
import org.apache.spark.{SparkEnv, SparkException} | ||
|
||
private[spark] object RUtils { | ||
// Local path where R binary packages built from R source code contained in the spark | ||
// packages specified with "--packages" or "--jars" command line option reside. | ||
var rPackages: Option[String] = None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I find this hard to maintain. Where There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This path is set only on localhost, won't change ever. This path is similar and a companion one to the local SparkR package path (<SPARK_HOME>/R/lib), will only be retrieved when running SparkR in client modes. |
||
|
||
/** | ||
* Get the SparkR package path in the local spark distribution. | ||
*/ | ||
|
@@ -34,11 +38,15 @@ private[spark] object RUtils { | |
} | ||
|
||
/** | ||
* Get the SparkR package path in various deployment modes. | ||
* Get the list of paths for R packages in various deployment modes, of which the first | ||
* path is for the SparkR package itself. The second path is for R packages built as | ||
* part of Spark Packages, if any exist. Spark Packages can be provided through the | ||
* "--packages" or "--jars" command line options. | ||
* | ||
* This assumes that Spark properties `spark.master` and `spark.submit.deployMode` | ||
* and environment variable `SPARK_HOME` are set. | ||
*/ | ||
def sparkRPackagePath(isDriver: Boolean): String = { | ||
def sparkRPackagePath(isDriver: Boolean): Seq[String] = { | ||
val (master, deployMode) = | ||
if (isDriver) { | ||
(sys.props("spark.master"), sys.props("spark.submit.deployMode")) | ||
|
@@ -51,15 +59,30 @@ private[spark] object RUtils { | |
val isYarnClient = master != null && master.contains("yarn") && deployMode == "client" | ||
|
||
// In YARN mode, the SparkR package is distributed as an archive symbolically | ||
// linked to the "sparkr" file in the current directory. Note that this does not apply | ||
// to the driver in client mode because it is run outside of the cluster. | ||
// linked to the "sparkr" file in the current directory and additional R packages | ||
// are distributed as an archive symbolically linked to the "rpkg" file in the | ||
// current directory. | ||
// | ||
// Note that this does not apply to the driver in client mode because it is run | ||
// outside of the cluster. | ||
if (isYarnCluster || (isYarnClient && !isDriver)) { | ||
new File("sparkr").getAbsolutePath | ||
val sparkRPkgPath = new File("sparkr").getAbsolutePath | ||
val rPkgPath = new File("rpkg") | ||
if (rPkgPath.exists()) { | ||
Seq(sparkRPkgPath, rPkgPath.getAbsolutePath) | ||
} else { | ||
Seq(sparkRPkgPath) | ||
} | ||
} else { | ||
// Otherwise, assume the package is local | ||
// TODO: support this for Mesos | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we need to check and fail Mesos explicitly.. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add check for running SparkR with Mesos, exit SparkSubmit in this case. |
||
localSparkRPackagePath.getOrElse { | ||
throw new SparkException("SPARK_HOME not set. Can't locate SparkR package.") | ||
val sparkRPkgPath = localSparkRPackagePath.getOrElse { | ||
throw new SparkException("SPARK_HOME not set. Can't locate SparkR package.") | ||
} | ||
if (!rPackages.isEmpty) { | ||
Seq(sparkRPkgPath, rPackages.get) | ||
} else { | ||
Seq(sparkRPkgPath) | ||
} | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -83,6 +83,7 @@ object SparkSubmit { | |
private val PYSPARK_SHELL = "pyspark-shell" | ||
private val SPARKR_SHELL = "sparkr-shell" | ||
private val SPARKR_PACKAGE_ARCHIVE = "sparkr.zip" | ||
private val R_PACKAGE_ARCHIVE = "rpkg.zip" | ||
|
||
private val CLASS_NOT_FOUND_EXIT_STATUS = 101 | ||
|
||
|
@@ -362,22 +363,46 @@ object SparkSubmit { | |
} | ||
} | ||
|
||
// In YARN mode for an R app, add the SparkR package archive to archives | ||
// that can be distributed with the job | ||
// In YARN mode for an R app, add the SparkR package archive and the R package | ||
// archive containing all of the built R libraries to archives so that they can | ||
// be distributed with the job | ||
if (args.isR && clusterManager == YARN) { | ||
val rPackagePath = RUtils.localSparkRPackagePath | ||
if (rPackagePath.isEmpty) { | ||
val sparkRPackagePath = RUtils.localSparkRPackagePath | ||
if (sparkRPackagePath.isEmpty) { | ||
printErrorAndExit("SPARK_HOME does not exist for R application in YARN mode.") | ||
} | ||
val rPackageFile = | ||
RPackageUtils.zipRLibraries(new File(rPackagePath.get), SPARKR_PACKAGE_ARCHIVE) | ||
if (!rPackageFile.exists()) { | ||
val sparkRPackageFile = new File(sparkRPackagePath.get, SPARKR_PACKAGE_ARCHIVE) | ||
if (!sparkRPackageFile.exists()) { | ||
printErrorAndExit(s"$SPARKR_PACKAGE_ARCHIVE does not exist for R application in YARN mode.") | ||
} | ||
val localURI = Utils.resolveURI(rPackageFile.getAbsolutePath) | ||
val sparkRPackageURI = Utils.resolveURI(sparkRPackageFile.getAbsolutePath).toString | ||
|
||
// Distribute the SparkR package. | ||
// Assigns a symbol link name "sparkr" to the shipped package. | ||
args.archives = mergeFileLists(args.archives, localURI.toString + "#sparkr") | ||
args.archives = mergeFileLists(args.archives, sparkRPackageURI + "#sparkr") | ||
|
||
// Distribute the R package archive containing all the built R packages. | ||
if (!RUtils.rPackages.isEmpty) { | ||
val rPackageFile = | ||
RPackageUtils.zipRLibraries(new File(RUtils.rPackages.get), R_PACKAGE_ARCHIVE) | ||
if (!rPackageFile.exists()) { | ||
printErrorAndExit("Failed to zip all the built R packages.") | ||
} | ||
|
||
val rPackageURI = Utils.resolveURI(rPackageFile.getAbsolutePath).toString | ||
// Assigns a symbol link name "rpkg" to the shipped package. | ||
args.archives = mergeFileLists(args.archives, rPackageURI + "#rpkg") | ||
} | ||
} | ||
|
||
// TODO: Support distributing R packages with standalone cluster | ||
if (args.isR && clusterManager == STANDALONE && !RUtils.rPackages.isEmpty) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what about MESOS? |
||
printErrorAndExit("Distributing R packages with standalone cluster is not supported.") | ||
} | ||
|
||
// TODO: Support SparkR with mesos cluster | ||
if (args.isR && clusterManager == MESOS) { | ||
printErrorAndExit("SparkR is not supported for Mesos cluster.") | ||
} | ||
|
||
// If we're running a R app, set the main class to our specific R runner | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If
R/lib
is not writable, how are you going to write it here?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This happens in build process, not runtime.