-
Notifications
You must be signed in to change notification settings - Fork 28.6k
[SPARK-19231][SPARKR] add error handling for download and untar for Spark release #16589
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -54,7 +54,7 @@ | |
#' } | ||
#' @param overwrite If \code{TRUE}, download and overwrite the existing tar file in localDir | ||
#' and force re-install Spark (in case the local directory or file is corrupted) | ||
#' @return \code{install.spark} returns the local directory where Spark is found or installed | ||
#' @return the (invisible) local directory where Spark is found or installed | ||
#' @rdname install.spark | ||
#' @name install.spark | ||
#' @aliases install.spark | ||
|
@@ -115,17 +115,35 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL, | |
} else { | ||
if (releaseUrl != "") { | ||
message("Downloading from alternate URL:\n- ", releaseUrl) | ||
downloadUrl(releaseUrl, packageLocalPath, paste0("Fetch failed from ", releaseUrl)) | ||
success <- downloadUrl(releaseUrl, packageLocalPath) | ||
if (!success) { | ||
unlink(packageLocalPath) | ||
stop(paste0("Fetch failed from ", releaseUrl)) | ||
} | ||
} else { | ||
robustDownloadTar(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath) | ||
} | ||
} | ||
|
||
message(sprintf("Installing to %s", localDir)) | ||
untar(tarfile = packageLocalPath, exdir = localDir) | ||
if (!tarExists || overwrite) { | ||
# There are two ways untar can fail - untar could stop() on errors like incomplete block on file | ||
# or, tar command can return failure code | ||
success <- tryCatch(untar(tarfile = packageLocalPath, exdir = localDir) == 0, | ||
error = function(e) { | ||
message(e) | ||
message() | ||
FALSE | ||
}, | ||
warning = function(w) { | ||
# Treat warning as error, add an empty line with message() | ||
message(w) | ||
message() | ||
FALSE | ||
}) | ||
if (!tarExists || overwrite || !success) { | ||
unlink(packageLocalPath) | ||
} | ||
if (!success) stop("Extract archive failed.") | ||
message("DONE.") | ||
Sys.setenv(SPARK_HOME = packageLocalDir) | ||
message(paste("SPARK_HOME set to", packageLocalDir)) | ||
|
@@ -135,8 +153,7 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL, | |
robustDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath) { | ||
# step 1: use user-provided url | ||
if (!is.null(mirrorUrl)) { | ||
msg <- sprintf("Use user-provided mirror site: %s.", mirrorUrl) | ||
message(msg) | ||
message("Use user-provided mirror site: ", mirrorUrl) | ||
success <- directDownloadTar(mirrorUrl, version, hadoopVersion, | ||
packageName, packageLocalPath) | ||
if (success) { | ||
|
@@ -156,7 +173,7 @@ robustDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, pa | |
packageName, packageLocalPath) | ||
if (success) return() | ||
} else { | ||
message("Unable to find preferred mirror site.") | ||
message("Unable to download from preferred mirror site: ", mirrorUrl) | ||
} | ||
|
||
# step 3: use backup option | ||
|
@@ -165,8 +182,11 @@ robustDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, pa | |
success <- directDownloadTar(mirrorUrl, version, hadoopVersion, | ||
packageName, packageLocalPath) | ||
if (success) { | ||
return(packageLocalPath) | ||
return() | ||
} else { | ||
# remove any partially downloaded file | ||
unlink(packageLocalPath) | ||
message("Unable to download from default mirror site: ", mirrorUrl) | ||
msg <- sprintf(paste("Unable to download Spark %s for Hadoop %s.", | ||
"Please check network connection, Hadoop version,", | ||
"or provide other mirror sites."), | ||
|
@@ -201,14 +221,20 @@ directDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, pa | |
msg <- sprintf(fmt, version, ifelse(hadoopVersion == "without", "Free build", hadoopVersion), | ||
packageRemotePath) | ||
message(msg) | ||
downloadUrl(packageRemotePath, packageLocalPath, paste0("Fetch failed from ", mirrorUrl)) | ||
downloadUrl(packageRemotePath, packageLocalPath) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any reason to not use this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't relate this to the update in L176 - I think this is fine. In general I think this file has gotten a little unwieldy with error messages coming from different functions. I wonder if there is a better way to refactor things to setup some expectations on where errors are thrown etc. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yea I agree. I guess I'm trying to bubble up error messages to the top level but generally without exception to throw is making this non-trivial (never thought I'd say that!) passing all the extra pieces of info around just to display them in error messages doesn't seem very intuitive either.. |
||
} | ||
|
||
downloadUrl <- function(remotePath, localPath, errorMessage) { | ||
downloadUrl <- function(remotePath, localPath) { | ||
isFail <- tryCatch(download.file(remotePath, localPath), | ||
error = function(e) { | ||
message(errorMessage) | ||
print(e) | ||
message(e) | ||
message() | ||
TRUE | ||
}, | ||
warning = function(w) { | ||
# Treat warning as error, add an empty line with message() | ||
message(w) | ||
message() | ||
TRUE | ||
}) | ||
!isFail | ||
|
@@ -234,10 +260,9 @@ sparkCachePath <- function() { | |
if (.Platform$OS.type == "windows") { | ||
winAppPath <- Sys.getenv("LOCALAPPDATA", unset = NA) | ||
if (is.na(winAppPath)) { | ||
msg <- paste("%LOCALAPPDATA% not found.", | ||
stop(paste("%LOCALAPPDATA% not found.", | ||
"Please define the environment variable", | ||
"or restart and enter an installation path in localDir.") | ||
stop(msg) | ||
"or restart and enter an installation path in localDir.")) | ||
} else { | ||
path <- file.path(winAppPath, "spark", "spark", "Cache") | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
any reason to change the return value here ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it wasn't actually - it was always invisible - see here, just not documented as such. Also referencing the method name and saying
returns
again is redundant as this would go to the return section of the generated doc.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Got it. Thanks