Skip to content

Commit 77202a6

Browse files
felixcheungFelix Cheung
authored and
Felix Cheung
committed
[SPARK-19231][SPARKR] add error handling for download and untar for Spark release
## What changes were proposed in this pull request? When R is starting as a package and it needs to download the Spark release distribution we need to handle error for download and untar, and clean up, otherwise it will get stuck. ## How was this patch tested? manually Author: Felix Cheung <felixcheung_m@hotmail.com> Closes #16589 from felixcheung/rtarreturncode. (cherry picked from commit 278fa1e) Signed-off-by: Felix Cheung <felixcheung@apache.org>
1 parent 29b954b commit 77202a6

File tree

1 file changed

+40
-15
lines changed

1 file changed

+40
-15
lines changed

R/pkg/R/install.R

Lines changed: 40 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
#' }
5555
#' @param overwrite If \code{TRUE}, download and overwrite the existing tar file in localDir
5656
#' and force re-install Spark (in case the local directory or file is corrupted)
57-
#' @return \code{install.spark} returns the local directory where Spark is found or installed
57+
#' @return the (invisible) local directory where Spark is found or installed
5858
#' @rdname install.spark
5959
#' @name install.spark
6060
#' @aliases install.spark
@@ -115,17 +115,35 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL,
115115
} else {
116116
if (releaseUrl != "") {
117117
message("Downloading from alternate URL:\n- ", releaseUrl)
118-
downloadUrl(releaseUrl, packageLocalPath, paste0("Fetch failed from ", releaseUrl))
118+
success <- downloadUrl(releaseUrl, packageLocalPath)
119+
if (!success) {
120+
unlink(packageLocalPath)
121+
stop(paste0("Fetch failed from ", releaseUrl))
122+
}
119123
} else {
120124
robustDownloadTar(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath)
121125
}
122126
}
123127

124128
message(sprintf("Installing to %s", localDir))
125-
untar(tarfile = packageLocalPath, exdir = localDir)
126-
if (!tarExists || overwrite) {
129+
# There are two ways untar can fail - untar could stop() on errors like incomplete block on file
130+
# or, tar command can return failure code
131+
success <- tryCatch(untar(tarfile = packageLocalPath, exdir = localDir) == 0,
132+
error = function(e) {
133+
message(e)
134+
message()
135+
FALSE
136+
},
137+
warning = function(w) {
138+
# Treat warning as error, add an empty line with message()
139+
message(w)
140+
message()
141+
FALSE
142+
})
143+
if (!tarExists || overwrite || !success) {
127144
unlink(packageLocalPath)
128145
}
146+
if (!success) stop("Extract archive failed.")
129147
message("DONE.")
130148
Sys.setenv(SPARK_HOME = packageLocalDir)
131149
message(paste("SPARK_HOME set to", packageLocalDir))
@@ -135,8 +153,7 @@ install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL,
135153
robustDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, packageLocalPath) {
136154
# step 1: use user-provided url
137155
if (!is.null(mirrorUrl)) {
138-
msg <- sprintf("Use user-provided mirror site: %s.", mirrorUrl)
139-
message(msg)
156+
message("Use user-provided mirror site: ", mirrorUrl)
140157
success <- directDownloadTar(mirrorUrl, version, hadoopVersion,
141158
packageName, packageLocalPath)
142159
if (success) {
@@ -156,7 +173,7 @@ robustDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, pa
156173
packageName, packageLocalPath)
157174
if (success) return()
158175
} else {
159-
message("Unable to find preferred mirror site.")
176+
message("Unable to download from preferred mirror site: ", mirrorUrl)
160177
}
161178

162179
# step 3: use backup option
@@ -165,8 +182,11 @@ robustDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, pa
165182
success <- directDownloadTar(mirrorUrl, version, hadoopVersion,
166183
packageName, packageLocalPath)
167184
if (success) {
168-
return(packageLocalPath)
185+
return()
169186
} else {
187+
# remove any partially downloaded file
188+
unlink(packageLocalPath)
189+
message("Unable to download from default mirror site: ", mirrorUrl)
170190
msg <- sprintf(paste("Unable to download Spark %s for Hadoop %s.",
171191
"Please check network connection, Hadoop version,",
172192
"or provide other mirror sites."),
@@ -201,14 +221,20 @@ directDownloadTar <- function(mirrorUrl, version, hadoopVersion, packageName, pa
201221
msg <- sprintf(fmt, version, ifelse(hadoopVersion == "without", "Free build", hadoopVersion),
202222
packageRemotePath)
203223
message(msg)
204-
downloadUrl(packageRemotePath, packageLocalPath, paste0("Fetch failed from ", mirrorUrl))
224+
downloadUrl(packageRemotePath, packageLocalPath)
205225
}
206226

207-
downloadUrl <- function(remotePath, localPath, errorMessage) {
227+
downloadUrl <- function(remotePath, localPath) {
208228
isFail <- tryCatch(download.file(remotePath, localPath),
209229
error = function(e) {
210-
message(errorMessage)
211-
print(e)
230+
message(e)
231+
message()
232+
TRUE
233+
},
234+
warning = function(w) {
235+
# Treat warning as error, add an empty line with message()
236+
message(w)
237+
message()
212238
TRUE
213239
})
214240
!isFail
@@ -234,10 +260,9 @@ sparkCachePath <- function() {
234260
if (.Platform$OS.type == "windows") {
235261
winAppPath <- Sys.getenv("LOCALAPPDATA", unset = NA)
236262
if (is.na(winAppPath)) {
237-
msg <- paste("%LOCALAPPDATA% not found.",
263+
stop(paste("%LOCALAPPDATA% not found.",
238264
"Please define the environment variable",
239-
"or restart and enter an installation path in localDir.")
240-
stop(msg)
265+
"or restart and enter an installation path in localDir."))
241266
} else {
242267
path <- file.path(winAppPath, "Apache", "Spark", "Cache")
243268
}

0 commit comments

Comments
 (0)