Skip to content

Commit 6c5fcac

Browse files
committed
[SPARK-35373][BUILD] Check Maven artifact checksum in build/mvn
### What changes were proposed in this pull request? `./build/mvn` now downloads the .sha512 checksum of Maven artifacts it downloads, and checks the checksum after download. ### Why are the changes needed? This ensures the integrity of the Maven artifact during a user's build, which may come from several non-ASF mirrors. ### Does this PR introduce _any_ user-facing change? Should not affect anything about Spark per se, just the build. ### How was this patch tested? Manual testing wherein I forced Maven/Scala download, verified checksums are downloaded and checked, and verified it fails on error with a corrupted checksum. Closes #32505 from srowen/SPARK-35373. Authored-by: Sean Owen <srowen@gmail.com> Signed-off-by: Sean Owen <srowen@gmail.com>
1 parent f7704ec commit 6c5fcac

File tree

1 file changed

+65
-25
lines changed

1 file changed

+65
-25
lines changed

build/mvn

Lines changed: 65 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -26,36 +26,67 @@ _COMPILE_JVM_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g"
2626

2727
# Installs any application tarball given a URL, the expected tarball name,
2828
# and, optionally, a checkable binary path to determine if the binary has
29-
# already been installed
30-
## Arg1 - URL
31-
## Arg2 - Tarball Name
32-
## Arg3 - Checkable Binary
29+
# already been installed. Arguments:
30+
# 1 - Mirror host
31+
# 2 - URL path on host
32+
# 3 - URL query string
33+
# 4 - checksum suffix
34+
# 5 - Tarball Name
35+
# 6 - Checkable Binary
3336
install_app() {
34-
local remote_tarball="$1"
35-
local local_tarball="${_DIR}/$2"
36-
local binary="${_DIR}/$3"
37+
local mirror_host="$1"
38+
local url_path="$2"
39+
local url_query="$3"
40+
local checksum_suffix="$4"
41+
local local_tarball="${_DIR}/$5"
42+
local binary="${_DIR}/$6"
43+
local remote_tarball="${mirror_host}/${url_path}${url_query}"
44+
local local_checksum="${local_tarball}.${checksum_suffix}"
45+
local remote_checksum="https://archive.apache.org/dist/${url_path}.${checksum_suffix}"
3746

3847
local curl_opts="--silent --show-error -L"
3948
local wget_opts="--no-verbose"
4049

41-
if [ -z "$3" -o ! -f "$binary" ]; then
50+
if [ ! -f "$binary" ]; then
4251
# check if we already have the tarball
4352
# check if we have curl installed
4453
# download application
45-
[ ! -f "${local_tarball}" ] && [ $(command -v curl) ] && \
46-
echo "exec: curl ${curl_opts} ${remote_tarball}" 1>&2 && \
54+
if [ ! -f "${local_tarball}" -a $(command -v curl) ]; then
55+
echo "exec: curl ${curl_opts} ${remote_tarball}" 1>&2
4756
curl ${curl_opts} "${remote_tarball}" > "${local_tarball}"
57+
if [ ! -z "${checksum_suffix}" ]; then
58+
echo "exec: curl ${curl_opts} ${remote_checksum}" 1>&2
59+
curl ${curl_opts} "${remote_checksum}" > "${local_checksum}"
60+
fi
61+
fi
4862
# if the file still doesn't exist, lets try `wget` and cross our fingers
49-
[ ! -f "${local_tarball}" ] && [ $(command -v wget) ] && \
50-
echo "exec: wget ${wget_opts} ${remote_tarball}" 1>&2 && \
63+
if [ ! -f "${local_tarball}" -a $(command -v wget) ]; then
64+
echo "exec: wget ${wget_opts} ${remote_tarball}" 1>&2
5165
wget ${wget_opts} -O "${local_tarball}" "${remote_tarball}"
66+
if [ ! -z "${checksum_suffix}" ]; then
67+
echo "exec: wget ${wget_opts} ${remote_checksum}" 1>&2
68+
wget ${wget_opts} -O "${local_checksum}" "${remote_checksum}"
69+
fi
70+
fi
5271
# if both were unsuccessful, exit
53-
[ ! -f "${local_tarball}" ] && \
54-
echo -n "ERROR: Cannot download $2 with cURL or wget; " && \
55-
echo "please install manually and try again." && \
72+
if [ ! -f "${local_tarball}" ]; then
73+
echo -n "ERROR: Cannot download ${remote_tarball} with cURL or wget; please install manually and try again."
5674
exit 2
57-
cd "${_DIR}" && tar -xzf "$2"
58-
rm -rf "$local_tarball"
75+
fi
76+
# Checksum may not have been specified; don't check if doesn't exist
77+
if [ -f "${local_checksum}" ]; then
78+
echo " ${local_tarball}" >> ${local_checksum} # two spaces + file are important!
79+
# Assuming SHA512 here for now
80+
echo "Veryfing checksum from ${local_checksum}" 1>&2
81+
if ! shasum -a 512 -q -c "${local_checksum}" ; then
82+
echo "Bad checksum from ${remote_checksum}"
83+
exit 2
84+
fi
85+
fi
86+
87+
cd "${_DIR}" && tar -xzf "${local_tarball}"
88+
rm -rf "${local_tarball}"
89+
rm -f "${local_checksum}"
5990
fi
6091
}
6192

@@ -71,21 +102,26 @@ install_mvn() {
71102
local MVN_DETECTED_VERSION="$(mvn --version | head -n1 | awk '{print $3}')"
72103
fi
73104
if [ $(version $MVN_DETECTED_VERSION) -lt $(version $MVN_VERSION) ]; then
74-
local FILE_PATH="maven/maven-3/${MVN_VERSION}/binaries/apache-maven-${MVN_VERSION}-bin.tar.gz"
105+
local MVN_TARBALL="apache-maven-${MVN_VERSION}-bin.tar.gz"
106+
local FILE_PATH="maven/maven-3/${MVN_VERSION}/binaries/${MVN_TARBALL}"
75107
local APACHE_MIRROR=${APACHE_MIRROR:-'https://www.apache.org/dyn/closer.lua'}
76-
local MIRROR_URL="${APACHE_MIRROR}/${FILE_PATH}?action=download"
108+
local MIRROR_URL_QUERY="?action=download"
77109

78110
if [ $(command -v curl) ]; then
79-
if ! curl -L --output /dev/null --silent --head --fail "${MIRROR_URL}" ; then
111+
if ! curl -L --output /dev/null --silent --head --fail "${APACHE_MIRROR}/${FILE_PATH}${MIRROR_URL_QUERY}" ; then
80112
# Fall back to archive.apache.org for older Maven
81113
echo "Falling back to archive.apache.org to download Maven"
82-
MIRROR_URL="https://archive.apache.org/dist/${FILE_PATH}"
114+
APACHE_MIRROR="https://archive.apache.org/dist"
115+
MIRROR_URL_QUERY=""
83116
fi
84117
fi
85118

86119
install_app \
87-
"${MIRROR_URL}" \
88-
"apache-maven-${MVN_VERSION}-bin.tar.gz" \
120+
"${APACHE_MIRROR}" \
121+
"${FILE_PATH}" \
122+
"${MIRROR_URL_QUERY}" \
123+
"sha512" \
124+
"${MVN_TARBALL}" \
89125
"apache-maven-${MVN_VERSION}/bin/mvn"
90126

91127
MVN_BIN="${_DIR}/apache-maven-${MVN_VERSION}/bin/mvn"
@@ -101,10 +137,14 @@ install_scala() {
101137
local scala_version=`grep "scala.version" "${_DIR}/../pom.xml" | grep ${scala_binary_version} | head -n1 | awk -F '[<>]' '{print $3}'`
102138
local scala_bin="${_DIR}/scala-${scala_version}/bin/scala"
103139
local TYPESAFE_MIRROR=${TYPESAFE_MIRROR:-https://downloads.lightbend.com}
140+
local SCALA_TARBALL="scala-${scala_version}.tgz"
104141

105142
install_app \
106-
"${TYPESAFE_MIRROR}/scala/${scala_version}/scala-${scala_version}.tgz" \
107-
"scala-${scala_version}.tgz" \
143+
"${TYPESAFE_MIRROR}" \
144+
"scala/${scala_version}/${SCALA_TARBALL}" \
145+
"" \
146+
"" \
147+
${SCALA_TARBALL} \
108148
"scala-${scala_version}/bin/scala"
109149

110150
SCALA_COMPILER="$(cd "$(dirname "${scala_bin}")/../lib" && pwd)/scala-compiler.jar"

0 commit comments

Comments
 (0)