Skip to content
This repository was archived by the owner on May 27, 2025. It is now read-only.

Commit 0ff0c5c

Browse files
authored
PR #1832: ch-fromhost: tidy
1 parent 48c8b36 commit 0ff0c5c

File tree

2 files changed

+116
-84
lines changed

2 files changed

+116
-84
lines changed

bin/ch-fromhost

Lines changed: 81 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44
# source:destination pairs separated by newlines, then walk through them and
55
# copy them into the image.
66
#
7-
# The colon separator is to avoid the difficulty of iterating through a sequence
8-
# of pairs with no arrays or structures in POSIX sh. We could avoid it by
9-
# taking action immediately upon encountering each file in the argument list,
10-
# but that would (a) yield a half-injected image for basic errors like
7+
# The colon separator is to avoid the difficulty of iterating through a
8+
# sequence of pairs with no arrays or structures in POSIX sh. We could avoid
9+
# it by taking action immediately upon encountering each file in the argument
10+
# list, but that would (a) yield a half-injected image for basic errors like
1111
# misspellings on the command line and (b) would require the image to be first
1212
# on the command line, which seems awkward.
1313
#
@@ -62,8 +62,8 @@ Destination within image:
6262
6363
Options:
6464
65-
--print-fi print inferred destination for libfabric provider(s)
6665
--print-cray-fi print inferred destination for libfabric replacement
66+
--print-fi print inferred destination for libfabric provider(s)
6767
--print-lib print inferred destination for shared libraries
6868
--no-ldconfig don’t run ldconfig even if we injected shared libraries
6969
-h, --help print this help and exit
@@ -93,10 +93,6 @@ print_fi_dest=
9393
print_lib_dest=
9494
no_ldconfig=
9595

96-
debug_indent () {
97-
DEBUG ' %s\n' "$1"
98-
}
99-
10096
ensure_nonempty () {
10197
[ "$2" ] || FATAL -- "$1 must not be empty"
10298
}
@@ -128,7 +124,7 @@ enqueue_file () {
128124
old_ifs="$IFS"
129125
IFS="$newline"
130126
d="${dest:-$2}"
131-
DEBUG "enqueue file(s)"
127+
VERBOSE "enqueue file(s)"
132128
for f in $1; do
133129
case $f in
134130
*:*)
@@ -139,16 +135,16 @@ enqueue_file () {
139135
case $f in
140136
*libfabric.so)
141137
if ldd "$f" | grep libcxi > /dev/null 2>&1; then
142-
debug_indent "cray libfabric: ${f}"
138+
DEBUG "cray libfabric: ${f}"
143139
cray_fi_found=yes
144140
host_libfabric=$f
145141
else
146-
debug_indent "libfabric: ${f}"
142+
DEBUG "libfabric: ${f}"
147143
lib_found=yes
148144
fi
149145
;;
150146
*-fi.so)
151-
debug_indent "libfabric shared provider: ${f}"
147+
DEBUG "libfabric shared provider: ${f}"
152148
fi_prov_found=yes
153149
# Providers, like Cray's libgnix-fi.so, link against paths that
154150
# need to be bind-mounted at run-time. Some of these paths need
@@ -159,13 +155,14 @@ enqueue_file () {
159155
ld=$(dirname "$(readlink -f "$l")")
160156
# Avoid duplicates and host libfabric.so.
161157
if [ "$(echo "$ld_conf" | grep -c "$ld")" -eq 0 ] \
162-
&& [ "$(echo "$ld" | grep -c "libfabric.so")" -eq 0 ]; then
158+
&& [ "$(echo "$ld" | grep -c "libfabric.so")" -eq 0 ]; \
159+
then
163160
enqueue_ldconf "$ld"
164161
fi
165162
done
166163
;;
167164
*)
168-
debug_indent "shared library: ${f}"
165+
DEBUG "shared library: ${f}"
169166
lib_found=yes
170167
;;
171168
esac
@@ -297,27 +294,27 @@ fi
297294
if [ -n "$cray_fi_found" ]; then
298295
# There is no Slingshot provider CXI; to leverage slingshot we need to
299296
# replace the image libfabric.so with Cray's.
300-
DEBUG "searching image for inferred libfabric destiation"
297+
VERBOSE "searching image for inferred libfabric destiation"
301298
img_libfabric=$(find "$image" -name "libfabric.so")
302299
[ -n "$img_libfabric" ] || FATAL "libfabric.so not found in $image"
303-
debug_indent "found $img_libfabric"
300+
DEBUG "found $img_libfabric"
304301
if [ "$(echo "$img_libfabric" | wc -l)" -ne 1 ]; then
305302
warn 'found more than one libfabric.so'
306303
fi
307304
img_libfabric_path=$(echo "$img_libfabric" | sed "s@$image@@")
308305
cray_fi_dest=$(dirname "/$img_libfabric_path")
309306

310-
# Since cray's libfabric isn't a standard provider, to use slingshot we must
311-
# also add any missing linked libraries from the host.
312-
DEBUG "adding cray libfabric libraries"
307+
# Since cray's libfabric isn't a standard provider, to use slingshot we
308+
# must also add any missing linked libraries from the host.
309+
VERBOSE "adding cray libfabric libraries"
313310
ldds=$(ldd "$host_libfabric" 2>&1 | grep lib | awk '{print $3}' | sort -u)
314311
for l in $ldds; do
315312
# Do not replace any libraries found in the image, experimentation has
316313
# shown this to be problematic. Perhaps revisit in the future. For now,
317314
# both MPICH and OpenMPI examples work with this conservative approach.
318315
file_found=$(find "${image}" -name "$(basename "$l")")
319316
if [ -n "$file_found" ]; then
320-
debug_indent "skipping $l"
317+
DEBUG "skipping $l"
321318
continue
322319
fi
323320
enqueue_file "$l"
@@ -333,33 +330,37 @@ if [ -n "$lib_found" ]; then
333330
# We want to put the libraries in the first directory that ldconfig
334331
# searches, so that we can override (or overwrite) any of the same library
335332
# that may already be in the image.
336-
DEBUG "asking ldconfig for inferred shared library destination"
337-
# "ldconfig -Nv" gives some pointless warnings on stderr even if
338-
# successful; we don't want to show those to users. However, we don't want
339-
# to simply pipe stderr to /dev/null because this hides real errors. Thus,
340-
# use the following abomination to pipe stdout and stderr to *separate
341-
# grep commands*. See: https://stackoverflow.com/a/31151808
333+
VERBOSE "asking ldconfig for inferred shared library destination"
334+
# "ldconfig -Nv" gives pointless warnings on stderr even if successful; we
335+
# don't want to show those to users (unless -vv or higher). However, we
336+
# don't want to simply pipe stderr to /dev/null because this hides real
337+
# errors. Thus, use the following abomination to pipe stdout and stderr to
338+
# *separate grep commands*. See: https://stackoverflow.com/a/31151808
339+
if [ "$log_level" -lt 2 ]; then # VERBOSE or lower
340+
stderr_filter='(^|dynamic linker, ignoring|given more than once|No such file or directory)$'
341+
else # DEBUG or higher
342+
stderr_filter=weird_al_yankovic_will_not_appear_in_ldconfig_output
343+
fi
342344
lib_dest=$( { "${ch_bin}/ch-run" "$image" -- /sbin/ldconfig -Nv \
343-
2>&1 1>&3 3>&- | grep -Ev '(^|dynamic linker, ignoring|given more than once)$' ; } \
345+
2>&1 1>&3 3>&- | grep -Ev "$stderr_filter" ; } \
344346
3>&1 1>&2 | grep -E '^/' | cut -d: -f1 | head -1 )
345347
[ -n "$lib_dest" ] || FATAL 'empty path from ldconfig'
346348
[ -z "${lib_dest%%/*}" ] || FATAL "bad path from ldconfig: ${lib_dest}"
347-
DEBUG "inferred shared library destination: ${image}/${lib_dest}"
349+
VERBOSE "inferred shared library destination: ${image}/${lib_dest}"
348350
fi
349351

350352
if [ -n "$fi_prov_found" ]; then
351353
# The libfabric provider can be specified with FI_PROVIDER. The path the
352354
# search for shared providers at can be specified with FI_PROVIDER_PATH
353355
# (undocumented). This complicates the inferred destination because these
354-
# variables can be inherited from the host or explicitly set in the image's
355-
# /ch/environment
356-
# file.
356+
# variables can be inherited from the host or explicitly set in the
357+
# image's /ch/environment file.
357358
#
358359
# For simplicity, the inferred injection destination is the always the
359-
# 'libfabric' directory at the path where libfabric.so is found. If it does
360-
# not exist, create it. Warn if FI_PROVIDER_PATH or FI_PROVIDER is found
361-
# in the the image's /ch/environment file.
362-
DEBUG "searching ${image} for libfabric shared provider destination"
360+
# 'libfabric' directory at the path where libfabric.so is found. If it
361+
# does not exist, create it. Warn if FI_PROVIDER_PATH or FI_PROVIDER is
362+
# found in the the image's /ch/environment file.
363+
VERBOSE "searching ${image} for libfabric shared provider destination"
363364
ch_env_p=$(grep -E '^FI_PROVIDER_PATH=' "${image}/ch/environment") \
364365
|| true # avoid -e exit
365366
ch_env_p=${ch_env_p##*=}
@@ -368,11 +369,11 @@ if [ -n "$fi_prov_found" ]; then
368369
fi
369370
img_libfabric=$(find "$image" -name 'libfabric.so')
370371
img_libfabric_path=$(echo "$img_libfabric" | sed "s@$image@@")
371-
debug_indent "found: ${image}${img_libfabric_path}"
372+
DEBUG "found: ${image}${img_libfabric_path}"
372373
fi_prov_dest=$(dirname "/${img_libfabric_path}")
373374
fi_prov_dest="${fi_prov_dest}/libfabric"
374375
queue_mkdir "$fi_prov_dest"
375-
DEBUG "inferred provider destination: $fi_prov_dest"
376+
VERBOSE "inferred provider destination: $fi_prov_dest"
376377
fi
377378

378379
if [ -n "$print_lib_dest" ]; then
@@ -394,70 +395,71 @@ if [ -f /etc/opt/cray/release/cle-release ]; then
394395
queue_mkdir /var/lib/hugetlbfs
395396
# UGNI
396397
if [ ! -L /etc/opt/cray/release/cle-release ]; then
397-
# ALPS libraries require the contents of this directory to be present at
398-
# the same path as the host. Create the mount point here, then ch-run
399-
# bind-mounts it later.
398+
# ALPS libraries require the contents of this directory to be present
399+
# at the same path as the host. Create the mount point here, then
400+
# ch-run bind-mounts it later.
400401
queue_mkdir /var/opt/cray/alps/spool
401402

402-
# The cray-ugni provider will link against cray's libwlm_detect so. Create
403-
# the mount point for ch-run.
403+
# The cray-ugni provider will link against crays libwlm_detect so.
404+
# Create the mount point for ch-run.
404405
queue_mkdir /opt/cray/wlm_detect
405406

406-
# libwlm_detect.so requires file(s) to present at the same path as the host.
407-
# Create mount point for ch-run.
407+
# libwlm_detect.so requires file(s) to present at the same path as the
408+
# host. Create mount point for ch-run.
408409
queue_mkdir /etc/opt/cray/wlm_detect
409410

410-
# OFI uGNI provider, libgnix-fi.so, links against the Cray host's
411-
# libxpmem, libudreg, libalpsutil, libalpslli, and libugni; create mount
412-
# points for ch-run to use later.
411+
# OFI uGNI provider, libgnix-fi.so, links against the Cray hosts
412+
# libxpmem, libudreg, libalpsutil, libalpslli, and libugni; create
413+
# mount points for ch-run to use later.
413414
queue_mkdir /opt/cray/udreg
414415
queue_mkdir /opt/cray/xpmem
415416
queue_mkdir /opt/cray/ugni
416417
queue_mkdir /opt/cray/alps
417418
fi
418419
# CXI (slingshot)
419420
if [ -f /opt/cray/etc/release/cos ]; then
420-
# Newer Cray Shasta environments require the contents of this directory
421-
# to be present at the same path as the host. Create mount points for
422-
# ch-run to use later.
421+
# Newer Cray Shasta environments require the contents of this
422+
# directory to be present at the same path as the host. Create mount
423+
# points for ch-run to use later.
423424
queue_mkdir /var/spool/slurmd
424425
fi
425426
fi
426427

427428
[ "$inject_files" ] || FATAL "empty file list"
428429

429-
DEBUG "injecting into image: ${image}"
430+
VERBOSE "injecting into image: ${image}"
430431

431432
old_ifs="$IFS"
432433
IFS="$newline"
433434

434435
# Process unlink list.
435436
for u in $inject_unlinks; do
436-
debug_indent "rm -f ${image}${u}"
437+
DEBUG "deleting: ${image}${u}"
437438
rm -f "${image}${u}"
438439
done
439440

440441
# Process bind-mount destination targets.
441442
for d in $inject_mkdirs; do
442-
debug_indent "mkdir -p ${image}${d}"
443+
DEBUG "mkdir: ${image}${d}"
443444
mkdir -p "${image}${d}"
444445
done
445446

446447
# Process ldconfig targets.
447448
if [ "$fi_prov_found" ] || [ "$cray_fi_found" ]; then
448449
if [ ! -f "${image}/etc/ld.so.conf" ]; then
449-
debug_indent "touch ${image}/etc/ld.so.conf"
450+
DEBUG "creating empty ld.so.conf"
450451
touch "${image}/etc/ld.so.conf"
451452
fi
452-
if ! grep -F 'include ld.so.conf.d/*.conf' "${image}/etc/ld.so.conf" > /dev/null 2>&1; then
453-
debug_indent "echo 'include ld.so.conf.d/*.conf' >> ${image}/etc/ld.so.conf"
453+
if ! grep -F 'include ld.so.conf.d/*.conf' "${image}/etc/ld.so.conf" \
454+
> /dev/null 2>&1; then
455+
DEBUG "ld.so.conf: adding 'include ld.so.conf.d/*.conf'"
454456
echo 'include ld.so.conf.d/*.conf' >> "${image}/etc/ld.so.conf"
455457
fi
456458
# Prepare image ch-ofi.conf.
457459
printf '' > "${image}/etc/ld.so.conf.d/ch-ofi.conf"
458460
# add ofi dso provider ld library dirs.
459461
for c in $ld_conf; do
460-
debug_indent "echo '$c' >> ${image}/etc/ld.so.conf.d/ch-ofi.conf"
462+
DEBUG "ld.so.conf: adding ${c}"
461463
echo "$c" >> "${image}/etc/ld.so.conf.d/ch-ofi.conf"
462464
done
463465
fi
@@ -476,17 +478,17 @@ for file in $inject_files; do
476478
if ldd "$f" | grep libcxi > /dev/null 2>&1; then
477479
d=$cray_fi_dest
478480
fi
479-
;;
481+
;;
480482
*-fi.so)
481483
d=$fi_prov_dest
482-
;;
484+
;;
483485
*)
484486
d=$lib_dest
485-
;;
487+
;;
486488
esac
487489
infer=" (inferred)"
488490
fi
489-
debug_indent "${f} -> ${d}${infer}"
491+
VERBOSE "${f} -> ${d}${infer}"
490492
[ "$d" ] || FATAL "no destination for: ${f}"
491493
[ -z "${d%%/*}" ] || FATAL "not an absolute path: ${d}"
492494
[ -d "${image}${d}" ] || FATAL "not a directory: ${image}${d}"
@@ -503,28 +505,31 @@ done
503505
IFS="$old_ifs"
504506

505507
if [ -z "$no_ldconfig" ] \
506-
&& { [ "$lib_found" ] || [ "$fi_prov_found" ] || [ "$cray_fi_found" ] ;} then
507-
DEBUG "running ldconfig"
508-
debug_indent "${ch_bin}/ch-run -w $image -- /sbin/ldconfig"
509-
"${ch_bin}/ch-run" -w "$image" -- /sbin/ldconfig 2> /dev/null || FATAL 'ldconfig error'
508+
&& { [ "$lib_found" ] \
509+
|| [ "$fi_prov_found" ] \
510+
|| [ "$cray_fi_found" ] ;} then
511+
VERBOSE "running ldconfig"
512+
"${ch_bin}/ch-run" -w "$image" -- /sbin/ldconfig 2> /dev/null \
513+
|| FATAL 'ldconfig error'
510514
if [ -n "$fi_prov_found" ] || [ -n "$cray_fi_found" ]; then
511-
DEBUG "validating ldconfig cache"
515+
VERBOSE "validating ldconfig cache"
512516
for file in $inject_files; do
513517
f="$(basename "${file%%:*}")"
514-
f=$("${ch_bin}/ch-run" "$image" -- find / \
515-
-not \( -path /proc -prune \) \
516-
-not \( -path /dev -prune \) \
517-
-not \( -path /tmp -prune \) \
518-
-not \( -path /sys -prune \) \
519-
-not \( -path /var/opt/cray -prune \) \
520-
-not \( -path /etc/opt/cray -prune \) \
521-
-name "$f")
518+
f=$( "${ch_bin}/ch-run" "$image" \
519+
-- find / \
520+
-not \( -path /proc -prune \) \
521+
-not \( -path /dev -prune \) \
522+
-not \( -path /tmp -prune \) \
523+
-not \( -path /sys -prune \) \
524+
-not \( -path /var/opt/cray -prune \) \
525+
-not \( -path /etc/opt/cray -prune \) \
526+
-name "$f")
522527
if [ "$("${ch_bin}/ch-run" "$image" -- ldd "$f" | grep -c 'not found ')" -ne 0 ]; then
523528
FATAL "ldconfig: '${ch_bin}/ch-run $image -- ldd $f' failed"
524529
fi
525530
done
526531
fi
527532
else
528-
DEBUG "not running ldconfig"
533+
VERBOSE "not running ldconfig"
529534
fi
530535
echo 'done'

0 commit comments

Comments
 (0)