forked from SALMON-TDDFT/SALMON
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmpirun.symm
executable file
·266 lines (242 loc) · 6.14 KB
/
mpirun.symm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
#! /bin/bash
#
# Copyright 2017 SALMON developers
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
# Wrapper that helps launching Intel MPI jobs within SLURM using MICs in native and symmetric modes.
# mpiexec.hydra needs passwordless ssh access to all involved nodes.
#
# This script inspired by two toolsets.
# 1. ibrun.symm command by TACC lariat (https://github.com/TACC/lariat)
# 2. mpirun-mic command by SLURM job scheduler (https://github.com/SchedMD/slurm)
#
BASENAME=$(basename "$0")
BASEDIR=$(cd $(dirname "$0") && pwd)
function usage() {
echo "
${BASENAME} : Wrapper scripts for MICs native mode execution using Intel MPI in SLURM.
Usage :
${BASENAME} [-hvcmn] [binary] [arguments for binary...]
Options :
-h Print this message.
-v Verbose mode.
-c Use CPU.
-m Use MIC.
-s Use CPU and MIC (Symmetric).
-n MPI command not execute.
Environment Variables :
MIC_PPN Number of Processes per MIC.
MIC_NPN Number of MIC per Host node.
"
}
function wecho () {
[[ -n "$MPIRUN_SYMM_VERBOSE" ]] && echo "[verbose] $1";
}
function abort() {
echo "[ABORT] $1" >&2;
exit 1;
}
if [ $# -lt 1 ] ; then
usage >&2;
exit 1;
fi
USE_CPU=0
USE_MIC=0
MPIRUN_SYMM_VERBOSE=
MPIRUN_EXECUTE=1
while getopts "vhcmsn" OPTION
do
case $OPTION in
c)
USE_CPU=1
;;
h)
usage;
exit 0;
;;
m)
USE_MIC=1
;;
s)
USE_CPU=1 ;
USE_MIC=1 ;
;;
v)
MPIRUN_SYMM_VERBOSE=1
;;
n)
MPIRUN_EXECUTE=0
;;
\?)
usage;
exit 1;
;;
esac
done
shift $((OPTIND - 1))
wecho "###########################################"
wecho "${BASENAME} - MPI execution wrapper command"
wecho "###########################################"
wecho ""
##############
# check the execution mode.
##############
if [[ $USE_CPU -eq 0 ]] && [[ $USE_MIC -eq 0 ]] ; then
usage >&2
exit 1;
fi
if [[ $USE_CPU -eq 0 ]] && [[ $USE_MIC -eq 1 ]] ; then
echo "run MIC-native mode."
fi
if [[ $USE_CPU -eq 1 ]] && [[ $USE_MIC -eq 0 ]] ; then
echo "run CPU-only mode."
fi
if [[ $USE_CPU -eq 1 ]] && [[ $USE_MIC -eq 1 ]] ; then
echo "run Symmetric mode."
fi
##############
# search paths.
##############
exec_command=`which mpiexec.hydra 2> /dev/null`
if [[ -z $exec_command ]] ; then
abort "'mpiexec.hydra' command not found.";
exit 1;
fi
##############
# check SLURM env. variables.
##############
wecho "check SLURM environment variables."
if [[ -z "$SLURM_NNODES" ]] ; then
abort "SLURM_NNODES not found."
fi
if [[ -z "$SLURM_NTASKS_PER_NODE" ]] ; then
if [[ $USE_CPU -eq 0 ]] ; then
export SLURM_NTASKS_PER_NODE=1
else
abort "SLURM_NTASKS_PER_NODE not found."
fi
fi
if [[ -z "$SLURM_JOBID" ]] ; then
abort "SLURM_JOBID not found."
fi
if [[ -z "$SLURM_SUBMIT_DIR" ]] ; then
abort "SLURM_SUBMIT_DIR not found."
fi
if [[ -z "$SLURM_PROCID" ]] ; then
abort "SLURM_PROCID not found."
fi
##############
# Intel MPI settings for COMA.
##############
if [[ -n "$I_MPI_MIC_POSTFIX" ]] ; then
wecho "unset I_MPI_MIC_POSTFIX"
unset I_MPI_MIC_POSTFIX
fi
if [[ -z "$I_MPI_ENV_PREFIX_LIST" ]] ; then
export I_MPI_ENV_PREFIX_LIST=knc:MIC
else
abort "I_MPI_ENV_PREFIX_LIST exists."
fi
if [[ -z "$MIC_OMP_NUM_THREADS" ]] ; then
if [[ $USE_MIC -eq 1 ]] ; then
wecho "MIC_OMP_NUM_THREADS not found.";
export MIC_OMP_NUM_THREADS=240
wecho "set default value of $MIC_OMP_NUM_THREADS";
fi
fi
if [[ $USE_CPU -eq 1 ]] && [[ $USE_MIC -eq 1 ]] ; then
# for Symmetric mode.
export I_MPI_ADJUST_ALLREDUCE=9
fi
export MIC_LD_LIBRARY_PATH=$MIC_LD_LIBRARY_PATH:$LD_LIBRARY_PATH
##############
# Intel MPI default settings.
##############
wecho "check default Intel MPI and OpenMP settings."
if [[ -z "$I_MPI_FABRICS" ]] ; then
export I_MPI_FABRICS=shm:dapl
fi
if [[ -z "$I_MPI_OFA_ADAPTER_NAME" ]] ; then
export I_MPI_OFA_ADAPTER_NAME=mlx4_0
fi
if [[ -z "$I_MPI_MIC" ]] ; then
export I_MPI_MIC=enable
fi
if [[ -z "$I_MPI_DAPL_PROVIDER_LIST" ]] ; then
# using uDAPL UCM provider.
# this provider is scalable.
export I_MPI_DAPL_PROVIDER_LIST=ofa-v2-mlx4_0-1u
fi
if [[ -z "$I_MPI_DYNAMIC_CONNECTION" ]] ; then
export I_MPI_DYNAMIC_CONNECTION=enable
fi
#if [[ -z "$I_MPI_HYDRA_BRANCH_COUNT" ]]; then
# # large node support. (# of Node <= 128)
# export I_MPI_HYDRA_BRANCH_COUNT=384
#fi
if [[ -n "$I_MPI_PMI_LIBRARY" ]] ; then
unset I_MPI_PMI_LIBRARY
fi
##############
# Generate hostfile
##############
wecho "check default environment variables."
if [[ -z "$CPU_PPN" ]] ; then
export CPU_PPN=$SLURM_NTASKS_PER_NODE
fi
if [[ -z "$MIC_PPN" ]] ; then
export MIC_PPN=1
fi
if [[ -z "$MIC_NPN" ]] ; then
export MIC_NPN=2
fi
hostfile=.machines.$SLURM_JOBID
host_nodelist=(`scontrol show hostname $SLURM_NODELIST`)
touch $hostfile
for host in ${host_nodelist[@]} ; do
if [[ $USE_CPU -eq 1 ]] ; then
echo "${host}:${CPU_PPN}" >> $hostfile
fi
if [[ $USE_MIC -eq 1 ]] ; then
for micN in $(seq 0 `expr $MIC_NPN - 1`) ; do
echo "${host}-mic${micN}:$MIC_PPN" >> $hostfile
done
fi
done
num_procs=0
if [[ $USE_CPU -eq 1 ]] ; then
num_procs=$(( $num_procs + $CPU_PPN * $SLURM_NNODES ))
fi
if [[ $USE_MIC -eq 1 ]] ; then
num_procs=$(( $num_procs + ($MIC_NPN * $MIC_PPN) * $SLURM_NNODES ))
fi
#############
# Running.
#############
if [[ $SLURM_PROCID -eq 0 ]] ; then
if [[ $MPIRUN_EXECUTE -eq 1 ]] ; then
$exec_command -machinefile ${hostfile} -np ${num_procs} ${BASEDIR}/env_wrapper $*
else
echo "$exec_command -machinefile ${hostfile} -np ${num_procs} ${BASEDIR}/env_wrapper $*"
cat ${hostfile}
fi
fi
if [[ $? -ne 0 ]] ; then
abort "catch error! ret=$?"
fi
##############
# cleanup.
##############
rm -f $hostfile