forked from LLNL/magpie
-
Notifications
You must be signed in to change notification settings - Fork 0
/
magpie-run-hadoop-terasort
executable file
·132 lines (112 loc) · 4.12 KB
/
magpie-run-hadoop-terasort
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/bin/bash
#############################################################################
# Copyright (C) 2013 Lawrence Livermore National Security, LLC.
# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
# Written by Albert Chu <chu11@llnl.gov>
# LLNL-CODE-644248
#
# This file is part of Magpie, scripts for running Hadoop on
# traditional HPC systems. For details, see <URL>.
#
# Magpie is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# Magpie is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Magpie. If not, see <http://www.gnu.org/licenses/>.
#############################################################################
# This script is the core terasort running script. For the most part,
# it shouldn't be editted. See job submission files for configuration
# details.
source ${MAGPIE_SCRIPTS_HOME}/magpie-common-exports
source ${MAGPIE_SCRIPTS_HOME}/magpie-common-functions
if [ "${HADOOP_SETUP_TYPE}" == "MR1" ] || [ "${HADOOP_SETUP_TYPE}" == "HDFS1" ]
then
terasortexamples="hadoop-examples-$HADOOP_VERSION.jar"
rmoption="-rmr"
elif [ "${HADOOP_SETUP_TYPE}" == "MR2" ] || [ "${HADOOP_SETUP_TYPE}" == "HDFS2" ]
then
terasortexamples="share/hadoop/mapreduce/hadoop-mapreduce-examples-$HADOOP_VERSION.jar"
rmoption="-rm -r"
fi
if [ "${HADOOP_TERASORT_SIZE}X" == "X" ]
then
terasortsize=50000000
else
terasortsize=$HADOOP_TERASORT_SIZE
fi
if [ "${HADOOP_FILESYSTEM_MODE}" == "rawnetworkfs" ]
then
pathprefix="${HADOOP_RAWNETWORKFS_PATH}/"
elif [ "${HADOOP_FILESYSTEM_MODE}" == "intellustre" ]
then
pathprefix="${HADOOP_INTELLUSTRE_PATH}/"
fi
if [ "${HADOOP_TERASORT_CLEAR_CACHE}X" != "X" ]
then
if [ "${HADOOP_TERASORT_CLEAR_CACHE}" == "yes" ]
then
clearcache="-Ddfs.datanode.drop.cache.behind.reads=true -Ddfs.datanode.drop.cache.behind.writes=true"
else
clearcache=""
fi
else
clearcache="-Ddfs.datanode.drop.cache.behind.reads=true -Ddfs.datanode.drop.cache.behind.writes=true"
fi
if [ "${HADOOP_UDA_SETUP}" == "yes" ] && [ "${HADOOP_SETUP_TYPE}" == "MR2" ]
then
if [ "${extralibjars}X" == "X" ]
then
extralibjars="-libjars ${HADOOP_UDA_JAR}"
else
extralibjars="${extralibjars},${HADOOP_UDA_JAR}"
fi
fi
if [ "${TACHYON_SETUP}" == "yes" ]
then
tachyonjar="${TACHYON_HOME}/client/target/tachyon-client-${TACHYON_VERSION}-jar-with-dependencies.jar"
if [ "${extralibjars}X" == "X" ]
then
extralibjars="-libjars ${tachyonjar}"
else
extralibjars="${extralibjars},${tachyonjar}"
fi
fi
cd ${HADOOP_HOME}
#
# Remove previous runs if they are lingering
#
if ${hadoopcmdprefix}/hadoop fs -ls ${pathprefix} | grep -q terasort-teragen
then
command="${hadoopcmdprefix}/hadoop fs ${rmoption} ${pathprefix}terasort-teragen"
$command
fi
if ${hadoopcmdprefix}/hadoop fs -ls ${pathprefix} | grep -q terasort-sort
then
command="${hadoopcmdprefix}/hadoop fs ${rmoption} ${pathprefix}terasort-sort"
$command
fi
command="${hadoopcmdprefix}/hadoop jar ${terasortexamples} teragen ${extralibjars} ${clearcache} $terasortsize ${pathprefix}terasort-teragen"
echo "Running $command" >&2
$command
sleep 30
if [ "${HADOOP_TERASORT_REDUCER_COUNT:-0}" -ne "0" ]
then
rtasks=$HADOOP_TERASORT_REDUCER_COUNT
else
rtasks=`expr $HADOOP_SLAVE_COUNT \* 2`
fi
command="${hadoopcmdprefix}/hadoop jar ${terasortexamples} terasort ${extralibjars} -Dmapred.reduce.tasks=$rtasks -Ddfs.replication=1 ${clearcache} ${pathprefix}terasort-teragen ${pathprefix}terasort-sort"
echo "Running $command" >&2
$command
command="${hadoopcmdprefix}/hadoop fs ${rmoption} ${pathprefix}terasort-teragen"
$command
command="${hadoopcmdprefix}/hadoop fs ${rmoption} ${pathprefix}terasort-sort"
$command
exit 0