forked from KarthikTunga/impala
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate-test-configuration.sh
executable file
·187 lines (159 loc) · 6.26 KB
/
create-test-configuration.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
#!/bin/bash
# Copyright 2012 Cloudera Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Create the test environment needed by Impala. Includes generation of the
# Hadoop config files: core-site.xml, hbase-site.xml, hive-site.xml as well
# as creation of the Hive metastore.
set -euo pipefail
trap 'echo Error in $0 at line $LINENO: $(cd "'$PWD'" && awk "NR == $LINENO" $0)' ERR
CREATE_METASTORE=0
CREATE_SENTRY_POLICY_DB=0
: ${IMPALA_KERBERIZE=}
# parse command line options
for ARG in $*
do
case "$ARG" in
-create_metastore)
CREATE_METASTORE=1
;;
-create_sentry_policy_db)
CREATE_SENTRY_POLICY_DB=1
;;
-k|-kerberize|-kerberos|-kerb)
# This could also come in through the environment...
export IMPALA_KERBERIZE=1
;;
-help|*)
echo "[-create_metastore] : If true, creates a new metastore."
echo "[-create_sentry_policy_db] : If true, creates a new sentry policy db."
echo "[-kerberize] : Enable kerberos on the cluster"
exit 1
;;
esac
done
# If this isn't sourced, bad things will always happen
if [ "${IMPALA_CONFIG_SOURCED}" != "1" ]; then
echo "You must source bin/impala-config.sh"
exit 1
fi
${CLUSTER_DIR}/admin create_cluster
if [ ! -z "${IMPALA_KERBERIZE}" ]; then
# Sanity check...
if ! ${CLUSTER_DIR}/admin is_kerberized; then
echo "Kerberized cluster not created, even though told to."
exit 1
fi
# Set some more environment variables.
. ${MINIKDC_ENV}
# For hive-site.xml further down...
export HIVE_S2_AUTH=KERBEROS
else
export HIVE_S2_AUTH=NONE
fi
export CURRENT_USER=`whoami`
CONFIG_DIR=${IMPALA_HOME}/fe/src/test/resources
echo "Config dir: ${CONFIG_DIR}"
echo "Current user: ${CURRENT_USER}"
echo "Metastore DB: ${METASTORE_DB}"
pushd ${CONFIG_DIR}
# Cleanup any existing files
rm -f {core,hdfs,hbase,hive,yarn,mapred}-site.xml
rm -f authz-provider.ini
if [ $CREATE_METASTORE -eq 1 ]; then
echo "Creating postgresql database for Hive metastore"
dropdb -U hiveuser ${METASTORE_DB} 2> /dev/null || true
createdb -U hiveuser ${METASTORE_DB}
psql -q -U hiveuser -d ${METASTORE_DB} \
-f ${HIVE_HOME}/scripts/metastore/upgrade/postgres/hive-schema-0.13.0.postgres.sql
# Increase the size limit of PARAM_VALUE from SERDE_PARAMS table to be able to create
# HBase tables with large number of columns.
echo "alter table \"SERDE_PARAMS\" alter column \"PARAM_VALUE\" type character varying" \
| psql -q -U hiveuser -d ${METASTORE_DB}
fi
if [ $CREATE_SENTRY_POLICY_DB -eq 1 ]; then
echo "Creating Sentry Policy Server DB"
dropdb -U hiveuser sentry_policy 2> /dev/null || true
createdb -U hiveuser sentry_policy
fi
# Perform search-replace on $1, output to $2.
# Search $1 ($GCIN) for strings that look like "${FOO}". If FOO is defined in
# the environment then replace "${FOO}" with the environment value. Also
# remove or leave special kerberos settings as desired. Sanity check at end.
function generate_config {
GCIN="$1"
GCOUT="$2"
perl -wpl -e 's/\$\{([^}]+)\}/defined $ENV{$1} ? $ENV{$1} : $&/eg' \
"${GCIN}" > "${GCOUT}.tmp"
if [ "${IMPALA_KERBERIZE}" = "" ]; then
sed '/<!-- BEGIN Kerberos/,/END Kerberos settings -->/d' \
"${GCOUT}.tmp" > "${GCOUT}"
else
cp "${GCOUT}.tmp" "${GCOUT}"
fi
rm -f "${GCOUT}.tmp"
# Check for anything that might have been missed.
# Assumes that environment variables will be ALL CAPS...
if grep '\${[A-Z_]*}' "${GCOUT}"; then
echo "Found undefined variables in ${GCOUT}, aborting"
exit 1
fi
echo "Generated `pwd`/${GCOUT}"
}
echo "Linking core-site.xml from local cluster"
CLUSTER_HADOOP_CONF_DIR=$(${CLUSTER_DIR}/admin get_hadoop_client_conf_dir)
ln -s ${CLUSTER_HADOOP_CONF_DIR}/core-site.xml
echo "Linking hdfs-site.xml from local cluster"
ln -s ${CLUSTER_HADOOP_CONF_DIR}/hdfs-site.xml
if ${CLUSTER_DIR}/admin is_kerberized; then
# KERBEROS TODO: Without this, the yarn daemons can see these
# files, but mapreduce jobs *cannot* see these files. This seems
# strange, but making these symlinks also results in data loading
# failures in the non-kerberized case. Without these, mapreduce
# jobs die in a kerberized cluster because they can't find their
# kerberos principals. Obviously this has to be sorted out before
# a kerberized cluster can load data.
echo "Linking yarn and mapred from local cluster"
ln -s ${CLUSTER_HADOOP_CONF_DIR}/yarn-site.xml
ln -s ${CLUSTER_HADOOP_CONF_DIR}/mapred-site.xml
fi
generate_config postgresql-hive-site.xml.template hive-site.xml
generate_config log4j.properties.template log4j.properties
generate_config hive-log4j.properties.template hive-log4j.properties
generate_config hbase-site.xml.template hbase-site.xml
generate_config authz-policy.ini.template authz-policy.ini
generate_config sentry-site.xml.template sentry-site.xml
if [ ! -z "${IMPALA_KERBERIZE}" ]; then
generate_config hbase-jaas-server.conf.template hbase-jaas-server.conf
generate_config hbase-jaas-client.conf.template hbase-jaas-client.conf
fi
popd
echo "Completed config generation"
# Creates a symlink in TARGET_DIR to all subdirectories under SOURCE_DIR
function symlink_subdirs {
SOURCE_DIR=$1
TARGET_DIR=$2
if [ -d "${SOURCE_DIR}" ]; then
find ${SOURCE_DIR}/ -maxdepth 1 -mindepth 1 -type d -exec ln -f -s {} ${TARGET_DIR} \;
else
echo "No auxiliary tests found at: ${SOURCE_DIR}"
fi
}
# The Impala test framework support running additional tests outside of the main repo.
# This is an optional feature that can be enabled by setting the IMPALA_AUX_* environment
# variables to valid locations.
echo "Searching for auxiliary tests, workloads, and datasets (if any exist)."
symlink_subdirs ${IMPALA_AUX_WORKLOAD_DIR} ${IMPALA_WORKLOAD_DIR}
symlink_subdirs ${IMPALA_AUX_DATASET_DIR} ${IMPALA_DATASET_DIR}
symlink_subdirs ${IMPALA_AUX_TEST_HOME}/tests ${IMPALA_HOME}/tests