forked from geetha-hortonworks/hive-testbench
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtpcds-setup-sandbox.sh
executable file
·43 lines (36 loc) · 1.34 KB
/
tpcds-setup-sandbox.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/bin/bash
if [ ! -f tpcds-gen/target/tpcds-gen-1.0-SNAPSHOT.jar ]; then
echo "Build the data generator with build.sh first"
exit 1
fi
set -x
set -e
# Tables in the TPC-DS schema.
LIST="date_dim time_dim item customer customer_demographics household_demographics customer_address store promotion warehouse ship_mode reason income_band call_center web_page catalog_page inventory store_sales store_returns web_sales web_returns web_site catalog_sales catalog_returns"
SCALE=$1
DIR=$2
BUCKETS=13
RETURN_BUCKETS=1
SPLIT=16
STORE_CLAUSES=( "orc" )
FILE_FORMATS=( "orc" )
SERDES=( "org.apache.hadoop.hive.ql.io.orc.OrcSerde" )
hadoop dfs -ls ${DIR}/${SCALE} || (cd tpcds-gen; hadoop jar target/*.jar -d ${DIR}/${SCALE}/ -s ${SCALE})
hadoop dfs -ls ${DIR}/${SCALE}
# Generate the text/flat tables. These will be later be converted to ORCFile.
if true; then
for t in ${LIST}
do
hive -i settings/load.sql -f ddl/text/${t}.sql -d DB=tpcds_text_${SCALE} -d LOCATION=${DIR}/${SCALE}/${t}
done
fi
# Generate a flat (unpartitioned) schema in ORCFile format.
i=0
for file in "${STORE_CLAUSES[@]}"
do
for t in ${LIST}
do
hive -i settings/load.sql -f ddl/bin_flat/${t}.sql -d DB=tpcds_bin_flat_${FILE_FORMATS[$i]}_${SCALE} -d SOURCE=tpcds_text_${SCALE} -d BUCKETS=${BUCKETS} -d FILE="${file}" -d SERDE=${SERDES[$i]} -d SPLIT=${SPLIT}
done
i=$((i+1))
done