Skip to content

Commit 1250589

Browse files
First commit
1 parent 1ccb20b commit 1250589

26 files changed

+1740
-1
lines changed

.gitlab-ci.yml

+122
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
# list of all stages
2+
stages:
3+
- build
4+
- deploy
5+
# Build the Docker images
6+
image_build:
7+
stage: build
8+
image: docker:latest
9+
script:
10+
- export VERSION=$(echo $CI_COMMIT_REF_NAME | sed 's,.*/,,g')
11+
- |
12+
if [ "$VERSION" == "master" ] ; then
13+
export VERSION=latest
14+
fi
15+
- docker info
16+
- docker login -u ${DOCKER_REGISTRY_USER} -p ${DOCKER_REGISTRY_PASSWORD}
17+
# From docker:dind, install latest version of kubectl and envsubst, jq and curl
18+
- cd 'kubernetes-deploy'
19+
- docker build -t ${DOCKER_REGISTRY_USER}/kubernetes-deploy:latest .
20+
- docker push ${DOCKER_REGISTRY_USER}/kubernetes-deploy:latest
21+
# From ubuntu 16.04, install hadoop
22+
- cd ../'docker-hadoop'
23+
- docker build -t ${DOCKER_REGISTRY_USER}/docker-hadoop:2.7.2 .
24+
- docker push ${DOCKER_REGISTRY_USER}/docker-hadoop:2.7.2
25+
# From docker-hadoop:2.7.2, create image for the namenode
26+
- cd ../'docker-hadoop-namenode'
27+
- docker build -t ${DOCKER_REGISTRY_USER}/docker-hadoop-namenode:${VERSION} .
28+
- docker push ${DOCKER_REGISTRY_USER}/docker-hadoop-namenode:${VERSION}
29+
# From docker-hadoop:2.7.2, create image for the datanodes
30+
- cd ../'docker-hadoop-datanode'
31+
- docker build -t ${DOCKER_REGISTRY_USER}/docker-hadoop-datanode:${VERSION} .
32+
- docker push ${DOCKER_REGISTRY_USER}/docker-hadoop-datanode:${VERSION}
33+
# From ubuntu 16.04, install Python, R and Spark, for workers
34+
- cd ../'docker-python-r-spark'
35+
- docker build -t ${DOCKER_REGISTRY_USER}/kubernetes-spark-libraries:${VERSION} .
36+
- docker push ${DOCKER_REGISTRY_USER}/kubernetes-spark-libraries:${VERSION}
37+
# From kubernetes-spark-libraries, install Rstudio for master
38+
# Set version
39+
- cd ../'docker-rstudio-zeppelin'
40+
- sed -i "s/:latest/:${VERSION}/g" Dockerfile
41+
- sed -i "s/default.svc.cluster.local/${CI_PROJECT_NAME}.svc.cluster.local/g" Dockerfile
42+
- docker build -t ${DOCKER_REGISTRY_USER}/kubernetes-spark-libraries-rstudio:${VERSION} .
43+
- docker push ${DOCKER_REGISTRY_USER}/kubernetes-spark-libraries-rstudio:${VERSION}
44+
# Proxy controller for the Spark UI
45+
- cd ../'docker-spark-ui'
46+
- docker build -t ${DOCKER_REGISTRY_USER}/docker-spark-ui:${VERSION} .
47+
- docker push ${DOCKER_REGISTRY_USER}/docker-spark-ui:${VERSION}
48+
# Deploy in Kubernetes
49+
deploy:
50+
variables:
51+
# Set Kubernetes and Spark variables for resource management (memory and cpu)
52+
SPARK_MASTER_REQUESTS_CPU: 1
53+
SPARK_MASTER_REQUESTS_MEMORY: '8G'
54+
SPARK_MASTER_LIMITS_CPU: 1
55+
SPARK_MASTER_LIMITS_MEMORY: '10G'
56+
SPARK_WORKER_REQUESTS_CPU: 1
57+
SPARK_WORKER_REQUESTS_MEMORY: '2G'
58+
SPARK_WORKER_LIMITS_CPU: 1
59+
SPARK_WORKER_LIMITS_MEMORY: '3G'
60+
#SPARK_CORES_MAX: 2
61+
SPARK_EXECUTOR_CORES: 1
62+
SPARK_EXECUTOR_MEMORY: '2G'
63+
SPARK_DRIVER_CORES: 1
64+
SPARK_DRIVER_MEMORY: '3G'
65+
SPARK_DRIVER_MAXRESULTSIZE: '1G'
66+
image: angelsevillacamins/kubernetes-deploy:latest
67+
stage: deploy
68+
script:
69+
- export VERSION=$(echo $CI_COMMIT_REF_NAME | sed 's,.*/,,g')
70+
- |
71+
if [ "$VERSION" == "master" ] ; then
72+
export VERSION=latest
73+
fi
74+
- echo "${KUBE_CA_PEM}" > kube_ca.pem
75+
- kubectl config set-cluster default-cluster --server=https://<IP-nuc01>:6443 --certificate-authority="$(pwd)/kube_ca.pem"
76+
- kubectl config set-credentials default-admin --token=${KUBE_TOKEN}
77+
- kubectl config set-context default-system --cluster=default-cluster --user=default-admin --namespace=${CI_PROJECT_NAME}
78+
- kubectl config use-context default-system
79+
- kubectl cluster-info || true
80+
- kubectl delete cm,deploy,svc,statefulsets,rc,ds --all || true
81+
# Secrets
82+
- kubectl delete secret rstudio-password rstudio-user zeppelin-password zeppelin-user || true
83+
- kubectl create secret generic rstudio-password --from-literal=password=${RSTUDIO_PASSWORD} || true
84+
- kubectl create secret generic rstudio-user --from-literal=user=${RSTUDIO_USER} || true
85+
- kubectl create secret generic zeppelin-password --from-literal=password=${ZEPPELIN_PASSWORD} || true
86+
- kubectl create secret generic zeppelin-user --from-literal=user=${ZEPPELIN_USER} || true
87+
# Replace Kubernetes and Spark variables for resource management (memory and cpu)
88+
- envsubst < "spark-rstudio.yaml.template" > "spark-rstudio.yaml"
89+
# Set version and imagePullPolicy
90+
- sed -i "s/:latest/:${VERSION}/g" spark-rstudio.yaml hadoop-namenode.yaml hadoop-datanode.yaml
91+
- |
92+
if [ "$VERSION" != "latest" ] && [ "$VERSION" != "develop" ]; then
93+
sed -i 's/imagePullPolicy: "Always"/imagePullPolicy: "IfNotPresent"/g' spark-rstudio.yaml hadoop-namenode.yaml hadoop-datanode.yaml
94+
fi
95+
# Hadoop HDFS RUN ONLY ONCE
96+
#- kubectl label nodes nuc01 hdfs-namenode-selector=hdfs-namenode-0 || true
97+
#- kubectl label node nuc01 hdfs-datanode-exclude=yes || true
98+
# Persistent volume RUN ONLY ONCE
99+
#- kubectl create -f persist-pv.yaml
100+
- kubectl create -f hadoop-namenode.yaml
101+
- sed -i "s/default.svc.cluster.local/${CI_PROJECT_NAME}.svc.cluster.local/g" hadoop-datanode.yaml
102+
- kubectl create -f hadoop-datanode.yaml
103+
# Spark StatefulSets
104+
- kubectl create -f spark-rstudio.yaml
105+
# Wait until spark-master-0 is running
106+
- while [[ $(kubectl get pod spark-master-0 -o go-template --template "{{.status.phase}}") != "Running" ]]; do sleep 10; echo "Waiting for spark-master-0"; done;
107+
- kubectl port-forward spark-master-0 8181:8181 &
108+
# Update Zeppelin Interpreters with Spark variables and others
109+
- cd 'docker-rstudio-zeppelin'
110+
- while [ "$ZEPPELIN_STATUS" != "SUCCESS" ]; do ZEPPELIN_STATUS=`curl http://127.0.0.1:8181 -k -s -f -o /dev/null && echo "SUCCESS" || echo "ERROR"`; sleep 10; echo "Waiting for Zeppelin"; done;
111+
- curl -s --data "userName=${ZEPPELIN_USER}&password=${ZEPPELIN_PASSWORD}" -c cookies.txt -X POST http://127.0.0.1:8181/api/login
112+
- SPARK_INTERPRETER_ID=`curl -b cookies.txt http://127.0.0.1:8181/api/interpreter/setting | jq -r '.body[] | select(.name == "spark") |.id'`
113+
# Configure Zeppelin Spark Interpreter
114+
- envsubst < "spark_interpreter.json.template" > "spark_interpreter.json"
115+
- curl -X PUT -d "@spark_interpreter.json" -b cookies.txt http://127.0.0.1:8181/api/interpreter/setting/${SPARK_INTERPRETER_ID}
116+
# Configure Zeppelin File Interpreter
117+
- FILE_INTERPRETER_ID=`curl -b cookies.txt http://127.0.0.1:8181/api/interpreter/setting | jq -r '.body[] | select(.name == "file") |.id'`
118+
- sed -i "s/default.svc.cluster.local/${CI_PROJECT_NAME}.svc.cluster.local/g" file_interpreter.json
119+
- curl -X PUT -d "@file_interpreter.json" -b cookies.txt http://127.0.0.1:8181/api/interpreter/setting/${FILE_INTERPRETER_ID}
120+
# Configure Zeppelin Python Interpreter
121+
- PYTHON_INTERPRETER_ID=`curl -b cookies.txt http://127.0.0.1:8181/api/interpreter/setting | jq -r '.body[] | select(.name == "python") |.id'`
122+
- curl -X PUT -d "@python_interpreter.json" -b cookies.txt http://127.0.0.1:8181/api/interpreter/setting/${PYTHON_INTERPRETER_ID}

0 commit comments

Comments
 (0)