1
+ # list of all stages
2
+ stages :
3
+ - build
4
+ - deploy
5
+ # Build the Docker images
6
+ image_build :
7
+ stage : build
8
+ image : docker:latest
9
+ script :
10
+ - export VERSION=$(echo $CI_COMMIT_REF_NAME | sed 's,.*/,,g')
11
+ - |
12
+ if [ "$VERSION" == "master" ] ; then
13
+ export VERSION=latest
14
+ fi
15
+ - docker info
16
+ - docker login -u ${DOCKER_REGISTRY_USER} -p ${DOCKER_REGISTRY_PASSWORD}
17
+ # From docker:dind, install latest version of kubectl and envsubst, jq and curl
18
+ - cd 'kubernetes-deploy'
19
+ - docker build -t ${DOCKER_REGISTRY_USER}/kubernetes-deploy:latest .
20
+ - docker push ${DOCKER_REGISTRY_USER}/kubernetes-deploy:latest
21
+ # From ubuntu 16.04, install hadoop
22
+ - cd ../'docker-hadoop'
23
+ - docker build -t ${DOCKER_REGISTRY_USER}/docker-hadoop:2.7.2 .
24
+ - docker push ${DOCKER_REGISTRY_USER}/docker-hadoop:2.7.2
25
+ # From docker-hadoop:2.7.2, create image for the namenode
26
+ - cd ../'docker-hadoop-namenode'
27
+ - docker build -t ${DOCKER_REGISTRY_USER}/docker-hadoop-namenode:${VERSION} .
28
+ - docker push ${DOCKER_REGISTRY_USER}/docker-hadoop-namenode:${VERSION}
29
+ # From docker-hadoop:2.7.2, create image for the datanodes
30
+ - cd ../'docker-hadoop-datanode'
31
+ - docker build -t ${DOCKER_REGISTRY_USER}/docker-hadoop-datanode:${VERSION} .
32
+ - docker push ${DOCKER_REGISTRY_USER}/docker-hadoop-datanode:${VERSION}
33
+ # From ubuntu 16.04, install Python, R and Spark, for workers
34
+ - cd ../'docker-python-r-spark'
35
+ - docker build -t ${DOCKER_REGISTRY_USER}/kubernetes-spark-libraries:${VERSION} .
36
+ - docker push ${DOCKER_REGISTRY_USER}/kubernetes-spark-libraries:${VERSION}
37
+ # From kubernetes-spark-libraries, install Rstudio for master
38
+ # Set version
39
+ - cd ../'docker-rstudio-zeppelin'
40
+ - sed -i "s/:latest/:${VERSION}/g" Dockerfile
41
+ - sed -i "s/default.svc.cluster.local/${CI_PROJECT_NAME}.svc.cluster.local/g" Dockerfile
42
+ - docker build -t ${DOCKER_REGISTRY_USER}/kubernetes-spark-libraries-rstudio:${VERSION} .
43
+ - docker push ${DOCKER_REGISTRY_USER}/kubernetes-spark-libraries-rstudio:${VERSION}
44
+ # Proxy controller for the Spark UI
45
+ - cd ../'docker-spark-ui'
46
+ - docker build -t ${DOCKER_REGISTRY_USER}/docker-spark-ui:${VERSION} .
47
+ - docker push ${DOCKER_REGISTRY_USER}/docker-spark-ui:${VERSION}
48
+ # Deploy in Kubernetes
49
+ deploy :
50
+ variables :
51
+ # Set Kubernetes and Spark variables for resource management (memory and cpu)
52
+ SPARK_MASTER_REQUESTS_CPU : 1
53
+ SPARK_MASTER_REQUESTS_MEMORY : ' 8G'
54
+ SPARK_MASTER_LIMITS_CPU : 1
55
+ SPARK_MASTER_LIMITS_MEMORY : ' 10G'
56
+ SPARK_WORKER_REQUESTS_CPU : 1
57
+ SPARK_WORKER_REQUESTS_MEMORY : ' 2G'
58
+ SPARK_WORKER_LIMITS_CPU : 1
59
+ SPARK_WORKER_LIMITS_MEMORY : ' 3G'
60
+ # SPARK_CORES_MAX: 2
61
+ SPARK_EXECUTOR_CORES : 1
62
+ SPARK_EXECUTOR_MEMORY : ' 2G'
63
+ SPARK_DRIVER_CORES : 1
64
+ SPARK_DRIVER_MEMORY : ' 3G'
65
+ SPARK_DRIVER_MAXRESULTSIZE : ' 1G'
66
+ image : angelsevillacamins/kubernetes-deploy:latest
67
+ stage : deploy
68
+ script :
69
+ - export VERSION=$(echo $CI_COMMIT_REF_NAME | sed 's,.*/,,g')
70
+ - |
71
+ if [ "$VERSION" == "master" ] ; then
72
+ export VERSION=latest
73
+ fi
74
+ - echo "${KUBE_CA_PEM}" > kube_ca.pem
75
+ - kubectl config set-cluster default-cluster --server=https://<IP-nuc01>:6443 --certificate-authority="$(pwd)/kube_ca.pem"
76
+ - kubectl config set-credentials default-admin --token=${KUBE_TOKEN}
77
+ - kubectl config set-context default-system --cluster=default-cluster --user=default-admin --namespace=${CI_PROJECT_NAME}
78
+ - kubectl config use-context default-system
79
+ - kubectl cluster-info || true
80
+ - kubectl delete cm,deploy,svc,statefulsets,rc,ds --all || true
81
+ # Secrets
82
+ - kubectl delete secret rstudio-password rstudio-user zeppelin-password zeppelin-user || true
83
+ - kubectl create secret generic rstudio-password --from-literal=password=${RSTUDIO_PASSWORD} || true
84
+ - kubectl create secret generic rstudio-user --from-literal=user=${RSTUDIO_USER} || true
85
+ - kubectl create secret generic zeppelin-password --from-literal=password=${ZEPPELIN_PASSWORD} || true
86
+ - kubectl create secret generic zeppelin-user --from-literal=user=${ZEPPELIN_USER} || true
87
+ # Replace Kubernetes and Spark variables for resource management (memory and cpu)
88
+ - envsubst < "spark-rstudio.yaml.template" > "spark-rstudio.yaml"
89
+ # Set version and imagePullPolicy
90
+ - sed -i "s/:latest/:${VERSION}/g" spark-rstudio.yaml hadoop-namenode.yaml hadoop-datanode.yaml
91
+ - |
92
+ if [ "$VERSION" != "latest" ] && [ "$VERSION" != "develop" ]; then
93
+ sed -i 's/imagePullPolicy: "Always"/imagePullPolicy: "IfNotPresent"/g' spark-rstudio.yaml hadoop-namenode.yaml hadoop-datanode.yaml
94
+ fi
95
+ # Hadoop HDFS RUN ONLY ONCE
96
+ # - kubectl label nodes nuc01 hdfs-namenode-selector=hdfs-namenode-0 || true
97
+ # - kubectl label node nuc01 hdfs-datanode-exclude=yes || true
98
+ # Persistent volume RUN ONLY ONCE
99
+ # - kubectl create -f persist-pv.yaml
100
+ - kubectl create -f hadoop-namenode.yaml
101
+ - sed -i "s/default.svc.cluster.local/${CI_PROJECT_NAME}.svc.cluster.local/g" hadoop-datanode.yaml
102
+ - kubectl create -f hadoop-datanode.yaml
103
+ # Spark StatefulSets
104
+ - kubectl create -f spark-rstudio.yaml
105
+ # Wait until spark-master-0 is running
106
+ - while [[ $(kubectl get pod spark-master-0 -o go-template --template "{{.status.phase}}") != "Running" ]]; do sleep 10; echo "Waiting for spark-master-0"; done;
107
+ - kubectl port-forward spark-master-0 8181:8181 &
108
+ # Update Zeppelin Interpreters with Spark variables and others
109
+ - cd 'docker-rstudio-zeppelin'
110
+ - while [ "$ZEPPELIN_STATUS" != "SUCCESS" ]; do ZEPPELIN_STATUS=`curl http://127.0.0.1:8181 -k -s -f -o /dev/null && echo "SUCCESS" || echo "ERROR"`; sleep 10; echo "Waiting for Zeppelin"; done;
111
+ - curl -s --data "userName=${ZEPPELIN_USER}&password=${ZEPPELIN_PASSWORD}" -c cookies.txt -X POST http://127.0.0.1:8181/api/login
112
+ - SPARK_INTERPRETER_ID=`curl -b cookies.txt http://127.0.0.1:8181/api/interpreter/setting | jq -r '.body[] | select(.name == "spark") |.id'`
113
+ # Configure Zeppelin Spark Interpreter
114
+ - envsubst < "spark_interpreter.json.template" > "spark_interpreter.json"
115
+ - curl -X PUT -d "@spark_interpreter.json" -b cookies.txt http://127.0.0.1:8181/api/interpreter/setting/${SPARK_INTERPRETER_ID}
116
+ # Configure Zeppelin File Interpreter
117
+ - FILE_INTERPRETER_ID=`curl -b cookies.txt http://127.0.0.1:8181/api/interpreter/setting | jq -r '.body[] | select(.name == "file") |.id'`
118
+ - sed -i "s/default.svc.cluster.local/${CI_PROJECT_NAME}.svc.cluster.local/g" file_interpreter.json
119
+ - curl -X PUT -d "@file_interpreter.json" -b cookies.txt http://127.0.0.1:8181/api/interpreter/setting/${FILE_INTERPRETER_ID}
120
+ # Configure Zeppelin Python Interpreter
121
+ - PYTHON_INTERPRETER_ID=`curl -b cookies.txt http://127.0.0.1:8181/api/interpreter/setting | jq -r '.body[] | select(.name == "python") |.id'`
122
+ - curl -X PUT -d "@python_interpreter.json" -b cookies.txt http://127.0.0.1:8181/api/interpreter/setting/${PYTHON_INTERPRETER_ID}
0 commit comments