Skip to content

Commit 41a0424

Browse files
committed
airflow-cluster moved to extra-quickstarters
1 parent 3105060 commit 41a0424

File tree

92 files changed

+5541
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

92 files changed

+5541
-0
lines changed

airflow-cluster/Jenkinsfile

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
def odsNamespace = env.ODS_NAMESPACE ?: 'ods'
2+
def odsGitRef = env.ODS_GIT_REF ?: 'master'
3+
def odsImageTag = env.ODS_IMAGE_TAG ?: 'latest'
4+
5+
library("ods-jenkins-shared-library@${odsGitRef}")
6+
7+
def dockerRegistry
8+
9+
node {
10+
dockerRegistry = env.DOCKER_REGISTRY
11+
}
12+
13+
odsQuickstarterPipeline(
14+
imageStreamTag: "${odsNamespace}/jenkins-agent-airflow:${odsImageTag}",
15+
) { context ->
16+
17+
odsQuickstarterStageCopyFiles(context)
18+
19+
stage('Setup OpenShift resources') {
20+
sh "cd ${context.sourceDir} ; sh ./custom-create-components.sh \
21+
-p ${context.projectId} -c ${context.componentId} -b ${context.gitUrlHttp.split('/' + context.projectId)[0]} \
22+
-r ${dockerRegistry} -gr ${odsGitRef} \
23+
-oa 'your-openshift-apihost' -oc 'https://your-openshift-console' "
24+
}
25+
26+
odsQuickstarterStageRenderJenkinsfile(context)
27+
28+
odsQuickstarterStageRenderSonarProperties(context)
29+
}

airflow-cluster/Jenkinsfile.template

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// See https://www.opendevstack.org/ods-documentation/ for usage and customization.
2+
3+
@Library('ods-jenkins-shared-library@@ods_git_ref@') _
4+
5+
odsComponentPipeline(
6+
imageStreamTag: '@ods_namespace@/jenkins-agent-airflow:@ods_image_tag@',
7+
componentId: 'airflow-worker',
8+
testResults : 'artifacts',
9+
branchToEnvironmentMapping: [
10+
'master': 'dev',
11+
// 'release/': 'test'
12+
]
13+
) { context ->
14+
stageDAGTest(context)
15+
stageUnitTest(context)
16+
stageBuild(context)
17+
odsComponentStageScanWithSonar(context)
18+
odsComponentStageBuildOpenShiftImage(context, [buildTimeoutMinutes: 25])
19+
stageTagImage(context)
20+
stagePublishDAGs(context)
21+
}
22+
23+
def stageDAGTest(def context) {
24+
stage('DAG Integrity Tests') {
25+
sh 'sh test_dag_integrity.sh'
26+
}
27+
}
28+
29+
def stageUnitTest(def context) {
30+
stage('Unit Tests') {
31+
sh "pip install -i ${context.nexusHostWithBasicAuth}/repository/pypi-all/simple --trusted-host ${context.nexusHostWithoutScheme} -r src/requirements.txt --user"
32+
sh 'sh test_all.sh'
33+
}
34+
}
35+
36+
def stageBuild(def context) {
37+
stage('Build') {
38+
sh 'sh build.sh'
39+
}
40+
}
41+
42+
def stageTagImage(def context) {
43+
stage('Tag build') {
44+
if (!context.environment) {
45+
println("Skipping for empty environment ...")
46+
return
47+
}
48+
sh(
49+
script: "oc -n ${context.targetProject} tag ${context.componentId}:${context.tagversion} ${context.componentId}:latest",
50+
label: "Update latest tag of is/${context.componentId} to ${context.tagversion}"
51+
)
52+
}
53+
}
54+
55+
def stagePublishDAGs(def context) {
56+
stage('Publish DAGs') {
57+
def airflow_webserver_info = sh(returnStdout: true, script: "oc get pods --sort-by=.status.startTime --no-headers -l component=airflow-webserver -n ${context.targetProject} | tail -n1").trim().split(/\s+/)
58+
def airflow_scheduler_info = sh(returnStdout: true, script: "oc get pods --sort-by=.status.startTime --no-headers -l component=airflow-scheduler -n ${context.targetProject} | tail -n1").trim().split(/\s+/)
59+
60+
if (airflow_webserver_info[2] != "Running" || airflow_scheduler_info[2] != "Running") {
61+
error("Airflow cluster is not running or does not exist")
62+
}
63+
64+
sh "oc rsync --no-perms=true --delete=true --exclude=.keep --exclude=__pycache__ src/dags ${airflow_webserver_info[0]}:/opt/app-root/src/airflow/ -n ${context.targetProject}"
65+
sh "oc rsync --no-perms=true --delete=true --exclude=.keep --exclude=__pycache__ src/dags ${airflow_scheduler_info[0]}:/opt/app-root/src/airflow/ -n ${context.targetProject}"
66+
sh "oc rsync --no-perms=true --delete=true --exclude=.keep --exclude=__pycache__ src/dag_deps ${airflow_webserver_info[0]}:/opt/app-root/src/airflow/ -n ${context.targetProject}"
67+
sh "oc rsync --no-perms=true --delete=true --exclude=.keep --exclude=__pycache__ src/dag_deps ${airflow_scheduler_info[0]}:/opt/app-root/src/airflow/ -n ${context.targetProject}"
68+
}
69+
}

airflow-cluster/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# AirFlow Openshift Cluster Boilerplate
2+
3+
Documentation is located in our [official documentation](https://www.opendevstack.org/ods-documentation/ods-quickstarters/latest/index.html)
4+
5+
Please update documentation in the [antora page directory](https://github.com/opendevstack/ods-quickstarters/tree/master/docs/modules/ROOT/pages)

airflow-cluster/base-images/README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Airflow base images
2+
3+
Images used in [ods-quickstarters/airflow-cluster](https://github.com/opendevstack/ods-quickstarters/tree/master/airflow-cluster)
4+
5+
## Contents
6+
7+
1. [Airflow](airflow) for OpenDevStack
8+
2. [Elasticsearch](elasticsearch) for OpenDevStack
27.7 KB
Loading
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
# RedHat image should be specified in the build config
2+
# FROM registry.access.redhat.com/rhscl/python-36-rhel7
3+
4+
# Centos is used for enabling local builds
5+
FROM centos/python-36-centos7
6+
7+
ARG AIRFLOW_VERSION=1.10.3
8+
ARG FILE_BEATS_VERSION=7.0.0
9+
10+
LABEL maintainer="Hugo Wruck Schneider <hugo.wruck_schneider@boehringer-ingelheim.com>"
11+
12+
ENV SUMMARY="OpenDevStack Provided Airflow ${AIRFLOW_VERSION} Base Image compatible with OpenShift"
13+
ENV DESCRIPTION="OpenDevStack Provided Airflow ${AIRFLOW_VERSION} Base Image compatible with OpenShift. This image \
14+
should be used for running airflow's webserver and scheduler and as base image for building the worker's image \
15+
\
16+
Packages being used: \
17+
- Airflow ${AIRFLOW_VERSION}\
18+
- FileBeat ${FILE_BEATS_VERSION}\
19+
"
20+
LABEL "name"="openshift3/airflow" \
21+
"summary"=$SUMMARY \
22+
"description"=$DESCRIPTION \
23+
"version"=$AIRFLOW_VERSION \
24+
"io.openshift.tags"="python36,python,airflow,airflow1102" \
25+
"io.k8s.description"=$DESCRIPTION \
26+
"io.openshift.expose-services"="8080:http" \
27+
"io.k8s.display-name"="Airflow ${AIRFLOW_VERSION}" \
28+
"com.redhat.component"="airflow-rhel7-docker"
29+
30+
ARG PYTHON_DEPS="Flask==1.0.0 tzlocal==1.5.1 urllib3==1.25.3 thrift==0.11.0 tabulate==0.8.3 six==1.12.0 PyYAML==5.1.2 \
31+
pytzdata==2019.2 python-dateutil==2.8.0 Pygments==2.4.2 pyasn1-modules==0.2.6 psutil==5.6.3"
32+
33+
ARG AIRFLOW_DEPS=""
34+
35+
ENV AIRFLOW_HOME=$HOME/airflow
36+
ENV SLUGIFY_USES_TEXT_UNIDECODE yes
37+
38+
ARG NEXUS_USERNAME
39+
ARG NEXUS_PASSWORD
40+
ARG NEXUS_URL
41+
42+
# Define en_US.
43+
ENV LANGUAGE en_US.UTF-8
44+
ENV LANG en_US.UTF-8
45+
ENV LC_ALL en_US.UTF-8
46+
ENV LC_CTYPE en_US.UTF-8
47+
ENV LC_MESSAGES en_US.UTF-8
48+
49+
50+
USER root
51+
52+
ADD https://artifacts.elastic.co/downloads/beats/filebeat/filebeat-${FILE_BEATS_VERSION}-x86_64.rpm ${HOME}
53+
54+
RUN buildDeps=' \
55+
freetds-devel \
56+
krb5-devel \
57+
cyrus-sasl-devel \
58+
openssl-devel \
59+
libffi-devel \
60+
postgresql-devel \
61+
git \
62+
' \
63+
&& pipDeps="\
64+
pytz==2019.2 \
65+
pyOpenSSL==19.0.0 \
66+
paramiko==2.6.0 \
67+
sshtunnel==0.1.5 \
68+
ndg-httpsclient==0.5.1 \
69+
pyasn1==0.4.6 \
70+
requests-oauthlib==1.1.0 \
71+
apache-airflow[crypto,postgres,hive,jdbc,mysql,ssh,kubernetes,elasticsearch${AIRFLOW_DEPS:+,}${AIRFLOW_DEPS}]==${AIRFLOW_VERSION} \
72+
" \
73+
&& rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm \
74+
&& yum install --assumeyes \
75+
$buildDeps \
76+
freetds \
77+
mariadb-libs \
78+
curl \
79+
rsync \
80+
nmap-ncat \
81+
&& localedef -c -f UTF-8 -i en_US en_US.UTF-8 \
82+
&& if [[ ! -z ${NEXUS_URL} ]]; \
83+
then pip install -U -i https://${NEXUS_USERNAME}:${NEXUS_PASSWORD}@${NEXUS_URL:8}/repository/pypi-all/simple pip setuptools wheel \
84+
&& pip install -i https://${NEXUS_USERNAME}:${NEXUS_PASSWORD}@${NEXUS_URL:8}/repository/pypi-all/simple $pipDeps; \
85+
else pip install -U pip setuptools wheel \
86+
&& pip install $pipDeps; \
87+
fi \
88+
&& yum localinstall --assumeyes ${HOME}/filebeat-${FILE_BEATS_VERSION}-x86_64.rpm \
89+
&& yum clean all \
90+
&& rm -rf /var/cache/yum
91+
92+
COPY config/filebeat.yml /etc/filebeat/filebeat.yml
93+
94+
### Log Stash - END
95+
96+
COPY scripts/entrypoint.sh /entrypoint.sh
97+
COPY config/airflow.cfg ${AIRFLOW_HOME}/airflow.cfg
98+
99+
RUN mkdir -p ${AIRFLOW_HOME}/dags \
100+
&& mkdir -p ${AIRFLOW_HOME}/plugins \
101+
&& mkdir -p ${AIRFLOW_HOME}/dag_deps
102+
103+
COPY dist/oauth ${HOME}/oauth
104+
COPY dist/openshift_plugin ${HOME}/openshift_plugin
105+
106+
RUN pip install ${HOME}/oauth \
107+
&& CFLAGS=-std=c99 pip install ${HOME}/openshift_plugin \
108+
&& chgrp -R 0 ${AIRFLOW_HOME} \
109+
&& chmod -R g=u ${AIRFLOW_HOME} \
110+
&& chmod +x /entrypoint.sh \
111+
&& chmod g+w /etc/passwd \
112+
&& mkdir -p /var/lib/filebeat \
113+
&& chgrp -R 0 /var/lib/filebeat \
114+
&& chmod -R g=u /var/lib/filebeat \
115+
&& mkdir -p /var/log/filebeat \
116+
&& chgrp -R 0 /var/log/filebeat \
117+
&& chmod -R g=u /var/log/filebeat \
118+
&& chmod g+r /etc/filebeat/filebeat.yml \
119+
&& chgrp -R 0 /etc/pki/ca-trust \
120+
&& chmod -R g=u /etc/pki/ca-trust
121+
122+
RUN if [ -n "${PYTHON_DEPS}" ]; then \
123+
if [[ ! -z ${NEXUS_URL} ]]; \
124+
then pip install -i https://${NEXUS_USERNAME}:${NEXUS_PASSWORD}@${NEXUS_URL:8}/repository/pypi-all/simple -U ${PYTHON_DEPS}; \
125+
else pip install -U ${PYTHON_DEPS}; \
126+
fi \
127+
fi
128+
129+
USER 1001
130+
131+
ENV PYTHONPATH="${AIRFLOW_HOME}/dag_deps:${PYTHONPATH}"
132+
133+
# Only the port for the webserver (8080) is exposed. Exposing the ports for flower
134+
# and for the http server on the worker is not advisable but if need this should be changed.
135+
EXPOSE 8080
136+
137+
WORKDIR ${AIRFLOW_HOME}
138+
ENTRYPOINT ["/entrypoint.sh"]
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# AirFlow OpenDevStack Shared Image
2+
3+
This image provides AirFlow 1.10.3 for OpenShift.
4+
5+
## Setup
6+
7+
The AirFlow setup that this image provides is based on the KubernetesExecutor and will
8+
start worker pods on demand. It is also setup to use an ElasticSearch instance as the log
9+
repository for all workers. As illustrated bellow:
10+
11+
![Airflow Architecture](Airflow Architecture Diagram.png?raw=true "Airflow Architecture")
12+
13+
To setup the whole infrastructure the Airlfow QuickStarter should be used.
14+
15+
## Contents
16+
17+
The image contains all dependencies and Airflow extras to run Airflow 1.10.3 in this setup.
18+
It also includes FileBeat 7.0.0 to send all log files to ElasticSearch
19+
20+
### Generic OAuth backend
21+
22+
A generic OAuth backend is included in this image to enabled Airflow to
23+
authenticate against any OAuth server including the one from OpenShift.
24+
25+
### OpenShift Plugin
26+
27+
An OpenShift Plugin was added to address compatibility and security issues
28+
between the Kubernetes Executor and Openshift.
29+
30+
This plugin includes two views.
31+
32+
* One for inspecting the pods that are and/or were part of the cluster
33+
* A second one to sync DAG information from the worker image back to
34+
the webserver and scheduler
35+
36+
## Security
37+
38+
### Authentication
39+
40+
The authentication method is enabled and uses OpenShift OAuth to authenticate
41+
users in the WebServer.
42+
43+
## Configuration
44+
45+
None Airflow configuration options were changed and they can be used as documented
46+
in the [Airflow Documentation](https://airflow.apache.org/project.html)
47+
48+
Besides all Airflow configuration, the OAuth backend and the OpenShift Plugin
49+
include a small set of configuration.
50+
51+
#### OAuth configuration
52+
53+
The configuration section is `oauth` and must have the followin keys:
54+
55+
```ini
56+
[oauth]
57+
# base_url contains alwayes the value of the OpenShift API url
58+
base_url = https://your.openshift.api.url
59+
60+
# client_id must have the service account name which serves as OAuth client
61+
client_id = system:serviceaccount:your-namespace:your-service-account-name
62+
63+
64+
# oauth_callback_route should not change unless you know what you are doing
65+
oauth_callback_route = /oauth2callback
66+
67+
# authorize_url authorization api of OpenShift.
68+
authorize_url = https://your.openshift.api.url/oauth/authorize
69+
70+
# access_token_url token api of OpenShift.
71+
access_token_url = https://your.openshift.api.url/oauth/token
72+
73+
# access_token_method Method which should be used for calling the API
74+
access_token_method = POST
75+
76+
# user_info_url User information API of OpenShift
77+
user_info_url = https://your.openshift.api.url/apis/user.openshift.io/v1/users/~
78+
79+
# username_key and email_key are path inside the reply of user_info_url where
80+
# the username and email of the user can be found
81+
username_key = metadata.name
82+
email_key = metadata.name
83+
84+
# oauth_permission_backend The OAuth authorization backend
85+
oauth_permission_backend=airflow_oauth.contrib.auth.backends.openshift_permission_backend
86+
```
87+
88+
NOTE: if SSL verification fails when calling oauth callback, users can import the host certificate chain using `AIRFLOW_HOSTS_TO_TRUST` or set
89+
environment variable `AIRFLOW__HTTP_CLIENT__INSECURE` to `true`.
90+
91+
### OpenShift Plugin Settings
92+
93+
The section `openshift_plugin` supports the plugin and must have the followin keys:
94+
95+
```ini
96+
[openshift_plugin]
97+
# OpenShift roles of an user which will allow access to Airflow
98+
access_roles=role1,role2
99+
# OpenShift roles of an user which will allow superuser access to Airflow
100+
superuser_roles=role1
101+
# Base OpenShift console url for build the links to airflow resources
102+
openshift_console_url=https://localhost
103+
```
104+
105+
## Environemnt Variables
106+
107+
108+
A set of Environment Variables where included to allow a easy way of using Config Maps
109+
and Secrets. They are:
110+
111+
| Name | Type | Description|
112+
|-----|------|------------|
113+
|**AIRFLOW_COMMAND**| String (Required) | Airflow command that should be executed or empty for a custom commands. It will define if the container will behave as the webserver, schediler and so on. The availabled options are: 'webserver', 'worker', 'scheduler', 'flower' or 'version' |
114+
|AIRFLOW_HOSTS_TO_TRUST| String | A semi-colon separeted list of hosts to be trusted in the format `hostname1:port1;hostname2:port2;...` . This host will have their certificate chain automaticaly trusted |
115+
|POSTGRES_HOST| String (Required) | Postgresql host to which Airflow should connect |
116+
|POSTGRES_PORT|String| Postgresql port to which Airflow should connect. Default: 5432 |
117+
|POSTGRES_USER|String (Required) | Postgresql username which Airflow should use |
118+
|POSTGRES_PASSWORD|String (Required)| Postgresql password which Airflow should use |
119+
|POSTGRES_DATABASE|String| Database name that should be used. Default: airflow|
120+
|START_FILE_BEAT| 1 or 0 | 0 if FileBeat service should not be started. Default: 1 (it should be stared) |
121+
|ELASTICSEARCH_URL| URL | ElasticSearch url to which Airflow should connect|
122+
|ELASTICSEARCH_USERNAME| String | ElasticSearch username which Airflow should use |
123+
|ELASTICSEARCH_PASSWORD| String | ElasticSearch password which Airflow should use |
124+
125+
126+
Since multiple deployment configs will use the same configuration and the values, besides
127+
the `AIRFLOW_COMMAND`, it is advisable to create a config map and mount it to each
128+
Airflow deployment config.
129+
130+
**All Airflow configuration, including OpenShift Plugin configuration,
131+
can be done using environment variables** as documented in
132+
https://airflow.apache.org/howto/set-config.html.

0 commit comments

Comments
 (0)