Skip to content

Commit

Permalink
Update to Apache Airflow 1.9
Browse files Browse the repository at this point in the history
Airflow 1.9 has been released, therefore we would like to update
the images.
  • Loading branch information
Fokko Driesprong committed Jan 3, 2018
1 parent ea9a036 commit c8f0361
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 18 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,6 @@ Session.vim

# sftp configuration file
sftp-config.json

# Python
__pycache__
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ ENV DEBIAN_FRONTEND noninteractive
ENV TERM linux

# Airflow
ARG AIRFLOW_VERSION=1.8.2
ARG AIRFLOW_VERSION=1.9.0
ARG AIRFLOW_HOME=/usr/local/airflow

# Define en_US.
Expand Down Expand Up @@ -57,7 +57,7 @@ RUN set -ex \
&& pip install ndg-httpsclient \
&& pip install pyasn1 \
&& pip install apache-airflow[crypto,celery,postgres,hive,jdbc]==$AIRFLOW_VERSION \
&& pip install celery[redis]==3.1.17 \
&& pip install celery[redis]==4.0.2 \
&& apt-get purge --auto-remove -yqq $buildDeps \
&& apt-get clean \
&& rm -rf \
Expand Down
2 changes: 1 addition & 1 deletion circle.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ test:
pre:
- sleep 5
override:
- docker run puckel/docker-airflow version |grep '1.8.2'
- docker run puckel/docker-airflow version |grep '1.9.0'
78 changes: 68 additions & 10 deletions config/airflow.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,26 @@ dags_folder = /usr/local/airflow/dags
base_log_folder = /usr/local/airflow/logs

# Airflow can store logs remotely in AWS S3 or Google Cloud Storage. Users
# must supply a remote location URL (starting with either 's3://...' or
# 'gs://...') and an Airflow connection id that provides access to the storage
# must supply an Airflow connection id that provides access to the storage
# location.
remote_base_log_folder =
remote_log_conn_id =
# Use server-side encryption for logs stored in S3
encrypt_s3_logs = False
# DEPRECATED option for remote log storage, use remote_base_log_folder instead!
s3_log_folder =

# Logging level
logging_level = INFO

# Logging class
# Specify the class that will specify the logging configuration
# This class has to be on the python classpath
# logging_config_class = my.path.default_local_settings.LOGGING_CONFIG
logging_config_class =

# Log format
log_format = [%%(asctime)s] {{%%(filename)s:%%(lineno)d}} %%(levelname)s - %%(message)s
simple_log_format = %%(asctime)s %%(levelname)s - %%(message)s

# The executor class that airflow should use. Choices include
# SequentialExecutor, LocalExecutor, CeleryExecutor
# SequentialExecutor, LocalExecutor, CeleryExecutor, DaskExecutor
executor = CeleryExecutor

# The SqlAlchemy connection string to the metadata database.
Expand Down Expand Up @@ -89,6 +97,18 @@ security =
# values at runtime)
unit_test_mode = False

# Name of handler to read task instance logs.
# Default to use file task handler.
task_log_reader = file.task

# Whether to enable pickling for xcom (note that this is insecure and allows for
# RCE exploits). This will be deprecated in Airflow 2.0 (be forced to False).
enable_xcom_pickling = True

# When a task is killed forcefully, this is the amount of time in seconds that
# it has to cleanup after it is sent a SIGTERM, before it is SIGKILLED
killed_task_cleanup_time = 60

[cli]
# In what way should the cli access the API. The LocalClient will use the
# database directly, while the json_client will use the api running on the
Expand Down Expand Up @@ -168,6 +188,10 @@ filter_by_owner = False
# in order to user the ldapgroup mode.
owner_mode = user

# Default DAG view. Valid values are:
# tree, graph, duration, gantt, landing_times
dag_default_view = tree

# Default DAG orientation. Valid values are:
# LR (Left->Right), TB (Top->Bottom), RL (Right->Left), BT (Bottom->Top)
dag_orientation = LR
Expand All @@ -184,6 +208,9 @@ log_fetch_timeout_sec = 5
# DAGs by default
hide_paused_dags_by_default = False

# Consistent page size across all listing views in the UI
page_size = 100

[email]
email_backend = airflow.utils.email.send_email_smtp

Expand All @@ -198,7 +225,7 @@ smtp_ssl = False
# smtp_user = airflow
# smtp_password = airflow
smtp_port = 25
smtp_mail_from = airflow@airflow.com
smtp_mail_from = airflow@example.com

[celery]
# This section only applies if you are using the CeleryExecutor in
Expand Down Expand Up @@ -238,6 +265,19 @@ flower_port = 5555
# Default queue that tasks get assigned to and that worker listen on.
default_queue = default

# Import path for celery configuration options
celery_config_options = airflow.config_templates.default_celery.DEFAULT_CELERY_CONFIG

# No SSL
celery_ssl_active = False

[dask]
# This section only applies if you are using the DaskExecutor in
# [core] section above

# The IP address and port of the Dask cluster's scheduler.
cluster_address = 127.0.0.1:8786

[scheduler]
# Task instances listen for external kill signal (when you clear tasks
# from the CLI or the UI), this defines the frequency at which they should
Expand Down Expand Up @@ -276,19 +316,37 @@ scheduler_zombie_task_threshold = 300
# DAG definition (catchup)
catchup_by_default = True

# This changes the batch size of queries in the scheduling main loop.
# This depends on query length limits and how long you are willing to hold locks.
# 0 for no limit
max_tis_per_query = 0

# Statsd (https://github.com/etsy/statsd) integration settings
statsd_on = False
statsd_host = localhost
statsd_port = 8125
statsd_prefix = airflow

# The scheduler can run multiple threads in parallel to schedule dags.
# This defines how many threads will run. However airflow will never
# use more threads than the amount of cpu cores available.
# This defines how many threads will run.
max_threads = 2

authenticate = False

[ldap]
# set this to ldaps://<your.ldap.server>:<port>
uri =
user_filter = objectClass=*
user_name_attr = uid
group_member_attr = memberOf
superuser_filter =
data_profiler_filter =
bind_user = cn=Manager,dc=example,dc=com
bind_password = insecure
basedn = dc=example,dc=com
cacert = /etc/ca/ldap_ca.crt
search_scope = LEVEL

[mesos]
# Mesos master address which MesosExecutor will connect to.
master = localhost:5050
Expand Down
8 changes: 4 additions & 4 deletions docker-compose-CeleryExecutor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ services:
# - ./pgdata:/var/lib/postgresql/data/pgdata

webserver:
image: puckel/docker-airflow:1.8.2
image: puckel/docker-airflow:1.9.0
restart: always
depends_on:
- postgres
Expand All @@ -41,7 +41,7 @@ services:
retries: 3

flower:
image: puckel/docker-airflow:1.8.2
image: puckel/docker-airflow:1.9.0
restart: always
depends_on:
- redis
Expand All @@ -53,7 +53,7 @@ services:
command: flower

scheduler:
image: puckel/docker-airflow:1.8.2
image: puckel/docker-airflow:1.9.0
restart: always
depends_on:
- webserver
Expand All @@ -70,7 +70,7 @@ services:
command: scheduler

worker:
image: puckel/docker-airflow:1.8.2
image: puckel/docker-airflow:1.9.0
restart: always
depends_on:
- scheduler
Expand Down
2 changes: 1 addition & 1 deletion docker-compose-LocalExecutor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ services:
- POSTGRES_DB=airflow

webserver:
image: puckel/docker-airflow:1.8.2
image: puckel/docker-airflow:1.9.0
restart: always
depends_on:
- postgres
Expand Down

0 comments on commit c8f0361

Please sign in to comment.