From 0adbf25f76666ac339fe37ad798b980f41e35678 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20G=C3=BCnther?= Date: Thu, 3 Dec 2020 21:45:31 +0100 Subject: [PATCH 1/2] Don't run the pipeline on a regular schedule Not specifying a `schedule_interval` means that Airflow assumes a daily schedule. This means that if the scheduler sees that a DAG is not paused and behind schedule, the DAG will be started automatically. Since we only want to start our DAG manually, we have to set `schedule_interval` to `None`. --- src/egon/data/airflow/dags/pipeline.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/egon/data/airflow/dags/pipeline.py b/src/egon/data/airflow/dags/pipeline.py index 602ece806..c927557ef 100644 --- a/src/egon/data/airflow/dags/pipeline.py +++ b/src/egon/data/airflow/dags/pipeline.py @@ -10,6 +10,7 @@ "egon-data-processing-pipeline", description="The eGo^N data processing DAG.", default_args={"start_date": days_ago(1)}, + schedule_interval=None, ) as pipeline: setup = PythonOperator(task_id="initdb", python_callable=initdb) From 9d8d01fefb45c6712d9ba7294a3d19bf150ab4b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20G=C3=BCnther?= Date: Thu, 3 Dec 2020 21:21:20 +0100 Subject: [PATCH 2/2] Switch DAGs "On" by default The on/off button toggling the pause state is easy to miss which leads to a lot of confusion and even bug reports. This commit changes the configuration to switch DAGs "On" by default, both on a global level, as well as on the individual "egon-data-processing-pipeline". The latter is redundant, but kept in the code as an example, because the parameter was surprisingly hard to find. This fixes #33 and ticks of an item in issue #65. --- src/egon/data/airflow/airflow.cfg | 2 +- src/egon/data/airflow/dags/pipeline.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/egon/data/airflow/airflow.cfg b/src/egon/data/airflow/airflow.cfg index 6cabc477e..699a68f58 100644 --- a/src/egon/data/airflow/airflow.cfg +++ b/src/egon/data/airflow/airflow.cfg @@ -125,7 +125,7 @@ parallelism = 32 dag_concurrency = 16 # Are DAGs paused by default at creation -dags_are_paused_at_creation = True +dags_are_paused_at_creation = False # The maximum number of active DAG runs per DAG max_active_runs_per_dag = 16 diff --git a/src/egon/data/airflow/dags/pipeline.py b/src/egon/data/airflow/dags/pipeline.py index c927557ef..1ba44231d 100644 --- a/src/egon/data/airflow/dags/pipeline.py +++ b/src/egon/data/airflow/dags/pipeline.py @@ -10,6 +10,7 @@ "egon-data-processing-pipeline", description="The eGo^N data processing DAG.", default_args={"start_date": days_ago(1)}, + is_paused_upon_creation=False, schedule_interval=None, ) as pipeline: setup = PythonOperator(task_id="initdb", python_callable=initdb)