From d93724434b83246f995df80c84c31f4d43967d55 Mon Sep 17 00:00:00 2001 From: Dyakov Roman Date: Wed, 9 Aug 2023 23:35:33 +0300 Subject: [PATCH] Starting (#4) * Initial commit * Black + isort * Update docker-compose.yaml * Some Dockerfile changes + test addition * Test changes * Fixing * Tests addition * Changes to Dockerfile * Update Dockerfile * Start.sh addition * Add local run * Postgres addition * Update Dockerfile --------- Co-authored-by: Stanislav Roslavtsev --- Dockerfile | 17 +++++++++++++++++ dags.py | 34 ++++++++++++++++++++++++++++++++++ docker-compose.yaml | 23 +++++++++++++++++++++++ requirements.txt | 12 ++++++++++++ start.sh | 18 ++++++++++++++++++ start_local.sh | 8 ++++++++ 6 files changed, 112 insertions(+) create mode 100644 Dockerfile create mode 100644 dags.py create mode 100644 docker-compose.yaml create mode 100644 requirements.txt create mode 100755 start.sh create mode 100755 start_local.sh diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..cadda1f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,17 @@ +FROM python:3.11 +ENV AIRFLOW__CORE__LOAD_EXAMPLES=false +ENV DEBIAN_FRONTEND=noninteractive +ENV AIRFLOW__CORE__EXECUTOR=LocalExecutor + +# Linux block +COPY requirements.txt . +RUN apt-get update \ + && apt-get -yq install \ + postgresql \ + postgresql-contrib \ + && python3 -m pip install --no-cache-dir -r ./requirements.txt + +# Run block +COPY --chmod=+x ./start.sh /start.sh +CMD /start.sh +HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 CMD [ "curl", "http://localhost:8080" ] diff --git a/dags.py b/dags.py new file mode 100644 index 0000000..79e868a --- /dev/null +++ b/dags.py @@ -0,0 +1,34 @@ +from datetime import datetime, timedelta + +from airflow import DAG +from airflow.operators.bash import BashOperator + +with DAG( + dag_id="git_pull_pipelines", + start_date=datetime(2022, 1, 1), + schedule_interval=timedelta(minutes=5), + catchup=False, + tags=["infra"], + default_args={"owner": "infra", "retries": 3, "retry_delay": timedelta(minutes=5)}, +) as dag: + BashOperator( + task_id="git_update", + bash_command=f"cd /root/airflow/dags/dwh-pipelines " + f"&& git reset --hard" + f"&& git submodule update --init --recursive ", + ) + +with DAG( + dag_id="git_pull_tests", + start_date=datetime(2022, 1, 1), + schedule_interval=timedelta(minutes=5), + catchup=False, + tags=["infra"], + default_args={"owner": "infra", "retries": 3, "retry_delay": timedelta(minutes=5)}, +) as test_dag: + BashOperator( + task_id="git_update", + bash_command=f"cd /root/airflow/dags/airflow_test " + f"&& git fetch && git pull " + f"&& git submodule update --init --recursive ", + ) diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 0000000..43e9f95 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,23 @@ +version: '3.8' + +services: + airflow: + build: . + ports: + - 8080:8080 + deploy: + restart_policy: + condition: on-failure + environment: + - AIRFLOW_ENV=DEV + - AIRFLOW__DATABASE__SQL_ALCHEMY_CONN=postgresql+psycopg2://postgres@postgres:5432/postgres + volumes: + - ./start_local:/start_inc.sh + + postgres: + image: postgres:latest + restart: always + environment: + POSTGRES_HOST_AUTH_METHOD: trust + ports: + - 5432:5432 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..de542c2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,12 @@ +requests +pandas +beautifulsoup4 +gunicorn +cryptography +celery +flower +psycopg2_binary +apache-airflow +apache-airflow-providers-common-sql +apache-airflow-providers-postgres +google-api-python-client diff --git a/start.sh b/start.sh new file mode 100755 index 0000000..41d825c --- /dev/null +++ b/start.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# Если папки с дагами не существует, то создай и склонируй туда наш реполиторий с дагами +[[ -d /root/airflow/dags/dwh-pipelines ]] \ + || mkdir -p /root/airflow/dags/dwh-pipelines \ + && git clone --recurse-submodules -b main https://github.com/profcomff/dwh-pipelines.git /root/airflow/dags/dwh-pipelines + +[[ -d /root/airflow/dags/airflow_test ]] \ + || mkdir -p /root/airflow/dags/airflow_test \ + && git clone --recurse-submodules -b main https://github.com/Men-of-Honest-Fate/airflow_test.git /root/airflow/dags/airflow_test + +# Инициализируй БД или проведи миграции для обновления +airflow db init + +# Если файл доопределений существует, выполни его тоже +[ -f /start_inc.sh ] && source /start_inc.sh + +airflow webserver && airflow scheduler diff --git a/start_local.sh b/start_local.sh new file mode 100755 index 0000000..f236303 --- /dev/null +++ b/start_local.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +airflow users create \ + --username admin \ + --firstname Local \ + --lastname User \ + --role Admin \ + --email user@localhost