Skip to content

Commit

Permalink
Adding scaffold
Browse files Browse the repository at this point in the history
  • Loading branch information
James Bristow committed Mar 10, 2024
1 parent dab2bdb commit 9afbaca
Show file tree
Hide file tree
Showing 19 changed files with 6,559 additions and 2 deletions.
165 changes: 165 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
.git
.github
mlruns
outputs
bento

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
37 changes: 37 additions & 0 deletions .env
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Project
PROJECT_NAME=time-series-airflow-kafka-spark

# Airflow
AIRFLOW_TAG=2.8.2-python3.10

# Spark
SPARK_TAG=3.5

# Kafka
KAFKA_TAG=6.2.0

# Python
PYTHON_TAG=3.10.13-bullseye

# BentoML
BENTOML_HOME=/workspace/bento
BENTOML_CONFIG=/workspace/bentoml_configuration.yaml

# Postgres
POSTGRES_TAG=15.3-bullseye
DB_NAME=$PROJECT_NAME
DB_USER=user
DB_PASSWORD=password

# Adminer
ADMINER_TAG=4.7.9-standalone

# MLFlow
MLFLOW_BACKEND_STORE_URI=postgresql://${DB_USER}:${DB_PASSWORD}@postgres:5432/${DB_NAME}

# Minio
MINIO_TAG=RELEASE.2023-05-27T05-56-19Z
MINIO_MC_TAG=RELEASE.2023-05-30T22-41-38Z

# Container
GITHUB_CONTAINER_REPO=ghcr.io/jbris/${PROJECT_NAME}:1.0.0
29 changes: 29 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Docker Build

on:
workflow_dispatch: {}

permissions:
actions: write
packages: write

jobs:
build-server:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Docker build
run: bash ./scripts/build.sh
- name: Login to GitHub Package Registry
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GHCR_TOKEN }}
- name: Docker push
run: bash ./scripts/push.sh
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ celerybeat.pid
*.sage.py

# Environments
.env
.venv
env/
venv/
Expand Down Expand Up @@ -158,3 +157,7 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

mlruns
outputs
bento
10 changes: 10 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"editor.formatOnSave": false,
"python.linting.lintOnSave": true,
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.insertSpaces": true,
"editor.tabSize": 4,
"editor.formatOnSave": true
},
}
33 changes: 33 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
ARG PYTHON_TAG

FROM python:${PYTHON_TAG} as base

WORKDIR /workspace

RUN apt-get update \
&& apt-get install -y build-essential libpq-dev openjdk-17-jdk \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* \
&& rm -rf /tmp/downloaded_packages

FROM base as builder

ENV POETRY_NO_INTERACTION=1 \
POETRY_VIRTUALENVS_IN_PROJECT=1 \
POETRY_VIRTUALENVS_CREATE=1 \
POETRY_CACHE_DIR=/tmp/poetry_cache \
PATH="/root/.local/bin:$PATH"

COPY pyproject.toml poetry.lock ./

RUN curl -sSL https://install.python-poetry.org | python3 -\
&& poetry install --no-root \
&& rm -rf $POETRY_CACHE_DIR \
&& curl -sSL https://install.python-poetry.org | python3 - --uninstall

FROM base as runtime

ENV VIRTUAL_ENV=/workspace/.venv \
PATH="/workspace/.venv/bin:$PATH"

COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
# time-series-airflow-kafka-spark
# TAKS

A simple demonstration of a Time Series-Airflow-Kafka-Spark (TAKS) stack.
3 changes: 3 additions & 0 deletions bentoml_configuration.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
version: 1
api_server:
workers: 4
Loading

0 comments on commit 9afbaca

Please sign in to comment.