Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 129 additions & 63 deletions affirm/docker-compose-bootcamp.yml
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
# Chronon Bootcamp Docker Compose - Minimal Setup for Learning
# This setup includes only the essential services needed for GroupBy development

services:

# MinIO for S3-compatible storage (ESSENTIAL for Iceberg)
minio:
image: minio/minio:latest
ports:
- "9000:9000" # API
- "9001:9001" # Console
- "9000:9000"
- "9001:9001"
environment:
MINIO_ROOT_USER: minioadmin
MINIO_ROOT_PASSWORD: minioadmin
Expand All @@ -21,7 +16,6 @@ services:
timeout: 20s
retries: 3

# MongoDB - Chronon's built-in KV store (ESSENTIAL for online serving)
mongodb:
image: mongo:latest
ports:
Expand All @@ -37,71 +31,124 @@ services:
timeout: 10s
retries: 3

# Spark Master (ESSENTIAL for GroupBy computation)
polaris:
image: apache/polaris:latest
ports:
- "8181:8181"
environment:
AWS_REGION: us-east-1
AWS_ACCESS_KEY_ID: minioadmin
AWS_SECRET_ACCESS_KEY: minioadmin
depends_on:
minio:
condition: service_started
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8182/q/health || exit 1"]
interval: 10s
timeout: 5s
retries: 10
start_period: 20s
volumes:
- polaris_data:/var/polaris

spark-master:
image: bitnami/spark:3.5.0
image: apache/spark:3.5.2
ports:
- "8080:8080" # Spark UI
- "7077:7077" # Spark Master
- "8080:8080"
- "7077:7077"
environment:
- SPARK_MODE=master
- SPARK_RPC_AUTHENTICATION_ENABLED=no
- SPARK_RPC_ENCRYPTION_ENABLED=no
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
- SPARK_SSL_ENABLED=no
SPARK_MASTER_HOST: spark-master
SPARK_MASTER_PORT: "7077"
SPARK_MASTER_WEBUI_PORT: "8080"
SPARK_DAEMON_JAVA_OPTS: -Dspark.deploy.recoveryMode=FILESYSTEM -Dspark.deploy.recoveryDirectory=/tmp/spark-recovery
AWS_ACCESS_KEY_ID: minioadmin
AWS_SECRET_ACCESS_KEY: minioadmin
AWS_REGION: us-east-1
command:
[
"/opt/spark/bin/spark-class","org.apache.spark.deploy.master.Master",
"--host","spark-master","--port","7077","--webui-port","8080"
]
volumes:
- spark_events:/opt/bitnami/spark/spark-events
- spark_events:/opt/spark/spark-events
depends_on:
polaris:
condition: service_healthy
healthcheck:
test: ["CMD-SHELL", "ps -ef | grep -q '[o]rg.apache.spark.deploy.master.Master' || exit 1; wget -qO- http://127.0.0.1:8080 >/dev/null || true"]
interval: 5s
timeout: 5s
retries: 24
start_period: 5s
restart: unless-stopped

# Spark Worker (ESSENTIAL for GroupBy computation)
spark-worker:
image: bitnami/spark:3.5.0
image: apache/spark:3.5.2
depends_on:
- spark-master
- minio
spark-master:
condition: service_healthy
minio:
condition: service_started
command:
[
"/opt/spark/bin/spark-class","org.apache.spark.deploy.worker.Worker",
"spark://spark-master:7077",
"--cores","2","--memory","2G",
"--webui-port","8081"
]
environment:
- SPARK_MODE=worker
- SPARK_MASTER_URL=spark://spark-master:7077
- SPARK_WORKER_MEMORY=2G
- SPARK_WORKER_CORES=2
- SPARK_RPC_AUTHENTICATION_ENABLED=no
- SPARK_RPC_ENCRYPTION_ENABLED=no
- SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
- SPARK_SSL_ENABLED=no
volumes: []
scale: 2
SPARK_WORKER_DIR: /opt/spark/work-dir
AWS_ACCESS_KEY_ID: minioadmin
AWS_SECRET_ACCESS_KEY: minioadmin
AWS_REGION: us-east-1
volumes:
- spark_events:/opt/spark/spark-events
restart: unless-stopped

# Chronon Main Container (ESSENTIAL for GroupBy execution)
chronon-main:
image: ezvz/chronon
command: bash -c "spark-shell -i scripts/data-loader.scala && tail -f /dev/null"
ports:
- "4040:4040" # Spark UI
- "4041:4041" # Spark UI (backup)
- "4040:4040"
- "4041:4041"
environment:
- USER=root
- SPARK_SUBMIT_PATH=spark-submit
- PYTHONPATH=/srv/chronon
- SPARK_VERSION=3.5.2
- JOB_MODE=spark://spark-master:7077
- PARALLELISM=4
- EXECUTOR_MEMORY=2G
- EXECUTOR_CORES=2
- DRIVER_MEMORY=1G
- CHRONON_LOG_TABLE=default.chronon_log_table
- CHRONON_ONLINE_CLASS=ai.chronon.quickstart.online.ChrononMongoOnlineImpl
- CHRONON_ONLINE_ARGS=-Zuser=admin -Zpassword=admin -Zhost=mongodb -Zport=27017 -Zdatabase=admin
# Iceberg Configuration with Spark's built-in Hive catalog
- SPARK_SQL_EXTENSIONS=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
- SPARK_SQL_CATALOG_SPARK_CATALOG=org.apache.iceberg.spark.SparkSessionCatalog
- SPARK_SQL_CATALOG_SPARK_CATALOG_TYPE=hive
- SPARK_SQL_CATALOG_SPARK_CATALOG_WAREHOUSE=s3a://chronon/warehouse
- SPARK_CHRONON_TABLE_WRITE_FORMAT=iceberg
- SPARK_CHRONON_TABLE_READ_FORMAT=iceberg
# S3 Configuration
- AWS_ACCESS_KEY_ID=minioadmin
- AWS_SECRET_ACCESS_KEY=minioadmin
- S3_ENDPOINT=http://minio:9000
- S3_PATH_STYLE_ACCESS=true
USER: root
SPARK_SUBMIT_PATH: spark-submit
PYTHONPATH: /srv/chronon
SPARK_VERSION: "3.5.2"
JOB_MODE: spark://spark-master:7077
PARALLELISM: "4"
EXECUTOR_MEMORY: 2G
EXECUTOR_CORES: "2"
DRIVER_MEMORY: 1G
CHRONON_LOG_TABLE: default.chronon_log_table
CHRONON_ONLINE_CLASS: ai.chronon.quickstart.online.ChrononMongoOnlineImpl
CHRONON_ONLINE_ARGS: -Zuser=admin -Zpassword=admin -Zhost=mongodb -Zport=27017 -Zdatabase=admin
SPARK_CHRONON_TABLE_WRITE_FORMAT: iceberg
SPARK_CHRONON_TABLE_READ_FORMAT: iceberg
AWS_ACCESS_KEY_ID: minioadmin
AWS_SECRET_ACCESS_KEY: minioadmin
AWS_REGION: us-east-1
S3_ENDPOINT: http://minio:9000
S3_PATH_STYLE_ACCESS: "true"

# === Added Iceberg + Polaris catalog wiring ===
PYSPARK_SUBMIT_ARGS: >
--packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.10.0,org.apache.hadoop:hadoop-aws:3.3.6
--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
--conf spark.sql.defaultCatalog=spark_catalog
--conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkCatalog
--conf spark.sql.catalog.spark_catalog.type=rest
--conf spark.sql.catalog.spark_catalog.uri=http://polaris:8181/api/catalog
--conf spark.sql.catalog.spark_catalog.warehouse=s3a://chronon/warehouse
--conf spark.sql.catalog.spark_catalog.io-impl=org.apache.iceberg.aws.s3.S3FileIO
--conf spark.hadoop.fs.s3a.endpoint=http://minio:9000
--conf spark.hadoop.fs.s3a.path.style.access=true
--conf spark.hadoop.fs.s3a.access.key=minioadmin
--conf spark.hadoop.fs.s3a.secret.key=minioadmin
--conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem
--conf spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider
pyspark-shell
volumes:
- ../api/py/test/sample:/srv/chronon
- ./scripts:/srv/scripts
Expand All @@ -110,16 +157,34 @@ services:
- spark-master
- minio
- mongodb
- polaris

# Jupyter Notebook (ESSENTIAL for data exploration and verification)
jupyter:
image: jupyter/pyspark-notebook:latest
ports:
- "8888:8888"
environment:
- JUPYTER_ENABLE_LAB=yes
- SPARK_MASTER=spark://spark-master:7077
- JUPYTER_TOKEN=chronon-dev
JUPYTER_ENABLE_LAB: "yes"
SPARK_MASTER: spark://spark-master:7077
JUPYTER_TOKEN: chronon-dev

# === Add Iceberg runtime + catalog config ===
PYSPARK_SUBMIT_ARGS: >
--packages org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.10.0,org.apache.hadoop:hadoop-aws:3.3.6
--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
--conf spark.sql.defaultCatalog=spark_catalog
--conf spark.sql.catalog.spark_catalog=org.apache.iceberg.spark.SparkCatalog
--conf spark.sql.catalog.spark_catalog.type=rest
--conf spark.sql.catalog.spark_catalog.uri=http://polaris:8181/api/catalog
--conf spark.sql.catalog.spark_catalog.warehouse=s3a://chronon/warehouse
--conf spark.sql.catalog.spark_catalog.io-impl=org.apache.iceberg.aws.s3.S3FileIO
--conf spark.hadoop.fs.s3a.endpoint=http://minio:9000
--conf spark.hadoop.fs.s3a.path.style.access=true
--conf spark.hadoop.fs.s3a.access.key=minioadmin
--conf spark.hadoop.fs.s3a.secret.key=minioadmin
--conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem
--conf spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider
pyspark-shell
volumes:
- ../api/py:/home/jovyan/work/chronon-api
- ../api/py/test/sample:/home/jovyan/work/sample-data
Expand All @@ -131,3 +196,4 @@ volumes:
minio_data:
mongodb_data:
spark_events:
polaris_data:
Loading