feat: Enhanced FramePack Docker build with performance optimizations and Docker Compose support

T Savo · T Savo · commit d4f2a475e45e · 2025-06-11T17:56:46.000-07:00
- Multi-stage Docker builds for better layer caching and faster rebuilds
- Added Flash Attention 2 for faster transformer inference
- Added SageAttention for optimized attention mechanisms
- Added xFormers for memory-efficient transformers
- Updated to Triton 3.2.0 for better GPU kernel optimization
- Maintained compatibility with official FramePack requirements.txt
- Added build parallelism control via MAX_JOBS argument
- Added Docker Compose support with shared volume management
- Enhanced README with quick start instructions
- Fixed Gradio server parameter compatibility
- Added .env.template for easy configuration

These changes provide significant performance improvements while maintaining
full compatibility with the upstream FramePack project and adding convenient
deployment options via Docker Compose.
diff --git a/.env.template b/.env.template
@@ -0,0 +1,15 @@
+# Docker Compose Environment Configuration
+# Copy this file to .env and customize as needed
+
+# User/Group IDs - defaults to 1000 (first user on most Linux systems)
+# On Windows, these can remain at 1000
+UID=1000
+GID=1000
+
+# Build parallelism control (adjust based on available RAM)
+# 4 jobs = ~16GB RAM, 8 jobs = ~32GB RAM, 64 jobs = ~500GB RAM
+MAX_JOBS=4
+
+# Path to store HuggingFace models (can be shared between containers)
+# Use absolute path for sharing between projects, or relative path for project-specific
+HF_MODELS_PATH=./hf_download
diff --git a/Dockerfile b/Dockerfile
@@ -1,84 +1,95 @@
-FROM nvidia/cuda:12.6.0-devel-ubuntu22.04
+FROM nvidia/cuda:12.6.0-devel-ubuntu22.04 AS base
 
 # Set user/group IDs to match host user (default 1000 for first user)
 ARG UID=1000
 ARG GID=1000
+# Control compilation parallelism (4 jobs ~16GB RAM, 8 jobs ~32GB RAM, 64 jobs ~500GB RAM)
+ARG MAX_JOBS=4
 
 ENV DEBIAN_FRONTEND=noninteractive \
     PYTHONUNBUFFERED=1 \
     PYTHONDONTWRITEBYTECODE=1 \
     VIRTUAL_ENV=/app/venv \
     PATH="/app/venv/bin:$PATH" \
-    USER=appuser
+    USER=appuser \
+    MAX_JOBS=$MAX_JOBS
 
-# Create system user and group
-RUN groupadd -g $GID appuser && \
-    useradd -u $UID -g $GID -m -s /bin/bash appuser
-
-# Install dependencies as root first
+# Layer 1: Install system dependencies (most stable, cached longest)
 RUN apt-get update && apt-get install -y --no-install-recommends \
     git \
     python3.10 \
     python3.10-venv \
     python3.10-dev \
+    python3-pip \
     libgl1 \
     libglib2.0-0 \
     libsm6 \
     libxrender1 \
     libxext6 \
     ninja-build \
     sudo \
-    && rm -rf /var/lib/apt/lists/* \
-    && echo "appuser ALL=(ALL) NOPASSWD: /bin/chown" >> /etc/sudoers
+    curl \
+    wget \
+    && rm -rf /var/lib/apt/lists/*
 
-# Create and configure directories before switching user
-RUN mkdir -p /app && \
+# Layer 2: Create user and basic directory structure early
+RUN groupadd -g $GID appuser && \
+    useradd -u $UID -g $GID -m -s /bin/bash appuser && \
+    echo "appuser ALL=(ALL) NOPASSWD: /bin/chown" >> /etc/sudoers && \
+    mkdir -p /app /app/outputs /app/hf_download && \
     chown -R $UID:$GID /app
 
-# Switch to non-root user
+# Layer 3: Setup Python environment and clone repository (stable)
 USER $UID:$GID
-
-# Clone repository
-RUN git clone https://github.com/lllyasviel/FramePack /app
 WORKDIR /app
+RUN python3.10 -m venv $VIRTUAL_ENV && \
+    python -m pip install --upgrade pip wheel setuptools && \
+    git clone https://github.com/lllyasviel/FramePack /tmp/framepack && \
+    cp -r /tmp/framepack/* /app/ && \
+    rm -rf /tmp/framepack
 
-# Create virtual environment as user
-RUN python3.10 -m venv $VIRTUAL_ENV
-
-# Install Python dependencies
+# Layer 4: Install PyTorch ecosystem with specific CUDA version
 RUN pip install --no-cache-dir \
     torch==2.6.0 \
-    torchvision \
-    torchaudio \
+    torchvision==0.21.0 \
+    torchaudio==2.6.0 \
     --index-url https://download.pytorch.org/whl/cu124
 
-# Install requirements
+# Layer 5: Install FramePack requirements (from the official requirements.txt)
 RUN pip install --no-cache-dir -r requirements.txt
-RUN pip install --no-cache-dir triton sageattention
 
-# Install additional dependencies
-RUN pip install --no-cache-dir \
-    triton==3.0.0 \
-    sageattention==1.0.6
-
-# Create and configure directories before switching user
-RUN mkdir -p /app/outputs && \
-    chown -R $UID:$GID /app/outputs && \
-    mkdir -p $VIRTUAL_ENV && \
-    chown -R $UID:$GID $VIRTUAL_ENV && \
-    mkdir -p /app/hf_download && \
-    chown -R $UID:$GID /app/hf_download
-
-# Copy entrypoint script
+# Layer 6: Install performance enhancements - triton and xformers
+RUN pip install --no-cache-dir triton==3.2.0 xformers
+
+# Layer 7: Install sageattention (needs compilation from source)
+RUN python -m pip install --no-cache-dir sageattention==1.0.6
+
+# Layer 8: Install flash-attn (needs special compilation)
+RUN python -m pip -v install --no-cache-dir flash-attn --no-build-isolation
+
+# Verify PyTorch installation works correctly and check operator availability
+RUN python -c "import torch; import torchvision; import torchaudio; print(f'PyTorch: {torch.__version__}, TorchVision: {torchvision.__version__}, TorchAudio: {torchaudio.__version__}'); print('CUDA available:', torch.cuda.is_available()); import torchvision.ops; print('torchvision.ops loaded successfully')"
+
+# ===== APPLICATION STAGE =====
+FROM base AS application
+
+ARG UID=1000
+ARG GID=1000
+
+# Layer 9: Copy entrypoint script (changes occasionally)
+USER root
 COPY entrypoint.sh /entrypoint.sh
 RUN chmod +x /entrypoint.sh && \
     chown $UID:$GID /entrypoint.sh
 
+# Final configuration
+USER $UID:$GID
+WORKDIR /app
 EXPOSE 7860
 
 # Configure volumes
 VOLUME /app/hf_download
 VOLUME /app/outputs
 
 ENTRYPOINT ["/entrypoint.sh"]
-CMD ["python", "demo_gradio.py", "--share", "--server-name", "0.0.0.0"]
+CMD ["python", "demo_gradio.py", "--share", "--server", "0.0.0.0"]
diff --git a/README.md b/README.md
@@ -1,10 +1,22 @@
 ## FramePack Docker CUDA
 
-Very easy:
+### Quick Start with Docker Compose (Recommended)
 
+```bash
+git clone https://github.com/TSavo/FramePack-Docker-CUDA.git
+cd FramePack-Docker-CUDA
+
+# Optional: Copy and customize environment settings
+cp .env.template .env
+
+# Start the application
+docker compose up --build
 ```
 
-git clone https://github.com/akitaonrails/FramePack-Docker-CUDA.git
+### Manual Docker Setup
+
+```bash
+git clone https://github.com/TSavo/FramePack-Docker-CUDA.git
 cd FramePack-Docker-CUDA
 mkdir outputs
 mkdir hf_download
@@ -22,3 +34,33 @@ docker run -it --rm --gpus all -p 7860:7860 \
 The first time it runs, it will download all necessary HunyuanVideo, Flux and other neccessary models. It will be more than 30GB, so be patient, but they will be cached on the external mapped directory.
 
 When it finishes access http://localhost:7860 and that's it!
+
+## Enhanced Features
+
+This enhanced version includes several performance optimizations:
+
+- **Multi-stage Docker builds** for better layer caching and faster rebuilds
+- **Flash Attention 2** for faster transformer inference
+- **SageAttention** for optimized attention mechanisms  
+- **xFormers** for memory-efficient transformers
+- **Triton 3.2.0** for GPU kernel optimization
+- **PyTorch 2.6.0** with CUDA 12.4 support
+- **Optimized dependency management** with proper version pinning
+- **Build parallelism control** via `MAX_JOBS` argument (default: 4)
+
+### Build with custom parallelism:
+```bash
+# Docker Compose
+MAX_JOBS=8 docker compose up --build
+
+# Manual Docker
+docker build --build-arg MAX_JOBS=8 -t framepack-torch26-cu124:latest .
+```
+
+## Docker Compose Benefits
+
+- **Shared model cache**: Models downloaded once can be reused across container rebuilds
+- **Easy configuration**: Environment variables in `.env` file
+- **Volume management**: Persistent storage for models and outputs
+- **GPU support**: Automatic GPU passthrough configuration
+- **Service management**: Easy start/stop/restart of the application
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,39 @@
+version: '3.8'
+
+services:
+  framepack:
+    build:
+      context: .
+      dockerfile: Dockerfile
+      args:
+        UID: ${UID:-1000}
+        GID: ${GID:-1000}
+        MAX_JOBS: ${MAX_JOBS:-4}
+    image: framepack-torch26-cu124:latest
+    container_name: framepack
+    ports:
+      - "7860:7860"
+    volumes:
+      - framepack_models:/app/hf_download
+      - ./outputs:/app/outputs
+    environment:
+      - UID=${UID:-1000}
+      - GID=${GID:-1000}
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    restart: unless-stopped
+    stdin_open: true
+    tty: true
+
+volumes:
+  framepack_models:
+    driver: local
+    driver_opts:
+      type: none
+      o: bind
+      device: ${HF_MODELS_PATH:-./hf_download}