Skip to content

Commit d4f2a47

Browse files
author
T Savo
committed
feat: Enhanced FramePack Docker build with performance optimizations and Docker Compose support
- Multi-stage Docker builds for better layer caching and faster rebuilds - Added Flash Attention 2 for faster transformer inference - Added SageAttention for optimized attention mechanisms - Added xFormers for memory-efficient transformers - Updated to Triton 3.2.0 for better GPU kernel optimization - Maintained compatibility with official FramePack requirements.txt - Added build parallelism control via MAX_JOBS argument - Added Docker Compose support with shared volume management - Enhanced README with quick start instructions - Fixed Gradio server parameter compatibility - Added .env.template for easy configuration These changes provide significant performance improvements while maintaining full compatibility with the upstream FramePack project and adding convenient deployment options via Docker Compose.
1 parent eb7a066 commit d4f2a47

File tree

4 files changed

+147
-40
lines changed

4 files changed

+147
-40
lines changed

.env.template

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Docker Compose Environment Configuration
2+
# Copy this file to .env and customize as needed
3+
4+
# User/Group IDs - defaults to 1000 (first user on most Linux systems)
5+
# On Windows, these can remain at 1000
6+
UID=1000
7+
GID=1000
8+
9+
# Build parallelism control (adjust based on available RAM)
10+
# 4 jobs = ~16GB RAM, 8 jobs = ~32GB RAM, 64 jobs = ~500GB RAM
11+
MAX_JOBS=4
12+
13+
# Path to store HuggingFace models (can be shared between containers)
14+
# Use absolute path for sharing between projects, or relative path for project-specific
15+
HF_MODELS_PATH=./hf_download

Dockerfile

Lines changed: 49 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,84 +1,95 @@
1-
FROM nvidia/cuda:12.6.0-devel-ubuntu22.04
1+
FROM nvidia/cuda:12.6.0-devel-ubuntu22.04 AS base
22

33
# Set user/group IDs to match host user (default 1000 for first user)
44
ARG UID=1000
55
ARG GID=1000
6+
# Control compilation parallelism (4 jobs ~16GB RAM, 8 jobs ~32GB RAM, 64 jobs ~500GB RAM)
7+
ARG MAX_JOBS=4
68

79
ENV DEBIAN_FRONTEND=noninteractive \
810
PYTHONUNBUFFERED=1 \
911
PYTHONDONTWRITEBYTECODE=1 \
1012
VIRTUAL_ENV=/app/venv \
1113
PATH="/app/venv/bin:$PATH" \
12-
USER=appuser
14+
USER=appuser \
15+
MAX_JOBS=$MAX_JOBS
1316

14-
# Create system user and group
15-
RUN groupadd -g $GID appuser && \
16-
useradd -u $UID -g $GID -m -s /bin/bash appuser
17-
18-
# Install dependencies as root first
17+
# Layer 1: Install system dependencies (most stable, cached longest)
1918
RUN apt-get update && apt-get install -y --no-install-recommends \
2019
git \
2120
python3.10 \
2221
python3.10-venv \
2322
python3.10-dev \
23+
python3-pip \
2424
libgl1 \
2525
libglib2.0-0 \
2626
libsm6 \
2727
libxrender1 \
2828
libxext6 \
2929
ninja-build \
3030
sudo \
31-
&& rm -rf /var/lib/apt/lists/* \
32-
&& echo "appuser ALL=(ALL) NOPASSWD: /bin/chown" >> /etc/sudoers
31+
curl \
32+
wget \
33+
&& rm -rf /var/lib/apt/lists/*
3334

34-
# Create and configure directories before switching user
35-
RUN mkdir -p /app && \
35+
# Layer 2: Create user and basic directory structure early
36+
RUN groupadd -g $GID appuser && \
37+
useradd -u $UID -g $GID -m -s /bin/bash appuser && \
38+
echo "appuser ALL=(ALL) NOPASSWD: /bin/chown" >> /etc/sudoers && \
39+
mkdir -p /app /app/outputs /app/hf_download && \
3640
chown -R $UID:$GID /app
3741

38-
# Switch to non-root user
42+
# Layer 3: Setup Python environment and clone repository (stable)
3943
USER $UID:$GID
40-
41-
# Clone repository
42-
RUN git clone https://github.com/lllyasviel/FramePack /app
4344
WORKDIR /app
45+
RUN python3.10 -m venv $VIRTUAL_ENV && \
46+
python -m pip install --upgrade pip wheel setuptools && \
47+
git clone https://github.com/lllyasviel/FramePack /tmp/framepack && \
48+
cp -r /tmp/framepack/* /app/ && \
49+
rm -rf /tmp/framepack
4450

45-
# Create virtual environment as user
46-
RUN python3.10 -m venv $VIRTUAL_ENV
47-
48-
# Install Python dependencies
51+
# Layer 4: Install PyTorch ecosystem with specific CUDA version
4952
RUN pip install --no-cache-dir \
5053
torch==2.6.0 \
51-
torchvision \
52-
torchaudio \
54+
torchvision==0.21.0 \
55+
torchaudio==2.6.0 \
5356
--index-url https://download.pytorch.org/whl/cu124
5457

55-
# Install requirements
58+
# Layer 5: Install FramePack requirements (from the official requirements.txt)
5659
RUN pip install --no-cache-dir -r requirements.txt
57-
RUN pip install --no-cache-dir triton sageattention
5860

59-
# Install additional dependencies
60-
RUN pip install --no-cache-dir \
61-
triton==3.0.0 \
62-
sageattention==1.0.6
63-
64-
# Create and configure directories before switching user
65-
RUN mkdir -p /app/outputs && \
66-
chown -R $UID:$GID /app/outputs && \
67-
mkdir -p $VIRTUAL_ENV && \
68-
chown -R $UID:$GID $VIRTUAL_ENV && \
69-
mkdir -p /app/hf_download && \
70-
chown -R $UID:$GID /app/hf_download
71-
72-
# Copy entrypoint script
61+
# Layer 6: Install performance enhancements - triton and xformers
62+
RUN pip install --no-cache-dir triton==3.2.0 xformers
63+
64+
# Layer 7: Install sageattention (needs compilation from source)
65+
RUN python -m pip install --no-cache-dir sageattention==1.0.6
66+
67+
# Layer 8: Install flash-attn (needs special compilation)
68+
RUN python -m pip -v install --no-cache-dir flash-attn --no-build-isolation
69+
70+
# Verify PyTorch installation works correctly and check operator availability
71+
RUN python -c "import torch; import torchvision; import torchaudio; print(f'PyTorch: {torch.__version__}, TorchVision: {torchvision.__version__}, TorchAudio: {torchaudio.__version__}'); print('CUDA available:', torch.cuda.is_available()); import torchvision.ops; print('torchvision.ops loaded successfully')"
72+
73+
# ===== APPLICATION STAGE =====
74+
FROM base AS application
75+
76+
ARG UID=1000
77+
ARG GID=1000
78+
79+
# Layer 9: Copy entrypoint script (changes occasionally)
80+
USER root
7381
COPY entrypoint.sh /entrypoint.sh
7482
RUN chmod +x /entrypoint.sh && \
7583
chown $UID:$GID /entrypoint.sh
7684

85+
# Final configuration
86+
USER $UID:$GID
87+
WORKDIR /app
7788
EXPOSE 7860
7889

7990
# Configure volumes
8091
VOLUME /app/hf_download
8192
VOLUME /app/outputs
8293

8394
ENTRYPOINT ["/entrypoint.sh"]
84-
CMD ["python", "demo_gradio.py", "--share", "--server-name", "0.0.0.0"]
95+
CMD ["python", "demo_gradio.py", "--share", "--server", "0.0.0.0"]

README.md

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,22 @@
11
## FramePack Docker CUDA
22

3-
Very easy:
3+
### Quick Start with Docker Compose (Recommended)
44

5+
```bash
6+
git clone https://github.com/TSavo/FramePack-Docker-CUDA.git
7+
cd FramePack-Docker-CUDA
8+
9+
# Optional: Copy and customize environment settings
10+
cp .env.template .env
11+
12+
# Start the application
13+
docker compose up --build
514
```
615

7-
git clone https://github.com/akitaonrails/FramePack-Docker-CUDA.git
16+
### Manual Docker Setup
17+
18+
```bash
19+
git clone https://github.com/TSavo/FramePack-Docker-CUDA.git
820
cd FramePack-Docker-CUDA
921
mkdir outputs
1022
mkdir hf_download
@@ -22,3 +34,33 @@ docker run -it --rm --gpus all -p 7860:7860 \
2234
The first time it runs, it will download all necessary HunyuanVideo, Flux and other neccessary models. It will be more than 30GB, so be patient, but they will be cached on the external mapped directory.
2335

2436
When it finishes access http://localhost:7860 and that's it!
37+
38+
## Enhanced Features
39+
40+
This enhanced version includes several performance optimizations:
41+
42+
- **Multi-stage Docker builds** for better layer caching and faster rebuilds
43+
- **Flash Attention 2** for faster transformer inference
44+
- **SageAttention** for optimized attention mechanisms
45+
- **xFormers** for memory-efficient transformers
46+
- **Triton 3.2.0** for GPU kernel optimization
47+
- **PyTorch 2.6.0** with CUDA 12.4 support
48+
- **Optimized dependency management** with proper version pinning
49+
- **Build parallelism control** via `MAX_JOBS` argument (default: 4)
50+
51+
### Build with custom parallelism:
52+
```bash
53+
# Docker Compose
54+
MAX_JOBS=8 docker compose up --build
55+
56+
# Manual Docker
57+
docker build --build-arg MAX_JOBS=8 -t framepack-torch26-cu124:latest .
58+
```
59+
60+
## Docker Compose Benefits
61+
62+
- **Shared model cache**: Models downloaded once can be reused across container rebuilds
63+
- **Easy configuration**: Environment variables in `.env` file
64+
- **Volume management**: Persistent storage for models and outputs
65+
- **GPU support**: Automatic GPU passthrough configuration
66+
- **Service management**: Easy start/stop/restart of the application

docker-compose.yml

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
version: '3.8'
2+
3+
services:
4+
framepack:
5+
build:
6+
context: .
7+
dockerfile: Dockerfile
8+
args:
9+
UID: ${UID:-1000}
10+
GID: ${GID:-1000}
11+
MAX_JOBS: ${MAX_JOBS:-4}
12+
image: framepack-torch26-cu124:latest
13+
container_name: framepack
14+
ports:
15+
- "7860:7860"
16+
volumes:
17+
- framepack_models:/app/hf_download
18+
- ./outputs:/app/outputs
19+
environment:
20+
- UID=${UID:-1000}
21+
- GID=${GID:-1000}
22+
deploy:
23+
resources:
24+
reservations:
25+
devices:
26+
- driver: nvidia
27+
count: all
28+
capabilities: [gpu]
29+
restart: unless-stopped
30+
stdin_open: true
31+
tty: true
32+
33+
volumes:
34+
framepack_models:
35+
driver: local
36+
driver_opts:
37+
type: none
38+
o: bind
39+
device: ${HF_MODELS_PATH:-./hf_download}

0 commit comments

Comments
 (0)