Skip to content

Commit

Permalink
chore: split nvidia and amd
Browse files Browse the repository at this point in the history
  • Loading branch information
jonafeucht committed Nov 7, 2024
1 parent 1be375a commit f6c8319
Show file tree
Hide file tree
Showing 9 changed files with 177 additions and 49 deletions.
66 changes: 66 additions & 0 deletions .github/workflows/dev.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
name: OpenCL Dev CI/CD

on:
push:
branches:
- "opencl"

env:
REGISTRY: "ghcr.io"
REGISTRY_USER: ${{ github.actor }}
REGISTRY_SECRET: ${{ secrets.GITHUB_TOKEN }}
IMAGE_NAME: ${{ github.repository }}

permissions:
contents: read
packages: write

jobs:
build_and_publish:
runs-on: ubuntu-latest
strategy:
matrix:
include:
- docker_file: Dockerfile
label: "latest"
- docker_file: Dockerfile.cuda
label: "latest-cuda"
- docker_file: Dockerfile.opencl.nvidia
label: "latest-opencl-nvidia"

steps:
- uses: actions/checkout@v4

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Meta
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=ref,event=branch
type=ref,event=pr
type=ref,event=tag
type=sha,prefix=sha-
labels: |
type=raw,value=image-type=${{ matrix.label }}
- uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ env.REGISTRY_USER }}
password: ${{ env.REGISTRY_SECRET }}

- name: Build and push
uses: docker/build-push-action@v5
with:
context: .
file: ${{ matrix.docker_file }}
push: ${{ github.event_name != 'pull_request' }}
tags: |
${{ steps.meta.outputs.tags }}
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ matrix.label }}
labels: ${{ steps.meta.outputs.labels }}
platforms: linux/amd64,linux/arm64
11 changes: 7 additions & 4 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,12 @@ jobs:
matrix:
include:
- docker_file: Dockerfile
label: "latest"
label: "dev"
- docker_file: Dockerfile.cuda
label: "latest-cuda"
label: "dev-cuda"
- docker_file: Dockerfile.opencl.nvidia
label: "dev-opencl-nvidia"

steps:
- uses: actions/checkout@v4

Expand Down Expand Up @@ -58,6 +61,6 @@ jobs:
push: ${{ github.event_name != 'pull_request' }}
tags: |
${{ steps.meta.outputs.tags }}
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest${{ matrix.label =='latest-cuda' && '-cuda' || '' }}
# labels: ${{ steps.meta.outputs.labels }}
${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ matrix.label }}
labels: ${{ steps.meta.outputs.labels }}
platforms: linux/amd64,linux/arm64
33 changes: 0 additions & 33 deletions Dockerfile.opencl

This file was deleted.

65 changes: 65 additions & 0 deletions Dockerfile.opencl.amd
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
FROM ubuntu:22.04

ARG DEBIAN_FRONTEND=noninteractive

WORKDIR /app
COPY . /app

RUN apt-get update && apt-get install -y \
wget \
software-properties-common \
apt-transport-https \
ca-certificates \
curl \
gpg \
&& rm -rf /var/lib/apt/lists/*

# Add AMD GPU repository and install drivers
RUN wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm.gpg && \
echo 'deb [arch=amd64] https://repo.radeon.com/amdgpu/6.2.3/ubuntu jammy main' | tee /etc/apt/sources.list.d/amdgpu.list && \
apt-get update && \
apt-get install -y amdgpu-install && \
amdgpu-install --usecase=graphics,opencl --opencl=rocr,legacy --accept-eula --no-32 -y

# Install OpenCL dependencies
RUN apt-get update && apt-get install -y \
ocl-icd-opencl-dev \
opencl-headers \
ocl-icd-libopencl1 \
clinfo \
&& rm -rf /var/lib/apt/lists/*

# Configure OpenCL vendors
RUN mkdir -p /etc/OpenCL/vendors && \
echo "libamdocl64.so" > /etc/OpenCL/vendors/amdocl64.icd

# Install Python 3.12
RUN add-apt-repository ppa:deadsnakes/ppa -y && \
apt-get update && \
apt-get install -y \
python3.12 \
python3.12-venv \
python3.12-dev \
python3-pip \
&& rm -rf /var/lib/apt/lists/*

# Set up Python virtual environment
RUN python3.12 -m venv /app/venv
ENV PATH="/app/venv/bin:$PATH"
RUN pip install --upgrade pip

# Add render and video groups
RUN groupadd -f render && groupadd -f video

# Set environment variables for OpenCL
ENV OCL_ICD_VENDORS=/etc/OpenCL/vendors

COPY requirements.txt /app/requirements.txt
RUN /app/venv/bin/pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu

# Install OpenCL backend for PyTorch
RUN wget https://github.com/artyom-beilis/pytorch_dlprim/releases/download/0.2.0/pytorch_ocl-0.2.0+torch2.5-cp312-none-linux_x86_64.whl && \
/app/venv/bin/pip install pytorch_ocl-0.2.0+torch2.5-cp312-none-linux_x86_64.whl && \
rm pytorch_ocl-0.2.0+torch2.5-cp312-none-linux_x86_64.whl

CMD ["/app/venv/bin/python", "-m", "fastapi", "run", "main.py", "--proxy-headers", "--host", "0.0.0.0", "--port", "8000"]
28 changes: 28 additions & 0 deletions Dockerfile.opencl.nvidia
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
FROM python:3.12.7

ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility

WORKDIR /app

RUN apt update && \
apt full-upgrade -y && \
apt install python3 python3-full python3-pip python3-venv git wget ocl-icd-opencl-dev opencl-clhpp-headers opencl-c-headers opencl-headers ocl-icd-libopencl1 clinfo -y && \
python3 -m venv /app/venv && \
/app/venv/bin/pip install --upgrade pip

# Configure OpenCL ICD loaders
RUN mkdir -p /etc/OpenCL/vendors && \
echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd

COPY requirements.txt /app/requirements.txt
RUN /app/venv/bin/pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu

# Install OpenCL backend for PyTorch
RUN wget https://github.com/artyom-beilis/pytorch_dlprim/releases/download/0.2.0/pytorch_ocl-0.2.0+torch2.5-cp312-none-linux_x86_64.whl && \
/app/venv/bin/pip install pytorch_ocl-0.2.0+torch2.5-cp312-none-linux_x86_64.whl && \
rm pytorch_ocl-0.2.0+torch2.5-cp312-none-linux_x86_64.whl

COPY . /app

CMD ["/app/venv/bin/python", "-m", "fastapi", "run", "main.py", "--proxy-headers", "--host", "0.0.0.0", "--port", "8000"]
7 changes: 2 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,11 @@ Only tested with consumer grade hardware and only on Linux based systems.

#### NVIDIA GPU (CUDA & OpenCL)
- GTX 950
- GTX 960
- RTX 2070
- RTX 3060 Ti

#### AMD GPU (OpenCL)
- Radeon RX 570 4GB
- Radeon RX 580 8GB
- Radeon RX 6600 XT 8GB
- RX 580 8GB
- RX 6600 XT

#### Intel GPU (OpenCL NEO)
- None
Expand Down
10 changes: 6 additions & 4 deletions docker-compose.dev.opencl.amd.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
services:
opencl_dev:
opencl_amd_dev:
build:
context: .
dockerfile: ./Dockerfile.opencl
dockerfile: ./Dockerfile.opencl.amd
ports:
- "8200:8000"
environment:
Expand All @@ -16,10 +16,12 @@ services:
devices:
- /dev/kfd
- /dev/dri
- /dev/dri/renderD128 # First GPU
# - /dev/dri/renderD129 # Second GPU
security_opt:
- seccomp:unconfined
group_add:
- "39"
- "109"
volumes:
- ./models:/root/.cache/huggingface/hub:rw
- /tmp/.X11-unix:/tmp/.X11-unix
ipc: host
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
services:
opencl_dev:
opencl_cuda_dev:
build:
context: .
dockerfile: ./Dockerfile.opencl
dockerfile: ./Dockerfile.opencl.nvidia
ports:
- "8200:8000"
environment:
Expand Down
2 changes: 1 addition & 1 deletion src/shared/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,4 @@ def clear_cache():
torch.ocl.empty_cache()
except ModuleNotFoundError as err:
print(err)
torch.cuda.empty_cache()
torch.cuda.empty_cache()

0 comments on commit f6c8319

Please sign in to comment.