chore: split nvidia and amd

doppeltilde · Nov 7, 2024 · f6c8319 · f6c8319
1 parent 1be375a
commit f6c8319
Show file tree

Hide file tree

Showing 9 changed files with 177 additions and 49 deletions.
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
@@ -0,0 +1,66 @@
+name: OpenCL Dev CI/CD
+
+on:
+  push:
+    branches:
+        - "opencl"
+
+env:
+  REGISTRY: "ghcr.io"
+  REGISTRY_USER: ${{ github.actor }}
+  REGISTRY_SECRET: ${{ secrets.GITHUB_TOKEN }}
+  IMAGE_NAME: ${{ github.repository }}
+
+permissions:
+  contents: read
+  packages: write
+
+jobs:
+    build_and_publish:
+        runs-on: ubuntu-latest
+        strategy:
+          matrix:
+            include:
+              - docker_file: Dockerfile
+                label: "latest"
+              - docker_file: Dockerfile.cuda
+                label: "latest-cuda"
+              - docker_file: Dockerfile.opencl.nvidia
+                label: "latest-opencl-nvidia"
+
+        steps:
+            - uses: actions/checkout@v4
+
+            - name: Set up Docker Buildx
+              uses: docker/setup-buildx-action@v3
+
+            - name: Meta
+              id: meta
+              uses: docker/metadata-action@v5
+              with:
+                images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+                tags: |
+                  type=ref,event=branch
+                  type=ref,event=pr
+                  type=ref,event=tag
+                  type=sha,prefix=sha-
+                labels: |
+                  type=raw,value=image-type=${{ matrix.label }}
+
+            - uses: docker/login-action@v3
+              with:
+                registry: ${{ env.REGISTRY }}
+                username: ${{ env.REGISTRY_USER }}
+                password: ${{ env.REGISTRY_SECRET }}
+
+            - name: Build and push
+              uses: docker/build-push-action@v5
+              with:
+                context: .
+                file: ${{ matrix.docker_file }}
+                push: ${{ github.event_name != 'pull_request' }}
+                tags: |
+                  ${{ steps.meta.outputs.tags }}
+                  ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ matrix.label }}
+                labels: ${{ steps.meta.outputs.labels }}
+                platforms: linux/amd64,linux/arm64
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -22,9 +22,12 @@ jobs:
           matrix:
             include:
               - docker_file: Dockerfile
-                label: "latest"
+                label: "dev"
               - docker_file: Dockerfile.cuda
-                label: "latest-cuda"
+                label: "dev-cuda"
+              - docker_file: Dockerfile.opencl.nvidia
+                label: "dev-opencl-nvidia"
+
         steps:
             - uses: actions/checkout@v4
 
@@ -58,6 +61,6 @@ jobs:
                 push: ${{ github.event_name != 'pull_request' }}
                 tags: |
                   ${{ steps.meta.outputs.tags }}
-                  ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest${{ matrix.label =='latest-cuda' && '-cuda' || '' }}
-                # labels: ${{ steps.meta.outputs.labels }}
+                  ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ matrix.label }}
+                labels: ${{ steps.meta.outputs.labels }}
                 platforms: linux/amd64,linux/arm64
diff --git a/Dockerfile.opencl b/Dockerfile.opencl
diff --git a/Dockerfile.opencl.amd b/Dockerfile.opencl.amd
@@ -0,0 +1,65 @@
+FROM ubuntu:22.04
+
+ARG DEBIAN_FRONTEND=noninteractive
+
+WORKDIR /app
+COPY . /app
+
+RUN apt-get update && apt-get install -y \
+    wget \
+    software-properties-common \
+    apt-transport-https \
+    ca-certificates \
+    curl \
+    gpg \
+    && rm -rf /var/lib/apt/lists/*
+
+# Add AMD GPU repository and install drivers
+RUN wget -q -O - https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm.gpg && \
+    echo 'deb [arch=amd64] https://repo.radeon.com/amdgpu/6.2.3/ubuntu jammy main' | tee /etc/apt/sources.list.d/amdgpu.list && \
+    apt-get update && \
+    apt-get install -y amdgpu-install && \
+    amdgpu-install --usecase=graphics,opencl --opencl=rocr,legacy --accept-eula --no-32 -y
+
+# Install OpenCL dependencies
+RUN apt-get update && apt-get install -y \
+    ocl-icd-opencl-dev \
+    opencl-headers \
+    ocl-icd-libopencl1 \
+    clinfo \
+    && rm -rf /var/lib/apt/lists/*
+
+# Configure OpenCL vendors
+RUN mkdir -p /etc/OpenCL/vendors && \
+    echo "libamdocl64.so" > /etc/OpenCL/vendors/amdocl64.icd
+
+# Install Python 3.12
+RUN add-apt-repository ppa:deadsnakes/ppa -y && \
+    apt-get update && \
+    apt-get install -y \
+    python3.12 \
+    python3.12-venv \
+    python3.12-dev \
+    python3-pip \
+    && rm -rf /var/lib/apt/lists/*
+
+# Set up Python virtual environment
+RUN python3.12 -m venv /app/venv
+ENV PATH="/app/venv/bin:$PATH"
+RUN pip install --upgrade pip
+
+# Add render and video groups
+RUN groupadd -f render && groupadd -f video
+
+# Set environment variables for OpenCL
+ENV OCL_ICD_VENDORS=/etc/OpenCL/vendors
+
+COPY requirements.txt /app/requirements.txt
+RUN /app/venv/bin/pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
+
+# Install OpenCL backend for PyTorch
+RUN wget https://github.com/artyom-beilis/pytorch_dlprim/releases/download/0.2.0/pytorch_ocl-0.2.0+torch2.5-cp312-none-linux_x86_64.whl && \
+    /app/venv/bin/pip install pytorch_ocl-0.2.0+torch2.5-cp312-none-linux_x86_64.whl && \
+    rm pytorch_ocl-0.2.0+torch2.5-cp312-none-linux_x86_64.whl
+
+CMD ["/app/venv/bin/python", "-m", "fastapi", "run", "main.py", "--proxy-headers", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/Dockerfile.opencl.nvidia b/Dockerfile.opencl.nvidia
@@ -0,0 +1,28 @@
+FROM python:3.12.7
+
+ENV NVIDIA_VISIBLE_DEVICES all
+ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
+
+WORKDIR /app
+
+RUN apt update && \
+    apt full-upgrade -y && \
+    apt install python3 python3-full python3-pip python3-venv git wget ocl-icd-opencl-dev opencl-clhpp-headers opencl-c-headers opencl-headers ocl-icd-libopencl1 clinfo -y && \
+    python3 -m venv /app/venv && \
+    /app/venv/bin/pip install --upgrade pip
+
+# Configure OpenCL ICD loaders
+RUN mkdir -p /etc/OpenCL/vendors && \
+    echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
+
+COPY requirements.txt /app/requirements.txt
+RUN /app/venv/bin/pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
+
+# Install OpenCL backend for PyTorch
+RUN wget https://github.com/artyom-beilis/pytorch_dlprim/releases/download/0.2.0/pytorch_ocl-0.2.0+torch2.5-cp312-none-linux_x86_64.whl && \
+    /app/venv/bin/pip install pytorch_ocl-0.2.0+torch2.5-cp312-none-linux_x86_64.whl && \
+    rm pytorch_ocl-0.2.0+torch2.5-cp312-none-linux_x86_64.whl
+
+COPY . /app
+
+CMD ["/app/venv/bin/python", "-m", "fastapi", "run", "main.py", "--proxy-headers", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/README.md b/README.md
@@ -86,14 +86,11 @@ Only tested with consumer grade hardware and only on Linux based systems.
 
 #### NVIDIA GPU (CUDA & OpenCL)
 - GTX 950
-- GTX 960
-- RTX 2070
 - RTX 3060 Ti
 
 #### AMD GPU (OpenCL)
-- Radeon RX 570 4GB
-- Radeon RX 580 8GB
-- Radeon RX 6600 XT 8GB
+- RX 580 8GB
+- RX 6600 XT
 
 #### Intel GPU (OpenCL NEO)
 - None

diff --git a/docker-compose.dev.opencl.amd.yml b/docker-compose.dev.opencl.amd.yml
@@ -1,8 +1,8 @@
 services:
-  opencl_dev:
+  opencl_amd_dev:
     build:
       context: .
-      dockerfile: ./Dockerfile.opencl
+      dockerfile: ./Dockerfile.opencl.amd
     ports:
       - "8200:8000"
     environment:
@@ -16,10 +16,12 @@ services:
     devices:
       - /dev/kfd
       - /dev/dri
-      - /dev/dri/renderD128 # First GPU
-      # - /dev/dri/renderD129  # Second GPU
     security_opt:
       - seccomp:unconfined
+    group_add:
+      - "39"
+      - "109"
     volumes:
       - ./models:/root/.cache/huggingface/hub:rw
+      - /tmp/.X11-unix:/tmp/.X11-unix
     ipc: host
diff --git a/docker-compose.dev.opencl.cuda.yml → docker-compose.dev.opencl.nvidia.yml b/docker-compose.dev.opencl.cuda.yml → docker-compose.dev.opencl.nvidia.yml
@@ -1,8 +1,8 @@
 services:
-  opencl_dev:
+  opencl_cuda_dev:
     build:
       context: .
-      dockerfile: ./Dockerfile.opencl
+      dockerfile: ./Dockerfile.opencl.nvidia
     ports:
       - "8200:8000"
     environment:

diff --git a/src/shared/shared.py b/src/shared/shared.py
@@ -59,4 +59,4 @@ def clear_cache():
         torch.ocl.empty_cache()
     except ModuleNotFoundError as err:
         print(err)
-    torch.cuda.empty_cache()
+        torch.cuda.empty_cache()