feat: cuda support

doppeltilde · Jun 11, 2024 · cba3b68 · cba3b68
1 parent 0470ff4
commit cba3b68
Show file tree

Hide file tree

Showing 4 changed files with 72 additions and 1 deletion.
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -18,6 +18,13 @@ permissions:
 jobs:
     build_and_publish:
         runs-on: ubuntu-latest
+        strategy:
+          matrix:
+            dockerfile: [Dockerfile, Dockerfile.cuda]
+            include:
+              - dockerfile: Dockerfile
+              - dockerfile: Dockerfile.cuda
+                tag-suffix: -cuda
         steps:
             - uses: actions/checkout@v4
 

diff --git a/Dockerfile.gpu b/Dockerfile.gpu
@@ -0,0 +1,6 @@
+FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04
+WORKDIR /app
+COPY . /app
+RUN pip install --upgrade pip
+RUN pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/121
+CMD ["fastapi", "run", "main.py", "--proxy-headers", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/README.md b/README.md
@@ -8,6 +8,8 @@
 ## Installation
 
 - For ease of use it's recommended to use the provided [docker-compose.yml](https://github.com/doppeltilde/natural_language_processing/blob/main/docker-compose.yml).
+
+**CPU Support:** Use the `latest` tag for the images.
 ```yml
 services:
   natural_language_processing:
@@ -28,6 +30,39 @@ volumes:
   models:
 ```
 
+**NVIDIA GPU Support:** Use the `latest-cuda` tag for the images.
+```yml
+services:
+  natural_language_processing_cuda:
+    image: ghcr.io/doppeltilde/natural_language_processing:latest-cuda
+    ports:
+      - "8000:8000"
+    volumes:
+      - models:/root/.cache/huggingface/hub:rw
+    environment:
+      - DEFAULT_SUMMARIZATION_MODEL_NAME
+      - DEFAULT_TRANSLATION_MODEL_NAME
+      - ACCESS_TOKEN
+      - DEFAULT_SCORE
+      - USE_API_KEYS
+      - API_KEYS
+    restart: unless-stopped
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [ gpu ]
+
+volumes:
+  models:
+```
+
+---
+
+### Environment Variables
+
 - Create a `.env` file and set the preferred values.
 ```sh
 DEFAULT_SUMMARIZATION_MODEL_NAME=Falconsai/text_summarization

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -6,12 +6,35 @@ services:
     volumes:
       - models:/root/.cache/huggingface/hub:rw
     environment:
-      - DEFAULT_MODEL_NAME
+      - DEFAULT_SUMMARIZATION_MODEL_NAME
+      - DEFAULT_TRANSLATION_MODEL_NAME
       - ACCESS_TOKEN
       - DEFAULT_SCORE
       - USE_API_KEYS
       - API_KEYS
     restart: unless-stopped
 
+  natural_language_processing_cuda:
+    image: ghcr.io/doppeltilde/natural_language_processing:latest-cuda
+    ports:
+      - "8000:8000"
+    volumes:
+      - models:/root/.cache/huggingface/hub:rw
+    environment:
+      - DEFAULT_SUMMARIZATION_MODEL_NAME
+      - DEFAULT_TRANSLATION_MODEL_NAME
+      - ACCESS_TOKEN
+      - DEFAULT_SCORE
+      - USE_API_KEYS
+      - API_KEYS
+    restart: unless-stopped
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [ gpu ]
+
 volumes:
   models: