From cba3b68bc7eeef7720252f5758561501223a8c2c Mon Sep 17 00:00:00 2001
From: "Jona T. Feucht" <14951074+jonafeucht@users.noreply.github.com>
Date: Tue, 11 Jun 2024 08:17:42 +0200
Subject: [PATCH] feat: cuda support

---
 .github/workflows/publish.yml |  7 +++++++
 Dockerfile.gpu                |  6 ++++++
 README.md                     | 35 +++++++++++++++++++++++++++++++++++
 docker-compose.yml            | 25 ++++++++++++++++++++++++-
 4 files changed, 72 insertions(+), 1 deletion(-)
 create mode 100644 Dockerfile.gpu

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 2910d7f..be37b94 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -18,6 +18,13 @@ permissions:
 jobs:
     build_and_publish:
         runs-on: ubuntu-latest
+        strategy:
+          matrix:
+            dockerfile: [Dockerfile, Dockerfile.cuda]
+            include:
+              - dockerfile: Dockerfile
+              - dockerfile: Dockerfile.cuda
+                tag-suffix: -cuda
         steps:
             - uses: actions/checkout@v4
 
diff --git a/Dockerfile.gpu b/Dockerfile.gpu
new file mode 100644
index 0000000..da937cd
--- /dev/null
+++ b/Dockerfile.gpu
@@ -0,0 +1,6 @@
+FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04
+WORKDIR /app
+COPY . /app
+RUN pip install --upgrade pip
+RUN pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/121
+CMD ["fastapi", "run", "main.py", "--proxy-headers", "--host", "0.0.0.0", "--port", "8000"]
\ No newline at end of file
diff --git a/README.md b/README.md
index a4648fa..58778fa 100644
--- a/README.md
+++ b/README.md
@@ -8,6 +8,8 @@
 ## Installation
 
 - For ease of use it's recommended to use the provided [docker-compose.yml](https://github.com/doppeltilde/natural_language_processing/blob/main/docker-compose.yml).
+
+**CPU Support:** Use the `latest` tag for the images.
 ```yml
 services:
   natural_language_processing:
@@ -28,6 +30,39 @@ volumes:
   models:
 ```
 
+**NVIDIA GPU Support:** Use the `latest-cuda` tag for the images.
+```yml
+services:
+  natural_language_processing_cuda:
+    image: ghcr.io/doppeltilde/natural_language_processing:latest-cuda
+    ports:
+      - "8000:8000"
+    volumes:
+      - models:/root/.cache/huggingface/hub:rw
+    environment:
+      - DEFAULT_SUMMARIZATION_MODEL_NAME
+      - DEFAULT_TRANSLATION_MODEL_NAME
+      - ACCESS_TOKEN
+      - DEFAULT_SCORE
+      - USE_API_KEYS
+      - API_KEYS
+    restart: unless-stopped
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [ gpu ]
+
+volumes:
+  models:
+```
+
+---
+
+### Environment Variables
+
 - Create a `.env` file and set the preferred values.
 ```sh
 DEFAULT_SUMMARIZATION_MODEL_NAME=Falconsai/text_summarization
diff --git a/docker-compose.yml b/docker-compose.yml
index bb920ac..479017e 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -6,12 +6,35 @@ services:
     volumes:
       - models:/root/.cache/huggingface/hub:rw
     environment:
-      - DEFAULT_MODEL_NAME
+      - DEFAULT_SUMMARIZATION_MODEL_NAME
+      - DEFAULT_TRANSLATION_MODEL_NAME
       - ACCESS_TOKEN
       - DEFAULT_SCORE
       - USE_API_KEYS
       - API_KEYS
     restart: unless-stopped
 
+  natural_language_processing_cuda:
+    image: ghcr.io/doppeltilde/natural_language_processing:latest-cuda
+    ports:
+      - "8000:8000"
+    volumes:
+      - models:/root/.cache/huggingface/hub:rw
+    environment:
+      - DEFAULT_SUMMARIZATION_MODEL_NAME
+      - DEFAULT_TRANSLATION_MODEL_NAME
+      - ACCESS_TOKEN
+      - DEFAULT_SCORE
+      - USE_API_KEYS
+      - API_KEYS
+    restart: unless-stopped
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [ gpu ]
+
 volumes:
   models: