From cba3b68bc7eeef7720252f5758561501223a8c2c Mon Sep 17 00:00:00 2001 From: "Jona T. Feucht" <14951074+jonafeucht@users.noreply.github.com> Date: Tue, 11 Jun 2024 08:17:42 +0200 Subject: [PATCH] feat: cuda support --- .github/workflows/publish.yml | 7 +++++++ Dockerfile.gpu | 6 ++++++ README.md | 35 +++++++++++++++++++++++++++++++++++ docker-compose.yml | 25 ++++++++++++++++++++++++- 4 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 Dockerfile.gpu diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 2910d7f..be37b94 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -18,6 +18,13 @@ permissions: jobs: build_and_publish: runs-on: ubuntu-latest + strategy: + matrix: + dockerfile: [Dockerfile, Dockerfile.cuda] + include: + - dockerfile: Dockerfile + - dockerfile: Dockerfile.cuda + tag-suffix: -cuda steps: - uses: actions/checkout@v4 diff --git a/Dockerfile.gpu b/Dockerfile.gpu new file mode 100644 index 0000000..da937cd --- /dev/null +++ b/Dockerfile.gpu @@ -0,0 +1,6 @@ +FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04 +WORKDIR /app +COPY . /app +RUN pip install --upgrade pip +RUN pip install --no-cache-dir -r requirements.txt --extra-index-url https://download.pytorch.org/whl/121 +CMD ["fastapi", "run", "main.py", "--proxy-headers", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/README.md b/README.md index a4648fa..58778fa 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,8 @@ ## Installation - For ease of use it's recommended to use the provided [docker-compose.yml](https://github.com/doppeltilde/natural_language_processing/blob/main/docker-compose.yml). + +**CPU Support:** Use the `latest` tag for the images. ```yml services: natural_language_processing: @@ -28,6 +30,39 @@ volumes: models: ``` +**NVIDIA GPU Support:** Use the `latest-cuda` tag for the images. +```yml +services: + natural_language_processing_cuda: + image: ghcr.io/doppeltilde/natural_language_processing:latest-cuda + ports: + - "8000:8000" + volumes: + - models:/root/.cache/huggingface/hub:rw + environment: + - DEFAULT_SUMMARIZATION_MODEL_NAME + - DEFAULT_TRANSLATION_MODEL_NAME + - ACCESS_TOKEN + - DEFAULT_SCORE + - USE_API_KEYS + - API_KEYS + restart: unless-stopped + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [ gpu ] + +volumes: + models: +``` + +--- + +### Environment Variables + - Create a `.env` file and set the preferred values. ```sh DEFAULT_SUMMARIZATION_MODEL_NAME=Falconsai/text_summarization diff --git a/docker-compose.yml b/docker-compose.yml index bb920ac..479017e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,12 +6,35 @@ services: volumes: - models:/root/.cache/huggingface/hub:rw environment: - - DEFAULT_MODEL_NAME + - DEFAULT_SUMMARIZATION_MODEL_NAME + - DEFAULT_TRANSLATION_MODEL_NAME - ACCESS_TOKEN - DEFAULT_SCORE - USE_API_KEYS - API_KEYS restart: unless-stopped + natural_language_processing_cuda: + image: ghcr.io/doppeltilde/natural_language_processing:latest-cuda + ports: + - "8000:8000" + volumes: + - models:/root/.cache/huggingface/hub:rw + environment: + - DEFAULT_SUMMARIZATION_MODEL_NAME + - DEFAULT_TRANSLATION_MODEL_NAME + - ACCESS_TOKEN + - DEFAULT_SCORE + - USE_API_KEYS + - API_KEYS + restart: unless-stopped + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [ gpu ] + volumes: models: