pytorch · glen-amd · Mar 12, 2025 · Mar 13, 2025 · Mar 13, 2025 · Mar 13, 2025
diff --git a/.github/workflows/ci_gpu.yml b/.github/workflows/ci_gpu.yml
@@ -17,6 +17,13 @@ concurrency:
 jobs:
   ci-gpu:
     runs-on: [self-hosted, ci-gpu]
+    runs-on:
+      - self-hosted
+      - ci-gpu
+      - ${{ matrix.gpu-type }}
+    strategy:
+      matrix:
+        gpu-type: [cuda, rocm]
     steps:
       - name: Clean up previous run
         run: |
@@ -41,9 +48,14 @@ jobs:
         uses: actions/checkout@v3
         with:
           submodules: recursive
-      - name: Install dependencies
+      - name: Install dependencies for CUDA
+        if: matrix.gpu-type == 'cuda'
         run: |
           python ts_scripts/install_dependencies.py --environment=dev --cuda=cu121
+      - name: Install dependencies for ROCm
+        if: matrix.gpu-type == 'rocm'
+        run: |
+          python ts_scripts/install_dependencies.py --environment=dev --rocm=rocm6.2
       - name: Torchserve Sanity
         uses: nick-fields/retry@v3
         with:

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -67,10 +67,10 @@ If you plan to develop with TorchServe and change some source code, you must ins
     Use the optional `--rocm` or `--cuda` flag with `install_dependencies.py` for installing accelerator specific dependencies.
 
     Possible values are
-    - rocm: `rocm61`, `rocm60`
+    - rocm: `rocm6.3`, `rocm6.2`, `rocm6.1`, 'rocm6.0'
     - cuda: `cu111`, `cu102`, `cu101`, `cu92`
 
-    For example `python ./ts_scripts/install_dependencies.py --environment=dev --rocm=rocm61`
+    For example `python ./ts_scripts/install_dependencies.py --environment=dev --rocm=rocm6.2`
 
     #### For Windows
 

diff --git a/README.md b/README.md
@@ -29,7 +29,7 @@ curl http://127.0.0.1:8080/predictions/bert -T input.txt
 python ./ts_scripts/install_dependencies.py
 
 # Include dependencies for accelerator support with the relevant optional flags
-python ./ts_scripts/install_dependencies.py --rocm=rocm61
+python ./ts_scripts/install_dependencies.py --rocm=rocm6.2
 python ./ts_scripts/install_dependencies.py --cuda=cu121
 
 # Latest release
@@ -45,8 +45,8 @@ pip install torchserve-nightly torch-model-archiver-nightly torch-workflow-archi
 # Install dependencies
 python ./ts_scripts/install_dependencies.py
 
-# Include depeendencies for accelerator support with the relevant optional flags
-python ./ts_scripts/install_dependencies.py --rocm=rocm61
+# Include dependencies for accelerator support with the relevant optional flags
+python ./ts_scripts/install_dependencies.py --rocm=rocm6.2
 python ./ts_scripts/install_dependencies.py --cuda=cu121
 
 # Latest release
@@ -106,7 +106,7 @@ curl -X POST -d '{"model":"meta-llama/Meta-Llama-3-8B-Instruct", "prompt":"Hello
 Refer to [LLM deployment](docs/llm_deployment.md) for details and other methods.
 
 ## ⚡ Why TorchServe
-* Write once, run anywhere, on-prem, on-cloud, supports inference on CPUs, GPUs, AWS Inf1/Inf2/Trn1, Google Cloud TPUs, [Nvidia MPS](docs/nvidia_mps.md)
+* Write once, run anywhere, on-prem, on-cloud, supports inference on CPUs, GPUs, AWS Inf1/Inf2/Trn1, Google Cloud TPUs, [Nvidia MPS](docs/hardware_support/nvidia_mps.md)
 * [Model Management API](docs/management_api.md): multi model management with optimized worker to model allocation
 * [Inference API](docs/inference_api.md): REST and gRPC support for batched inference
 * [TorchServe Workflows](examples/Workflows/README.md): deploy complex DAGs with multiple interdependent models

diff --git a/cpp/src/backends/handler/handler_factory.hh b/cpp/src/backends/handler/handler_factory.hh
@@ -18,13 +18,21 @@ class HandlerFactory {
       const std::string& handler_class_name) {
     auto it = handlers_.find(handler_class_name);
     if (it == handlers_.end()) {
+      // XXX:
+      // Why not use the default ctor of `std::shared_ptr` directly?
+      // What are the benefits of using this `std::shared_ptr(nullptr_t)`?
       return std::shared_ptr<BaseHandler>(nullptr);
     } else {
       return it->second();
     }
   };
 
  private:
+  // XXX:
+  // 1) What are the benefits of using a function (ctor) pointer as the value
+  // instead of using a `shared_ptr` instance directly?
+  // 2) Whenever we want to add a new pair to `handlers_`, we'll have to
+  // change the definition here.
   std::map<std::string, std::shared_ptr<BaseHandler> (*)()> handlers_ = {
       {"TorchScriptHandler", []() -> std::shared_ptr<BaseHandler> {
          return std::make_shared<TorchScriptHandler>();

diff --git a/cpp/src/backends/handler/torch_scripted_handler.hh b/cpp/src/backends/handler/torch_scripted_handler.hh
@@ -4,6 +4,7 @@
 namespace torchserve {
 
 class TorchScriptHandler : public BaseHandler {
+ public:
   std::pair<std::shared_ptr<void>, std::shared_ptr<torch::Device>> LoadModel(
       std::shared_ptr<LoadModelRequest>& load_model_request) override;
 };

diff --git a/docker/Dockerfile b/docker/Dockerfile
@@ -37,12 +37,12 @@ ARG BRANCH_NAME
 ARG REPO_URL=https://github.com/pytorch/serve.git
 ENV PYTHONUNBUFFERED TRUE
 
-RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
+RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \
     apt-get update && \
     apt-get upgrade -y && \
     apt-get install software-properties-common -y && \
     add-apt-repository -y ppa:deadsnakes/ppa && \
-    apt remove python-pip  python3-pip && \
+    apt remove -y python-pip  python3-pip && \
     DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
         ca-certificates \
         g++ \
@@ -55,6 +55,13 @@ RUN --mount=type=cache,id=apt-dev,target=/var/cache/apt \
         git \
     && rm -rf /var/lib/apt/lists/*
 
+RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \
+    if [ "$USE_ROCM_VERSION" ]; then \
+        apt-get update && \
+        DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y rocm-dev amd-smi-lib \
+        && rm -rf /var/lib/apt/lists/* ; \
+    fi
+
 # Make the virtual environment and "activating" it by adding it first to the path.
 # From here on the python$PYTHON_VERSION interpreter is used and the packages
 # are installed in /home/venv which is what we need for the "runtime-image"
@@ -67,6 +74,7 @@ RUN python -m pip install -U pip setuptools
 RUN export USE_CUDA=1
 
 ARG USE_CUDA_VERSION=""
+ARG USE_ROCM_VERSION=""
 
 COPY ./ serve
 
@@ -90,6 +98,14 @@ RUN \
         else \
             python ./ts_scripts/install_dependencies.py;\
         fi; \
+    elif echo "${BASE_IMAGE}" | grep -q "rocm/"; then \
+        # Install ROCm version specific binary when ROCm version is specified as a build arg
+        if [ "$USE_ROCM_VERSION" ]; then \
+            python ./ts_scripts/install_dependencies.py --rocm $USE_ROCM_VERSION;\
+        # Install the binary with the latest CPU image on a ROCm base image
+        else \
+            python ./ts_scripts/install_dependencies.py; \
+        fi; \
     # Install the CPU binary
     else \
         python ./ts_scripts/install_dependencies.py; \
@@ -111,13 +127,14 @@ FROM ${BASE_IMAGE} AS production-image
 # Re-state ARG PYTHON_VERSION to make it active in this build-stage (uses default define at the top)
 ARG PYTHON_VERSION
 ENV PYTHONUNBUFFERED TRUE
+ARG USE_ROCM_VERSION
 
 RUN --mount=type=cache,target=/var/cache/apt \
     apt-get update && \
     apt-get upgrade -y && \
     apt-get install software-properties-common -y && \
     add-apt-repository ppa:deadsnakes/ppa -y && \
-    apt remove python-pip  python3-pip && \
+    apt remove -y python-pip python3-pip && \
     DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
     python$PYTHON_VERSION \
     python3-distutils \
@@ -130,13 +147,25 @@ RUN --mount=type=cache,target=/var/cache/apt \
     && rm -rf /var/lib/apt/lists/* \
     && cd /tmp
 
+RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \
+    if [ "$USE_ROCM_VERSION" ]; then \
+        apt-get update && \
+        DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y rocm-dev amd-smi-lib \
+        && rm -rf /var/lib/apt/lists/* ; \
+    fi
+
 RUN useradd -m model-server \
     && mkdir -p /home/model-server/tmp
 
 COPY --chown=model-server --from=compile-image /home/venv /home/venv
 COPY --from=compile-image /usr/local/bin/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh
 ENV PATH="/home/venv/bin:$PATH"
 
+RUN \
+    if [ "$USE_ROCM_VERSION" ]; then \
+        python -m pip install /opt/rocm/share/amd_smi; \
+    fi
+
 RUN chmod +x /usr/local/bin/dockerd-entrypoint.sh \
     && chown -R model-server /home/model-server
 
@@ -157,13 +186,14 @@ FROM ${BASE_IMAGE} AS ci-image
 ARG PYTHON_VERSION
 ARG BRANCH_NAME
 ENV PYTHONUNBUFFERED TRUE
+ARG USE_ROCM_VERSION
 
-RUN --mount=type=cache,target=/var/cache/apt \
+RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \
     apt-get update && \
     apt-get upgrade -y && \
     apt-get install software-properties-common -y && \
     add-apt-repository -y ppa:deadsnakes/ppa && \
-    apt remove python-pip  python3-pip && \
+    apt remove -y python-pip python3-pip && \
     DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
     python$PYTHON_VERSION \
     python3-distutils \
@@ -183,13 +213,24 @@ RUN --mount=type=cache,target=/var/cache/apt \
     && rm -rf /var/lib/apt/lists/* \
     && cd /tmp
 
+RUN --mount=type=cache,sharing=locked,id=apt-dev,target=/var/cache/apt \
+    if [ "$USE_ROCM_VERSION" ]; then \
+        apt-get update && \
+        DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y rocm-dev amd-smi-lib \
+        && rm -rf /var/lib/apt/lists/* ; \
+    fi
 
 COPY --from=compile-image /home/venv /home/venv
 
 ENV PATH="/home/venv/bin:$PATH"
 
 RUN python -m pip install --no-cache-dir -r https://raw.githubusercontent.com/pytorch/serve/$BRANCH_NAME/requirements/developer.txt
 
+RUN \
+    if [ "$USE_ROCM_VERSION" ]; then \
+        python -m pip install /opt/rocm/share/amd_smi; \
+    fi
+
 RUN mkdir /home/serve
 ENV TS_RUN_IN_DOCKER True
 
@@ -203,11 +244,12 @@ ARG PYTHON_VERSION
 ARG BRANCH_NAME
 ARG BUILD_FROM_SRC
 ARG LOCAL_CHANGES
+ARG USE_ROCM_VERSION
 ARG BUILD_WITH_IPEX
 ARG IPEX_VERSION=1.11.0
 ARG IPEX_URL=https://software.intel.com/ipex-whl-stable
 ENV PYTHONUNBUFFERED TRUE
-RUN --mount=type=cache,target=/var/cache/apt \
+RUN --mount=type=cache,sharing=locked,target=/var/cache/apt \
     apt-get update && \
     apt-get upgrade -y && \
     apt-get install software-properties-common -y && \
@@ -243,10 +285,16 @@ RUN \
 
 COPY --from=compile-image /home/venv /home/venv
 ENV PATH="/home/venv/bin:$PATH"
+
+RUN \
+    if [ "$USE_ROCM_VERSION" ]; then \
+        python -m pip install /opt/rocm/share/amd_smi; \
+    fi
+
 WORKDIR "serve"
 RUN python -m pip install -U pip setuptools \
     && python -m pip install --no-cache-dir -r requirements/developer.txt \
-    && python ts_scripts/install_from_src.py \
+    && python ts_scripts/install_from_src.py --environment=dev \
 default="production", 
 default="production", 
     && useradd -m model-server \
     && mkdir -p /home/model-server/tmp \
     && cp docker/dockerd-entrypoint.sh /usr/local/bin/dockerd-entrypoint.sh \

diff --git a/docker/Dockerfile.cpp b/docker/Dockerfile.cpp
@@ -19,6 +19,7 @@ ARG CMAKE_VERSION=3.26.4
 ARG GCC_VERSION=9
 ARG BRANCH_NAME="master"
 ARG USE_CUDA_VERSION=""
+ARG USE_ROCM_VERSION=""
 
 FROM ${BASE_IMAGE} AS cpp-dev-image
 ARG BASE_IMAGE
@@ -28,6 +29,7 @@ ARG GCC_VERSION
 ARG BRANCH_NAME
 ARG REPO_URL=https://github.com/pytorch/serve.git
 ARG USE_CUDA_VERSION
+ARG USE_ROCM_VERSION
 ARG DEBIAN_FRONTEND=noninteractive
 ENV PYTHONUNBUFFERED TRUE
 ENV TZ=Etc/UTC

diff --git a/docker/Dockerfile.dev b/docker/Dockerfile.dev
@@ -19,6 +19,7 @@ ARG BRANCH_NAME=master
 ARG REPO_URL=https://github.com/pytorch/serve.git
 ARG MACHINE_TYPE=cpu
 ARG CUDA_VERSION
+ARG ROCM_VERSION
 
 ARG BUILD_WITH_IPEX
 ARG IPEX_VERSION=1.11.0
@@ -62,15 +63,16 @@ RUN update-alternatives --install /usr/bin/python python /usr/bin/python$PYTHON_
 FROM compile-image AS dev-image
 ARG MACHINE_TYPE=cpu
 ARG CUDA_VERSION
-RUN if [ "$MACHINE_TYPE" = "gpu" ]; then export USE_CUDA=1; fi \
+ARG ROCM_VERSION
+RUN if [ "$MACHINE_TYPE" = "gpu" ]; then if [ -n "$ROCM_VERSION" ]; then export USE_ROCM=1; else export USE_CUDA=1; fi fi \
     && git clone $REPO_URL \
     && cd serve \
     && git checkout ${BRANCH_NAME} \
     && python$PYTHON_VERSION -m venv /home/venv
 ENV PATH="/home/venv/bin:$PATH"
 WORKDIR serve
 RUN python -m pip install -U pip setuptools \
-    && if [ -z "$CUDA_VERSION" ]; then python ts_scripts/install_dependencies.py --environment=dev; else python ts_scripts/install_dependencies.py --environment=dev  --cuda $CUDA_VERSION; fi \
+    && if [ -n "$CUDA_VERSION" ]; then python ts_scripts/install_dependencies.py --environment=dev --cuda $CUDA_VERSION; elif [ -n "$ROCM_VERSION" ]; then python ts_scripts/install_dependencies.py --environment=dev --rocm $ROCM_VERSION; else python ts_scripts/install_dependencies.py --environment=dev; fi \
     && if [ "$BUILD_WITH_IPEX" = "true" ]; then python -m pip install --no-cache-dir intel_extension_for_pytorch==${IPEX_VERSION} -f ${IPEX_URL}; fi \
     && python ts_scripts/install_from_src.py \
     && useradd -m model-server \