ai-dynamo · nealvaidya · Dec 2, 2025 · Dec 2, 2025 · Dec 2, 2025 · Dec 2, 2025
@@ -0,0 +1,21 @@
+ARG DYNAMO_BASE_IMAGE="dynamo:latest-none"
+ARG TRITON_SERVER_IMAGE="nvcr.io/nvidia/tritonserver:25.01-py3"
+
+FROM ${TRITON_SERVER_IMAGE} AS triton_source
+
+FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base
+
+COPY --from=triton_source /opt/tritonserver /opt/tritonserver
+COPY --from=triton_source /usr/local/dcgm /usr/local/dcgm
+COPY --from=triton_source /lib/x86_64-linux-gnu/libdcgm*.so* /lib/x86_64-linux-gnu/
+ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:/opt/tritonserver/lib:/opt/tritonserver/backends:/usr/local/dcgm/lib64:$LD_LIBRARY_PATH
+ENV PATH=/opt/tritonserver/bin:$PATH
+
+COPY --chown=dynamo: src/ /workspace/src/
+COPY --chown=dynamo: model_repo/ /workspace/model_repo/
+
+WORKDIR /workspace
+USER dynamo
+
+RUN uv pip install --no-cache-dir tritonclient[grpc]
+RUN uv pip install /opt/tritonserver/python/triton*.whl
@@ -0,0 +1,104 @@
+# Makefile for Triton Server installation
+# Builds Triton Server and copies artifacts only when necessary
+
+CURRENT_DIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
+TMP_SERVER := /tmp/server
+BUILD_INSTALL := $(TMP_SERVER)/build/install
+
+# Target directories
+WHEELHOUSE_DIR := $(CURRENT_DIR)/wheelhouse
+LIB_DIR := $(CURRENT_DIR)/lib
+BIN_DIR := $(CURRENT_DIR)/bin
+BACKEND_DIR := $(CURRENT_DIR)/backends
+# Target files
+TRITON_LIB := $(LIB_DIR)/libtritonserver.so
+TRITON_BIN := $(BIN_DIR)/tritonserver
+TRITON_WHEEL := $(WHEELHOUSE_DIR)/.wheel_marker
+
+# Main target - build everything
+.PHONY: all
+all: $(TRITON_LIB) $(TRITON_BIN) $(TRITON_WHEEL) $(BACKEND_DIR)
+	@echo "Triton Server installation complete."
+	@echo "Add to your environment:"
+	@echo "  export LD_LIBRARY_PATH=$(LIB_DIR):$(BACKEND_DIR):\$$LD_LIBRARY_PATH"
+	@echo "  export PATH=$(BIN_DIR):\$$PATH"
+
+# Clone the repository
+$(TMP_SERVER):
+	@echo "Cloning Triton Server repository..."
+	cd /tmp && git clone https://github.com/triton-inference-server/server.git
+
+# Build Triton Server
+$(BUILD_INSTALL)/lib/libtritonserver.so $(BUILD_INSTALL)/bin/tritonserver: $(TMP_SERVER)
+	@echo "Building Triton Server (this may take a while)..."
+	cd $(TMP_SERVER) && \
+	uv venv .venv && \
+	. .venv/bin/activate && \
+	uv pip install distro requests && \
+	python3 build.py \
+	  --enable-logging \
+	  --enable-stats \
+	  --enable-metrics \
+	  --endpoint=http \
+	  --backend=identity
+
+# Copy library
+$(TRITON_LIB): $(BUILD_INSTALL)/lib/libtritonserver.so
+	@echo "Copying libtriton.so..."
+	@mkdir -p $(LIB_DIR)
+	cp $(BUILD_INSTALL)/lib/libtritonserver.so $(LIB_DIR)/
+
+# Copy binary
+$(TRITON_BIN): $(BUILD_INSTALL)/bin/tritonserver
+	@echo "Copying tritonserver binary..."
+	@mkdir -p $(BIN_DIR)
+	cp $(BUILD_INSTALL)/bin/tritonserver $(BIN_DIR)/
+
+# Copy backends
+$(BACKEND_DIR): $(BUILD_INSTALL)/backends
+	@echo "Copying backends..."
+	@mkdir -p $(BACKEND_DIR)
+	cp -r $(BUILD_INSTALL)/backends/* $(BACKEND_DIR)/
+
+# Copy wheels
+$(TRITON_WHEEL): $(BUILD_INSTALL)/lib/libtritonserver.so
+	@echo "Copying Python wheels..."
+	@mkdir -p $(WHEELHOUSE_DIR)
+	cp $(BUILD_INSTALL)/python/*.whl $(WHEELHOUSE_DIR)/
+	@touch $(TRITON_WHEEL)
+	@echo "Triton Server wheel built successfully."
+	@ls -al $(WHEELHOUSE_DIR)
+
+# Clean installed artifacts (keeps the build)
+.PHONY: clean
+clean:
+	@echo "Cleaning installed artifacts..."
+	rm -rf $(WHEELHOUSE_DIR) $(LIB_DIR) $(BIN_DIR)
+
+# Full clean (removes everything including the cloned repo and build)
+.PHONY: distclean
+distclean: clean
+	@echo "Cleaning build directory..."
+	rm -rf $(TMP_SERVER)
+
+# Show what would be built
+.PHONY: status
+status:
+	@echo "Installation status:"
+	@echo "  Repository: $(if $(wildcard $(TMP_SERVER)),✓ cloned,✗ not cloned)"
+	@echo "  Built: $(if $(wildcard $(BUILD_INSTALL)/lib/libtritonserver.so),✓ yes,✗ no)"
+	@echo "  Library: $(if $(wildcard $(TRITON_LIB)),✓ installed,✗ not installed)"
+	@echo "  Binary: $(if $(wildcard $(TRITON_BIN)),✓ installed,✗ not installed)"
+	@echo "  Wheels: $(if $(wildcard $(TRITON_WHEEL)),✓ installed,✗ not installed)"
+
+.PHONY: help
+help:
+	@echo "Triton Server Installation Makefile"
+	@echo ""
+	@echo "Targets:"
+	@echo "  all        - Build and install Triton Server (default)"
+	@echo "  clean      - Remove installed artifacts (keeps build cache)"
+	@echo "  distclean  - Remove everything including build cache"
+	@echo "  status     - Show installation status"
+	@echo "  help       - Show this help message"
+
@@ -0,0 +1,239 @@
+# Triton Server Backend for Dynamo
+
+> **⚠️ Work in Progress / Proof of Concept**
+>
+> This example demonstrates integrating NVIDIA Triton Inference Server as a backend for Dynamo.
+> It is currently a proof-of-concept and may require additional work for production use.
+
+## Overview
+
+This example shows how to run Triton Server models through Dynamo's distributed runtime, exposing them via the KServe gRPC protocol. The integration allows Triton models to benefit from Dynamo's service discovery, routing, and infrastructure.
+
+**Architecture:**
+
+```
+┌─────────────────┐     ┌─────────────────┐     ┌─────────────────────────────┐
+│  Triton Client  │────▶│  Dynamo Frontend│────▶│       Dynamo Worker         │
+│  (KServe gRPC)  │     │  (port 8787)    │     │  ┌───────────────────────┐  │
+└─────────────────┘     └─────────────────┘     │  │    Triton Server      │  │
+                              │                 │  │  (Python bindings)    │  │
+                              ▼                 │  └───────────────────────┘  │
+                    ┌─────────────────┐         └─────────────────────────────┘
+                    │   etcd + NATS   │
+                    │ (discovery/msg) │
+                    └─────────────────┘
+```
+
+## Prerequisites
+
+- NVIDIA GPU with CUDA support
+- `etcd` and `nats-server` running (locally or in containers)
+- For local development: Python 3.10+ with Dynamo installed
+- For container deployment: Docker with NVIDIA Container Toolkit
+
+## Quick Start
+
+### Option 1: Local Development
+
+This requires Dynamo to be installed locally.
+
+```bash
+# Start etcd and nats (if not already running)
+etcd &
+nats-server -js &
+
+# From the dynamo repo root
+cd examples/backends/tritonserver
+
+# Build Triton Server (first time only, ~30 minutes)
+make all
+
+# Install Python dependencies
+pip install wheelhouse/tritonserver-*.whl
+pip install tritonclient[grpc]
+
+# Launch the server
+./launch/agg.sh
+
+# In another terminal, test with the client
+python launch/client.py
+```
+
+### Option 2: Container Deployment
+
+Run the Triton worker in a container with external etcd/nats.
+
+#### Step 1: Start etcd and NATS
+
+```bash
+# From the dynamo repo root
+docker compose -f deploy/docker-compose.yml up -d
+```
+
+#### Step 2: Build Container Images
+
+From the Dynamo repository root:
+
+```bash
+# Build the base Dynamo image
+./container/build.sh
+
+# Build the Triton worker image
+cd examples/backends/tritonserver
+docker build -t dynamo-triton:latest .
+```
+
+#### Step 3: Run the Container
+
+```bash
+docker run --rm -it --gpus all --network host \
+  dynamo-triton:latest \
+  ./launch/agg.sh
+```
+
+#### Step 4: Test the Deployment
+
+```bash
+# Install client dependencies
+pip install tritonclient[grpc]
+
+# Test with the client
+python launch/client.py --host 127.0.0.1 --port 8787
+```
+
+## Directory Structure
+
+```
+tritonserver/
+├── launch/
+│   ├── agg.sh           # Launch script (frontend + worker)
+│   └── client.py        # Test client (KServe gRPC)
+├── src/
+│   └── tritonworker.py  # Main Dynamo worker implementation
+├── model_repo/
+│   └── identity/        # Sample identity model
+│       ├── config.pbtxt
+│       └── 1/
+├── backends/            # Triton backends (built by `make all`)
+├── lib/                 # Triton libraries (built by `make all`)
+├── wheelhouse/          # Python wheels (built by `make all`)
+├── Dockerfile           # Triton worker container
+└── Makefile             # Build Triton from source
+```
+
+## Configuration
+
+### Launch Script Options
+
+```bash
+./launch/agg.sh --help
+
+Options:
+  --model-name <name>         Model name to load (default: identity)
+  --model-repository <path>   Path to model repository
+  --backend-directory <path>  Path to Triton backends
+  --log-verbose <level>       Triton log verbosity 0-6 (default: 1)
+```
+
+### Environment Variables
+
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `ETCD_ENDPOINTS` | etcd connection URL | `http://localhost:2379` |
+| `NATS_SERVER` | NATS connection URL | `nats://localhost:4222` |
+| `DYN_LOG` | Log level (debug, info, warn, error) | `info` |
+| `DYN_HTTP_PORT` | Frontend HTTP port | `8000` |
+
+## Adding Your Own Models
+
+1. Create a model directory in `model_repo/`:
+
+   ```text
+   model_repo/
+   └── my_model/
+       ├── config.pbtxt
+       └── 1/
+           └── model.plan  # or other model file
+   ```
+
+2. Define the model config (`config.pbtxt`):
+
+   ```protobuf
+   name: "my_model"
+   backend: "tensorrt"  # or onnxruntime, python, etc.
+   max_batch_size: 8
+
+   input [
+     {
+       name: "input"
+       data_type: TYPE_FP32
+       dims: [3, 224, 224]
+     }
+   ]
+   output [
+     {
+       name: "output"
+       data_type: TYPE_FP32
+       dims: [1000]
+     }
+   ]
+   ```
+
+3. Launch with your model:
+
+   ```bash
+   ./launch/agg.sh --model-name my_model
+   ```
+
+## Known Limitations
+
+- **Single model**: Currently loads one model at a time
+- **Identity backend only**: The Makefile builds the identity backend by default; other backends require modifying the build configuration
+- **No batching optimization**: Dynamo's request batching is not yet integrated with Triton's dynamic batching
+- **Limited error handling**: Error propagation between Triton and Dynamo could be improved
+
+## Building Triton from Source
+
+Required for local development. The Makefile builds Triton Server and the identity backend.
+
+```bash
+cd examples/backends/tritonserver
+
+# Build Triton Server (~30 minutes, clones and builds from source)
+make all
+
+# Check build status
+make status
+
+# This produces:
+#   lib/libtritonserver.so     - Core library
+#   bin/tritonserver           - Server binary
+#   backends/identity/         - Identity backend
+#   wheelhouse/*.whl           - Python bindings
+
+# Clean up build artifacts
+make clean      # Remove installed artifacts
+make distclean  # Remove everything including build cache
+```
+
+To add other backends (TensorRT, ONNX, Python, etc.), edit the Makefile's `build.py` invocation to include additional `--backend=<name>` flags.
+
+## Troubleshooting
+
+### "Model not found" error
+
+- Verify the model exists in `model_repo/<model_name>/`
+- Check that `config.pbtxt` is valid
+- Ensure the backend is available in `backends/`
+
+### Worker fails to start
+
+- Check `LD_LIBRARY_PATH` includes Triton libraries
+- Verify GPU is available: `nvidia-smi`
+- Increase log verbosity: `--log-verbose 6`
+
+## Related Documentation
+
+- [Dynamo Backend Guide](../../../docs/development/backend-guide.md)
+- [Triton Inference Server](https://github.com/triton-inference-server/server)
+- [KServe Protocol](https://kserve.github.io/website/latest/modelserving/data_plane/v2_protocol/)