Description
System Info
Environment
If applicable, please include the following:
CPU architecture (e.g., x86_64, aarch64)
x86_64
CPU/Host memory size (if known)
GPU properties
GPU name (e.g., NVIDIA H100, NVIDIA A100, NVIDIA L40S)
DGX H200
GPU memory size (if known)
143771MiB
Clock frequencies used (if applicable)
Libraries
TensorRT-LLM backend branch or tag (e.g., main, v0.7.1)
TensorRT-LLM backend commit (if known)
Versions of TensorRT, AMMO, CUDA, cuBLAS, etc. used
Container used (if running TensorRT-LLM backend in a container)
nvcr.io/nvidia/tritonserver:24.12-trtllm-python-py3 and nvcr.io/nvidia/tritonserver:24.07-trtllm-python-py3 (no others tested)
NVIDIA driver version
565.57.01
OS (Ubuntu 22.04, CentOS 7, Windows 10)
RHEL 9.4
Who can help?
Information
- The official example scripts
- My own modified scripts
Tasks
- An officially supported task in the
examples
folder (such as GLUE/SQuAD, ...) - My own task or dataset (give details below)
Reproduction
Starting container with
# vllm-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: trt-llama
namespace: tsr
spec:
replicas: 1
selector:
matchLabels:
app: trt-llama
template:
metadata:
labels:
app: trt-llama
spec:
containers:
- name: trt-container
image: nvcr.io/nvidia/tritonserver:24.12-trtllm-python-py3
command: ["/bin/bash", "-c", "sleep infinity"]
ports:
- containerPort: 8000
name: http
- containerPort: 8001
name: grpc
- containerPort: 8002
name: metrics
resources:
limits:
nvidia.com/gpu: 1
volumeMounts:
- name: model-storage
mountPath: /models
volumes:
- name: model-storage
persistentVolumeClaim:
claimName: tsr-pvc
Inside container:
root@trt-llama-74cc8596c7-45fd6:/opt/tritonserver# cd /app
bash: cd: /app: No such file or directory
trying to find files expected to be in /app:
root@trt-llama-74cc8596c7-6zcv2:/# find . -name "*tensorrt_llm*"
./usr/local/lib/python3.12/dist-packages/modelopt/torch/export/__pycache__/tensorrt_llm_type.cpython-312.pyc
./usr/local/lib/python3.12/dist-packages/modelopt/torch/export/__pycache__/tensorrt_llm_utils.cpython-312.pyc
./usr/local/lib/python3.12/dist-packages/modelopt/torch/export/tensorrt_llm_type.py
./usr/local/lib/python3.12/dist-packages/modelopt/torch/export/tensorrt_llm_utils.py
./usr/local/lib/python3.12/dist-packages/tensorrt_llm
./usr/local/lib/python3.12/dist-packages/tensorrt_llm/libs/libnvinfer_plugin_tensorrt_llm.so
./usr/local/lib/python3.12/dist-packages/tensorrt_llm/libs/libtensorrt_llm.so
./usr/local/lib/python3.12/dist-packages/tensorrt_llm/libs/libtensorrt_llm_nvrtc_wrapper.so
./usr/local/lib/python3.12/dist-packages/tensorrt_llm/libs/libtensorrt_llm_ucx_wrapper.so
./usr/local/lib/python3.12/dist-packages/tensorrt_llm-0.16.0.dist-info
root@trt-llama-74cc8596c7-6zcv2:/# find . -name "*examples*"
./opt/hpcx/sharp/share/sharp/examples
./opt/hpcx/ucx/share/ucx/examples
./usr/local/lib/python3.12/dist-packages/torch/_export/db/examples
./usr/local/lib/python3.12/dist-packages/torch/testing/_internal/distributed/rpc/examples
./usr/local/lib/python3.12/dist-packages/torch/utils/benchmark/examples
./usr/local/lib/python3.12/dist-packages/accelerate/test_utils/__pycache__/examples.cpython-312.pyc
./usr/local/lib/python3.12/dist-packages/accelerate/test_utils/examples.py
./usr/local/lib/python3.12/dist-packages/numpy/core/tests/examples
./usr/local/lib/python3.12/dist-packages/numpy/random/_examples
./usr/local/lib/python3.12/dist-packages/pulp/tests/__pycache__/test_examples.cpython-312.pyc
./usr/local/lib/python3.12/dist-packages/pulp/tests/test_examples.py
./usr/local/lib/python3.12/dist-packages/pyarrow/tests/__pycache__/pandas_examples.cpython-312.pyc
./usr/local/lib/python3.12/dist-packages/pyarrow/tests/__pycache__/wsgi_examples.cpython-312.pyc
./usr/local/lib/python3.12/dist-packages/pyarrow/tests/pandas_examples.py
./usr/local/lib/python3.12/dist-packages/pyarrow/tests/wsgi_examples.py
./usr/local/lib/python3.12/dist-packages/scipy/linalg/tests/_cython_examples
./usr/local/lib/python3.12/dist-packages/scipy/optimize/tests/_cython_examples
./usr/local/lib/python3.12/dist-packages/scipy/special/tests/_cython_examples
./usr/local/lib/python3.12/dist-packages/sympy/parsing/autolev/test-examples
./usr/share/doc/apt/examples
./usr/share/doc/libp11-kit0/examples
./usr/share/doc/libpam-modules/examples
./usr/share/doc/mawk/examples
./usr/share/doc/mount/examples
./usr/share/doc/passwd/examples
./usr/share/doc/procps/examples
./usr/share/doc/sed/examples
./usr/share/doc/util-linux/examples
./usr/share/doc/comerr-dev/examples
./usr/share/doc/libarchive-dev/examples
./usr/share/doc/libzstd-dev/examples
./usr/share/doc/nettle-dev/examples
./usr/share/doc/gperf/examples
./usr/share/doc/liblzma-dev/examples
./usr/share/doc/liblzma-dev/examples_old
./usr/share/doc/libstemmer0d/examples
./usr/share/doc/polkitd/examples
./usr/share/doc/sgml-base/examples
./usr/share/doc/xml-core/examples
./usr/share/doc/libexpat1-dev/examples
./usr/share/doc/zlib1g-dev/examples
./usr/share/doc/git/contrib/examples
./usr/share/doc/liberror-perl/examples
./usr/share/doc/nano/examples
./usr/share/doc/adduser/examples
./usr/share/doc/adduser/examples/adduser.local.conf.examples
./usr/share/doc/apt-utils/examples
./usr/share/doc/ca-certificates/examples
./usr/share/doc/gnupg/examples
./usr/share/doc/gpg-agent/examples
./usr/share/doc/gpgconf/examples
./usr/share/doc/libjansson4/examples
./usr/share/doc/libreadline8t64/examples
./usr/share/doc/rsync/examples
./usr/share/vim/vim91/macros/urm/examples
Expected behavior
Expected to find /app directory, cannot find it in these containers, can find /app in "nvcr.io/nvidia/tritonserver:25.03-trtllm-python-py3" container:
root@trt-llama-5b84f4b768-5vn9x:/opt/tritonserver# ls /app
all_models client examples scripts tools
actual behavior
root@trt-llama-74cc8596c7-45fd6:/opt/tritonserver# cd /app
bash: cd: /app: No such file or directory
additional notes
Can find /app in nvcr.io/nvidia/tritonserver:25.03-trtllm-python-py3 which I start exactly the same with
# vllm-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: trt-llama
namespace: tsr
spec:
replicas: 1
selector:
matchLabels:
app: trt-llama
template:
metadata:
labels:
app: trt-llama
spec:
containers:
- name: trt-container
image: nvcr.io/nvidia/tritonserver:25.03-trtllm-python-py3
command: ["/bin/bash", "-c", "sleep infinity"]
ports:
- containerPort: 8000
name: http
- containerPort: 8001
name: grpc
- containerPort: 8002
name: metrics
resources:
limits:
nvidia.com/gpu: 1
volumeMounts:
- name: model-storage
mountPath: /models
volumes:
- name: model-storage
persistentVolumeClaim:
claimName: tsr-pvc
Doesnt work with the other two containers