vllm-project · ApostaC · May 16, 2025 · May 19, 2025 · May 19, 2025 · May 20, 2025
diff --git a/tests/v1/kv_connector/cpu_kv_integration/__init__.py b/tests/v1/kv_connector/cpu_kv_integration/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: Apache-2.0
+# Empty init file to mark directory as Python package
diff --git a/tests/v1/kv_connector/cpu_kv_integration/online_test.sh b/tests/v1/kv_connector/cpu_kv_integration/online_test.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+if [[ $# -lt 1 ]]; then
+    echo "Usage: $0 <prefiller | decoder> [model]"
+    exit 1
+fi
+
+if [[ $# -eq 1 ]]; then
+    echo "Using default model: meta-llama/Llama-3.1-8B-Instruct"
+    MODEL="meta-llama/Llama-3.1-8B-Instruct"
+else
+    echo "Using model: $2"
+    MODEL=$2
+fi
+
+
+if [[ $1 == "prefiller" ]]; then
+    # Prefiller listens on port 8100
+    #UCX_TLS=cuda_ipc,cuda_copy,tcp \
+        VLLM_ENABLE_V1_MULTIPROCESSING=1 \
+        VLLM_WORKER_MULTIPROC_METHOD=spawn \
+        CUDA_VISIBLE_DEVICES=0 \
+        vllm serve $MODEL \
+        --port 8100 \
+        --disable-log-requests \
+        --enforce-eager \
+        --kv-transfer-config \
+        '{"kv_connector":"CPUConnector","kv_role":"kv_producer","kv_connector_extra_config": {"host": "localhost", "port": "54321", "size": 40}}'
+
+
+elif [[ $1 == "decoder" ]]; then
+    # Decoder listens on port 8200
+    #UCX_TLS=cuda_ipc,cuda_copy,tcp \
+        VLLM_ENABLE_V1_MULTIPROCESSING=1 \
+        VLLM_WORKER_MULTIPROC_METHOD=spawn \
+        CUDA_VISIBLE_DEVICES=1 \
+        vllm serve $MODEL \
+        --port 8200 \
+        --disable-log-requests \
+        --enforce-eager \
+        --kv-transfer-config \
+        '{"kv_connector":"CPUConnector","kv_role":"kv_consumer","kv_connector_extra_config": {"host": "localhost", "port": "54321", "size": 40}}'
+
+
+else
+    echo "Invalid role: $1"
+    echo "Should be either prefiller, decoder"
+    exit 1
+fi
diff --git a/tests/v1/kv_connector/cpu_kv_integration/output.txt b/tests/v1/kv_connector/cpu_kv_integration/output.txt
diff --git a/tests/v1/kv_connector/cpu_kv_integration/output_decode.txt b/tests/v1/kv_connector/cpu_kv_integration/output_decode.txt
@@ -0,0 +1,4 @@
+ Hi Hi Hi Hi Hello, my name is [Your Name] and I am a [Your
+Hi Hi The capital of France is Paris. The capital of France is Paris. The
+Hello Hello Hello Your name is not in the list. Please check your email for
+ow How The capital of China is Beijing. Beijing is a city in northern China.
diff --git a/tests/v1/kv_connector/cpu_kv_integration/run_nsys.sh b/tests/v1/kv_connector/cpu_kv_integration/run_nsys.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+if [[ $1 == "decoder" ]]; then
+echo "Running decoder"
+CUDA_VISIBLE_DEVICES=7 nsys profile \
+        --trace=cuda,nvtx,osrt \
+	--gpu-metrics-devices=cuda-visible \
+	--python-sampling=true \
+	--trace-fork-before-exec=true \
+        --output=decoder \
+        --force-overwrite=true \
+        python3 toy_decode.py
+
+else
+echo "Running prefiller"
+CUDA_VISIBLE_DEVICES=6 nsys profile \
+        --trace=cuda,nvtx,osrt \
+	--gpu-metrics-devices=cuda-visible \
+	--python-sampling=true \
+	--trace-fork-before-exec=true \
+        --output=prefiller \
+        --force-overwrite=true \
+        python3 toy_example.py
+fi
diff --git a/tests/v1/kv_connector/cpu_kv_integration/temptest.py b/tests/v1/kv_connector/cpu_kv_integration/temptest.py
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: Apache-2.0
+from vllm.distributed.kv_transfer.kv_connector.v1.nixl_cpu_utils import (
+    NixlKVSender)
+
+sender = NixlKVSender(1024 * 1024 * 1024)
+
+sender.close()
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# SPDX-License-Identifier: Apache-2.0
		# Empty init file to mark directory as Python package