InftyAI · InftyAI-Agent · Mar 18, 2025 · Mar 18, 2025
diff --git a/chart/templates/backends/vllm.yaml b/chart/templates/backends/vllm.yaml
@@ -14,6 +14,24 @@ spec:
     - vllm.entrypoints.openai.api_server
   image: vllm/vllm-openai
   version: v0.7.3
+  lifecycle:
+    preStop:
+      exec:
+        command:
+          - /bin/sh
+          - -c
+          - |
+            while true; do
+              RUNNING=$(curl -s http://localhost:8000/metrics | grep 'vllm:num_requests_running' | grep -v '#' | awk '{print $2}')
+              WAITING=$(curl -s http://localhost:8000/metrics | grep 'vllm:num_requests_waiting' | grep -v '#' | awk '{print $2}')
+              if [ "$RUNNING" = "0.0" ] && [ "$WAITING" = "0.0" ]; then
+                echo "Terminating: No active or waiting requests, safe to terminate" >> /proc/1/fd/1
+                exit 0
+              else
+                echo "Terminating: Running: $RUNNING, Waiting: $WAITING" >> /proc/1/fd/1
+                sleep 5
+              fi
+            done
   # Do not edit the preset argument name unless you know what you're doing.
   # Free to add more arguments with your requirements.
   recommendedConfigs: