[V0][Metrics] Remove unimplemented vllm:tokens_total

markmc · markmc · commit 28f79644ec64 · 2025-03-03T06:02:45.000-05:00
It looks like vllm-project#4464 intended to add this alongside the vllm:iteration_tokens_total histogram, but didn't actully hook it up and would never have appeard in /metrics. Since it's clearly not critical to anyone, let's just remove it until we hear of a need for it. Signed-off-by: Mark McLoughlin <markmc@redhat.com>
diff --git a/vllm/engine/metrics.py b/vllm/engine/metrics.py
@@ -115,10 +115,6 @@ def __init__(self, labelnames: List[str], vllm_config: VllmConfig):
             name="vllm:generation_tokens_total",
             documentation="Number of generation tokens processed.",
             labelnames=labelnames)
-        self.counter_tokens = self._counter_cls(
-            name="vllm:tokens_total",
-            documentation="Number of prefill plus generation tokens processed.",
-            labelnames=labelnames)
         buckets = [1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8096]
         if not vllm_config.model_config.enforce_eager:
             buckets = vllm_config.compilation_config.\