Skip to content

Commit c8525f0

Browse files
authored
[V0][Metrics] Deprecate some questionable request time metrics (#14135)
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
1 parent 5db6b2c commit c8525f0

File tree

1 file changed

+17
-6
lines changed

1 file changed

+17
-6
lines changed

vllm/engine/metrics.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -197,24 +197,35 @@ def __init__(self, labelnames: List[str], vllm_config: VllmConfig):
197197
"Histogram of time spent in DECODE phase for request.",
198198
labelnames=labelnames,
199199
buckets=request_latency_buckets)
200+
# Deprecated in 0.8 - duplicates vllm:request_queue_time_seconds:
201+
# TODO: in 0.9, only enable if show_hidden_metrics=True
200202
self.histogram_time_in_queue_request = self._histogram_cls(
201203
name="vllm:time_in_queue_requests",
202-
documentation=
203-
"Histogram of time the request spent in the queue in seconds.",
204+
documentation=(
205+
"Histogram of time the request spent in the queue in seconds. "
206+
"DEPRECATED: use vllm:request_queue_time_seconds instead."),
204207
labelnames=labelnames,
205208
buckets=request_latency_buckets)
209+
210+
# Deprecated in 0.8 - use prefill/decode/inference time metrics
211+
# TODO: in 0.9, only enable if show_hidden_metrics=True
206212
self.histogram_model_forward_time_request = self._histogram_cls(
207213
name="vllm:model_forward_time_milliseconds",
208-
documentation=
209-
"Histogram of time spent in the model forward pass in ms.",
214+
documentation=(
215+
"Histogram of time spent in the model forward pass in ms. "
216+
"DEPRECATED: use prefill/decode/inference time metrics instead."
217+
),
210218
labelnames=labelnames,
211219
buckets=build_1_2_3_5_8_buckets(3000))
212220
self.histogram_model_execute_time_request = self._histogram_cls(
213221
name="vllm:model_execute_time_milliseconds",
214-
documentation=
215-
"Histogram of time spent in the model execute function in ms.",
222+
documentation=(
223+
"Histogram of time spent in the model execute function in ms."
224+
"DEPRECATED: use prefill/decode/inference time metrics instead."
225+
),
216226
labelnames=labelnames,
217227
buckets=build_1_2_3_5_8_buckets(3000))
228+
218229
# Metadata
219230
self.histogram_num_prompt_tokens_request = self._histogram_cls(
220231
name="vllm:request_prompt_tokens",

0 commit comments

Comments
 (0)