@@ -197,24 +197,35 @@ def __init__(self, labelnames: List[str], vllm_config: VllmConfig):
197
197
"Histogram of time spent in DECODE phase for request." ,
198
198
labelnames = labelnames ,
199
199
buckets = request_latency_buckets )
200
+ # Deprecated in 0.8 - duplicates vllm:request_queue_time_seconds:
201
+ # TODO: in 0.9, only enable if show_hidden_metrics=True
200
202
self .histogram_time_in_queue_request = self ._histogram_cls (
201
203
name = "vllm:time_in_queue_requests" ,
202
- documentation =
203
- "Histogram of time the request spent in the queue in seconds." ,
204
+ documentation = (
205
+ "Histogram of time the request spent in the queue in seconds. "
206
+ "DEPRECATED: use vllm:request_queue_time_seconds instead." ),
204
207
labelnames = labelnames ,
205
208
buckets = request_latency_buckets )
209
+
210
+ # Deprecated in 0.8 - use prefill/decode/inference time metrics
211
+ # TODO: in 0.9, only enable if show_hidden_metrics=True
206
212
self .histogram_model_forward_time_request = self ._histogram_cls (
207
213
name = "vllm:model_forward_time_milliseconds" ,
208
- documentation =
209
- "Histogram of time spent in the model forward pass in ms." ,
214
+ documentation = (
215
+ "Histogram of time spent in the model forward pass in ms. "
216
+ "DEPRECATED: use prefill/decode/inference time metrics instead."
217
+ ),
210
218
labelnames = labelnames ,
211
219
buckets = build_1_2_3_5_8_buckets (3000 ))
212
220
self .histogram_model_execute_time_request = self ._histogram_cls (
213
221
name = "vllm:model_execute_time_milliseconds" ,
214
- documentation =
215
- "Histogram of time spent in the model execute function in ms." ,
222
+ documentation = (
223
+ "Histogram of time spent in the model execute function in ms."
224
+ "DEPRECATED: use prefill/decode/inference time metrics instead."
225
+ ),
216
226
labelnames = labelnames ,
217
227
buckets = build_1_2_3_5_8_buckets (3000 ))
228
+
218
229
# Metadata
219
230
self .histogram_num_prompt_tokens_request = self ._histogram_cls (
220
231
name = "vllm:request_prompt_tokens" ,
0 commit comments