Skip to content

Commit 322ecef

Browse files
yuz207yuz207
authored andcommitted
feat(metrics): add NWOR metrics logging and Prometheus counters
- Log NWOR (Number of Words Or Rejected) stats including mode, committed, rejected, fallback, and reason in LoggingStatLogger. - Introduce Prometheus counters and gauge for tracking NWOR committed tokens, rejected tokens, fallbacks, and activation state in PrometheusStatLogger. - Increment NWOR counters and update gauge based on scheduler stats during metric logging. This enhancement improves observability of NWOR behavior in the engine metrics.
1 parent 4ef4c62 commit 322ecef

File tree

1 file changed

+67
-0
lines changed

1 file changed

+67
-0
lines changed

vllm/v1/metrics/loggers.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,24 @@ def log(self):
165165
*log_args,
166166
)
167167

168+
if scheduler_stats.nwor_stats:
169+
nwor = scheduler_stats.nwor_stats
170+
mode = nwor.get("mode", "stage")
171+
committed = nwor.get("committed", 0)
172+
rejected = nwor.get("rejected", 0)
173+
fallback = nwor.get("fallback", 0)
174+
reason = nwor.get("reason")
175+
extra = f", reason={reason}" if reason else ""
176+
log_fn(
177+
"Engine %03d: NWOR mode=%s committed=%s rejected=%s fallback=%s%s",
178+
self.engine_index,
179+
mode,
180+
committed,
181+
rejected,
182+
fallback,
183+
extra,
184+
)
185+
168186
self.spec_decoding_logging.log(log_fn=log_fn)
169187
self.kv_connector_logging.log(log_fn=log_fn)
170188

@@ -339,6 +357,44 @@ def __init__(
339357
counter_mm_cache_hits, engine_indexes, model_name
340358
)
341359

360+
self.counter_nwor_committed = make_per_engine(
361+
self._counter_cls(
362+
name="vllm:nwor_committed_tokens",
363+
documentation="Number of tokens committed via NWOR in this engine.",
364+
labelnames=labelnames,
365+
),
366+
engine_indexes,
367+
model_name,
368+
)
369+
self.counter_nwor_rejected = make_per_engine(
370+
self._counter_cls(
371+
name="vllm:nwor_rejected_tokens",
372+
documentation="Number of draft tokens rejected by NWOR.",
373+
labelnames=labelnames,
374+
),
375+
engine_indexes,
376+
model_name,
377+
)
378+
self.counter_nwor_fallbacks = make_per_engine(
379+
self._counter_cls(
380+
name="vllm:nwor_fallbacks",
381+
documentation="Number of NWOR fallbacks triggered.",
382+
labelnames=labelnames,
383+
),
384+
engine_indexes,
385+
model_name,
386+
)
387+
self.gauge_nwor_enabled = make_per_engine(
388+
self._gauge_cls(
389+
name="vllm:nwor_enabled",
390+
documentation="Whether NWOR is active for this engine (1=yes, 0=no).",
391+
multiprocess_mode="mostrecent",
392+
labelnames=labelnames,
393+
),
394+
engine_indexes,
395+
model_name,
396+
)
397+
342398
#
343399
# Counters
344400
#
@@ -744,6 +800,17 @@ def record(
744800
scheduler_stats.spec_decoding_stats, engine_idx
745801
)
746802

803+
if scheduler_stats.nwor_stats is not None:
804+
nwor = scheduler_stats.nwor_stats
805+
committed = int(nwor.get("committed", 0))
806+
rejected = int(nwor.get("rejected", 0))
807+
fallback = int(nwor.get("fallback", 0))
808+
mode = nwor.get("mode", "stage")
809+
self.counter_nwor_committed[engine_idx].inc(committed)
810+
self.counter_nwor_rejected[engine_idx].inc(rejected)
811+
self.counter_nwor_fallbacks[engine_idx].inc(fallback)
812+
self.gauge_nwor_enabled[engine_idx].set(1 if mode == "stage" else 0)
813+
747814
if mm_cache_stats is not None:
748815
self.counter_mm_cache_queries[engine_idx].inc(mm_cache_stats.queries)
749816
self.counter_mm_cache_hits[engine_idx].inc(mm_cache_stats.hits)

0 commit comments

Comments
 (0)