Skip to content

Commit d7b7f4b

Browse files
committed
modify to keep the original comment info
Signed-off-by: sitloboi2012 <huyvo6812@gmail.com>
1 parent beaaa62 commit d7b7f4b

File tree

1 file changed

+10
-6
lines changed

1 file changed

+10
-6
lines changed

src/vllm_router/request_stats.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -87,16 +87,17 @@ def __init__(self, sliding_window_size: float):
8787
"""
8888
self.sliding_window_size = sliding_window_size
8989

90-
# Monitors for calculating QPS and TTFT
90+
# Finished requests for each serving engine
91+
# The elements in the deque should be sorted by 'complete' time
9192
self.qps_monitors: Dict[str, MovingAverageMonitor] = {}
9293
self.ttft_monitors: Dict[str, MovingAverageMonitor] = {}
9394

94-
# Record initial request start time: (engine_url, request_id) -> timestamp
95+
# The time when the request is coming (engine_url, request_id) -> timestamp
9596
self.request_start_time: Dict[Tuple[str, str], float] = {}
9697
# Record time when first token is received: (engine_url, request_id) -> timestamp
9798
self.first_token_time: Dict[Tuple[str, str], float] = {}
9899

99-
# Counters for requests in different stages
100+
# Number of requests in different stages (from the start of the router)
100101
self.in_prefill_requests: Dict[str, int] = {}
101102
self.in_decoding_requests: Dict[str, int] = {}
102103
self.finished_requests: Dict[str, int] = {}
@@ -195,13 +196,16 @@ def on_request_swapped(self, engine_url: str, request_id: str, timestamp: float)
195196

196197
def get_request_stats(self, current_time: float) -> Dict[str, RequestStats]:
197198
"""
198-
Get the request statistics from the monitor.
199+
Get the request statistics for each serving engine
199200
200201
Args:
201-
current_time: The current timestamp
202+
current_time: The current timestamp in seconds
202203
203204
Returns:
204-
A dictionary mapping engine URLs to RequestStats objects
205+
A dictionary where the key is the serving engine URL and the value
206+
is the request statistics for that engine.
207+
The TTFT and inter token latency will be -1 if there is no requests
208+
finished in the sliding window.
205209
"""
206210
ret = {}
207211
urls = set(self.in_prefill_requests.keys()).union(

0 commit comments

Comments
 (0)