@@ -201,14 +201,21 @@ class PoolingRequestOutput(Generic[_O]):
201201 request_id (str): A unique identifier for the pooling request.
202202 outputs (PoolingOutput): The pooling results for the given input.
203203 prompt_token_ids (list[int]): A list of token IDs used in the prompt.
204+ num_cached_tokens: The number of tokens with prefix cache hit.
204205 finished (bool): A flag indicating whether the pooling is completed.
205206 """
206207
207208 def __init__ (
208- self , request_id : str , outputs : _O , prompt_token_ids : list [int ], finished : bool
209+ self ,
210+ request_id : str ,
211+ outputs : _O ,
212+ prompt_token_ids : list [int ],
213+ num_cached_tokens : int ,
214+ finished : bool ,
209215 ):
210216 self .request_id = request_id
211217 self .prompt_token_ids = prompt_token_ids
218+ self .num_cached_tokens = num_cached_tokens
212219 self .finished = finished
213220 self .outputs = outputs
214221
@@ -217,6 +224,7 @@ def __repr__(self):
217224 f"{ type (self ).__name__ } (request_id={ self .request_id !r} , "
218225 f"outputs={ self .outputs !r} , "
219226 f"prompt_token_ids={ self .prompt_token_ids } , "
227+ f"num_cached_tokens={ self .num_cached_tokens } , "
220228 f"finished={ self .finished } )"
221229 )
222230
@@ -255,6 +263,7 @@ def from_base(request_output: PoolingRequestOutput):
255263 request_id = request_output .request_id ,
256264 outputs = EmbeddingOutput .from_base (request_output .outputs ),
257265 prompt_token_ids = request_output .prompt_token_ids ,
266+ num_cached_tokens = request_output .num_cached_tokens ,
258267 finished = request_output .finished ,
259268 )
260269
@@ -294,6 +303,7 @@ def from_base(request_output: PoolingRequestOutput):
294303 request_id = request_output .request_id ,
295304 outputs = ClassificationOutput .from_base (request_output .outputs ),
296305 prompt_token_ids = request_output .prompt_token_ids ,
306+ num_cached_tokens = request_output .num_cached_tokens ,
297307 finished = request_output .finished ,
298308 )
299309
@@ -330,5 +340,6 @@ def from_base(request_output: PoolingRequestOutput):
330340 request_id = request_output .request_id ,
331341 outputs = ScoringOutput .from_base (request_output .outputs ),
332342 prompt_token_ids = request_output .prompt_token_ids ,
343+ num_cached_tokens = request_output .num_cached_tokens ,
333344 finished = request_output .finished ,
334345 )
0 commit comments