66implementations.
77"""
88
9- import uuid
10- from typing import Any , Literal , Optional
9+ from __future__ import annotations
10+
11+ from typing import Literal
1112
1213from pydantic import Field
1314
15+ from guidellm .data import (
16+ GenerationRequest ,
17+ GenerationRequestArguments ,
18+ GenerationRequestTimings ,
19+ )
1420from guidellm .scheduler import (
15- MeasuredRequestTimings ,
1621 SchedulerMessagingPydanticRegistry ,
1722)
1823from guidellm .utils import StandardBaseModel
1924
2025__all__ = [
2126 "GenerationRequest" ,
27+ "GenerationRequestArguments" ,
2228 "GenerationRequestTimings" ,
2329 "GenerationResponse" ,
30+ "GenerationTokenStats" ,
2431]
2532
2633
2734@SchedulerMessagingPydanticRegistry .register ()
28- class GenerationRequest (StandardBaseModel ):
29- """Request model for backend generation operations ."""
35+ class GenerationTokenStats (StandardBaseModel ):
36+ """Token statistics for generation requests and responses ."""
3037
31- request_id : str = Field (
32- default_factory = lambda : str (uuid .uuid4 ()),
33- description = "Unique identifier for the request." ,
34- )
35- request_type : Literal ["text_completions" , "chat_completions" ] = Field (
36- default = "text_completions" ,
37- description = (
38- "Type of request. 'text_completions' uses backend.text_completions(), "
39- "'chat_completions' uses backend.chat_completions()."
40- ),
41- )
42- content : Any = Field (
43- description = (
44- "Request content. For text_completions: string or list of strings. "
45- "For chat_completions: string, list of messages, or raw content "
46- "(set raw_content=True in params)."
47- )
48- )
49- params : dict [str , Any ] = Field (
50- default_factory = dict ,
51- description = (
52- "Additional parameters passed to backend methods. "
53- "Common: max_tokens, temperature, stream."
54- ),
38+ request : int | None = Field (
39+ default = None , description = "Number of tokens in the original request."
5540 )
56- stats : dict [Literal ["prompt_tokens" ], int ] = Field (
57- default_factory = dict ,
58- description = "Request statistics including prompt token count." ,
59- )
60- constraints : dict [Literal ["output_tokens" ], int ] = Field (
61- default_factory = dict ,
62- description = "Request constraints such as maximum output tokens." ,
41+ response : int | None = Field (
42+ default = None , description = "Number of tokens in the generated response."
6343 )
6444
45+ def value (
46+ self , preference : Literal ["request" , "response" ] | None = None
47+ ) -> int | None :
48+ if preference == "request" :
49+ return self .request
50+ if preference == "response" :
51+ return self .response
52+ return self .response if self .response is not None else self .request
53+
6554
6655@SchedulerMessagingPydanticRegistry .register ()
6756class GenerationResponse (StandardBaseModel ):
@@ -70,87 +59,32 @@ class GenerationResponse(StandardBaseModel):
7059 request_id : str = Field (
7160 description = "Unique identifier matching the original GenerationRequest."
7261 )
73- request_args : dict [ str , Any ] = Field (
62+ request_args : GenerationRequestArguments = Field (
7463 description = "Arguments passed to the backend for this request."
7564 )
76- value : Optional [ str ] = Field (
65+ text : str | None = Field (
7766 default = None ,
78- description = "Complete generated text content. None for streaming responses." ,
79- )
80- delta : Optional [str ] = Field (
81- default = None , description = "Incremental text content for streaming responses."
67+ description = "The generated response text." ,
8268 )
8369 iterations : int = Field (
8470 default = 0 , description = "Number of generation iterations completed."
8571 )
86- request_prompt_tokens : Optional [int ] = Field (
87- default = None , description = "Token count from the original request prompt."
88- )
89- request_output_tokens : Optional [int ] = Field (
90- default = None ,
91- description = "Expected output token count from the original request." ,
92- )
93- response_prompt_tokens : Optional [int ] = Field (
94- default = None , description = "Actual prompt token count reported by the backend."
72+
73+ prompt_stats : GenerationTokenStats = Field (
74+ default_factory = GenerationTokenStats ,
75+ description = "Token statistics from the prompt." ,
9576 )
96- response_output_tokens : Optional [int ] = Field (
97- default = None , description = "Actual output token count reported by the backend."
77+ output_stats : GenerationTokenStats = Field (
78+ default_factory = GenerationTokenStats ,
79+ description = "Token statistics from the generated output." ,
9880 )
9981
100- @property
101- def prompt_tokens (self ) -> Optional [int ]:
102- """
103- :return: The number of prompt tokens used in the request
104- (response_prompt_tokens if available, otherwise request_prompt_tokens).
105- """
106- return self .response_prompt_tokens or self .request_prompt_tokens
107-
108- @property
109- def output_tokens (self ) -> Optional [int ]:
110- """
111- :return: The number of output tokens generated in the response
112- (response_output_tokens if available, otherwise request_output_tokens).
113- """
114- return self .response_output_tokens or self .request_output_tokens
115-
116- @property
117- def total_tokens (self ) -> Optional [int ]:
118- """
119- :return: The total number of tokens used in the request and response.
120- Sum of prompt_tokens and output_tokens.
121- """
122- if self .prompt_tokens is None or self .output_tokens is None :
123- return None
124- return self .prompt_tokens + self .output_tokens
125-
126- def preferred_prompt_tokens (
127- self , preferred_source : Literal ["request" , "response" ]
128- ) -> Optional [int ]:
129- if preferred_source == "request" :
130- return self .request_prompt_tokens or self .response_prompt_tokens
131- else :
132- return self .response_prompt_tokens or self .request_prompt_tokens
133-
134- def preferred_output_tokens (
135- self , preferred_source : Literal ["request" , "response" ]
136- ) -> Optional [int ]:
137- if preferred_source == "request" :
138- return self .request_output_tokens or self .response_output_tokens
139- else :
140- return self .response_output_tokens or self .request_output_tokens
141-
142-
143- @SchedulerMessagingPydanticRegistry .register ()
144- @MeasuredRequestTimings .register ("generation_request_timings" )
145- class GenerationRequestTimings (MeasuredRequestTimings ):
146- """Timing model for tracking generation request lifecycle events."""
82+ def total_tokens (
83+ self , preference : Literal ["request" , "response" ] | None = None
84+ ) -> int | None :
85+ prompt_tokens = self .prompt_stats .value (preference = preference )
86+ output_tokens = self .output_stats .value (preference = preference )
14787
148- timings_type : Literal ["generation_request_timings" ] = "generation_request_timings"
149- first_iteration : Optional [float ] = Field (
150- default = None ,
151- description = "Unix timestamp when the first generation iteration began." ,
152- )
153- last_iteration : Optional [float ] = Field (
154- default = None ,
155- description = "Unix timestamp when the last generation iteration completed." ,
156- )
88+ if prompt_tokens is None and output_tokens is None :
89+ return None
90+ return (prompt_tokens or 0 ) + (output_tokens or 0 )
0 commit comments