@@ -17,6 +17,8 @@ const (
17
17
generationSuccess = "$ld:ai:generation:success"
18
18
generationError = "$ld:ai:generation:error"
19
19
//nolint:gosec
20
+ timeToFirstToken = "$ld:ai:tokens:ttf"
21
+ //nolint:gosec
20
22
tokenTotal = "$ld:ai:tokens:total"
21
23
//nolint:gosec
22
24
tokenInput = "$ld:ai:tokens:input"
@@ -43,11 +45,8 @@ func (t TokenUsage) Set() bool {
43
45
type Metrics struct {
44
46
// Latency is the latency of the request.
45
47
Latency time.Duration
46
- }
47
-
48
- // Set returns true if the latency is non-zero.
49
- func (m Metrics ) Set () bool {
50
- return m .Latency != 0
48
+ // TimeToFirstToken is the time to the first token of the streamed response.
49
+ TimeToFirstToken time.Duration
51
50
}
52
51
53
52
// ProviderResponse represents the response from a model provider for a specific request.
@@ -203,6 +202,11 @@ func (t *Tracker) TrackError() error {
203
202
return err
204
203
}
205
204
205
+ // TrackTimeToFirstToken tracks the time to the first token of the streamed response.
206
+ func (t * Tracker ) TrackTimeToFirstToken (dur time.Duration ) error {
207
+ return t .events .TrackMetric (timeToFirstToken , t .context , float64 (dur .Milliseconds ()), t .trackData )
208
+ }
209
+
206
210
// TrackUsage tracks the token usage for a model evaluation.
207
211
func (t * Tracker ) TrackUsage (usage TokenUsage ) error {
208
212
var failed bool
@@ -270,7 +274,7 @@ func (t *Tracker) TrackRequest(task func(c *Config) (ProviderResponse, error)) (
270
274
t .logWarning ("error tracking success metric for request: %v" , err )
271
275
}
272
276
273
- if usage .Metrics .Set () {
277
+ if usage .Metrics .Latency != 0 {
274
278
if err := t .TrackDuration (usage .Metrics .Latency ); err != nil {
275
279
t .logWarning ("error tracking duration metric (user provided) for request: %v" , err )
276
280
}
@@ -280,6 +284,12 @@ func (t *Tracker) TrackRequest(task func(c *Config) (ProviderResponse, error)) (
280
284
}
281
285
}
282
286
287
+ if usage .Metrics .TimeToFirstToken != 0 {
288
+ if err := t .TrackTimeToFirstToken (usage .Metrics .TimeToFirstToken ); err != nil {
289
+ t .logWarning ("error tracking time to first token metric for request: %v" , err )
290
+ }
291
+ }
292
+
283
293
if usage .Usage .Set () {
284
294
// TrackUsage logs errors.
285
295
_ = t .TrackUsage (usage .Usage )
0 commit comments