Skip to content

Commit

Permalink
Add stages to the firecracker_stage_duration_usec metric
Browse files Browse the repository at this point in the history
 I added the `pull_image` and `remote` stages, and I split `init` into `create` and `unpause`.
  • Loading branch information
vanja-p committed Feb 3, 2025
1 parent 5919181 commit 1357dd6
Showing 1 changed file with 15 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,7 @@ type FirecrackerContainer struct {
//
// This can be used to understand the total time it takes to execute a task,
// including VM startup time
currentTaskInitTimeUsec int64
currentTaskInitTimeUsec time.Time

executorConfig *ExecutorConfig

Expand Down Expand Up @@ -1780,12 +1780,12 @@ func (c *FirecrackerContainer) Create(ctx context.Context, actionWorkingDir stri
createTime := time.Since(start)
log.CtxDebugf(ctx, "Create took %s", createTime)

c.observeStageDuration("init", createTime.Microseconds())
c.observeStageDuration("create", createTime)
return err
}

func (c *FirecrackerContainer) create(ctx context.Context) error {
c.currentTaskInitTimeUsec = time.Now().UnixMicro()
c.currentTaskInitTimeUsec = time.Now()
c.rmOnce = &sync.Once{}
c.rmErr = nil

Expand Down Expand Up @@ -1876,7 +1876,7 @@ func (c *FirecrackerContainer) create(ctx context.Context) error {
return nil
}

func (c *FirecrackerContainer) SendExecRequestToGuest(ctx context.Context, conn *grpc.ClientConn, cmd *repb.Command, workDir string, stdio *interfaces.Stdio) (_ *interfaces.CommandResult, healthy bool) {
func (c *FirecrackerContainer) sendExecRequestToGuest(ctx context.Context, conn *grpc.ClientConn, cmd *repb.Command, workDir string, stdio *interfaces.Stdio) (_ *interfaces.CommandResult, healthy bool) {
ctx, span := tracing.StartSpan(ctx)
defer span.End()

Expand Down Expand Up @@ -1954,7 +1954,6 @@ func (c *FirecrackerContainer) SendExecRequestToGuest(ctx context.Context, conn
return res, false
}
}

func (c *FirecrackerContainer) vmExecConn(ctx context.Context) (*grpc.ClientConn, error) {
conn, _, err := c.vmExec.singleflight.Do(ctx, "",
func(ctx context.Context) (*grpc.ClientConn, error) {
Expand Down Expand Up @@ -2072,9 +2071,9 @@ func (c *FirecrackerContainer) Exec(ctx context.Context, cmd *repb.Command, stdi
execDuration := time.Since(start)
log.CtxDebugf(ctx, "Exec took %s", execDuration)

timeSinceContainerInit := time.Since(time.UnixMicro(c.currentTaskInitTimeUsec))
c.observeStageDuration("task_lifecycle", timeSinceContainerInit.Microseconds())
c.observeStageDuration("exec", execDuration.Microseconds())
timeSinceContainerInit := time.Since(c.currentTaskInitTimeUsec)
c.observeStageDuration("task_lifecycle", timeSinceContainerInit)
c.observeStageDuration("exec", execDuration)
}()

if c.fsLayout == nil {
Expand Down Expand Up @@ -2139,7 +2138,7 @@ func (c *FirecrackerContainer) Exec(ctx context.Context, cmd *repb.Command, stdi
return commandutil.ErrorResult(status.InternalErrorf("Firecracker exec failed: failed to dial VM exec port: %s", err))
}

result, vmHealthy := c.SendExecRequestToGuest(ctx, conn, cmd, guestWorkspaceMountDir, stdio)
result, vmHealthy := c.sendExecRequestToGuest(ctx, conn, cmd, guestWorkspaceMountDir, stdio)

ctx, cancel = background.ExtendContextForFinalization(ctx, finalizationTimeout)
defer cancel()
Expand Down Expand Up @@ -2226,6 +2225,7 @@ func (c *FirecrackerContainer) PullImage(ctx context.Context, creds oci.Credenti

start := time.Now()
defer func() {
c.observeStageDuration("pull_image", time.Since(start))
log.CtxDebugf(ctx, "PullImage took %s", time.Since(start))
}()

Expand All @@ -2245,6 +2245,7 @@ func (c *FirecrackerContainer) Remove(ctx context.Context) error {

start := time.Now()
defer func() {
c.observeStageDuration("remove", time.Since(start))
log.CtxDebugf(ctx, "Remove took %s", time.Since(start))
}()

Expand Down Expand Up @@ -2433,7 +2434,7 @@ func (c *FirecrackerContainer) Pause(ctx context.Context) error {
pauseTime := time.Since(start)
log.CtxDebugf(ctx, "Pause took %s", pauseTime)

c.observeStageDuration("pause", pauseTime.Microseconds())
c.observeStageDuration("pause", pauseTime)
return err
}

Expand Down Expand Up @@ -2590,14 +2591,13 @@ func (c *FirecrackerContainer) Unpause(ctx context.Context) error {

unpauseTime := time.Since(start)
log.CtxDebugf(ctx, "Unpause took %s", unpauseTime)

c.observeStageDuration("init", unpauseTime.Microseconds())
c.observeStageDuration("unpause", unpauseTime)
return err
}

func (c *FirecrackerContainer) unpause(ctx context.Context) error {
c.recycled = true
c.currentTaskInitTimeUsec = time.Now().UnixMicro()
c.currentTaskInitTimeUsec = time.Now()

log.CtxInfof(ctx, "Unpausing VM")

Expand Down Expand Up @@ -2679,10 +2679,10 @@ func (c *FirecrackerContainer) parseSegFault(logTail string, cmdResult *interfac
return status.UnavailableErrorf("process hit a segfault:\n%s", logTail)
}

func (c *FirecrackerContainer) observeStageDuration(taskStage string, durationUsec int64) {
func (c *FirecrackerContainer) observeStageDuration(taskStage string, d time.Duration) {
metrics.FirecrackerStageDurationUsec.With(prometheus.Labels{
metrics.Stage: taskStage,
}).Observe(float64(durationUsec))
}).Observe(float64(d.Microseconds()))
}

func (c *FirecrackerContainer) SnapshotDebugString(ctx context.Context) string {
Expand Down

0 comments on commit 1357dd6

Please sign in to comment.