Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion pkg/epp/scheduling/framework/plugins/multi/prefix/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,12 @@ func (p *Plugin) PreRequest(ctx context.Context, request *types.LLMRequest, sche
return
}

p.indexer.Add(state.PrefixHashes, ServerID(targetPod.NamespacedName))
// This function is just adding data, it does not need to block other operations.
// TODO: look into making this entire function async, none of this needs to be done in-band
// The PR that introduces this change is meant as a cherrypick, so it was minimally invasive.
go func() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Actually the entire PreRequest can be done async. But this lgtm as well.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ack, I'll add a TODO comment so we can figure this out later, I tested with this change, and I'm trying to make this minimally impactful since I plan to cherrypick this into the v1 branch.

You're probably right, but just in case.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated, PTAL

p.indexer.Add(state.PrefixHashes, ServerID(targetPod.NamespacedName))
}()

total := len(state.PrefixHashes)
matchLen := state.PrefixCacheServers[ServerID(targetPod.NamespacedName)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ func (p *MaxScorePicker) TypedName() plugins.TypedName {

// Pick selects the pod with the maximum score from the list of candidates.
func (p *MaxScorePicker) Pick(ctx context.Context, cycleState *types.CycleState, scoredPods []*types.ScoredPod) *types.ProfileRunResult {
log.FromContext(ctx).V(logutil.DEBUG).Info(fmt.Sprintf("Selecting maximum '%d' pods from %d candidates sorted by max score: %+v", p.maxNumOfEndpoints,
len(scoredPods), scoredPods))
log.FromContext(ctx).V(logutil.DEBUG).Info("Selecting pods from candidates sorted by max score: ", "NumberOfPods", p.maxNumOfEndpoints,
"scoredPodsLength", len(scoredPods), "scoredPods", scoredPods)

// TODO: merge this with the logic in RandomPicker
// Rand package is not safe for concurrent use, so we create a new instance.
Expand Down
2 changes: 1 addition & 1 deletion pkg/epp/scheduling/framework/scheduler_profile.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ func (p *SchedulerProfile) runPickerPlugin(ctx context.Context, cycleState *type
i++
}

loggerDebug.Info("Running picker plugin", "plugin", p.picker.TypedName(), "pods-weighted-score", fmt.Sprint(weightedScorePerPod))
loggerDebug.Info("Running picker plugin", "plugin", p.picker.TypedName(), "pods-weighted-score", weightedScorePerPod)
before := time.Now()
result := p.picker.Pick(ctx, cycleState, scoredPods)
metrics.RecordPluginProcessingLatency(PickerExtensionPoint, p.picker.TypedName().Type, p.picker.TypedName().Name, time.Since(before))
Expand Down