Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions pkg/epp/requestcontrol/director.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ type Director struct {
// 1. Parses request details.
// 2. Calls admitRequest for admission control.
// 3. Calls Scheduler.Schedule if request is approved.
// 4. Calls prepareRequest to populate RequestContext with results and call PreRequest plugins.
// 4. Calls prepareRequest to populate RequestContext with result and call PreRequest plugins.
//
// It always returns the requestContext even in the error case, as the request context is used in error handling.
func (d *Director) HandleRequest(ctx context.Context, reqCtx *handlers.RequestContext) (*handlers.RequestContext, error) {
Expand Down Expand Up @@ -147,15 +147,15 @@ func (d *Director) HandleRequest(ctx context.Context, reqCtx *handlers.RequestCo
if len(candidatePods) == 0 {
return reqCtx, errutil.Error{Code: errutil.ServiceUnavailable, Msg: "failed to find candidate pods for serving the request"}
}
results, err := d.scheduler.Schedule(ctx, reqCtx.SchedulingRequest, candidatePods)
result, err := d.scheduler.Schedule(ctx, reqCtx.SchedulingRequest, candidatePods)
if err != nil {
return reqCtx, errutil.Error{Code: errutil.InferencePoolResourceExhausted, Msg: fmt.Errorf("failed to find target pod: %w", err).Error()}
}

// --- 4. Prepare Request (Populates RequestContext and call PreRequest plugins) ---
// Insert target endpoint to instruct Envoy to route requests to the specified target pod and attach the port number.
// Invoke PreRequest registered plugins.
reqCtx, err = d.prepareRequest(ctx, reqCtx, results)
reqCtx, err = d.prepareRequest(ctx, reqCtx, result)
if err != nil {
return reqCtx, err
}
Expand Down Expand Up @@ -231,7 +231,7 @@ func (d *Director) getCandidatePodsForScheduling(ctx context.Context, requestMet
}

// prepareRequest populates the RequestContext and calls the registered PreRequest plugins
// for allowing plugging customized logic based on the scheduling results.
// for allowing plugging customized logic based on the scheduling result.
func (d *Director) prepareRequest(ctx context.Context, reqCtx *handlers.RequestContext, result *schedulingtypes.SchedulingResult) (*handlers.RequestContext, error) {
logger := log.FromContext(ctx)
if result == nil || len(result.ProfileResults) == 0 {
Expand Down Expand Up @@ -320,7 +320,8 @@ func RandomWeightedDraw(logger logr.Logger, model *v1alpha2.InferenceModel, seed
}

func (d *Director) runPreRequestPlugins(ctx context.Context, request *schedulingtypes.LLMRequest, schedulingResult *schedulingtypes.SchedulingResult,
targetPort int) {
targetPort int,
) {
for _, plugin := range d.preRequestPlugins {
log.FromContext(ctx).V(logutil.DEBUG).Info("Running pre-request plugin", "plugin", plugin.TypedName().Type)
before := time.Now()
Expand Down