@@ -71,9 +71,9 @@ type Director struct {
71
71
72
72
// HandleRequest orchestrates the request lifecycle:
73
73
// 1. Parses request details.
74
- // 2. Calls PreDispatch for admission control.
75
- // 3. Calls Dispatch (which calls Scheduler) if request is approved.
76
- // 4. Calls PostDispatch to populate RequestContext with results.
74
+ // 2. Calls admitRequest for admission control.
75
+ // 3. Calls Scheduler.Schedule if request is approved.
76
+ // 4. Calls prepareRequest to populate RequestContext with results and call PreRequest plugins .
77
77
//
78
78
// It always returns the requestContext even in the error case, as the request context is used in error handling.
79
79
func (d * Director ) HandleRequest (ctx context.Context , reqCtx * handlers.RequestContext ) (* handlers.RequestContext , error ) {
@@ -120,38 +120,36 @@ func (d *Director) HandleRequest(ctx context.Context, reqCtx *handlers.RequestCo
120
120
Prompt : prompt ,
121
121
Headers : reqCtx .Request .Headers ,
122
122
}
123
- logger = logger .WithValues (
124
- "model" , reqCtx .Model ,
125
- "resolvedTargetModel" , reqCtx .ResolvedTargetModel ,
126
- "criticality" , requestCriticality ,
127
- )
123
+
124
+ logger = logger .WithValues ("model" , reqCtx .Model , "resolvedTargetModel" , reqCtx .ResolvedTargetModel , "criticality" , requestCriticality )
128
125
ctx = log .IntoContext (ctx , logger )
129
126
logger .V (logutil .DEBUG ).Info ("LLM request assembled" )
130
127
131
- // --- 2. Saturation Check - --
132
- if err := d .PreDispatch (ctx , reqCtx , requestCriticality ); err != nil {
128
+ // --- 2. Admission Control check --
129
+ if err := d .admitRequest (ctx , reqCtx , requestCriticality ); err != nil {
133
130
return reqCtx , err
134
131
}
135
132
136
- // --- 3. Dispatch (Calls Scheduler) ---
133
+ // --- 3. Call Scheduler ---
137
134
results , err := d .scheduler .Schedule (ctx , reqCtx .SchedulingRequest )
138
135
if err != nil {
139
136
return reqCtx , errutil.Error {Code : errutil .InferencePoolResourceExhausted , Msg : fmt .Errorf ("failed to find target pod: %w" , err ).Error ()}
140
137
}
141
138
142
- // --- 4. PostDispatch (Populates RequestContext) ---
143
- // Insert target endpoint to instruct Envoy to route requests to the specified target pod.
144
- // Attach the port number .
145
- reqCtx , err = d .PostDispatch (ctx , reqCtx , results )
139
+ // --- 4. Prepare Request (Populates RequestContext and call PreRequest plugins ) ---
140
+ // Insert target endpoint to instruct Envoy to route requests to the specified target pod and attach the port number .
141
+ // Invoke PreRequest registered plugins .
142
+ reqCtx , err = d .prepareRequest (ctx , reqCtx , results )
146
143
if err != nil {
147
144
return reqCtx , err
148
145
}
149
146
150
147
return reqCtx , nil
151
148
}
152
149
153
- // PreDispatch handles admission control before dispatch.
154
- func (d * Director ) PreDispatch (ctx context.Context , reqCtx * handlers.RequestContext , reqCriticality v1alpha2.Criticality ) error {
150
+ // admitRequest handles admission control to decide whether or not to accept the request
151
+ // based on the request criticality and system saturation state.
152
+ func (d * Director ) admitRequest (ctx context.Context , reqCtx * handlers.RequestContext , reqCriticality v1alpha2.Criticality ) error {
155
153
logger := log .FromContext (ctx )
156
154
157
155
if reqCriticality == v1alpha2 .Critical {
@@ -170,8 +168,9 @@ func (d *Director) PreDispatch(ctx context.Context, reqCtx *handlers.RequestCont
170
168
return nil
171
169
}
172
170
173
- // PostDispatch populates the RequestContext based on scheduling results.
174
- func (d * Director ) PostDispatch (ctx context.Context , reqCtx * handlers.RequestContext , result * schedulingtypes.SchedulingResult ) (* handlers.RequestContext , error ) {
171
+ // prepareRequest populates the RequestContext and calls the registered PreRequest plugins
172
+ // for allowing plugging customized logic based on the scheduling results.
173
+ func (d * Director ) prepareRequest (ctx context.Context , reqCtx * handlers.RequestContext , result * schedulingtypes.SchedulingResult ) (* handlers.RequestContext , error ) {
175
174
logger := log .FromContext (ctx )
176
175
if result == nil || len (result .ProfileResults ) == 0 {
177
176
return reqCtx , errutil.Error {Code : errutil .Internal , Msg : "results must be greater than zero" }
0 commit comments