remove Model field from LLMRequest

nirrozenbaum · nirrozenbaum · commit a2c19cfe8577 · 2025-05-09T10:20:33.000+03:00
Signed-off-by: Nir Rozenbaum &lt;nirro@il.ibm.com&gt;
diff --git a/pkg/epp/requestcontrol/director.go b/pkg/epp/requestcontrol/director.go
@@ -79,14 +79,14 @@ func (d *Director) HandleRequest(ctx context.Context, reqCtx *handlers.RequestCo
 		if reqCtx.ResolvedTargetModel == "" {
 			return reqCtx, errutil.Error{Code: errutil.BadConfiguration, Msg: fmt.Sprintf("error getting target model name for model %v", modelObj.Name)}
 		}
+		reqCtx.Request.Body["model"] = reqCtx.ResolvedTargetModel // Update target model in the body.
 	}
 
 	llmReq := &schedulingtypes.LLMRequest{
-		Model:               reqCtx.Model,
-		ResolvedTargetModel: reqCtx.ResolvedTargetModel,
-		Critical:            modelObj.Spec.Criticality != nil && *modelObj.Spec.Criticality == v1alpha2.Critical,
-		Prompt:              prompt,
-		Headers:             reqCtx.Request.Headers,
+		TargetModel: reqCtx.ResolvedTargetModel,
+		Critical:    modelObj.Spec.Criticality != nil && *modelObj.Spec.Criticality == v1alpha2.Critical,
+		Prompt:      prompt,
+		Headers:     reqCtx.Request.Headers,
 	}
 	logger.V(logutil.DEBUG).Info("LLM request assembled", "request", llmReq)
 	results, err := d.Dispatch(ctx, llmReq)
@@ -129,13 +129,8 @@ func (d *Director) PostDispatch(ctx context.Context, reqCtx *handlers.RequestCon
 	}
 
 	endpoint := targetPod.Address + ":" + strconv.Itoa(int(pool.Spec.TargetPortNumber))
-	logger.V(logutil.DEFAULT).Info("Request handled",
-		"model", reqCtx.Model, "targetModel", reqCtx.ResolvedTargetModel, "endpoint", targetPod)
+	logger.V(logutil.DEFAULT).Info("Request handled", "model", reqCtx.Model, "targetModel", reqCtx.ResolvedTargetModel, "endpoint", targetPod)
 
-	// Update target models in the body.
-	if reqCtx.Model != reqCtx.ResolvedTargetModel {
-		reqCtx.Request.Body["model"] = reqCtx.ResolvedTargetModel
-	}
 	reqCtx.TargetPod = targetPod.NamespacedName.String()
 	reqCtx.TargetEndpoint = endpoint
 
diff --git a/pkg/epp/scheduling/plugins/filter/filter_test.go b/pkg/epp/scheduling/plugins/filter/filter_test.go
@@ -203,8 +203,7 @@ func TestLoRASoftAffinityDistribution(t *testing.T) {
 
 	// Create a test request and pods
 	req := &types.LLMRequest{
-		Model:               testAffinityModel,
-		ResolvedTargetModel: testAffinityModel,
+		TargetModel: testAffinityModel,
 	}
 
 	// Test setup: One affinity pod and one available pod
diff --git a/pkg/epp/scheduling/plugins/filter/lora_affinity_filter.go b/pkg/epp/scheduling/plugins/filter/lora_affinity_filter.go
@@ -59,8 +59,8 @@ func (f *LoraAffinityFilter) Filter(ctx *types.SchedulingContext, pods []types.P
 
 	// Categorize pods based on affinity and availability
 	for _, pod := range pods {
-		_, active := pod.GetMetrics().ActiveModels[ctx.Req.ResolvedTargetModel]
-		_, waiting := pod.GetMetrics().WaitingModels[ctx.Req.ResolvedTargetModel]
+		_, active := pod.GetMetrics().ActiveModels[ctx.Req.TargetModel]
+		_, waiting := pod.GetMetrics().WaitingModels[ctx.Req.TargetModel]
 
 		if active || waiting {
 			filtered_affinity = append(filtered_affinity, pod)
diff --git a/pkg/epp/scheduling/scheduler_test.go b/pkg/epp/scheduling/scheduler_test.go
@@ -40,19 +40,17 @@ func TestSchedule(t *testing.T) {
 		{
 			name: "no pods in datastore",
 			req: &types.LLMRequest{
-				Model:               "any-model",
-				ResolvedTargetModel: "any-model",
-				Critical:            true,
+				TargetModel: "any-model",
+				Critical:    true,
 			},
 			input: []*backendmetrics.FakePodMetrics{},
 			err:   true,
 		},
 		{
 			name: "critical request",
 			req: &types.LLMRequest{
-				Model:               "critical",
-				ResolvedTargetModel: "critical",
-				Critical:            true,
+				TargetModel: "critical",
+				Critical:    true,
 			},
 			// pod2 will be picked because it has relatively low queue size, with the requested
 			// model being active, and has low KV cache.
@@ -114,9 +112,8 @@ func TestSchedule(t *testing.T) {
 		{
 			name: "sheddable request, accepted",
 			req: &types.LLMRequest{
-				Model:               "sheddable",
-				ResolvedTargetModel: "sheddable",
-				Critical:            false,
+				TargetModel: "sheddable",
+				Critical:    false,
 			},
 			// pod1 will be picked because it has capacity for the sheddable request.
 			input: []*backendmetrics.FakePodMetrics{
@@ -177,9 +174,8 @@ func TestSchedule(t *testing.T) {
 		{
 			name: "sheddable request, dropped",
 			req: &types.LLMRequest{
-				Model:               "sheddable",
-				ResolvedTargetModel: "sheddable",
-				Critical:            false,
+				TargetModel: "sheddable",
+				Critical:    false,
 			},
 			// All pods have higher KV cache thant the threshold, so the sheddable request will be
 			// dropped.
@@ -356,7 +352,7 @@ func TestSchedulePlugins(t *testing.T) {
 			// Initialize the scheduler
 			scheduler := NewSchedulerWithConfig(&fakeDataStore{pods: test.input}, &test.config)
 
-			req := &types.LLMRequest{Model: "test-model"}
+			req := &types.LLMRequest{TargetModel: "test-model"}
 			got, err := scheduler.Schedule(context.Background(), req)
 
 			// Validate error state
diff --git a/pkg/epp/scheduling/types/types.go b/pkg/epp/scheduling/types/types.go
@@ -28,10 +28,8 @@ import (
 
 // LLMRequest is a structured representation of the fields we parse out of the LLMRequest body.
 type LLMRequest struct {
-	// Model is the name of the model that the user specified in the request body.
-	Model string
-	// ResolvedTargetModel is the final target model after traffic split.
-	ResolvedTargetModel string
+	// TargetModel is the final target model after traffic split.
+	TargetModel string
 	// Critical is a boolean that specifies if a request is critical or not.
 	Critical bool
 	// Prompt is the prompt that was sent in the request body.
@@ -41,8 +39,7 @@ type LLMRequest struct {
 }
 
 func (r *LLMRequest) String() string {
-	return fmt.Sprintf("Model: %s, ResolvedTargetModel: %s, Critical: %t, PromptLength: %d, Headers: %v",
-		r.Model, r.ResolvedTargetModel, r.Critical, len(r.Prompt), r.Headers)
+	return fmt.Sprintf("TargetModel: %s, Critical: %t, PromptLength: %d, Headers: %v", r.TargetModel, r.Critical, len(r.Prompt), r.Headers)
 }
 
 type Pod interface {

Original file line number	Diff line number	Diff line change
`@@ -203,8 +203,7 @@ func TestLoRASoftAffinityDistribution(t *testing.T) {`
`203`	`203`
`204`	`204`	`// Create a test request and pods`
`205`	`205`	`req := &types.LLMRequest{`
`206`		`- Model: testAffinityModel,`
`207`		`- ResolvedTargetModel: testAffinityModel,`
	`206`	`+ TargetModel: testAffinityModel,`
`208`	`207`	`}`
`209`	`208`
`210`	`209`	`// Test setup: One affinity pod and one available pod`