kubernetes-sigs · k8s-ci-robot · Jun 19, 2025 · Jun 19, 2025 · Jun 19, 2025 · Jun 19, 2025
diff --git a/pkg/epp/common/config/configloader_test.go b/pkg/epp/common/config/configloader_test.go
@@ -515,8 +515,8 @@ func (p *testProfileHandler) Pick(ctx context.Context, request *types.LLMRequest
 	return nil
 }
 
-func (p *testProfileHandler) ProcessResults(ctx context.Context, request *types.LLMRequest, profileResults map[string]*types.ProfileRunResult) *types.SchedulingResult {
-	return nil
+func (p *testProfileHandler) ProcessResults(ctx context.Context, request *types.LLMRequest, profileResults map[string]*types.ProfileRunResult) (*types.SchedulingResult, error) {
+	return nil, nil
 }
 
 func registerTestPlugins() {

diff --git a/pkg/epp/scheduling/framework/plugins.go b/pkg/epp/scheduling/framework/plugins.go
@@ -40,10 +40,11 @@ type ProfileHandler interface {
 	// and the previously executed SchedluderProfile cycles along with their results.
 	Pick(ctx context.Context, request *types.LLMRequest, profiles map[string]*SchedulerProfile, profileResults map[string]*types.ProfileRunResult) map[string]*SchedulerProfile
 
-	// ProcessResults handles the outcome of the profile runs after all profiles ran succuessfully.
+	// ProcessResults handles the outcome of the profile runs after all profiles ran.
 	// It may aggregate results, log test profile outputs, or apply custom logic. It specifies in the SchedulingResult the
 	// key of the primary profile that should be used to get the request selected destination.
-	ProcessResults(ctx context.Context, request *types.LLMRequest, profileResults map[string]*types.ProfileRunResult) *types.SchedulingResult
+	// When a profile run fails, its result in the profileResults map is nil.
 func (p *SchedulerProfile) Run(ctx context.Context, request *types.LLMRequest, cycleState *types.CycleState, podsSnapshot []types.Pod) (*types.ProfileRunResult, error) { 
 	pods := p.runFilterPlugins(ctx, request, cycleState, podsSnapshot) 
 	if len(pods) == 0 { 
 		return nil, errutil.Error{Code: errutil.Internal, Msg: "no pods available for the given request"} 
 	} 
 	// if we got here, there is at least one pod to score 
 	weightedScorePerPod := p.runScorerPlugins(ctx, request, cycleState, pods) 
 	result := p.runPickerPlugin(ctx, cycleState, weightedScorePerPod) 
 	p.runPostCyclePlugins(ctx, cycleState, result) 
 	return result, nil 
 } 
 for name, profile := range profiles { 
 	// run the selected profiles and collect results (current code runs all profiles) 
 	profileRunResult, err := profile.Run(ctx, request, cycleState, podsSnapshot) 
 	if err != nil { 
 		loggerDebug.Info("failed to run scheduler profile", "profile", name, "error", err.Error()) 
 	} 
 	profileRunResults[name] = profileRunResult // if profile failed to run, the run result is nil 
 } 
 func (p *SchedulerProfile) Run(ctx context.Context, request *types.LLMRequest, cycleState *types.CycleState, podsSnapshot []types.Pod) (*types.ProfileRunResult, error) { 
 	pods := p.runFilterPlugins(ctx, request, cycleState, podsSnapshot) 
 	if len(pods) == 0 { 
 		return nil, errutil.Error{Code: errutil.Internal, Msg: "no pods available for the given request"} 
 	} 
 	// if we got here, there is at least one pod to score 
 	weightedScorePerPod := p.runScorerPlugins(ctx, request, cycleState, pods) 
  
 	result := p.runPickerPlugin(ctx, cycleState, weightedScorePerPod) 
  
 	p.runPostCyclePlugins(ctx, cycleState, result) 
  
 	return result, nil 
 } 
 for name, profile := range profiles { 
 	// run the selected profiles and collect results (current code runs all profiles) 
 	profileRunResult, err := profile.Run(ctx, request, cycleState, podsSnapshot) 
 	if err != nil { 
 		loggerDebug.Info("failed to run scheduler profile", "profile", name, "error", err.Error()) 
 	} 
  
 	profileRunResults[name] = profileRunResult // if profile failed to run, the run result is nil 
 } 
+	ProcessResults(ctx context.Context, request *types.LLMRequest, profileResults map[string]*types.ProfileRunResult) (*types.SchedulingResult, error)
 }
 
 // Filter defines the interface for filtering a list of pods based on context.

diff --git a/pkg/epp/scheduling/framework/plugins/profile/single_profile_handler.go b/pkg/epp/scheduling/framework/plugins/profile/single_profile_handler.go
@@ -19,6 +19,8 @@ package profile
 import (
 	"context"
 	"encoding/json"
+	"errors"
+	"fmt"
 
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/framework"
@@ -58,15 +60,28 @@ func (h *SingleProfileHandler) Pick(_ context.Context, request *types.LLMRequest
 	return profiles
 }
 
-func (h *SingleProfileHandler) ProcessResults(_ context.Context, _ *types.LLMRequest, profileResults map[string]*types.ProfileRunResult) *types.SchedulingResult {
-	var firstKey string
-	for key := range profileResults {
-		firstKey = key
+// ProcessResults handles the outcome of the profile runs after all profiles ran.
+// It may aggregate results, log test profile outputs, or apply custom logic. It specifies in the SchedulingResult the
+// key of the primary profile that should be used to get the request selected destination.
+// When a profile run fails, its result in the profileResults map is nil.
+func (h *SingleProfileHandler) ProcessResults(_ context.Context, _ *types.LLMRequest,
+	profileResults map[string]*types.ProfileRunResult) (*types.SchedulingResult, error) {
+	if len(profileResults) != 1 {
+		return nil, errors.New("single profile handler is intended to be used with a single profile, failed to process multiple profiles")
+	}
+
+	var singleProfileName string
+	for profileName := range profileResults {
+		singleProfileName = profileName
 		break
 	}
 
+	if profileResults[singleProfileName] == nil { // there was an error while running the profile
+		return nil, fmt.Errorf("failed to run scheduler profile '%s'", singleProfileName)
+	}
+
 	return &types.SchedulingResult{
 		ProfileResults:     profileResults,
-		PrimaryProfileName: firstKey,
-	}
+		PrimaryProfileName: singleProfileName,
+	}, nil
 }
diff --git a/pkg/epp/scheduling/scheduler.go b/pkg/epp/scheduling/scheduler.go
@@ -123,10 +123,10 @@ func (s *Scheduler) Schedule(ctx context.Context, request *types.LLMRequest) (*t
 			// run the selected profiles and collect results (current code runs all profiles)
 			profileRunResult, err := profile.Run(ctx, request, cycleState, podsSnapshot)
 			if err != nil {
-				return nil, fmt.Errorf("failed to run all required scheduling profiles - %w", err)
+				loggerDebug.Info("failed to run scheduler profile", "profile", name, "error", err.Error())
 			}
 
-			profileRunResults[name] = profileRunResult
+			profileRunResults[name] = profileRunResult // if profile failed to run, the run result is nil
 		}
 	}
 
@@ -135,8 +135,8 @@ func (s *Scheduler) Schedule(ctx context.Context, request *types.LLMRequest) (*t
 	}
 
 	before := time.Now()
-	result := s.profileHandler.ProcessResults(ctx, request, profileRunResults)
+	result, err := s.profileHandler.ProcessResults(ctx, request, profileRunResults)
 	metrics.RecordSchedulerPluginProcessingLatency(framework.ProcessProfilesResultsType, s.profileHandler.Name(), time.Since(before))
 
-	return result, nil
+	return result, err
 }