Skip to content

Commit af4be8f

Browse files
committed
move the converstion from pod metrics to scheduler pod representation one level up
Signed-off-by: Nir Rozenbaum <nirro@il.ibm.com>
1 parent 2ae867d commit af4be8f

File tree

5 files changed

+43
-45
lines changed

5 files changed

+43
-45
lines changed

conformance/testing-epp/scheduler_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ import (
3131
func TestSchedule(t *testing.T) {
3232
tests := []struct {
3333
name string
34-
input []backendmetrics.PodMetrics
34+
input []types.Pod
3535
req *types.LLMRequest
3636
wantRes *types.SchedulingResult
3737
err bool
@@ -47,7 +47,7 @@ func TestSchedule(t *testing.T) {
4747
},
4848
{
4949
name: "req header not set",
50-
input: []backendmetrics.PodMetrics{
50+
input: []types.Pod{
5151
&backendmetrics.FakePodMetrics{Pod: &backend.Pod{Address: "random-endpoint"}},
5252
},
5353
req: &types.LLMRequest{
@@ -59,7 +59,7 @@ func TestSchedule(t *testing.T) {
5959
},
6060
{
6161
name: "no pods address from the candidate pods matches req header address",
62-
input: []backendmetrics.PodMetrics{
62+
input: []types.Pod{
6363
&backendmetrics.FakePodMetrics{Pod: &backend.Pod{Address: "nonmatched-endpoint"}},
6464
},
6565
req: &types.LLMRequest{
@@ -71,7 +71,7 @@ func TestSchedule(t *testing.T) {
7171
},
7272
{
7373
name: "one pod address from the candidate pods matches req header address",
74-
input: []backendmetrics.PodMetrics{
74+
input: []types.Pod{
7575
&backendmetrics.FakePodMetrics{Pod: &backend.Pod{Address: "nonmatched-endpoint"}},
7676
&backendmetrics.FakePodMetrics{Pod: &backend.Pod{Address: "matched-endpoint"}},
7777
},
@@ -100,7 +100,7 @@ func TestSchedule(t *testing.T) {
100100
for _, test := range tests {
101101
t.Run(test.name, func(t *testing.T) {
102102
scheduler := NewReqHeaderBasedScheduler()
103-
got, err := scheduler.Schedule(context.Background(), test.req, types.ToSchedulerPodMetrics(test.input))
103+
got, err := scheduler.Schedule(context.Background(), test.req, test.input)
104104
if test.err != (err != nil) {
105105
t.Errorf("Unexpected error, got %v, want %v", err, test.err)
106106
}

pkg/epp/requestcontrol/director.go

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -195,13 +195,13 @@ func (d *Director) getCandidatePodsForScheduling(ctx context.Context, requestMet
195195

196196
subsetMap, found := requestMetadata[subsetHintNamespace].(map[string]any)
197197
if !found {
198-
return schedulingtypes.ToSchedulerPodMetrics(d.datastore.PodGetAll())
198+
return d.toSchedulerPodMetrics(d.datastore.PodGetAll())
199199
}
200200

201201
// Check if endpoint key is present in the subset map and ensure there is at least one value
202202
endpointSubsetList, found := subsetMap[subsetHintKey].([]any)
203203
if !found {
204-
return schedulingtypes.ToSchedulerPodMetrics(d.datastore.PodGetAll())
204+
return d.toSchedulerPodMetrics(d.datastore.PodGetAll())
205205
} else if len(endpointSubsetList) == 0 {
206206
loggerTrace.Info("found empty subset filter in request metadata, filtering all pods")
207207
return []schedulingtypes.Pod{}
@@ -227,7 +227,7 @@ func (d *Director) getCandidatePodsForScheduling(ctx context.Context, requestMet
227227

228228
loggerTrace.Info("filtered candidate pods by subset filtering", "podTotalCount", podTotalCount, "filteredCount", len(podFitleredList))
229229

230-
return schedulingtypes.ToSchedulerPodMetrics(podFitleredList)
230+
return d.toSchedulerPodMetrics(podFitleredList)
231231
}
232232

233233
// prepareRequest populates the RequestContext and calls the registered PreRequest plugins
@@ -257,6 +257,14 @@ func (d *Director) prepareRequest(ctx context.Context, reqCtx *handlers.RequestC
257257
return reqCtx, nil
258258
}
259259

260+
func (d *Director) toSchedulerPodMetrics(pods []backendmetrics.PodMetrics) []schedulingtypes.Pod {
261+
pm := make([]schedulingtypes.Pod, 0, len(pods))
262+
for _, pod := range pods {
263+
pm = append(pm, &schedulingtypes.PodMetrics{Pod: pod.GetPod().Clone(), MetricsState: pod.GetMetrics().Clone()})
264+
}
265+
return pm
266+
}
267+
260268
func (d *Director) HandleResponse(ctx context.Context, reqCtx *handlers.RequestContext) (*handlers.RequestContext, error) {
261269
response := &Response{
262270
RequestId: reqCtx.Request.Headers[requtil.RequestIdHeaderKey],

pkg/epp/scheduling/framework/scheduler_profile_test.go

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@ import (
2323
"github.com/google/go-cmp/cmp"
2424
"github.com/google/uuid"
2525
k8stypes "k8s.io/apimachinery/pkg/types"
26-
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
27-
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" // Import config for thresholds
26+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend" // Import config for thresholds
2827
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
2928
)
3029

@@ -51,7 +50,7 @@ func TestSchedulePlugins(t *testing.T) {
5150
tests := []struct {
5251
name string
5352
profile *SchedulerProfile
54-
input []backendmetrics.PodMetrics
53+
input []types.Pod
5554
wantTargetPod k8stypes.NamespacedName
5655
targetPodScore float64
5756
// Number of expected pods to score (after filter)
@@ -65,10 +64,10 @@ func TestSchedulePlugins(t *testing.T) {
6564
WithScorers(NewWeightedScorer(tp1, 1), NewWeightedScorer(tp2, 1)).
6665
WithPicker(pickerPlugin).
6766
WithPostCyclePlugins(tp1, tp2),
68-
input: []backendmetrics.PodMetrics{
69-
&backendmetrics.FakePodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}}},
70-
&backendmetrics.FakePodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}}},
71-
&backendmetrics.FakePodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod3"}}},
67+
input: []types.Pod{
68+
&types.PodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}}},
69+
&types.PodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}}},
70+
&types.PodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod3"}}},
7271
},
7372
wantTargetPod: k8stypes.NamespacedName{Name: "pod1"},
7473
targetPodScore: 1.1,
@@ -82,10 +81,10 @@ func TestSchedulePlugins(t *testing.T) {
8281
WithScorers(NewWeightedScorer(tp1, 60), NewWeightedScorer(tp2, 40)).
8382
WithPicker(pickerPlugin).
8483
WithPostCyclePlugins(tp1, tp2),
85-
input: []backendmetrics.PodMetrics{
86-
&backendmetrics.FakePodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}}},
87-
&backendmetrics.FakePodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}}},
88-
&backendmetrics.FakePodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod3"}}},
84+
input: []types.Pod{
85+
&types.PodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}}},
86+
&types.PodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}}},
87+
&types.PodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod3"}}},
8988
},
9089
wantTargetPod: k8stypes.NamespacedName{Name: "pod1"},
9190
targetPodScore: 50,
@@ -99,10 +98,10 @@ func TestSchedulePlugins(t *testing.T) {
9998
WithScorers(NewWeightedScorer(tp1, 1), NewWeightedScorer(tp2, 1)).
10099
WithPicker(pickerPlugin).
101100
WithPostCyclePlugins(tp1, tp2),
102-
input: []backendmetrics.PodMetrics{
103-
&backendmetrics.FakePodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}}},
104-
&backendmetrics.FakePodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}}},
105-
&backendmetrics.FakePodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod3"}}},
101+
input: []types.Pod{
102+
&types.PodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}}},
103+
&types.PodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}}},
104+
&types.PodMetrics{Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod3"}}},
106105
},
107106
numPodsToScore: 0,
108107
err: true, // no available pods to server after filter all
@@ -129,7 +128,7 @@ func TestSchedulePlugins(t *testing.T) {
129128
RequestId: uuid.NewString(),
130129
}
131130
// Run profile cycle
132-
got, err := test.profile.Run(context.Background(), request, types.NewCycleState(), types.ToSchedulerPodMetrics(test.input))
131+
got, err := test.profile.Run(context.Background(), request, types.NewCycleState(), test.input)
133132

134133
// Validate error state
135134
if test.err != (err != nil) {
@@ -142,7 +141,7 @@ func TestSchedulePlugins(t *testing.T) {
142141

143142
// Validate output
144143
wantPod := &types.PodMetrics{
145-
Pod: &backend.Pod{NamespacedName: test.wantTargetPod, Labels: make(map[string]string)},
144+
Pod: &backend.Pod{NamespacedName: test.wantTargetPod},
146145
}
147146
wantRes := &types.ProfileRunResult{
148147
TargetPod: wantPod,

pkg/epp/scheduling/scheduler_test.go

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ func TestSchedule(t *testing.T) {
3333
tests := []struct {
3434
name string
3535
req *types.LLMRequest
36-
input []backendmetrics.PodMetrics
36+
input []types.Pod
3737
wantRes *types.SchedulingResult
3838
err bool
3939
}{
@@ -43,7 +43,7 @@ func TestSchedule(t *testing.T) {
4343
TargetModel: "any-model",
4444
RequestId: uuid.NewString(),
4545
},
46-
input: []backendmetrics.PodMetrics{},
46+
input: []types.Pod{},
4747
wantRes: nil,
4848
err: true,
4949
},
@@ -55,10 +55,10 @@ func TestSchedule(t *testing.T) {
5555
},
5656
// pod2 will be picked because it has relatively low queue size, with the requested
5757
// model being active, and has low KV cache.
58-
input: []backendmetrics.PodMetrics{
59-
&backendmetrics.FakePodMetrics{
58+
input: []types.Pod{
59+
&types.PodMetrics{
6060
Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod1"}},
61-
Metrics: &backendmetrics.MetricsState{
61+
MetricsState: &backendmetrics.MetricsState{
6262
WaitingQueueSize: 0,
6363
KVCacheUsagePercent: 0.2,
6464
MaxActiveModels: 2,
@@ -68,9 +68,9 @@ func TestSchedule(t *testing.T) {
6868
},
6969
},
7070
},
71-
&backendmetrics.FakePodMetrics{
71+
&types.PodMetrics{
7272
Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}},
73-
Metrics: &backendmetrics.MetricsState{
73+
MetricsState: &backendmetrics.MetricsState{
7474
WaitingQueueSize: 3,
7575
KVCacheUsagePercent: 0.1,
7676
MaxActiveModels: 2,
@@ -80,9 +80,9 @@ func TestSchedule(t *testing.T) {
8080
},
8181
},
8282
},
83-
&backendmetrics.FakePodMetrics{
83+
&types.PodMetrics{
8484
Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod3"}},
85-
Metrics: &backendmetrics.MetricsState{
85+
MetricsState: &backendmetrics.MetricsState{
8686
WaitingQueueSize: 10,
8787
KVCacheUsagePercent: 0.2,
8888
MaxActiveModels: 2,
@@ -97,7 +97,7 @@ func TestSchedule(t *testing.T) {
9797
"default": {
9898
TargetPod: &types.ScoredPod{
9999
Pod: &types.PodMetrics{
100-
Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}, Labels: make(map[string]string)},
100+
Pod: &backend.Pod{NamespacedName: k8stypes.NamespacedName{Name: "pod2"}},
101101
MetricsState: &backendmetrics.MetricsState{
102102
WaitingQueueSize: 3,
103103
KVCacheUsagePercent: 0.1,
@@ -106,7 +106,6 @@ func TestSchedule(t *testing.T) {
106106
"foo": 1,
107107
"critical": 1,
108108
},
109-
WaitingModels: map[string]int{},
110109
},
111110
},
112111
},
@@ -120,7 +119,7 @@ func TestSchedule(t *testing.T) {
120119
for _, test := range tests {
121120
t.Run(test.name, func(t *testing.T) {
122121
scheduler := NewScheduler()
123-
got, err := scheduler.Schedule(context.Background(), test.req, types.ToSchedulerPodMetrics(test.input))
122+
got, err := scheduler.Schedule(context.Background(), test.req, test.input)
124123
if test.err != (err != nil) {
125124
t.Errorf("Unexpected error, got %v, want %v", err, test.err)
126125
}

pkg/epp/scheduling/types/types.go

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -70,14 +70,6 @@ type PodMetrics struct {
7070
*backendmetrics.MetricsState
7171
}
7272

73-
func ToSchedulerPodMetrics(pods []backendmetrics.PodMetrics) []Pod {
74-
pm := make([]Pod, 0, len(pods))
75-
for _, pod := range pods {
76-
pm = append(pm, &PodMetrics{Pod: pod.GetPod().Clone(), MetricsState: pod.GetMetrics().Clone()})
77-
}
78-
return pm
79-
}
80-
8173
// ProfileRunResult captures the profile run result.
8274
type ProfileRunResult struct {
8375
TargetPod Pod

0 commit comments

Comments
 (0)