Skip to content

Commit 0d00997

Browse files
committed
rebase handling
Signed-off-by: Nir Rozenbaum <nirro@il.ibm.com>
1 parent a42cb79 commit 0d00997

File tree

2 files changed

+11
-16
lines changed

2 files changed

+11
-16
lines changed

pkg/epp/scheduling/plugins/prefix/plugin.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ func hashPrompt(ctx *types.SchedulingContext, cacheBlockSize int, maxPrefixBlock
229229
// If the last block is smaller than cacheBlockSize, it will be ignored.
230230
res := make([]BlockHash, 0, 1+len(prompt)/cacheBlockSize)
231231
// Add the model to the first block hash so that different models have different hashes even with the same body.
232-
res = append(res, BlockHash(xxhash.Sum64String(ctx.Req.ResolvedTargetModel)))
232+
res = append(res, BlockHash(xxhash.Sum64String(ctx.Req.TargetModel)))
233233
for i := 0; i+cacheBlockSize <= len(prompt); i += cacheBlockSize {
234234
block := prompt[i : i+cacheBlockSize]
235235
prevBlockHash := res[len(res)-1]

pkg/epp/scheduling/plugins/prefix/plugin_test.go

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,8 @@ func TestPrefixPlugin(t *testing.T) {
2424

2525
// First request.
2626
req1 := &types.LLMRequest{
27-
Model: "test-model1",
28-
ResolvedTargetModel: "test-model1",
29-
Prompt: "aaaaaa",
27+
TargetModel: "test-model1",
28+
Prompt: "aaaaaa",
3029
}
3130
ctx := types.NewSchedulingContext(context.Background(), req1, pods)
3231
plugin.PreSchedule(ctx)
@@ -49,9 +48,8 @@ func TestPrefixPlugin(t *testing.T) {
4948
// Second request doesn't share any prefix with first one. It should be added to the cache but
5049
// the pod score should be 0.
5150
req2 := &types.LLMRequest{
52-
Model: "test-model2",
53-
ResolvedTargetModel: "test-model2",
54-
Prompt: "bbbbbb",
51+
TargetModel: "test-model2",
52+
Prompt: "bbbbbb",
5553
}
5654
ctx = types.NewSchedulingContext(context.Background(), req2, pods)
5755
plugin.PreSchedule(ctx)
@@ -73,9 +71,8 @@ func TestPrefixPlugin(t *testing.T) {
7371

7472
// Third request shares partial prefix with first one.
7573
req3 := &types.LLMRequest{
76-
Model: "test-model1",
77-
ResolvedTargetModel: "test-model1",
78-
Prompt: "aaaabbbb",
74+
TargetModel: "test-model1",
75+
Prompt: "aaaabbbb",
7976
}
8077
ctx = types.NewSchedulingContext(context.Background(), req3, pods)
8178
plugin.PreSchedule(ctx)
@@ -96,9 +93,8 @@ func TestPrefixPlugin(t *testing.T) {
9693

9794
// 4th request is same as req3 except the model is different, still no match.
9895
req4 := &types.LLMRequest{
99-
Model: "test-model-new",
100-
ResolvedTargetModel: "test-model-new",
101-
Prompt: "aaaabbbb",
96+
TargetModel: "test-model-new",
97+
Prompt: "aaaabbbb",
10298
}
10399
ctx = types.NewSchedulingContext(context.Background(), req4, pods)
104100
plugin.PreSchedule(ctx)
@@ -119,9 +115,8 @@ func TestPrefixPlugin(t *testing.T) {
119115

120116
// 5th request shares partial prefix with 3rd one.
121117
req5 := &types.LLMRequest{
122-
Model: "test-model1",
123-
ResolvedTargetModel: "test-model1",
124-
Prompt: "aaaabbbbcccc",
118+
TargetModel: "test-model1",
119+
Prompt: "aaaabbbbcccc",
125120
}
126121
ctx = types.NewSchedulingContext(context.Background(), req5, pods)
127122
plugin.PreSchedule(ctx)

0 commit comments

Comments
 (0)