-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Support resource reservation (code only) #4887
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
68188ac
cb1dba0
9079cd8
24a6afb
a0bdf0a
71b87e9
fa4228b
3f95f14
0c6a59e
7aebf2e
8db0404
d1afec6
a45c8eb
0bcc4f1
40983f0
0a48230
ccddf1a
793be7f
f28c882
44e538e
9824629
3cb4614
2060925
6131224
a7c0abe
d149b27
b9dfbfb
ed14208
271f506
922b12a
d8340af
db6dfb9
f9b4708
6775484
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
|
|
@@ -52,7 +52,8 @@ require ( | |||
| sigs.k8s.io/controller-runtime v0.13.0 | ||||
| sigs.k8s.io/yaml v1.6.0 | ||||
| stathat.com/c/consistent v1.0.0 | ||||
| volcano.sh/apis v1.13.2-0.20251222111824-40fed4793a6f | ||||
| //volcano.sh/apis v1.12.3-0.20250922071132-1aeba4dd588f | ||||
| volcano.sh/apis v0.0.0 | ||||
| ) | ||||
|
|
||||
| require ( | ||||
|
|
@@ -122,7 +123,7 @@ require ( | |||
| github.com/google/cel-go v0.26.0 // indirect | ||||
| github.com/google/gnostic-models v0.7.0 // indirect | ||||
| github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect | ||||
| github.com/google/uuid v1.6.0 // indirect | ||||
| github.com/google/uuid v1.6.0 | ||||
| github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 // indirect | ||||
| github.com/hashicorp/errwrap v1.1.0 // indirect | ||||
| github.com/inconshreveable/mousetrap v1.1.0 // indirect | ||||
|
|
@@ -214,3 +215,6 @@ replace ( | |||
| k8s.io/sample-cli-plugin => k8s.io/sample-cli-plugin v0.34.1 | ||||
| k8s.io/sample-controller => k8s.io/sample-controller v0.34.1 | ||||
| ) | ||||
|
|
||||
| // replace volcano.sh/apis => ../volcano-apis | ||||
| replace volcano.sh/apis => github.com/LegGasai/apis v0.0.0-20251228060403-bbce8ddfec23 | ||||
|
||||
| replace volcano.sh/apis => github.com/LegGasai/apis v0.0.0-20251228060403-bbce8ddfec23 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,135 @@ | ||
| /* | ||
| Copyright 2019 The Kubernetes Authors. | ||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||
| you may not use this file except in compliance with the License. | ||
| You may obtain a copy of the License at | ||
| http://www.apache.org/licenses/LICENSE-2.0 | ||
| Unless required by applicable law or agreed to in writing, software | ||
| distributed under the License is distributed on an "AS IS" BASIS, | ||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| See the License for the specific language governing permissions and | ||
| limitations under the License. | ||
| */ | ||
|
|
||
| package reserve | ||
|
|
||
| import ( | ||
| "time" | ||
|
|
||
| "k8s.io/klog/v2" | ||
|
|
||
| "volcano.sh/volcano/pkg/scheduler/api" | ||
| "volcano.sh/volcano/pkg/scheduler/framework" | ||
| "volcano.sh/volcano/pkg/scheduler/metrics" | ||
| "volcano.sh/volcano/pkg/scheduler/util" | ||
| ) | ||
|
|
||
| type Action struct{} | ||
|
|
||
| func New() *Action { | ||
| return &Action{} | ||
| } | ||
|
|
||
| func (reserve *Action) Name() string { | ||
| return "reserve" | ||
| } | ||
|
|
||
| func (reserve *Action) Initialize() {} | ||
|
|
||
| func (reserve *Action) Execute(ssn *framework.Session) { | ||
| klog.V(5).Infof("Enter Reserve ...") | ||
| defer klog.V(5).Infof("Leaving Reserve ...") | ||
|
|
||
| for _, job := range ssn.Jobs { | ||
| if !job.IsUseReservation() { | ||
| continue | ||
| } | ||
|
|
||
| if vr := ssn.JobValid(job); vr != nil && !vr.Pass { | ||
| klog.V(3).Infof("Job <%s/%s> Queue <%s> skip allocate, reason: %v, message %v", job.Namespace, job.Name, job.Queue, vr.Reason, vr.Message) | ||
| continue | ||
| } | ||
|
|
||
| jobs := util.NewPriorityQueue(ssn.JobOrderFn) | ||
| jobs.Push(job) | ||
|
Comment on lines
+56
to
+57
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
|
||
| klog.V(3).Infof("Attempting to swap reservation for actual tasks in job <%s/%s>", job.Namespace, job.Name) | ||
| stmt := framework.NewStatement(ssn) | ||
|
|
||
| pendingTasks := util.NewPriorityQueue(ssn.TaskOrderFn) | ||
| for _, task := range job.TaskStatusIndex[api.Pending] { | ||
| pendingTasks.Push(task) | ||
| } | ||
|
|
||
| for !pendingTasks.Empty() { | ||
| actualTask := pendingTasks.Pop().(*api.TaskInfo) | ||
|
|
||
| if job.TaskHasFitErrors(job.DefaultSubJobID(), actualTask) { | ||
| klog.V(3).Infof("Task %s with role spec %s has already predicated failed, skip", actualTask.Name, actualTask.TaskRole) | ||
| continue | ||
| } | ||
|
|
||
| if err := ssn.PrePredicateFn(actualTask); err != nil { | ||
| klog.V(3).Infof("PrePredicate for task %s/%s failed for: %v", actualTask.Namespace, actualTask.Name, err) | ||
| fitErrors := api.NewFitErrors() | ||
| for _, ni := range ssn.NodeList { | ||
| fitErrors.SetNodeError(ni.Name, err) | ||
| } | ||
| job.NodesFitErrors[actualTask.UID] = fitErrors | ||
| break | ||
| } | ||
|
|
||
| reservationTask := actualTask.ReservationTaskInfo | ||
| if reservationTask == nil { | ||
| klog.Warningf("Task <%s/%s> wants to use reservation but has no ReservationTaskInfo", actualTask.Namespace, actualTask.Name) | ||
| continue | ||
| } | ||
| reservedNodeName := reservationTask.NodeName | ||
|
|
||
| if reservedNodeName == "" { | ||
| klog.Warningf("Reservation info for task <%s/%s> does not specify a node", actualTask.Namespace, actualTask.Name) | ||
| continue | ||
| } | ||
|
|
||
| reservedNode, found := ssn.Nodes[reservedNodeName] | ||
| if !found { | ||
| klog.Warningf("Reserved node '%s' for task <%s/%s> not found in current session", reservedNodeName, actualTask.Namespace, actualTask.Name) | ||
| continue | ||
| } | ||
|
|
||
| if err := stmt.UnAllocateForReservationTask(reservationTask); err != nil { | ||
| klog.Errorf("Failed to release reservation task %v resources from node %v, err: %v", reservationTask.UID, reservedNode.Name, err) | ||
| continue | ||
| } | ||
|
|
||
| if err := stmt.Allocate(actualTask, reservedNode); err != nil { | ||
| klog.Errorf("Failed to allocate actual task %v to its reserved node %v, err: %v", actualTask.UID, reservedNode.Name, err) | ||
| } else { | ||
| klog.V(3).Infof("Allocated actual task <%s/%s> to node <%s>, effectively replacing the reservation.", actualTask.Namespace, actualTask.Name, reservedNode.Name) | ||
| metrics.UpdateE2eSchedulingDurationByJob(job.Name, string(job.Queue), job.Namespace, metrics.Duration(job.CreationTimestamp.Time)) | ||
| metrics.UpdateE2eSchedulingLastTimeByJob(job.Name, string(job.Queue), job.Namespace, time.Now()) | ||
| } | ||
|
|
||
| if ssn.JobReady(job) && !pendingTasks.Empty() { | ||
| jobs.Push(job) | ||
| klog.V(3).Infof("Job <%s/%s> is ready, but still has pending tasks. Pipelining.", job.Namespace, job.Name) | ||
| break | ||
| } | ||
| } | ||
|
|
||
| if ssn.JobReady(job) { | ||
| stmt.Commit() | ||
| } else { | ||
| if !ssn.JobPipelined(job) { | ||
| stmt.Discard() | ||
| } else { | ||
| stmt.Commit() | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| func (reserve *Action) UnInitialize() {} | ||
|
Comment on lines
+1
to
+135
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This
replacedirective points to a personal fork. While this is useful for development, it should be removed before merging to avoid breaking the main branch build for other contributors and CI/CD pipelines. Please ensure this is reverted.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
will revert it after api pr has been merged