Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
68188ac
feature: support resource reservation
LegGasai May 27, 2025
cb1dba0
merge
LegGasai May 27, 2025
9079cd8
bugfix: add prePredicate for reservation task to adapt volume bind
LegGasai May 28, 2025
24a6afb
merge
LegGasai May 28, 2025
a0bdf0a
merge
LegGasai Jun 1, 2025
71b87e9
move reservation context to ReservationContext
LegGasai Jun 1, 2025
fa4228b
optimize: remove reservation controller
LegGasai Jun 1, 2025
3f95f14
optimize: add TTL for Reservation
LegGasai Jun 1, 2025
0c6a59e
merge
LegGasai Jun 7, 2025
7aebf2e
Adapt to the change of moving the Reservation CRD from the batch API …
LegGasai Jun 8, 2025
8db0404
optimize: support multi optional reservation node.
LegGasai Jun 8, 2025
d1afec6
Refactor: Use PostBind to update Reservation
LegGasai Jun 22, 2025
a45c8eb
Merge remote-tracking branch 'upstream/master' into support_resource_…
LegGasai Jul 6, 2025
0bcc4f1
Optimize: Asynchronous processing for sync Reservation Status
LegGasai Jul 6, 2025
40983f0
Optimize: add reserve action
LegGasai Jul 21, 2025
0a48230
merge
LegGasai Jul 21, 2025
ccddf1a
merge
LegGasai Jul 21, 2025
793be7f
merge
LegGasai Jul 21, 2025
f28c882
merge
LegGasai Jul 21, 2025
44e538e
merge
LegGasai Jul 21, 2025
9824629
Merge remote-tracking branch 'upstream/master' into support_resource_…
LegGasai Jul 27, 2025
3cb4614
merge
LegGasai Aug 2, 2025
2060925
bugfix: skip update reservation-only pg in api server
LegGasai Aug 10, 2025
6131224
Merge remote-tracking branch 'upstream/master' into support_resource_…
LegGasai Aug 10, 2025
a7c0abe
bugfix: skip deleting reservation if already gc'ed
LegGasai Aug 10, 2025
d149b27
bugfix: fix action order in conf
LegGasai Aug 10, 2025
b9dfbfb
Merge remote-tracking branch 'upstream/master' into support_resource_…
LegGasai Sep 17, 2025
ed14208
chore: add sign off
LegGasai Sep 17, 2025
271f506
merge
LegGasai Oct 7, 2025
922b12a
merge
LegGasai Dec 28, 2025
d8340af
merge upstream and fix reservation bugs
LegGasai Dec 28, 2025
db6dfb9
fix reservation bugs
LegGasai Dec 28, 2025
f9b4708
type
LegGasai Dec 28, 2025
6775484
feat(reservation): add resource reservation support for scheduler(code)
LegGasai Dec 28, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ generate-code:
manifests: controller-gen
go mod vendor
# volcano crd base
$(CONTROLLER_GEN) $(CRD_OPTIONS) paths="./vendor/volcano.sh/apis/pkg/apis/scheduling/v1beta1;./vendor/volcano.sh/apis/pkg/apis/batch/v1alpha1;./vendor/volcano.sh/apis/pkg/apis/bus/v1alpha1;./vendor/volcano.sh/apis/pkg/apis/nodeinfo/v1alpha1;./vendor/volcano.sh/apis/pkg/apis/topology/v1alpha1;./vendor/volcano.sh/apis/pkg/apis/shard/v1alpha1" output:crd:artifacts:config=config/crd/volcano/bases
$(CONTROLLER_GEN) $(CRD_OPTIONS_EXCLUDE_DESCRIPTION) paths="./vendor/volcano.sh/apis/pkg/apis/scheduling/v1beta1;./vendor/volcano.sh/apis/pkg/apis/batch/v1alpha1;./vendor/volcano.sh/apis/pkg/apis/bus/v1alpha1;./vendor/volcano.sh/apis/pkg/apis/nodeinfo/v1alpha1;./vendor/volcano.sh/apis/pkg/apis/topology/v1alpha1;./vendor/volcano.sh/apis/pkg/apis/shard/v1alpha1" output:crd:artifacts:config=config/crd/volcano/bases
# generate volcano job crd yaml without description to avoid yaml size limit when using `kubectl apply`
$(CONTROLLER_GEN) $(CRD_OPTIONS_EXCLUDE_DESCRIPTION) paths="./vendor/volcano.sh/apis/pkg/apis/batch/v1alpha1" output:crd:artifacts:config=config/crd/volcano/bases
# jobflow crd base
Expand Down
8 changes: 6 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ require (
sigs.k8s.io/controller-runtime v0.13.0
sigs.k8s.io/yaml v1.6.0
stathat.com/c/consistent v1.0.0
volcano.sh/apis v1.13.2-0.20251222111824-40fed4793a6f
//volcano.sh/apis v1.12.3-0.20250922071132-1aeba4dd588f
volcano.sh/apis v0.0.0
)

require (
Expand Down Expand Up @@ -122,7 +123,7 @@ require (
github.com/google/cel-go v0.26.0 // indirect
github.com/google/gnostic-models v0.7.0 // indirect
github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/google/uuid v1.6.0
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
Expand Down Expand Up @@ -214,3 +215,6 @@ replace (
k8s.io/sample-cli-plugin => k8s.io/sample-cli-plugin v0.34.1
k8s.io/sample-controller => k8s.io/sample-controller v0.34.1
)

// replace volcano.sh/apis => ../volcano-apis
replace volcano.sh/apis => github.com/LegGasai/apis v0.0.0-20251228060403-bbce8ddfec23
Comment on lines +219 to +220

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

high

This replace directive points to a personal fork. While this is useful for development, it should be removed before merging to avoid breaking the main branch build for other contributors and CI/CD pipelines. Please ensure this is reverted.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will revert it after api pr has been merged

Copy link

Copilot AI Dec 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dependency replaced with personal GitHub fork: The volcano.sh/apis dependency has been replaced with a personal GitHub fork (github.com/LegGasai/apis). This is inappropriate for production code and should use the official volcano.sh/apis repository. Personal forks may not be maintained, could introduce security issues, and make it difficult for other contributors to build the project. Revert to using the official volcano.sh/apis dependency.

Suggested change
replace volcano.sh/apis => github.com/LegGasai/apis v0.0.0-20251228060403-bbce8ddfec23

Copilot uses AI. Check for mistakes.
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6 h1:He8af
github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8=
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=
github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
github.com/LegGasai/apis v0.0.0-20251228060403-bbce8ddfec23 h1:J3GB5pucpcFYOTzoPHb0v+dXDqHgkLDRuoDbDEb8lI8=
github.com/LegGasai/apis v0.0.0-20251228060403-bbce8ddfec23/go.mod h1:CKQbxVt0o4lTKisC0MonoXWruGFC0S8KU+UuzaZ5E7k=
github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ=
github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE=
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
Expand Down Expand Up @@ -496,5 +498,3 @@ sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs=
sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4=
stathat.com/c/consistent v1.0.0 h1:ezyc51EGcRPJUxfHGSgJjWzJdj3NiMU9pNfLNGiXV0c=
stathat.com/c/consistent v1.0.0/go.mod h1:QkzMWzcbB+yQBL2AttO6sgsQS/JSTapcDISJalmCDS0=
volcano.sh/apis v1.13.2-0.20251222111824-40fed4793a6f h1:B3ZI+SximYK1oLOrWX5mah4Zblp8M/Tj/e+VZJWVYik=
volcano.sh/apis v1.13.2-0.20251222111824-40fed4793a6f/go.mod h1:CKQbxVt0o4lTKisC0MonoXWruGFC0S8KU+UuzaZ5E7k=
2 changes: 2 additions & 0 deletions hack/generate-yaml.sh
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ tail -n +2 ${VOLCANO_CRD_DIR}/bases/batch.volcano.sh_cronjobs.yaml > ${HELM_VOLC
tail -n +2 ${VOLCANO_CRD_DIR}/bases/bus.volcano.sh_commands.yaml > ${HELM_VOLCANO_CRD_DIR}/bases/bus.volcano.sh_commands.yaml
tail -n +2 ${VOLCANO_CRD_DIR}/bases/scheduling.volcano.sh_podgroups.yaml > ${HELM_VOLCANO_CRD_DIR}/bases/scheduling.volcano.sh_podgroups.yaml
tail -n +2 ${VOLCANO_CRD_DIR}/bases/scheduling.volcano.sh_queues.yaml > ${HELM_VOLCANO_CRD_DIR}/bases/scheduling.volcano.sh_queues.yaml
tail -n +2 ${VOLCANO_CRD_DIR}/bases/scheduling.volcano.sh_reservations.yaml > ${HELM_VOLCANO_CRD_DIR}/bases/scheduling.volcano.sh_reservations.yaml
tail -n +2 ${VOLCANO_CRD_DIR}/bases/nodeinfo.volcano.sh_numatopologies.yaml > ${HELM_VOLCANO_CRD_DIR}/bases/nodeinfo.volcano.sh_numatopologies.yaml
tail -n +2 ${VOLCANO_CRD_DIR}/bases/topology.volcano.sh_hypernodes.yaml > ${HELM_VOLCANO_CRD_DIR}/bases/topology.volcano.sh_hypernodes.yaml
tail -n +2 ${VOLCANO_CRD_DIR}/bases/shard.volcano.sh_nodeshards.yaml > ${HELM_VOLCANO_CRD_DIR}/bases/shard.volcano.sh_nodeshards.yaml
Expand Down Expand Up @@ -184,6 +185,7 @@ HELM_CMD="${HELM_BIN_DIR}/helm template ${VK_ROOT}/installer/helm/chart/volcano
-s templates/scheduler.yaml \
-s templates/scheduling_v1beta1_podgroup.yaml \
-s templates/scheduling_v1beta1_queue.yaml \
-s templates/scheduling_v1beta1_reservation.yaml \
-s templates/nodeinfo_v1alpha1_numatopologies.yaml \
-s templates/topology_v1alpha1_hypernodes.yaml \
-s templates/shard_v1alpha1_nodeshards.yaml \
Expand Down
1 change: 1 addition & 0 deletions pkg/scheduler/actions/allocate/allocate.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"k8s.io/klog/v2"

"volcano.sh/apis/pkg/apis/scheduling"

"volcano.sh/volcano/pkg/scheduler/api"
"volcano.sh/volcano/pkg/scheduler/conf"
"volcano.sh/volcano/pkg/scheduler/framework"
Expand Down
2 changes: 2 additions & 0 deletions pkg/scheduler/actions/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"volcano.sh/volcano/pkg/scheduler/actions/enqueue"
"volcano.sh/volcano/pkg/scheduler/actions/preempt"
"volcano.sh/volcano/pkg/scheduler/actions/reclaim"
"volcano.sh/volcano/pkg/scheduler/actions/reserve"
"volcano.sh/volcano/pkg/scheduler/actions/shuffle"
"volcano.sh/volcano/pkg/scheduler/framework"
)
Expand All @@ -37,4 +38,5 @@ func init() {
framework.RegisterAction(preempt.New())
framework.RegisterAction(enqueue.New())
framework.RegisterAction(shuffle.New())
framework.RegisterAction(reserve.New())
}
135 changes: 135 additions & 0 deletions pkg/scheduler/actions/reserve/reserve.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package reserve

import (
"time"

"k8s.io/klog/v2"

"volcano.sh/volcano/pkg/scheduler/api"
"volcano.sh/volcano/pkg/scheduler/framework"
"volcano.sh/volcano/pkg/scheduler/metrics"
"volcano.sh/volcano/pkg/scheduler/util"
)

type Action struct{}

func New() *Action {
return &Action{}
}

func (reserve *Action) Name() string {
return "reserve"
}

func (reserve *Action) Initialize() {}

func (reserve *Action) Execute(ssn *framework.Session) {
klog.V(5).Infof("Enter Reserve ...")
defer klog.V(5).Infof("Leaving Reserve ...")

for _, job := range ssn.Jobs {
if !job.IsUseReservation() {
continue
}

if vr := ssn.JobValid(job); vr != nil && !vr.Pass {
klog.V(3).Infof("Job <%s/%s> Queue <%s> skip allocate, reason: %v, message %v", job.Namespace, job.Name, job.Queue, vr.Reason, vr.Message)
continue
}

jobs := util.NewPriorityQueue(ssn.JobOrderFn)
jobs.Push(job)
Comment on lines +56 to +57

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The jobs priority queue is initialized here for each job but it's never read from. This appears to be dead code. The jobs.Push(job) on line 117 also pushes to this unused queue. This logic should be removed to improve clarity and avoid confusion.


klog.V(3).Infof("Attempting to swap reservation for actual tasks in job <%s/%s>", job.Namespace, job.Name)
stmt := framework.NewStatement(ssn)

pendingTasks := util.NewPriorityQueue(ssn.TaskOrderFn)
for _, task := range job.TaskStatusIndex[api.Pending] {
pendingTasks.Push(task)
}

for !pendingTasks.Empty() {
actualTask := pendingTasks.Pop().(*api.TaskInfo)

if job.TaskHasFitErrors(job.DefaultSubJobID(), actualTask) {
klog.V(3).Infof("Task %s with role spec %s has already predicated failed, skip", actualTask.Name, actualTask.TaskRole)
continue
}

if err := ssn.PrePredicateFn(actualTask); err != nil {
klog.V(3).Infof("PrePredicate for task %s/%s failed for: %v", actualTask.Namespace, actualTask.Name, err)
fitErrors := api.NewFitErrors()
for _, ni := range ssn.NodeList {
fitErrors.SetNodeError(ni.Name, err)
}
job.NodesFitErrors[actualTask.UID] = fitErrors
break
}

reservationTask := actualTask.ReservationTaskInfo
if reservationTask == nil {
klog.Warningf("Task <%s/%s> wants to use reservation but has no ReservationTaskInfo", actualTask.Namespace, actualTask.Name)
continue
}
reservedNodeName := reservationTask.NodeName

if reservedNodeName == "" {
klog.Warningf("Reservation info for task <%s/%s> does not specify a node", actualTask.Namespace, actualTask.Name)
continue
}

reservedNode, found := ssn.Nodes[reservedNodeName]
if !found {
klog.Warningf("Reserved node '%s' for task <%s/%s> not found in current session", reservedNodeName, actualTask.Namespace, actualTask.Name)
continue
}

if err := stmt.UnAllocateForReservationTask(reservationTask); err != nil {
klog.Errorf("Failed to release reservation task %v resources from node %v, err: %v", reservationTask.UID, reservedNode.Name, err)
continue
}

if err := stmt.Allocate(actualTask, reservedNode); err != nil {
klog.Errorf("Failed to allocate actual task %v to its reserved node %v, err: %v", actualTask.UID, reservedNode.Name, err)
} else {
klog.V(3).Infof("Allocated actual task <%s/%s> to node <%s>, effectively replacing the reservation.", actualTask.Namespace, actualTask.Name, reservedNode.Name)
metrics.UpdateE2eSchedulingDurationByJob(job.Name, string(job.Queue), job.Namespace, metrics.Duration(job.CreationTimestamp.Time))
metrics.UpdateE2eSchedulingLastTimeByJob(job.Name, string(job.Queue), job.Namespace, time.Now())
}

if ssn.JobReady(job) && !pendingTasks.Empty() {
jobs.Push(job)
klog.V(3).Infof("Job <%s/%s> is ready, but still has pending tasks. Pipelining.", job.Namespace, job.Name)
break
}
}

if ssn.JobReady(job) {
stmt.Commit()
} else {
if !ssn.JobPipelined(job) {
stmt.Discard()
} else {
stmt.Commit()
}
}
}
}

func (reserve *Action) UnInitialize() {}
Comment on lines +1 to +135
Copy link

Copilot AI Dec 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing test coverage for reserve action: The reserve action in pkg/scheduler/actions/reserve/reserve.go lacks test coverage. Other actions like allocate have comprehensive test files (allocate_test.go with 5185 lines), but there's no reserve_test.go. Given that this action handles critical reservation-to-actual-task swapping logic, it should have thorough test coverage.

Copilot uses AI. Check for mistakes.
56 changes: 56 additions & 0 deletions pkg/scheduler/api/helpers/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"math"

v1 "k8s.io/api/core/v1"
apiequality "k8s.io/apimachinery/pkg/api/equality"

"volcano.sh/volcano/pkg/scheduler/api"
)
Expand Down Expand Up @@ -91,3 +92,58 @@ func Share(l, r float64) float64 {

return share
}

func IsPodSpecMatch(taskPodSpec, resvPodSpec *v1.PodSpec) bool {
if taskPodSpec.SchedulerName != resvPodSpec.SchedulerName {
return false
}
if !apiequality.Semantic.DeepEqual(taskPodSpec.NodeSelector, resvPodSpec.NodeSelector) {
return false
}
if !apiequality.Semantic.DeepEqual(taskPodSpec.Affinity, resvPodSpec.Affinity) {
return false
}
if !apiequality.Semantic.DeepEqual(taskPodSpec.Tolerations, resvPodSpec.Tolerations) {
return false
}
if taskPodSpec.PriorityClassName != resvPodSpec.PriorityClassName {
return false
}
if !isContainerListEqual(taskPodSpec.Containers, resvPodSpec.Containers) {
return false
}
if !isContainerListEqual(taskPodSpec.InitContainers, resvPodSpec.InitContainers) {
return false
}

return true
}

func isContainerListEqual(a, b []v1.Container) bool {
if len(a) != len(b) {
return false
}

containerMap := make(map[string]v1.Container, len(a))
for _, c := range a {
containerMap[c.Name] = c
}

for _, c := range b {
ref, ok := containerMap[c.Name]
if !ok {
return false
}
if c.Image != ref.Image {
return false
}
if !apiequality.Semantic.DeepEqual(c.Resources.Requests, ref.Resources.Requests) {
return false
}
if !apiequality.Semantic.DeepEqual(c.Resources.Limits, ref.Resources.Limits) {
return false
}
}

return true
}
Loading
Loading