diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 5f457194e3a8c8..e629686cf06218 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -612,9 +612,8 @@ struct scx_dispatch_q { enum scx_ent_flags { SCX_TASK_QUEUED = 1 << 0, /* on ext runqueue */ SCX_TASK_BAL_KEEP = 1 << 1, /* balance decided to keep current */ - SCX_TASK_DDSP_PRIQ = 1 << 2, /* task should be enqueued on priq when directly dispatched */ - SCX_TASK_RESET_RUNNABLE_AT = 1 << 3, /* runnable_at should be reset */ - SCX_TASK_DEQD_FOR_SLEEP = 1 << 4, /* last dequeue was for SLEEP */ + SCX_TASK_RESET_RUNNABLE_AT = 1 << 2, /* runnable_at should be reset */ + SCX_TASK_DEQD_FOR_SLEEP = 1 << 3, /* last dequeue was for SLEEP */ SCX_TASK_STATE_SHIFT = 8, /* bit 8 and 9 are used to carry scx_task_state */ SCX_TASK_STATE_BITS = 2, @@ -689,7 +688,8 @@ struct sched_ext_entity { #ifdef CONFIG_SCHED_CORE u64 core_sched_at; /* see scx_prio_less() */ #endif - u64 ddsq_id; + u64 ddsp_dsq_id; + u64 ddsp_enq_flags; /* BPF scheduler modifiable fields */ diff --git a/init/init_task.c b/init/init_task.c index 1e035992a52b9d..54c9244ef9e5d4 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -114,7 +114,8 @@ struct task_struct init_task .ops_state = ATOMIC_INIT(0), .runnable_at = INITIAL_JIFFIES, .slice = SCX_SLICE_DFL, - .ddsq_id = SCX_DSQ_INVALID, + .ddsp_dsq_id = SCX_DSQ_INVALID, + .ddsp_enq_flags = 0, }, #endif .ptraced = LIST_HEAD_INIT(init_task.ptraced), diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 781e8a00b6d597..937ef9353c0b3a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4564,7 +4564,8 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) atomic_long_set(&p->scx.ops_state, 0); p->scx.runnable_at = INITIAL_JIFFIES; p->scx.slice = SCX_SLICE_DFL; - p->scx.ddsq_id = SCX_DSQ_INVALID; + p->scx.ddsp_dsq_id = SCX_DSQ_INVALID; + p->scx.ddsp_enq_flags = 0; #endif #ifdef CONFIG_PREEMPT_NOTIFIERS diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 2a552efeec6f31..dd09b53254f592 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -681,6 +681,15 @@ static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p, dsq->nr++; p->scx.dsq = dsq; + /* + * scx.ddsp_dsq_id and scx.ddsp_enq_flags are only relevant on the + * direct dispatch path, but we clear them here because the direct + * dispatch verdict may be overridden on the enqueue path during e.g. + * bypass. + */ + p->scx.ddsp_dsq_id = SCX_DSQ_INVALID; + p->scx.ddsp_enq_flags = 0; + /* * We're transitioning out of QUEUEING or DISPATCHING. store_release to * match waiters' load_acquire. @@ -833,12 +842,11 @@ static void mark_direct_dispatch(struct task_struct *ddsp_task, return; } - WARN_ON_ONCE(p->scx.ddsq_id != SCX_DSQ_INVALID); - WARN_ON_ONCE(p->scx.flags & SCX_TASK_DDSP_PRIQ); + WARN_ON_ONCE(p->scx.ddsp_dsq_id != SCX_DSQ_INVALID); + WARN_ON_ONCE(p->scx.ddsp_enq_flags); - p->scx.ddsq_id = dsq_id; - if (enq_flags & SCX_ENQ_DSQ_PRIQ) - p->scx.flags |= SCX_TASK_DDSP_PRIQ; + p->scx.ddsp_dsq_id = dsq_id; + p->scx.ddsp_enq_flags = enq_flags; } static void direct_dispatch(struct task_struct *p, u64 enq_flags) @@ -847,14 +855,9 @@ static void direct_dispatch(struct task_struct *p, u64 enq_flags) touch_core_sched_dispatch(task_rq(p), p); - if (p->scx.flags & SCX_TASK_DDSP_PRIQ) { - enq_flags |= SCX_ENQ_DSQ_PRIQ; - p->scx.flags &= ~SCX_TASK_DDSP_PRIQ; - } - - dsq = find_dsq_for_dispatch(task_rq(p), p->scx.ddsq_id, p); - dispatch_enqueue(dsq, p, enq_flags | SCX_ENQ_CLEAR_OPSS); - p->scx.ddsq_id = SCX_DSQ_INVALID; + enq_flags |= (p->scx.ddsp_enq_flags | SCX_ENQ_CLEAR_OPSS); + dsq = find_dsq_for_dispatch(task_rq(p), p->scx.ddsp_dsq_id, p); + dispatch_enqueue(dsq, p, enq_flags); } static bool test_rq_online(struct rq *rq) @@ -874,9 +877,6 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags, WARN_ON_ONCE(!(p->scx.flags & SCX_TASK_QUEUED)); - if (p->scx.ddsq_id != SCX_DSQ_INVALID) - goto direct; - /* rq migration */ if (sticky_cpu == cpu_of(rq)) goto local_norefill; @@ -896,6 +896,9 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags, goto global; } + if (p->scx.ddsp_dsq_id != SCX_DSQ_INVALID) + goto direct; + /* see %SCX_OPS_ENQ_EXITING */ if (!static_branch_unlikely(&scx_ops_enq_exiting) && unlikely(p->flags & PF_EXITING)) @@ -922,7 +925,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags, SCX_CALL_OP_TASK(SCX_KF_ENQUEUE, enqueue, p, enq_flags); *ddsp_taskp = NULL; - if (p->scx.ddsq_id != SCX_DSQ_INVALID) + if (p->scx.ddsp_dsq_id != SCX_DSQ_INVALID) goto direct; /* @@ -2142,7 +2145,7 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, &found); if (found) { p->scx.slice = SCX_SLICE_DFL; - p->scx.ddsq_id = SCX_DSQ_LOCAL; + p->scx.ddsp_dsq_id = SCX_DSQ_LOCAL; } return cpu; } @@ -4101,13 +4104,20 @@ static void scx_dispatch_commit(struct task_struct *p, u64 dsq_id, u64 enq_flags * @enq_flags: SCX_ENQ_* * * Dispatch @p into the FIFO queue of the DSQ identified by @dsq_id. It is safe - * to call this function spuriously. Can be called from ops.enqueue() and - * ops.dispatch(). + * to call this function spuriously. Can be called from ops.enqueue(), + * ops.select_cpu(), and ops.dispatch(). + * + * When called from ops.select_cpu() or ops.enqueue(), it's for direct dispatch + * and @p must match the task being enqueued. Also, %SCX_DSQ_LOCAL_ON can't be + * used to target the local DSQ of a CPU other than the enqueueing one. Use + * ops.select_cpu() to be on the target CPU in the first place. * - * When called from ops.enqueue(), it's for direct dispatch and @p must match - * the task being enqueued. Also, %SCX_DSQ_LOCAL_ON can't be used to target the - * local DSQ of a CPU other than the enqueueing one. Use ops.select_cpu() to be - * on the target CPU in the first place. + * When called from ops.select_cpu(), @enq_flags and @dsp_id are stored, and @p + * will be directly dispatched to the corresponding dispatch queue after + * ops.select_cpu() returns. If @p is dispatched to SCX_DSQ_LOCAL, it will be + * dispatched to the local DSQ of the CPU returned by ops.select_cpu(). + * @enq_flags are OR'd with the enqueue flags on the enqueue path before the + * task is dispatched. * * When called from ops.dispatch(), there are no restrictions on @p or @dsq_id * and this function can be called upto ops.dispatch_max_batch times to dispatch diff --git a/tools/testing/selftests/scx/.gitignore b/tools/testing/selftests/scx/.gitignore index 991721c50d9ee8..4ae433bb3955d8 100644 --- a/tools/testing/selftests/scx/.gitignore +++ b/tools/testing/selftests/scx/.gitignore @@ -9,4 +9,5 @@ select_cpu_dfl_nodispatch select_cpu_dispatch select_cpu_dispatch_dbl_dsp select_cpu_dispatch_bad_dsq +select_cpu_vtime build/ diff --git a/tools/testing/selftests/scx/Makefile b/tools/testing/selftests/scx/Makefile index ae713d614f252c..8a0b66236ada4e 100644 --- a/tools/testing/selftests/scx/Makefile +++ b/tools/testing/selftests/scx/Makefile @@ -158,7 +158,8 @@ c-sched-targets := \ select_cpu_dfl_nodispatch \ select_cpu_dispatch \ select_cpu_dispatch_bad_dsq \ - select_cpu_dispatch_dbl_dsp + select_cpu_dispatch_dbl_dsp \ + select_cpu_vtime $(c-sched-targets): %: $(filter-out %.bpf.c,%.c) $(INCLUDE_DIR)/%.bpf.skel.h $(eval sched=$(notdir $@)) diff --git a/tools/testing/selftests/scx/select_cpu_vtime.bpf.c b/tools/testing/selftests/scx/select_cpu_vtime.bpf.c new file mode 100644 index 00000000000000..b8bdadf3e541b1 --- /dev/null +++ b/tools/testing/selftests/scx/select_cpu_vtime.bpf.c @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * A scheduler that validates that enqueue flags are properly stored and + * applied at dispatch time when a task is directly dispatched from + * ops.select_cpu(). We validate this by using scx_bpf_dispatch_vtime(), and + * making the test a very basic vtime scheduler. + * + * Copyright (c) 2024 Meta Platforms, Inc. and affiliates. + * Copyright (c) 2024 David Vernet + * Copyright (c) 2024 Tejun Heo + */ + +#include + +char _license[] SEC("license") = "GPL"; + +volatile bool consumed; + +static u64 vtime_now; + +#define VTIME_DSQ 0 + +static inline bool vtime_before(u64 a, u64 b) +{ + return (s64)(a - b) < 0; +} + +static inline u64 task_vtime(const struct task_struct *p) +{ + u64 vtime = p->scx.dsq_vtime; + + if (vtime_before(vtime, vtime_now - SCX_SLICE_DFL)) + return vtime_now - SCX_SLICE_DFL; + else + return vtime; +} + +s32 BPF_STRUCT_OPS(select_cpu_vtime_select_cpu, struct task_struct *p, + s32 prev_cpu, u64 wake_flags) +{ + s32 cpu; + + cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0); + if (cpu >= 0) + goto ddsp; + + cpu = prev_cpu; + scx_bpf_test_and_clear_cpu_idle(cpu); +ddsp: + scx_bpf_dispatch_vtime(p, VTIME_DSQ, SCX_SLICE_DFL, task_vtime(p), 0); + return cpu; +} + +void BPF_STRUCT_OPS(select_cpu_vtime_dispatch, s32 cpu, struct task_struct *p) +{ + if (scx_bpf_consume(VTIME_DSQ)) + consumed = true; +} + +void BPF_STRUCT_OPS(select_cpu_vtime_running, struct task_struct *p) +{ + if (vtime_before(vtime_now, p->scx.dsq_vtime)) + vtime_now = p->scx.dsq_vtime; +} + +void BPF_STRUCT_OPS(select_cpu_vtime_stopping, struct task_struct *p, + bool runnable) +{ + p->scx.dsq_vtime += (SCX_SLICE_DFL - p->scx.slice) * 100 / p->scx.weight; +} + +void BPF_STRUCT_OPS(select_cpu_vtime_enable, struct task_struct *p) +{ + p->scx.dsq_vtime = vtime_now; +} + +s32 BPF_STRUCT_OPS_SLEEPABLE(select_cpu_vtime_init) +{ + scx_bpf_switch_all(); + + return scx_bpf_create_dsq(VTIME_DSQ, -1); +} + +SEC(".struct_ops.link") +struct sched_ext_ops select_cpu_vtime_ops = { + .select_cpu = select_cpu_vtime_select_cpu, + .dispatch = select_cpu_vtime_dispatch, + .running = select_cpu_vtime_running, + .stopping = select_cpu_vtime_stopping, + .enable = select_cpu_vtime_enable, + .init = select_cpu_vtime_init, + .name = "select_cpu_vtime", + .timeout_ms = 1000U, +}; diff --git a/tools/testing/selftests/scx/select_cpu_vtime.c b/tools/testing/selftests/scx/select_cpu_vtime.c new file mode 100644 index 00000000000000..6f72f0625478c7 --- /dev/null +++ b/tools/testing/selftests/scx/select_cpu_vtime.c @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2024 Meta Platforms, Inc. and affiliates. + * Copyright (c) 2024 David Vernet + * Copyright (c) 2024 Tejun Heo + */ +#include +#include +#include +#include +#include +#include +#include "select_cpu_vtime.bpf.skel.h" +#include "scx_test.h" + +int main(int argc, char **argv) +{ + struct select_cpu_vtime *skel; + struct bpf_link *link; + + libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + + skel = select_cpu_vtime__open_and_load(); + SCX_BUG_ON(!skel, "Failed to open and load skel"); + + SCX_ASSERT(!skel->bss->consumed); + + link = bpf_map__attach_struct_ops(skel->maps.select_cpu_vtime_ops); + SCX_BUG_ON(!link, "Failed to attach struct_ops"); + + sleep(1); + + SCX_ASSERT(skel->bss->consumed); + + bpf_link__destroy(link); + select_cpu_vtime__destroy(skel); + + return 0; +}