diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
index 5f457194e3a8c8..e629686cf06218 100644
--- a/include/linux/sched/ext.h
+++ b/include/linux/sched/ext.h
@@ -612,9 +612,8 @@ struct scx_dispatch_q {
 enum scx_ent_flags {
 	SCX_TASK_QUEUED		= 1 << 0, /* on ext runqueue */
 	SCX_TASK_BAL_KEEP	= 1 << 1, /* balance decided to keep current */
-	SCX_TASK_DDSP_PRIQ	= 1 << 2, /* task should be enqueued on priq when directly dispatched */
-	SCX_TASK_RESET_RUNNABLE_AT = 1 << 3, /* runnable_at should be reset */
-	SCX_TASK_DEQD_FOR_SLEEP	= 1 << 4, /* last dequeue was for SLEEP */
+	SCX_TASK_RESET_RUNNABLE_AT = 1 << 2, /* runnable_at should be reset */
+	SCX_TASK_DEQD_FOR_SLEEP	= 1 << 3, /* last dequeue was for SLEEP */
 
 	SCX_TASK_STATE_SHIFT	= 8,	  /* bit 8 and 9 are used to carry scx_task_state */
 	SCX_TASK_STATE_BITS	= 2,
@@ -689,7 +688,8 @@ struct sched_ext_entity {
 #ifdef CONFIG_SCHED_CORE
 	u64			core_sched_at;	/* see scx_prio_less() */
 #endif
-	u64			ddsq_id;
+	u64			ddsp_dsq_id;
+	u64			ddsp_enq_flags;
 
 	/* BPF scheduler modifiable fields */
 
diff --git a/init/init_task.c b/init/init_task.c
index 1e035992a52b9d..54c9244ef9e5d4 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -114,7 +114,8 @@ struct task_struct init_task
 		.ops_state	= ATOMIC_INIT(0),
 		.runnable_at	= INITIAL_JIFFIES,
 		.slice		= SCX_SLICE_DFL,
-		.ddsq_id	= SCX_DSQ_INVALID,
+		.ddsp_dsq_id	= SCX_DSQ_INVALID,
+		.ddsp_enq_flags	= 0,
 	},
 #endif
 	.ptraced	= LIST_HEAD_INIT(init_task.ptraced),
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 781e8a00b6d597..937ef9353c0b3a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4564,7 +4564,8 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 	atomic_long_set(&p->scx.ops_state, 0);
 	p->scx.runnable_at	= INITIAL_JIFFIES;
 	p->scx.slice		= SCX_SLICE_DFL;
-	p->scx.ddsq_id		= SCX_DSQ_INVALID;
+	p->scx.ddsp_dsq_id	= SCX_DSQ_INVALID;
+	p->scx.ddsp_enq_flags	= 0;
 #endif
 
 #ifdef CONFIG_PREEMPT_NOTIFIERS
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 2a552efeec6f31..dd09b53254f592 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -681,6 +681,15 @@ static void dispatch_enqueue(struct scx_dispatch_q *dsq, struct task_struct *p,
 	dsq->nr++;
 	p->scx.dsq = dsq;
 
+	/*
+	 * scx.ddsp_dsq_id and scx.ddsp_enq_flags are only relevant on the
+	 * direct dispatch path, but we clear them here because the direct
+	 * dispatch verdict may be overridden on the enqueue path during e.g.
+	 * bypass.
+	 */
+	p->scx.ddsp_dsq_id = SCX_DSQ_INVALID;
+	p->scx.ddsp_enq_flags = 0;
+
 	/*
 	 * We're transitioning out of QUEUEING or DISPATCHING. store_release to
 	 * match waiters' load_acquire.
@@ -833,12 +842,11 @@ static void mark_direct_dispatch(struct task_struct *ddsp_task,
 		return;
 	}
 
-	WARN_ON_ONCE(p->scx.ddsq_id != SCX_DSQ_INVALID);
-	WARN_ON_ONCE(p->scx.flags & SCX_TASK_DDSP_PRIQ);
+	WARN_ON_ONCE(p->scx.ddsp_dsq_id != SCX_DSQ_INVALID);
+	WARN_ON_ONCE(p->scx.ddsp_enq_flags);
 
-	p->scx.ddsq_id = dsq_id;
-	if (enq_flags & SCX_ENQ_DSQ_PRIQ)
-		p->scx.flags |= SCX_TASK_DDSP_PRIQ;
+	p->scx.ddsp_dsq_id = dsq_id;
+	p->scx.ddsp_enq_flags = enq_flags;
 }
 
 static void direct_dispatch(struct task_struct *p, u64 enq_flags)
@@ -847,14 +855,9 @@ static void direct_dispatch(struct task_struct *p, u64 enq_flags)
 
 	touch_core_sched_dispatch(task_rq(p), p);
 
-	if (p->scx.flags & SCX_TASK_DDSP_PRIQ) {
-		enq_flags |= SCX_ENQ_DSQ_PRIQ;
-		p->scx.flags &= ~SCX_TASK_DDSP_PRIQ;
-	}
-
-	dsq = find_dsq_for_dispatch(task_rq(p), p->scx.ddsq_id, p);
-	dispatch_enqueue(dsq, p, enq_flags | SCX_ENQ_CLEAR_OPSS);
-	p->scx.ddsq_id = SCX_DSQ_INVALID;
+	enq_flags |= (p->scx.ddsp_enq_flags | SCX_ENQ_CLEAR_OPSS);
+	dsq = find_dsq_for_dispatch(task_rq(p), p->scx.ddsp_dsq_id, p);
+	dispatch_enqueue(dsq, p, enq_flags);
 }
 
 static bool test_rq_online(struct rq *rq)
@@ -874,9 +877,6 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
 
 	WARN_ON_ONCE(!(p->scx.flags & SCX_TASK_QUEUED));
 
-	if (p->scx.ddsq_id != SCX_DSQ_INVALID)
-		goto direct;
-
 	/* rq migration */
 	if (sticky_cpu == cpu_of(rq))
 		goto local_norefill;
@@ -896,6 +896,9 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
 			goto global;
 	}
 
+	if (p->scx.ddsp_dsq_id != SCX_DSQ_INVALID)
+		goto direct;
+
 	/* see %SCX_OPS_ENQ_EXITING */
 	if (!static_branch_unlikely(&scx_ops_enq_exiting) &&
 	    unlikely(p->flags & PF_EXITING))
@@ -922,7 +925,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
 	SCX_CALL_OP_TASK(SCX_KF_ENQUEUE, enqueue, p, enq_flags);
 
 	*ddsp_taskp = NULL;
-	if (p->scx.ddsq_id != SCX_DSQ_INVALID)
+	if (p->scx.ddsp_dsq_id != SCX_DSQ_INVALID)
 		goto direct;
 
 	/*
@@ -2142,7 +2145,7 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag
 		cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, &found);
 		if (found) {
 			p->scx.slice = SCX_SLICE_DFL;
-			p->scx.ddsq_id = SCX_DSQ_LOCAL;
+			p->scx.ddsp_dsq_id = SCX_DSQ_LOCAL;
 		}
 		return cpu;
 	}
@@ -4101,13 +4104,20 @@ static void scx_dispatch_commit(struct task_struct *p, u64 dsq_id, u64 enq_flags
  * @enq_flags: SCX_ENQ_*
  *
  * Dispatch @p into the FIFO queue of the DSQ identified by @dsq_id. It is safe
- * to call this function spuriously. Can be called from ops.enqueue() and
- * ops.dispatch().
+ * to call this function spuriously. Can be called from ops.enqueue(),
+ * ops.select_cpu(), and ops.dispatch().
+ *
+ * When called from ops.select_cpu() or ops.enqueue(), it's for direct dispatch
+ * and @p must match the task being enqueued. Also, %SCX_DSQ_LOCAL_ON can't be
+ * used to target the local DSQ of a CPU other than the enqueueing one. Use
+ * ops.select_cpu() to be on the target CPU in the first place.
  *
- * When called from ops.enqueue(), it's for direct dispatch and @p must match
- * the task being enqueued. Also, %SCX_DSQ_LOCAL_ON can't be used to target the
- * local DSQ of a CPU other than the enqueueing one. Use ops.select_cpu() to be
- * on the target CPU in the first place.
+ * When called from ops.select_cpu(), @enq_flags and @dsp_id are stored, and @p
+ * will be directly dispatched to the corresponding dispatch queue after
+ * ops.select_cpu() returns. If @p is dispatched to SCX_DSQ_LOCAL, it will be
+ * dispatched to the local DSQ of the CPU returned by ops.select_cpu().
+ * @enq_flags are OR'd with the enqueue flags on the enqueue path before the
+ * task is dispatched.
  *
  * When called from ops.dispatch(), there are no restrictions on @p or @dsq_id
  * and this function can be called upto ops.dispatch_max_batch times to dispatch
diff --git a/tools/testing/selftests/scx/.gitignore b/tools/testing/selftests/scx/.gitignore
index 991721c50d9ee8..4ae433bb3955d8 100644
--- a/tools/testing/selftests/scx/.gitignore
+++ b/tools/testing/selftests/scx/.gitignore
@@ -9,4 +9,5 @@ select_cpu_dfl_nodispatch
 select_cpu_dispatch
 select_cpu_dispatch_dbl_dsp
 select_cpu_dispatch_bad_dsq
+select_cpu_vtime
 build/
diff --git a/tools/testing/selftests/scx/Makefile b/tools/testing/selftests/scx/Makefile
index ae713d614f252c..8a0b66236ada4e 100644
--- a/tools/testing/selftests/scx/Makefile
+++ b/tools/testing/selftests/scx/Makefile
@@ -158,7 +158,8 @@ c-sched-targets :=			\
 	select_cpu_dfl_nodispatch	\
 	select_cpu_dispatch		\
 	select_cpu_dispatch_bad_dsq	\
-	select_cpu_dispatch_dbl_dsp
+	select_cpu_dispatch_dbl_dsp	\
+	select_cpu_vtime
 
 $(c-sched-targets): %: $(filter-out %.bpf.c,%.c) $(INCLUDE_DIR)/%.bpf.skel.h
 	$(eval sched=$(notdir $@))
diff --git a/tools/testing/selftests/scx/select_cpu_vtime.bpf.c b/tools/testing/selftests/scx/select_cpu_vtime.bpf.c
new file mode 100644
index 00000000000000..b8bdadf3e541b1
--- /dev/null
+++ b/tools/testing/selftests/scx/select_cpu_vtime.bpf.c
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * A scheduler that validates that enqueue flags are properly stored and
+ * applied at dispatch time when a task is directly dispatched from
+ * ops.select_cpu(). We validate this by using scx_bpf_dispatch_vtime(), and
+ * making the test a very basic vtime scheduler.
+ *
+ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
+ * Copyright (c) 2024 Tejun Heo <tj@kernel.org>
+ */
+
+#include <scx/common.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+volatile bool consumed;
+
+static u64 vtime_now;
+
+#define VTIME_DSQ 0
+
+static inline bool vtime_before(u64 a, u64 b)
+{
+	return (s64)(a - b) < 0;
+}
+
+static inline u64 task_vtime(const struct task_struct *p)
+{
+	u64 vtime = p->scx.dsq_vtime;
+
+	if (vtime_before(vtime, vtime_now - SCX_SLICE_DFL))
+		return vtime_now - SCX_SLICE_DFL;
+	else
+		return vtime;
+}
+
+s32 BPF_STRUCT_OPS(select_cpu_vtime_select_cpu, struct task_struct *p,
+		   s32 prev_cpu, u64 wake_flags)
+{
+	s32 cpu;
+
+	cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0);
+	if (cpu >= 0)
+		goto ddsp;
+
+	cpu = prev_cpu;
+	scx_bpf_test_and_clear_cpu_idle(cpu);
+ddsp:
+	scx_bpf_dispatch_vtime(p, VTIME_DSQ, SCX_SLICE_DFL, task_vtime(p), 0);
+	return cpu;
+}
+
+void BPF_STRUCT_OPS(select_cpu_vtime_dispatch, s32 cpu, struct task_struct *p)
+{
+	if (scx_bpf_consume(VTIME_DSQ))
+		consumed = true;
+}
+
+void BPF_STRUCT_OPS(select_cpu_vtime_running, struct task_struct *p)
+{
+	if (vtime_before(vtime_now, p->scx.dsq_vtime))
+		vtime_now = p->scx.dsq_vtime;
+}
+
+void BPF_STRUCT_OPS(select_cpu_vtime_stopping, struct task_struct *p,
+		    bool runnable)
+{
+	p->scx.dsq_vtime += (SCX_SLICE_DFL - p->scx.slice) * 100 / p->scx.weight;
+}
+
+void BPF_STRUCT_OPS(select_cpu_vtime_enable, struct task_struct *p)
+{
+	p->scx.dsq_vtime = vtime_now;
+}
+
+s32 BPF_STRUCT_OPS_SLEEPABLE(select_cpu_vtime_init)
+{
+	scx_bpf_switch_all();
+
+	return scx_bpf_create_dsq(VTIME_DSQ, -1);
+}
+
+SEC(".struct_ops.link")
+struct sched_ext_ops select_cpu_vtime_ops = {
+	.select_cpu		= select_cpu_vtime_select_cpu,
+	.dispatch		= select_cpu_vtime_dispatch,
+	.running		= select_cpu_vtime_running,
+	.stopping		= select_cpu_vtime_stopping,
+	.enable			= select_cpu_vtime_enable,
+	.init			= select_cpu_vtime_init,
+	.name			= "select_cpu_vtime",
+	.timeout_ms		= 1000U,
+};
diff --git a/tools/testing/selftests/scx/select_cpu_vtime.c b/tools/testing/selftests/scx/select_cpu_vtime.c
new file mode 100644
index 00000000000000..6f72f0625478c7
--- /dev/null
+++ b/tools/testing/selftests/scx/select_cpu_vtime.c
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2024 David Vernet <dvernet@meta.com>
+ * Copyright (c) 2024 Tejun Heo <tj@kernel.org>
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <bpf/bpf.h>
+#include <scx/common.h>
+#include <sys/wait.h>
+#include "select_cpu_vtime.bpf.skel.h"
+#include "scx_test.h"
+
+int main(int argc, char **argv)
+{
+	struct select_cpu_vtime *skel;
+	struct bpf_link *link;
+
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
+	skel = select_cpu_vtime__open_and_load();
+	SCX_BUG_ON(!skel, "Failed to open and load skel");
+
+	SCX_ASSERT(!skel->bss->consumed);
+
+	link = bpf_map__attach_struct_ops(skel->maps.select_cpu_vtime_ops);
+	SCX_BUG_ON(!link, "Failed to attach struct_ops");
+
+	sleep(1);
+
+	SCX_ASSERT(skel->bss->consumed);
+
+	bpf_link__destroy(link);
+	select_cpu_vtime__destroy(skel);
+
+	return 0;
+}