@@ -3155,9 +3155,9 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
3155
3155
WRITE_ONCE (scx_switching_all , false);
3156
3156
3157
3157
/* avoid racing against fork and cgroup changes */
3158
- cpus_read_lock ();
3159
3158
percpu_down_write (& scx_fork_rwsem );
3160
3159
scx_cgroup_lock ();
3160
+ cpus_read_lock ();
3161
3161
3162
3162
spin_lock_irq (& scx_tasks_lock );
3163
3163
scx_task_iter_init (& sti );
@@ -3196,9 +3196,9 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
3196
3196
3197
3197
scx_cgroup_exit ();
3198
3198
3199
+ cpus_read_unlock ();
3199
3200
scx_cgroup_unlock ();
3200
3201
percpu_up_write (& scx_fork_rwsem );
3201
- cpus_read_unlock ();
3202
3202
3203
3203
if (ei -> kind >= SCX_EXIT_ERROR ) {
3204
3204
printk (KERN_ERR "sched_ext: BPF scheduler \"%s\" errored, disabling\n" , scx_ops .name );
@@ -3353,9 +3353,18 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
3353
3353
atomic_long_set (& scx_nr_rejected , 0 );
3354
3354
3355
3355
/*
3356
- * Keep CPUs stable during enable so that the BPF scheduler can track
3357
- * online CPUs by watching ->on/offline_cpu() after ->init().
3356
+ * Lock out forks, cgroup on/offlining and moves before opening the
3357
+ * floodgate so that they don't wander into the operations prematurely.
3358
+ *
3359
+ * Also keep CPUs stable during enable so that the BPF scheduler can
3360
+ * track online CPUs by watching ->on/offline_cpu() after ->init().
3361
+ *
3362
+ * Acquire scx_fork_rwsem and scx_group_rwsem before the hotplug lock.
3363
+ * cpus_read_lock() is acquired in a ton of places, so let's be a bit
3364
+ * cautious to avoid possible deadlock.
3358
3365
*/
3366
+ percpu_down_write (& scx_fork_rwsem );
3367
+ scx_cgroup_lock ();
3359
3368
cpus_read_lock ();
3360
3369
3361
3370
scx_switch_all_req = false;
@@ -3399,13 +3408,6 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
3399
3408
queue_delayed_work (system_unbound_wq , & scx_watchdog_work ,
3400
3409
scx_watchdog_timeout / 2 );
3401
3410
3402
- /*
3403
- * Lock out forks, cgroup on/offlining and moves before opening the
3404
- * floodgate so that they don't wander into the operations prematurely.
3405
- */
3406
- percpu_down_write (& scx_fork_rwsem );
3407
- scx_cgroup_lock ();
3408
-
3409
3411
for (i = 0 ; i < SCX_NR_ONLINE_OPS ; i ++ )
3410
3412
if (((void (* * )(void ))ops )[i ])
3411
3413
static_branch_enable_cpuslocked (& scx_has_op [i ]);
@@ -3431,7 +3433,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
3431
3433
*/
3432
3434
ret = scx_cgroup_init ();
3433
3435
if (ret )
3434
- goto err_disable_unlock ;
3436
+ goto err_disable ;
3435
3437
3436
3438
static_branch_enable_cpuslocked (& __scx_ops_enabled );
3437
3439
@@ -3457,7 +3459,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
3457
3459
spin_unlock_irq (& scx_tasks_lock );
3458
3460
pr_err ("sched_ext: ops.init_task() failed (%d) for %s[%d] while loading\n" ,
3459
3461
ret , p -> comm , p -> pid );
3460
- goto err_disable_unlock ;
3462
+ goto err_disable ;
3461
3463
}
3462
3464
3463
3465
put_task_struct (p );
@@ -3481,7 +3483,7 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
3481
3483
preempt_enable ();
3482
3484
spin_unlock_irq (& scx_tasks_lock );
3483
3485
ret = - EBUSY ;
3484
- goto err_disable_unlock ;
3486
+ goto err_disable ;
3485
3487
}
3486
3488
3487
3489
/*
@@ -3515,8 +3517,6 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
3515
3517
3516
3518
spin_unlock_irq (& scx_tasks_lock );
3517
3519
preempt_enable ();
3518
- scx_cgroup_unlock ();
3519
- percpu_up_write (& scx_fork_rwsem );
3520
3520
3521
3521
if (!scx_ops_tryset_enable_state (SCX_OPS_ENABLED , SCX_OPS_ENABLING )) {
3522
3522
ret = - EBUSY ;
@@ -3527,6 +3527,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
3527
3527
static_branch_enable_cpuslocked (& __scx_switched_all );
3528
3528
3529
3529
cpus_read_unlock ();
3530
+ scx_cgroup_unlock ();
3531
+ percpu_up_write (& scx_fork_rwsem );
3530
3532
mutex_unlock (& scx_ops_enable_mutex );
3531
3533
3532
3534
scx_cgroup_config_knobs ();
@@ -3537,11 +3539,10 @@ static int scx_ops_enable(struct sched_ext_ops *ops)
3537
3539
mutex_unlock (& scx_ops_enable_mutex );
3538
3540
return ret ;
3539
3541
3540
- err_disable_unlock :
3541
- scx_cgroup_unlock ();
3542
- percpu_up_write (& scx_fork_rwsem );
3543
3542
err_disable :
3544
3543
cpus_read_unlock ();
3544
+ scx_cgroup_unlock ();
3545
+ percpu_up_write (& scx_fork_rwsem );
3545
3546
mutex_unlock (& scx_ops_enable_mutex );
3546
3547
/* must be fully disabled before returning */
3547
3548
scx_ops_disable (SCX_EXIT_ERROR );
0 commit comments