Skip to content

Commit 53795ce

Browse files
committed
Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Ingo Molnar. * 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched: Fix migration thread runtime bogosity sched,rt: fix isolated CPUs leaving root_task_group indefinitely throttled sched,cgroup: Fix up task_groups list sched: fix divide by zero at {thread_group,task}_times sched, cgroup: Reduce rq->lock hold times for large cgroup hierarchies
2 parents f78602a + 8f61896 commit 53795ce

File tree

5 files changed

+70
-19
lines changed

5 files changed

+70
-19
lines changed

kernel/sched/core.c

+21-14
Original file line numberDiff line numberDiff line change
@@ -3142,6 +3142,20 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
31423142
# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)
31433143
#endif
31443144

3145+
static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
3146+
{
3147+
u64 temp = (__force u64) rtime;
3148+
3149+
temp *= (__force u64) utime;
3150+
3151+
if (sizeof(cputime_t) == 4)
3152+
temp = div_u64(temp, (__force u32) total);
3153+
else
3154+
temp = div64_u64(temp, (__force u64) total);
3155+
3156+
return (__force cputime_t) temp;
3157+
}
3158+
31453159
void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
31463160
{
31473161
cputime_t rtime, utime = p->utime, total = utime + p->stime;
@@ -3151,13 +3165,9 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
31513165
*/
31523166
rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
31533167

3154-
if (total) {
3155-
u64 temp = (__force u64) rtime;
3156-
3157-
temp *= (__force u64) utime;
3158-
do_div(temp, (__force u32) total);
3159-
utime = (__force cputime_t) temp;
3160-
} else
3168+
if (total)
3169+
utime = scale_utime(utime, rtime, total);
3170+
else
31613171
utime = rtime;
31623172

31633173
/*
@@ -3184,13 +3194,9 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
31843194
total = cputime.utime + cputime.stime;
31853195
rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
31863196

3187-
if (total) {
3188-
u64 temp = (__force u64) rtime;
3189-
3190-
temp *= (__force u64) cputime.utime;
3191-
do_div(temp, (__force u32) total);
3192-
utime = (__force cputime_t) temp;
3193-
} else
3197+
if (total)
3198+
utime = scale_utime(cputime.utime, rtime, total);
3199+
else
31943200
utime = rtime;
31953201

31963202
sig->prev_utime = max(sig->prev_utime, utime);
@@ -7246,6 +7252,7 @@ int in_sched_functions(unsigned long addr)
72467252

72477253
#ifdef CONFIG_CGROUP_SCHED
72487254
struct task_group root_task_group;
7255+
LIST_HEAD(task_groups);
72497256
#endif
72507257

72517258
DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask);

kernel/sched/fair.c

+9-2
Original file line numberDiff line numberDiff line change
@@ -3387,6 +3387,14 @@ static int tg_load_down(struct task_group *tg, void *data)
33873387

33883388
static void update_h_load(long cpu)
33893389
{
3390+
struct rq *rq = cpu_rq(cpu);
3391+
unsigned long now = jiffies;
3392+
3393+
if (rq->h_load_throttle == now)
3394+
return;
3395+
3396+
rq->h_load_throttle = now;
3397+
33903398
rcu_read_lock();
33913399
walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
33923400
rcu_read_unlock();
@@ -4293,11 +4301,10 @@ static int load_balance(int this_cpu, struct rq *this_rq,
42934301
env.src_rq = busiest;
42944302
env.loop_max = min(sysctl_sched_nr_migrate, busiest->nr_running);
42954303

4304+
update_h_load(env.src_cpu);
42964305
more_balance:
42974306
local_irq_save(flags);
42984307
double_rq_lock(this_rq, busiest);
4299-
if (!env.loop)
4300-
update_h_load(env.src_cpu);
43014308

43024309
/*
43034310
* cur_ld_moved - load moved in current iteration

kernel/sched/rt.c

+13
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,19 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
788788
const struct cpumask *span;
789789

790790
span = sched_rt_period_mask();
791+
#ifdef CONFIG_RT_GROUP_SCHED
792+
/*
793+
* FIXME: isolated CPUs should really leave the root task group,
794+
* whether they are isolcpus or were isolated via cpusets, lest
795+
* the timer run on a CPU which does not service all runqueues,
796+
* potentially leaving other CPUs indefinitely throttled. If
797+
* isolation is really required, the user will turn the throttle
798+
* off to kill the perturbations it causes anyway. Meanwhile,
799+
* this maintains functionality for boot and/or troubleshooting.
800+
*/
801+
if (rt_b == &root_task_group.rt_bandwidth)
802+
span = cpu_online_mask;
803+
#endif
791804
for_each_cpu(i, span) {
792805
int enqueue = 0;
793806
struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);

kernel/sched/sched.h

+6-2
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ extern struct mutex sched_domains_mutex;
8080
struct cfs_rq;
8181
struct rt_rq;
8282

83-
static LIST_HEAD(task_groups);
83+
extern struct list_head task_groups;
8484

8585
struct cfs_bandwidth {
8686
#ifdef CONFIG_CFS_BANDWIDTH
@@ -374,7 +374,11 @@ struct rq {
374374
#ifdef CONFIG_FAIR_GROUP_SCHED
375375
/* list of leaf cfs_rq on this cpu: */
376376
struct list_head leaf_cfs_rq_list;
377-
#endif
377+
#ifdef CONFIG_SMP
378+
unsigned long h_load_throttle;
379+
#endif /* CONFIG_SMP */
380+
#endif /* CONFIG_FAIR_GROUP_SCHED */
381+
378382
#ifdef CONFIG_RT_GROUP_SCHED
379383
struct list_head leaf_rt_rq_list;
380384
#endif

kernel/sched/stop_task.c

+21-1
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,10 @@ static struct task_struct *pick_next_task_stop(struct rq *rq)
2727
{
2828
struct task_struct *stop = rq->stop;
2929

30-
if (stop && stop->on_rq)
30+
if (stop && stop->on_rq) {
31+
stop->se.exec_start = rq->clock_task;
3132
return stop;
33+
}
3234

3335
return NULL;
3436
}
@@ -52,6 +54,21 @@ static void yield_task_stop(struct rq *rq)
5254

5355
static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
5456
{
57+
struct task_struct *curr = rq->curr;
58+
u64 delta_exec;
59+
60+
delta_exec = rq->clock_task - curr->se.exec_start;
61+
if (unlikely((s64)delta_exec < 0))
62+
delta_exec = 0;
63+
64+
schedstat_set(curr->se.statistics.exec_max,
65+
max(curr->se.statistics.exec_max, delta_exec));
66+
67+
curr->se.sum_exec_runtime += delta_exec;
68+
account_group_exec_runtime(curr, delta_exec);
69+
70+
curr->se.exec_start = rq->clock_task;
71+
cpuacct_charge(curr, delta_exec);
5572
}
5673

5774
static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued)
@@ -60,6 +77,9 @@ static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued)
6077

6178
static void set_curr_task_stop(struct rq *rq)
6279
{
80+
struct task_struct *stop = rq->stop;
81+
82+
stop->se.exec_start = rq->clock_task;
6383
}
6484

6585
static void switched_to_stop(struct rq *rq, struct task_struct *p)

0 commit comments

Comments
 (0)