Skip to content

Commit bc7a34b

Browse files
committed
timer: Reduce timer migration overhead if disabled
Eric reported that the timer_migration sysctl is not really nice performance wise as it needs to check at every timer insertion whether the feature is enabled or not. Further the check does not live in the timer code, so we have an extra function call which checks an extra cache line to figure out that it is disabled. We can do better and store that information in the per cpu (hr)timer bases. I pondered to use a static key, but that's a nightmare to update from the nohz code and the timer base cache line is hot anyway when we select a timer base. The old logic enabled the timer migration unconditionally if CONFIG_NO_HZ was set even if nohz was disabled on the kernel command line. With this modification, we start off with migration disabled. The user visible sysctl is still set to enabled. If the kernel switches to NOHZ migration is enabled, if the user did not disable it via the sysctl prior to the switch. If nohz=off is on the kernel command line, migration stays disabled no matter what. Before: 47.76% hog [.] main 14.84% [kernel] [k] _raw_spin_lock_irqsave 9.55% [kernel] [k] _raw_spin_unlock_irqrestore 6.71% [kernel] [k] mod_timer 6.24% [kernel] [k] lock_timer_base.isra.38 3.76% [kernel] [k] detach_if_pending 3.71% [kernel] [k] del_timer 2.50% [kernel] [k] internal_add_timer 1.51% [kernel] [k] get_nohz_timer_target 1.28% [kernel] [k] __internal_add_timer 0.78% [kernel] [k] timerfn 0.48% [kernel] [k] wake_up_nohz_cpu After: 48.10% hog [.] main 15.25% [kernel] [k] _raw_spin_lock_irqsave 9.76% [kernel] [k] _raw_spin_unlock_irqrestore 6.50% [kernel] [k] mod_timer 6.44% [kernel] [k] lock_timer_base.isra.38 3.87% [kernel] [k] detach_if_pending 3.80% [kernel] [k] del_timer 2.67% [kernel] [k] internal_add_timer 1.33% [kernel] [k] __internal_add_timer 0.73% [kernel] [k] timerfn 0.54% [kernel] [k] wake_up_nohz_cpu Reported-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Paul McKenney <paulmck@linux.vnet.ibm.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Viresh Kumar <viresh.kumar@linaro.org> Cc: John Stultz <john.stultz@linaro.org> Cc: Joonwoo Park <joonwoop@codeaurora.org> Cc: Wenbo Wang <wenbo.wang@memblaze.com> Link: http://lkml.kernel.org/r/20150526224512.127050787@linutronix.de Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
1 parent c74441a commit bc7a34b

File tree

12 files changed

+133
-60
lines changed

12 files changed

+133
-60
lines changed

include/linux/hrtimer.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ enum hrtimer_base_type {
163163
* @cpu: cpu number
164164
* @active_bases: Bitfield to mark bases with active timers
165165
* @clock_was_set_seq: Sequence counter of clock was set events
166+
* @migration_enabled: The migration of hrtimers to other cpus is enabled
166167
* @expires_next: absolute time of the next event which was scheduled
167168
* via clock_set_next_event()
168169
* @next_timer: Pointer to the first expiring timer
@@ -186,6 +187,7 @@ struct hrtimer_cpu_base {
186187
unsigned int cpu;
187188
unsigned int active_bases;
188189
unsigned int clock_was_set_seq;
190+
bool migration_enabled;
189191
#ifdef CONFIG_HIGH_RES_TIMERS
190192
unsigned int in_hrtirq : 1,
191193
hres_active : 1,

include/linux/sched.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -335,14 +335,10 @@ extern int runqueue_is_locked(int cpu);
335335
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
336336
extern void nohz_balance_enter_idle(int cpu);
337337
extern void set_cpu_sd_state_idle(void);
338-
extern int get_nohz_timer_target(int pinned);
338+
extern int get_nohz_timer_target(void);
339339
#else
340340
static inline void nohz_balance_enter_idle(int cpu) { }
341341
static inline void set_cpu_sd_state_idle(void) { }
342-
static inline int get_nohz_timer_target(int pinned)
343-
{
344-
return smp_processor_id();
345-
}
346342
#endif
347343

348344
/*

include/linux/sched/sysctl.h

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -57,24 +57,12 @@ extern unsigned int sysctl_numa_balancing_scan_size;
5757
extern unsigned int sysctl_sched_migration_cost;
5858
extern unsigned int sysctl_sched_nr_migrate;
5959
extern unsigned int sysctl_sched_time_avg;
60-
extern unsigned int sysctl_timer_migration;
6160
extern unsigned int sysctl_sched_shares_window;
6261

6362
int sched_proc_update_handler(struct ctl_table *table, int write,
6463
void __user *buffer, size_t *length,
6564
loff_t *ppos);
6665
#endif
67-
#ifdef CONFIG_SCHED_DEBUG
68-
static inline unsigned int get_sysctl_timer_migration(void)
69-
{
70-
return sysctl_timer_migration;
71-
}
72-
#else
73-
static inline unsigned int get_sysctl_timer_migration(void)
74-
{
75-
return 1;
76-
}
77-
#endif
7866

7967
/*
8068
* control realtime throttling:

include/linux/timer.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,15 @@ extern void run_local_timers(void);
238238
struct hrtimer;
239239
extern enum hrtimer_restart it_real_fn(struct hrtimer *);
240240

241+
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
242+
#include <linux/sysctl.h>
243+
244+
extern unsigned int sysctl_timer_migration;
245+
int timer_migration_handler(struct ctl_table *table, int write,
246+
void __user *buffer, size_t *lenp,
247+
loff_t *ppos);
248+
#endif
249+
241250
unsigned long __round_jiffies(unsigned long j, int cpu);
242251
unsigned long __round_jiffies_relative(unsigned long j, int cpu);
243252
unsigned long round_jiffies(unsigned long j);

kernel/rcu/tree_plugin.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1432,8 +1432,6 @@ module_param(rcu_idle_gp_delay, int, 0644);
14321432
static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY;
14331433
module_param(rcu_idle_lazy_gp_delay, int, 0644);
14341434

1435-
extern int tick_nohz_active;
1436-
14371435
/*
14381436
* Try to advance callbacks for all flavors of RCU on the current CPU, but
14391437
* only if it has been awhile since the last time we did so. Afterwards,

kernel/sched/core.c

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -572,13 +572,12 @@ void resched_cpu(int cpu)
572572
* selecting an idle cpu will add more delays to the timers than intended
573573
* (as that cpu's timer base may not be uptodate wrt jiffies etc).
574574
*/
575-
int get_nohz_timer_target(int pinned)
575+
int get_nohz_timer_target(void)
576576
{
577-
int cpu = smp_processor_id();
578-
int i;
577+
int i, cpu = smp_processor_id();
579578
struct sched_domain *sd;
580579

581-
if (pinned || !get_sysctl_timer_migration() || !idle_cpu(cpu))
580+
if (!idle_cpu(cpu))
582581
return cpu;
583582

584583
rcu_read_lock();
@@ -7050,8 +7049,6 @@ void __init sched_init_smp(void)
70507049
}
70517050
#endif /* CONFIG_SMP */
70527051

7053-
const_debug unsigned int sysctl_timer_migration = 1;
7054-
70557052
int in_sched_functions(unsigned long addr)
70567053
{
70577054
return in_lock_functions(addr) ||

kernel/sysctl.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -349,15 +349,6 @@ static struct ctl_table kern_table[] = {
349349
.mode = 0644,
350350
.proc_handler = proc_dointvec,
351351
},
352-
{
353-
.procname = "timer_migration",
354-
.data = &sysctl_timer_migration,
355-
.maxlen = sizeof(unsigned int),
356-
.mode = 0644,
357-
.proc_handler = proc_dointvec_minmax,
358-
.extra1 = &zero,
359-
.extra2 = &one,
360-
},
361352
#endif /* CONFIG_SMP */
362353
#ifdef CONFIG_NUMA_BALANCING
363354
{
@@ -1132,6 +1123,15 @@ static struct ctl_table kern_table[] = {
11321123
.extra1 = &zero,
11331124
.extra2 = &one,
11341125
},
1126+
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
1127+
{
1128+
.procname = "timer_migration",
1129+
.data = &sysctl_timer_migration,
1130+
.maxlen = sizeof(unsigned int),
1131+
.mode = 0644,
1132+
.proc_handler = timer_migration_handler,
1133+
},
1134+
#endif
11351135
{ }
11361136
};
11371137

kernel/time/hrtimer.c

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -177,21 +177,38 @@ hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
177177
#endif
178178
}
179179

180+
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
181+
static inline
182+
struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
183+
int pinned)
184+
{
185+
if (pinned || !base->migration_enabled)
186+
return this_cpu_ptr(&hrtimer_bases);
187+
return &per_cpu(hrtimer_bases, get_nohz_timer_target());
188+
}
189+
#else
190+
static inline
191+
struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
192+
int pinned)
193+
{
194+
return this_cpu_ptr(&hrtimer_bases);
195+
}
196+
#endif
197+
180198
/*
181199
* Switch the timer base to the current CPU when possible.
182200
*/
183201
static inline struct hrtimer_clock_base *
184202
switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
185203
int pinned)
186204
{
205+
struct hrtimer_cpu_base *new_cpu_base, *this_base;
187206
struct hrtimer_clock_base *new_base;
188-
struct hrtimer_cpu_base *new_cpu_base;
189-
int this_cpu = smp_processor_id();
190-
int cpu = get_nohz_timer_target(pinned);
191207
int basenum = base->index;
192208

209+
this_base = this_cpu_ptr(&hrtimer_bases);
210+
new_cpu_base = get_target_base(this_base, pinned);
193211
again:
194-
new_cpu_base = &per_cpu(hrtimer_bases, cpu);
195212
new_base = &new_cpu_base->clock_base[basenum];
196213

197214
if (base != new_base) {
@@ -212,17 +229,19 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
212229
raw_spin_unlock(&base->cpu_base->lock);
213230
raw_spin_lock(&new_base->cpu_base->lock);
214231

215-
if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
216-
cpu = this_cpu;
232+
if (new_cpu_base != this_base &&
233+
hrtimer_check_target(timer, new_base)) {
217234
raw_spin_unlock(&new_base->cpu_base->lock);
218235
raw_spin_lock(&base->cpu_base->lock);
236+
new_cpu_base = this_base;
219237
timer->base = base;
220238
goto again;
221239
}
222240
timer->base = new_base;
223241
} else {
224-
if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
225-
cpu = this_cpu;
242+
if (new_cpu_base != this_base &&
243+
hrtimer_check_target(timer, new_base)) {
244+
new_cpu_base = this_base;
226245
goto again;
227246
}
228247
}

kernel/time/tick-internal.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,4 +149,18 @@ extern void tick_nohz_init(void);
149149
static inline void tick_nohz_init(void) { }
150150
#endif
151151

152+
#ifdef CONFIG_NO_HZ_COMMON
153+
extern unsigned long tick_nohz_active;
154+
#else
155+
#define tick_nohz_active (0)
156+
#endif
157+
158+
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
159+
extern void timers_update_migration(void);
160+
#else
161+
static inline void timers_update_migration(void) { }
162+
#endif
163+
164+
DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
165+
152166
extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem);

kernel/time/tick-sched.c

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ void __init tick_nohz_init(void)
399399
* NO HZ enabled ?
400400
*/
401401
static int tick_nohz_enabled __read_mostly = 1;
402-
int tick_nohz_active __read_mostly;
402+
unsigned long tick_nohz_active __read_mostly;
403403
/*
404404
* Enable / Disable tickless mode
405405
*/
@@ -956,6 +956,16 @@ static void tick_nohz_handler(struct clock_event_device *dev)
956956
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
957957
}
958958

959+
static inline void tick_nohz_activate(struct tick_sched *ts, int mode)
960+
{
961+
if (!tick_nohz_enabled)
962+
return;
963+
ts->nohz_mode = mode;
964+
/* One update is enough */
965+
if (!test_and_set_bit(0, &tick_nohz_active))
966+
timers_update_migration();
967+
}
968+
959969
/**
960970
* tick_nohz_switch_to_nohz - switch to nohz mode
961971
*/
@@ -970,9 +980,6 @@ static void tick_nohz_switch_to_nohz(void)
970980
if (tick_switch_to_oneshot(tick_nohz_handler))
971981
return;
972982

973-
tick_nohz_active = 1;
974-
ts->nohz_mode = NOHZ_MODE_LOWRES;
975-
976983
/*
977984
* Recycle the hrtimer in ts, so we can share the
978985
* hrtimer_forward with the highres code.
@@ -984,6 +991,7 @@ static void tick_nohz_switch_to_nohz(void)
984991
hrtimer_forward_now(&ts->sched_timer, tick_period);
985992
hrtimer_set_expires(&ts->sched_timer, next);
986993
tick_program_event(next, 1);
994+
tick_nohz_activate(ts, NOHZ_MODE_LOWRES);
987995
}
988996

989997
/*
@@ -1035,6 +1043,7 @@ static inline void tick_nohz_irq_enter(void)
10351043

10361044
static inline void tick_nohz_switch_to_nohz(void) { }
10371045
static inline void tick_nohz_irq_enter(void) { }
1046+
static inline void tick_nohz_activate(struct tick_sched *ts, int mode) { }
10381047

10391048
#endif /* CONFIG_NO_HZ_COMMON */
10401049

@@ -1117,13 +1126,7 @@ void tick_setup_sched_timer(void)
11171126

11181127
hrtimer_forward(&ts->sched_timer, now, tick_period);
11191128
hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
1120-
1121-
#ifdef CONFIG_NO_HZ_COMMON
1122-
if (tick_nohz_enabled) {
1123-
ts->nohz_mode = NOHZ_MODE_HIGHRES;
1124-
tick_nohz_active = 1;
1125-
}
1126-
#endif
1129+
tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
11271130
}
11281131
#endif /* HIGH_RES_TIMERS */
11291132

0 commit comments

Comments
 (0)