Skip to content

Commit

Permalink
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/l…
Browse files Browse the repository at this point in the history
…inux/kernel/git/tip/tip

Pull scheduler changes from Ingo Molnar:
 "Continued quest to clean up and enhance the cputime code by Frederic
  Weisbecker, in preparation for future tickless kernel features.

  Other than that, smallish changes."

Fix up trivial conflicts due to additions next to each other in arch/{x86/}Kconfig

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits)
  cputime: Make finegrained irqtime accounting generally available
  cputime: Gather time/stats accounting config options into a single menu
  ia64: Reuse system and user vtime accounting functions on task switch
  ia64: Consolidate user vtime accounting
  vtime: Consolidate system/idle context detection
  cputime: Use a proper subsystem naming for vtime related APIs
  sched: cpu_power: enable ARCH_POWER
  sched/nohz: Clean up select_nohz_load_balancer()
  sched: Fix load avg vs. cpu-hotplug
  sched: Remove __ARCH_WANT_INTERRUPTS_ON_CTXSW
  sched: Fix nohz_idle_balance()
  sched: Remove useless code in yield_to()
  sched: Add time unit suffix to sched sysctl knobs
  sched/debug: Limit sd->*_idx range on sysctl
  sched: Remove AFFINE_WAKEUPS feature flag
  s390: Remove leftover account_tick_vtime() header
  cputime: Consolidate vtime handling on context switch
  sched: Move cputime code to its own file
  cputime: Generalize CONFIG_VIRT_CPU_ACCOUNTING
  tile: Remove SD_PREFER_LOCAL leftover
  ...
  • Loading branch information
torvalds committed Oct 1, 2012
2 parents 4cba333 + fdf9c35 commit 0b981cb
Show file tree
Hide file tree
Showing 32 changed files with 892 additions and 906 deletions.
10 changes: 0 additions & 10 deletions Documentation/scheduler/sched-arch.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,6 @@ you must `#define __ARCH_WANT_UNLOCKED_CTXSW` in a header file
Unlocked context switches introduce only a very minor performance
penalty to the core scheduler implementation in the CONFIG_SMP case.

2. Interrupt status
By default, the switch_to arch function is called with interrupts
disabled. Interrupts may be enabled over the call if it is likely to
introduce a significant interrupt latency by adding the line
`#define __ARCH_WANT_INTERRUPTS_ON_CTXSW` in the same place as for
unlocked context switches. This define also implies
`__ARCH_WANT_UNLOCKED_CTXSW`. See arch/arm/include/asm/system.h for an
example.


CPU idle
========
Your cpu_idle routines need to obey the following rules:
Expand Down
9 changes: 9 additions & 0 deletions arch/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -304,4 +304,13 @@ config HAVE_RCU_USER_QS
are already protected inside rcu_irq_enter/rcu_irq_exit() but
preemption or signal handling on irq exit still need to be protected.

config HAVE_VIRT_CPU_ACCOUNTING
bool

config HAVE_IRQ_TIME_ACCOUNTING
bool
help
Archs need to ensure they use a high enough resolution clock to
support irq time accounting and then call enable_sched_clock_irqtime().

source "kernel/gcov/Kconfig"
12 changes: 1 addition & 11 deletions arch/ia64/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ config IA64
select HAVE_GENERIC_HARDIRQS
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_VIRT_CPU_ACCOUNTING
select ARCH_DISCARD_MEMBLOCK
select GENERIC_IRQ_PROBE
select GENERIC_PENDING_IRQ if SMP
Expand Down Expand Up @@ -340,17 +341,6 @@ config FORCE_MAX_ZONEORDER
default "17" if HUGETLB_PAGE
default "11"

config VIRT_CPU_ACCOUNTING
bool "Deterministic task and CPU time accounting"
default n
help
Select this option to enable more accurate task and CPU time
accounting. This is done by reading a CPU counter on each
kernel entry and exit and on transitions within the kernel
between system, softirq and hardirq state, so there is a
small performance impact.
If in doubt, say N here.

config SMP
bool "Symmetric multi-processing support"
select USE_GENERIC_SMP_HELPERS
Expand Down
8 changes: 0 additions & 8 deletions arch/ia64/include/asm/switch_to.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,6 @@ extern struct task_struct *ia64_switch_to (void *next_task);
extern void ia64_save_extra (struct task_struct *task);
extern void ia64_load_extra (struct task_struct *task);

#ifdef CONFIG_VIRT_CPU_ACCOUNTING
extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct *next);
# define IA64_ACCOUNT_ON_SWITCH(p,n) ia64_account_on_switch(p,n)
#else
# define IA64_ACCOUNT_ON_SWITCH(p,n)
#endif

#ifdef CONFIG_PERFMON
DECLARE_PER_CPU(unsigned long, pfm_syst_info);
# define PERFMON_IS_SYSWIDE() (__get_cpu_var(pfm_syst_info) & 0x1)
Expand All @@ -49,7 +42,6 @@ extern void ia64_account_on_switch (struct task_struct *prev, struct task_struct
|| PERFMON_IS_SYSWIDE())

#define __switch_to(prev,next,last) do { \
IA64_ACCOUNT_ON_SWITCH(prev, next); \
if (IA64_HAS_EXTRA_STATE(prev)) \
ia64_save_extra(prev); \
if (IA64_HAS_EXTRA_STATE(next)) \
Expand Down
66 changes: 33 additions & 33 deletions arch/ia64/kernel/time.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,77 +83,77 @@ static struct clocksource *itc_clocksource;

extern cputime_t cycle_to_cputime(u64 cyc);

static void vtime_account_user(struct task_struct *tsk)
{
cputime_t delta_utime;
struct thread_info *ti = task_thread_info(tsk);

if (ti->ac_utime) {
delta_utime = cycle_to_cputime(ti->ac_utime);
account_user_time(tsk, delta_utime, delta_utime);
ti->ac_utime = 0;
}
}

/*
* Called from the context switch with interrupts disabled, to charge all
* accumulated times to the current process, and to prepare accounting on
* the next process.
*/
void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next)
void vtime_task_switch(struct task_struct *prev)
{
struct thread_info *pi = task_thread_info(prev);
struct thread_info *ni = task_thread_info(next);
cputime_t delta_stime, delta_utime;
__u64 now;
struct thread_info *ni = task_thread_info(current);

now = ia64_get_itc();

delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp));
if (idle_task(smp_processor_id()) != prev)
account_system_time(prev, 0, delta_stime, delta_stime);
vtime_account_system(prev);
else
account_idle_time(delta_stime);
vtime_account_idle(prev);

if (pi->ac_utime) {
delta_utime = cycle_to_cputime(pi->ac_utime);
account_user_time(prev, delta_utime, delta_utime);
}
vtime_account_user(prev);

pi->ac_stamp = ni->ac_stamp = now;
pi->ac_stamp = ni->ac_stamp;
ni->ac_stime = ni->ac_utime = 0;
}

/*
* Account time for a transition between system, hard irq or soft irq state.
* Note that this function is called with interrupts enabled.
*/
void account_system_vtime(struct task_struct *tsk)
static cputime_t vtime_delta(struct task_struct *tsk)
{
struct thread_info *ti = task_thread_info(tsk);
unsigned long flags;
cputime_t delta_stime;
__u64 now;

local_irq_save(flags);

now = ia64_get_itc();

delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
if (irq_count() || idle_task(smp_processor_id()) != tsk)
account_system_time(tsk, 0, delta_stime, delta_stime);
else
account_idle_time(delta_stime);
ti->ac_stime = 0;

ti->ac_stamp = now;

local_irq_restore(flags);
return delta_stime;
}

void vtime_account_system(struct task_struct *tsk)
{
cputime_t delta = vtime_delta(tsk);

account_system_time(tsk, 0, delta, delta);
}

void vtime_account_idle(struct task_struct *tsk)
{
account_idle_time(vtime_delta(tsk));
}
EXPORT_SYMBOL_GPL(account_system_vtime);

/*
* Called from the timer interrupt handler to charge accumulated user time
* to the current process. Must be called with interrupts disabled.
*/
void account_process_tick(struct task_struct *p, int user_tick)
{
struct thread_info *ti = task_thread_info(p);
cputime_t delta_utime;

if (ti->ac_utime) {
delta_utime = cycle_to_cputime(ti->ac_utime);
account_user_time(p, delta_utime, delta_utime);
ti->ac_utime = 0;
}
vtime_account_user(p);
}

#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
Expand Down
6 changes: 0 additions & 6 deletions arch/powerpc/include/asm/time.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,12 +197,6 @@ struct cpu_usage {

DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array);

#if defined(CONFIG_VIRT_CPU_ACCOUNTING)
#define account_process_vtime(tsk) account_process_tick(tsk, 0)
#else
#define account_process_vtime(tsk) do { } while (0)
#endif

extern void secondary_cpu_time_init(void);

DECLARE_PER_CPU(u64, decrementers_next_tb);
Expand Down
3 changes: 0 additions & 3 deletions arch/powerpc/kernel/process.c
Original file line number Diff line number Diff line change
Expand Up @@ -514,9 +514,6 @@ struct task_struct *__switch_to(struct task_struct *prev,

local_irq_save(flags);

account_system_vtime(current);
account_process_vtime(current);

/*
* We can't take a PMU exception inside _switch() since there is a
* window where the kernel stack SLB and the kernel stack are out
Expand Down
55 changes: 35 additions & 20 deletions arch/powerpc/kernel/time.c
Original file line number Diff line number Diff line change
Expand Up @@ -291,21 +291,20 @@ static inline u64 calculate_stolen_time(u64 stop_tb)
* Account time for a transition between system, hard irq
* or soft irq state.
*/
void account_system_vtime(struct task_struct *tsk)
static u64 vtime_delta(struct task_struct *tsk,
u64 *sys_scaled, u64 *stolen)
{
u64 now, nowscaled, delta, deltascaled;
unsigned long flags;
u64 stolen, udelta, sys_scaled, user_scaled;
u64 now, nowscaled, deltascaled;
u64 udelta, delta, user_scaled;

local_irq_save(flags);
now = mftb();
nowscaled = read_spurr(now);
get_paca()->system_time += now - get_paca()->starttime;
get_paca()->starttime = now;
deltascaled = nowscaled - get_paca()->startspurr;
get_paca()->startspurr = nowscaled;

stolen = calculate_stolen_time(now);
*stolen = calculate_stolen_time(now);

delta = get_paca()->system_time;
get_paca()->system_time = 0;
Expand All @@ -322,35 +321,45 @@ void account_system_vtime(struct task_struct *tsk)
* the user ticks get saved up in paca->user_time_scaled to be
* used by account_process_tick.
*/
sys_scaled = delta;
*sys_scaled = delta;
user_scaled = udelta;
if (deltascaled != delta + udelta) {
if (udelta) {
sys_scaled = deltascaled * delta / (delta + udelta);
user_scaled = deltascaled - sys_scaled;
*sys_scaled = deltascaled * delta / (delta + udelta);
user_scaled = deltascaled - *sys_scaled;
} else {
sys_scaled = deltascaled;
*sys_scaled = deltascaled;
}
}
get_paca()->user_time_scaled += user_scaled;

if (in_interrupt() || idle_task(smp_processor_id()) != tsk) {
account_system_time(tsk, 0, delta, sys_scaled);
if (stolen)
account_steal_time(stolen);
} else {
account_idle_time(delta + stolen);
}
local_irq_restore(flags);
return delta;
}

void vtime_account_system(struct task_struct *tsk)
{
u64 delta, sys_scaled, stolen;

delta = vtime_delta(tsk, &sys_scaled, &stolen);
account_system_time(tsk, 0, delta, sys_scaled);
if (stolen)
account_steal_time(stolen);
}

void vtime_account_idle(struct task_struct *tsk)
{
u64 delta, sys_scaled, stolen;

delta = vtime_delta(tsk, &sys_scaled, &stolen);
account_idle_time(delta + stolen);
}
EXPORT_SYMBOL_GPL(account_system_vtime);

/*
* Transfer the user and system times accumulated in the paca
* by the exception entry and exit code to the generic process
* user and system time records.
* Must be called with interrupts disabled.
* Assumes that account_system_vtime() has been called recently
* Assumes that vtime_account() has been called recently
* (i.e. since the last entry from usermode) so that
* get_paca()->user_time_scaled is up to date.
*/
Expand All @@ -366,6 +375,12 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
account_user_time(tsk, utime, utimescaled);
}

void vtime_task_switch(struct task_struct *prev)
{
vtime_account(prev);
account_process_tick(prev, 0);
}

#else /* ! CONFIG_VIRT_CPU_ACCOUNTING */
#define calc_cputime_factors()
#endif
Expand Down
16 changes: 1 addition & 15 deletions arch/powerpc/platforms/Kconfig.cputype
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
config PPC64
bool "64-bit kernel"
default n
select HAVE_VIRT_CPU_ACCOUNTING
help
This option selects whether a 32-bit or a 64-bit kernel
will be built.
Expand Down Expand Up @@ -337,21 +338,6 @@ config PPC_MM_SLICES
default y if (!PPC_FSL_BOOK3E && PPC64 && HUGETLB_PAGE) || (PPC_STD_MMU_64 && PPC_64K_PAGES)
default n

config VIRT_CPU_ACCOUNTING
bool "Deterministic task and CPU time accounting"
depends on PPC64
default y
help
Select this option to enable more accurate task and CPU time
accounting. This is done by reading a CPU counter on each
kernel entry and exit and on transitions within the kernel
between system, softirq and hardirq state, so there is a
small performance impact. This also enables accounting of
stolen time on logically-partitioned systems running on
IBM POWER5-based machines.

If in doubt, say Y here.

config PPC_HAVE_PMU_SUPPORT
bool

Expand Down
5 changes: 2 additions & 3 deletions arch/s390/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,6 @@ config GENERIC_LOCKBREAK
config PGSTE
def_bool y if KVM

config VIRT_CPU_ACCOUNTING
def_bool y

config ARCH_SUPPORTS_DEBUG_PAGEALLOC
def_bool y

Expand Down Expand Up @@ -89,6 +86,8 @@ config S390
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_CMPXCHG_LOCAL
select HAVE_VIRT_CPU_ACCOUNTING
select VIRT_CPU_ACCOUNTING
select ARCH_DISCARD_MEMBLOCK
select BUILDTIME_EXTABLE_SORT
select ARCH_INLINE_SPIN_TRYLOCK
Expand Down
3 changes: 3 additions & 0 deletions arch/s390/include/asm/cputime.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
#include <linux/spinlock.h>
#include <asm/div64.h>


#define __ARCH_HAS_VTIME_ACCOUNT

/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */

typedef unsigned long long __nocast cputime_t;
Expand Down
4 changes: 0 additions & 4 deletions arch/s390/include/asm/switch_to.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,8 @@ static inline void restore_access_regs(unsigned int *acrs)
prev = __switch_to(prev,next); \
} while (0)

extern void account_vtime(struct task_struct *, struct task_struct *);
extern void account_tick_vtime(struct task_struct *);

#define finish_arch_switch(prev) do { \
set_fs(current->thread.mm_segment); \
account_vtime(prev, current); \
} while (0)

#endif /* __ASM_SWITCH_TO_H */
Loading

0 comments on commit 0b981cb

Please sign in to comment.