Skip to content

Commit faafcba

Browse files
committed
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: "The main changes in this cycle were: - Optimized support for Intel "Cluster-on-Die" (CoD) topologies (Dave Hansen) - Various sched/idle refinements for better idle handling (Nicolas Pitre, Daniel Lezcano, Chuansheng Liu, Vincent Guittot) - sched/numa updates and optimizations (Rik van Riel) - sysbench speedup (Vincent Guittot) - capacity calculation cleanups/refactoring (Vincent Guittot) - Various cleanups to thread group iteration (Oleg Nesterov) - Double-rq-lock removal optimization and various refactorings (Kirill Tkhai) - various sched/deadline fixes ... and lots of other changes" * 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (72 commits) sched/dl: Use dl_bw_of() under rcu_read_lock_sched() sched/fair: Delete resched_cpu() from idle_balance() sched, time: Fix build error with 64 bit cputime_t on 32 bit systems sched: Improve sysbench performance by fixing spurious active migration sched/x86: Fix up typo in topology detection x86, sched: Add new topology for multi-NUMA-node CPUs sched/rt: Use resched_curr() in task_tick_rt() sched: Use rq->rd in sched_setaffinity() under RCU read lock sched: cleanup: Rename 'out_unlock' to 'out_free_new_mask' sched: Use dl_bw_of() under RCU read lock sched/fair: Remove duplicate code from can_migrate_task() sched, mips, ia64: Remove __ARCH_WANT_UNLOCKED_CTXSW sched: print_rq(): Don't use tasklist_lock sched: normalize_rt_tasks(): Don't use _irqsave for tasklist_lock, use task_rq_lock() sched: Fix the task-group check in tg_has_rt_tasks() sched/fair: Leverage the idle state info when choosing the "idlest" cpu sched: Let the scheduler see CPU idle states sched/deadline: Fix inter- exclusive cpusets migrations sched/deadline: Clear dl_entity params when setscheduling to different class sched/numa: Kill the wrong/dead TASK_DEAD check in task_numa_fault() ...
2 parents 13ead80 + f10e00f commit faafcba

File tree

55 files changed

+1075
-552
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+1075
-552
lines changed

Documentation/scheduler/sched-deadline.txt

+292-48
Large diffs are not rendered by default.

arch/arm/kernel/topology.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
*/
4343
static DEFINE_PER_CPU(unsigned long, cpu_scale);
4444

45-
unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
45+
unsigned long arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
4646
{
4747
return per_cpu(cpu_scale, cpu);
4848
}
@@ -166,7 +166,7 @@ static void update_cpu_capacity(unsigned int cpu)
166166
set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity);
167167

168168
printk(KERN_INFO "CPU%u: update cpu_capacity %lu\n",
169-
cpu, arch_scale_freq_capacity(NULL, cpu));
169+
cpu, arch_scale_cpu_capacity(NULL, cpu));
170170
}
171171

172172
#else

arch/cris/arch-v10/drivers/sync_serial.c

-1
Original file line numberDiff line numberDiff line change
@@ -1086,7 +1086,6 @@ static ssize_t sync_serial_write(struct file *file, const char *buf,
10861086
}
10871087
local_irq_restore(flags);
10881088
schedule();
1089-
set_current_state(TASK_RUNNING);
10901089
remove_wait_queue(&port->out_wait_q, &wait);
10911090
if (signal_pending(current))
10921091
return -EINTR;

arch/cris/arch-v32/drivers/sync_serial.c

-1
Original file line numberDiff line numberDiff line change
@@ -1089,7 +1089,6 @@ static ssize_t sync_serial_write(struct file *file, const char *buf,
10891089
}
10901090

10911091
schedule();
1092-
set_current_state(TASK_RUNNING);
10931092
remove_wait_queue(&port->out_wait_q, &wait);
10941093

10951094
if (signal_pending(current))

arch/ia64/include/asm/processor.h

-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
#include <asm/ptrace.h>
2020
#include <asm/ustack.h>
2121

22-
#define __ARCH_WANT_UNLOCKED_CTXSW
2322
#define ARCH_HAS_PREFETCH_SWITCH_STACK
2423

2524
#define IA64_NUM_PHYS_STACK_REG 96

arch/mips/include/asm/processor.h

-6
Original file line numberDiff line numberDiff line change
@@ -397,12 +397,6 @@ unsigned long get_wchan(struct task_struct *p);
397397
#define ARCH_HAS_PREFETCHW
398398
#define prefetchw(x) __builtin_prefetch((x), 1, 1)
399399

400-
/*
401-
* See Documentation/scheduler/sched-arch.txt; prevents deadlock on SMP
402-
* systems.
403-
*/
404-
#define __ARCH_WANT_UNLOCKED_CTXSW
405-
406400
#endif
407401

408402
#endif /* _ASM_PROCESSOR_H */

arch/powerpc/include/asm/cputime.h

+2
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ static inline void setup_cputime_one_jiffy(void) { }
3232
typedef u64 __nocast cputime_t;
3333
typedef u64 __nocast cputime64_t;
3434

35+
#define cmpxchg_cputime(ptr, old, new) cmpxchg(ptr, old, new)
36+
3537
#ifdef __KERNEL__
3638

3739
/*

arch/powerpc/mm/fault.c

+1-4
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
#include <linux/kprobes.h>
3131
#include <linux/kdebug.h>
3232
#include <linux/perf_event.h>
33-
#include <linux/magic.h>
3433
#include <linux/ratelimit.h>
3534
#include <linux/context_tracking.h>
3635
#include <linux/hugetlb.h>
@@ -521,7 +520,6 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
521520
void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
522521
{
523522
const struct exception_table_entry *entry;
524-
unsigned long *stackend;
525523

526524
/* Are we prepared to handle this fault? */
527525
if ((entry = search_exception_tables(regs->nip)) != NULL) {
@@ -550,8 +548,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
550548
printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n",
551549
regs->nip);
552550

553-
stackend = end_of_stack(current);
554-
if (current != &init_task && *stackend != STACK_END_MAGIC)
551+
if (task_stack_end_corrupted(current))
555552
printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
556553

557554
die("Kernel access of bad area", regs, sig);

arch/s390/include/asm/cputime.h

+2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
typedef unsigned long long __nocast cputime_t;
1919
typedef unsigned long long __nocast cputime64_t;
2020

21+
#define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new)
22+
2123
static inline unsigned long __div(unsigned long long n, unsigned long base)
2224
{
2325
#ifndef CONFIG_64BIT

arch/um/drivers/random.c

-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ static ssize_t rng_dev_read (struct file *filp, char __user *buf, size_t size,
7979
set_task_state(current, TASK_INTERRUPTIBLE);
8080

8181
schedule();
82-
set_task_state(current, TASK_RUNNING);
8382
remove_wait_queue(&host_read_wait, &wait);
8483

8584
if (atomic_dec_and_test(&host_sleep_count)) {

arch/x86/kernel/smpboot.c

+46-9
Original file line numberDiff line numberDiff line change
@@ -295,12 +295,20 @@ void smp_store_cpu_info(int id)
295295
identify_secondary_cpu(c);
296296
}
297297

298+
static bool
299+
topology_same_node(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
300+
{
301+
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
302+
303+
return (cpu_to_node(cpu1) == cpu_to_node(cpu2));
304+
}
305+
298306
static bool
299307
topology_sane(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o, const char *name)
300308
{
301309
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
302310

303-
return !WARN_ONCE(cpu_to_node(cpu1) != cpu_to_node(cpu2),
311+
return !WARN_ONCE(!topology_same_node(c, o),
304312
"sched: CPU #%d's %s-sibling CPU #%d is not on the same node! "
305313
"[node: %d != %d]. Ignoring dependency.\n",
306314
cpu1, name, cpu2, cpu_to_node(cpu1), cpu_to_node(cpu2));
@@ -341,17 +349,44 @@ static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
341349
return false;
342350
}
343351

344-
static bool match_mc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
352+
/*
353+
* Unlike the other levels, we do not enforce keeping a
354+
* multicore group inside a NUMA node. If this happens, we will
355+
* discard the MC level of the topology later.
356+
*/
357+
static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
345358
{
346-
if (c->phys_proc_id == o->phys_proc_id) {
347-
if (cpu_has(c, X86_FEATURE_AMD_DCM))
348-
return true;
349-
350-
return topology_sane(c, o, "mc");
351-
}
359+
if (c->phys_proc_id == o->phys_proc_id)
360+
return true;
352361
return false;
353362
}
354363

364+
static struct sched_domain_topology_level numa_inside_package_topology[] = {
365+
#ifdef CONFIG_SCHED_SMT
366+
{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
367+
#endif
368+
#ifdef CONFIG_SCHED_MC
369+
{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
370+
#endif
371+
{ NULL, },
372+
};
373+
/*
374+
* set_sched_topology() sets the topology internal to a CPU. The
375+
* NUMA topologies are layered on top of it to build the full
376+
* system topology.
377+
*
378+
* If NUMA nodes are observed to occur within a CPU package, this
379+
* function should be called. It forces the sched domain code to
380+
* only use the SMT level for the CPU portion of the topology.
381+
* This essentially falls back to relying on NUMA information
382+
* from the SRAT table to describe the entire system topology
383+
* (except for hyperthreads).
384+
*/
385+
static void primarily_use_numa_for_topology(void)
386+
{
387+
set_sched_topology(numa_inside_package_topology);
388+
}
389+
355390
void set_cpu_sibling_map(int cpu)
356391
{
357392
bool has_smt = smp_num_siblings > 1;
@@ -388,7 +423,7 @@ void set_cpu_sibling_map(int cpu)
388423
for_each_cpu(i, cpu_sibling_setup_mask) {
389424
o = &cpu_data(i);
390425

391-
if ((i == cpu) || (has_mp && match_mc(c, o))) {
426+
if ((i == cpu) || (has_mp && match_die(c, o))) {
392427
link_mask(core, cpu, i);
393428

394429
/*
@@ -410,6 +445,8 @@ void set_cpu_sibling_map(int cpu)
410445
} else if (i != cpu && !c->booted_cores)
411446
c->booted_cores = cpu_data(i).booted_cores;
412447
}
448+
if (match_die(c, o) && !topology_same_node(c, o))
449+
primarily_use_numa_for_topology();
413450
}
414451
}
415452

arch/x86/mm/fault.c

+1-4
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
* Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs.
44
* Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar
55
*/
6-
#include <linux/magic.h> /* STACK_END_MAGIC */
76
#include <linux/sched.h> /* test_thread_flag(), ... */
87
#include <linux/kdebug.h> /* oops_begin/end, ... */
98
#include <linux/module.h> /* search_exception_table */
@@ -649,7 +648,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
649648
unsigned long address, int signal, int si_code)
650649
{
651650
struct task_struct *tsk = current;
652-
unsigned long *stackend;
653651
unsigned long flags;
654652
int sig;
655653

@@ -709,8 +707,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
709707

710708
show_fault_oops(regs, error_code, address);
711709

712-
stackend = end_of_stack(tsk);
713-
if (tsk != &init_task && *stackend != STACK_END_MAGIC)
710+
if (task_stack_end_corrupted(tsk))
714711
printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
715712

716713
tsk->thread.cr2 = address;

drivers/cpuidle/cpuidle.c

+8-7
Original file line numberDiff line numberDiff line change
@@ -223,8 +223,14 @@ void cpuidle_uninstall_idle_handler(void)
223223
{
224224
if (enabled_devices) {
225225
initialized = 0;
226-
kick_all_cpus_sync();
226+
wake_up_all_idle_cpus();
227227
}
228+
229+
/*
230+
* Make sure external observers (such as the scheduler)
231+
* are done looking at pointed idle states.
232+
*/
233+
synchronize_rcu();
228234
}
229235

230236
/**
@@ -530,11 +536,6 @@ EXPORT_SYMBOL_GPL(cpuidle_register);
530536

531537
#ifdef CONFIG_SMP
532538

533-
static void smp_callback(void *v)
534-
{
535-
/* we already woke the CPU up, nothing more to do */
536-
}
537-
538539
/*
539540
* This function gets called when a part of the kernel has a new latency
540541
* requirement. This means we need to get all processors out of their C-state,
@@ -544,7 +545,7 @@ static void smp_callback(void *v)
544545
static int cpuidle_latency_notify(struct notifier_block *b,
545546
unsigned long l, void *v)
546547
{
547-
smp_call_function(smp_callback, NULL, 1);
548+
wake_up_all_idle_cpus();
548549
return NOTIFY_OK;
549550
}
550551

drivers/gpu/vga/vgaarb.c

-1
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,6 @@ int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible)
400400
}
401401
schedule();
402402
remove_wait_queue(&vga_wait_queue, &wait);
403-
set_current_state(TASK_RUNNING);
404403
}
405404
return rc;
406405
}

drivers/md/dm-bufio.c

-1
Original file line numberDiff line numberDiff line change
@@ -720,7 +720,6 @@ static void __wait_for_free_buffer(struct dm_bufio_client *c)
720720

721721
io_schedule();
722722

723-
set_task_state(current, TASK_RUNNING);
724723
remove_wait_queue(&c->free_buffer_wait, &wait);
725724

726725
dm_bufio_lock(c);

drivers/parisc/power.c

-1
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,6 @@ static int kpowerswd(void *param)
121121
unsigned long soft_power_reg = (unsigned long) param;
122122

123123
schedule_timeout_interruptible(pwrsw_enabled ? HZ : HZ/POWERSWITCH_POLL_PER_SEC);
124-
__set_current_state(TASK_RUNNING);
125124

126125
if (unlikely(!pwrsw_enabled))
127126
continue;

drivers/s390/net/claw.c

-2
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,6 @@ claw_open(struct net_device *dev)
481481
spin_unlock_irqrestore(
482482
get_ccwdev_lock(privptr->channel[i].cdev), saveflags);
483483
schedule();
484-
set_current_state(TASK_RUNNING);
485484
remove_wait_queue(&privptr->channel[i].wait, &wait);
486485
if(rc != 0)
487486
ccw_check_return_code(privptr->channel[i].cdev, rc);
@@ -828,7 +827,6 @@ claw_release(struct net_device *dev)
828827
spin_unlock_irqrestore(
829828
get_ccwdev_lock(privptr->channel[i].cdev), saveflags);
830829
schedule();
831-
set_current_state(TASK_RUNNING);
832830
remove_wait_queue(&privptr->channel[i].wait, &wait);
833831
if (rc != 0) {
834832
ccw_check_return_code(privptr->channel[i].cdev, rc);

drivers/scsi/fcoe/fcoe.c

-1
Original file line numberDiff line numberDiff line change
@@ -1884,7 +1884,6 @@ static int fcoe_percpu_receive_thread(void *arg)
18841884
set_current_state(TASK_INTERRUPTIBLE);
18851885
spin_unlock_bh(&p->fcoe_rx_list.lock);
18861886
schedule();
1887-
set_current_state(TASK_RUNNING);
18881887
goto retry;
18891888
}
18901889

drivers/scsi/qla2xxx/qla_os.c

-1
Original file line numberDiff line numberDiff line change
@@ -4875,7 +4875,6 @@ qla2x00_do_dpc(void *data)
48754875
"DPC handler sleeping.\n");
48764876

48774877
schedule();
4878-
__set_current_state(TASK_RUNNING);
48794878

48804879
if (!base_vha->flags.init_done || ha->flags.mbox_busy)
48814880
goto end_loop;

drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c

-3
Original file line numberDiff line numberDiff line change
@@ -3215,7 +3215,6 @@ kiblnd_connd (void *arg)
32153215

32163216
schedule_timeout(timeout);
32173217

3218-
set_current_state(TASK_RUNNING);
32193218
remove_wait_queue(&kiblnd_data.kib_connd_waitq, &wait);
32203219
spin_lock_irqsave(&kiblnd_data.kib_connd_lock, flags);
32213220
}
@@ -3432,7 +3431,6 @@ kiblnd_scheduler(void *arg)
34323431
busy_loops = 0;
34333432

34343433
remove_wait_queue(&sched->ibs_waitq, &wait);
3435-
set_current_state(TASK_RUNNING);
34363434
spin_lock_irqsave(&sched->ibs_lock, flags);
34373435
}
34383436

@@ -3507,7 +3505,6 @@ kiblnd_failover_thread(void *arg)
35073505

35083506
rc = schedule_timeout(long_sleep ? cfs_time_seconds(10) :
35093507
cfs_time_seconds(1));
3510-
set_current_state(TASK_RUNNING);
35113508
remove_wait_queue(&kiblnd_data.kib_failover_waitq, &wait);
35123509
write_lock_irqsave(glock, flags);
35133510

drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c

-1
Original file line numberDiff line numberDiff line change
@@ -2232,7 +2232,6 @@ ksocknal_connd (void *arg)
22322232
nloops = 0;
22332233
schedule_timeout(timeout);
22342234

2235-
set_current_state(TASK_RUNNING);
22362235
remove_wait_queue(&ksocknal_data.ksnd_connd_waitq, &wait);
22372236
spin_lock_bh(connd_lock);
22382237
}

drivers/staging/lustre/lustre/libcfs/fail.c

-1
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,6 @@ int __cfs_fail_timeout_set(__u32 id, __u32 value, int ms, int set)
131131
id, ms);
132132
set_current_state(TASK_UNINTERRUPTIBLE);
133133
schedule_timeout(cfs_time_seconds(ms) / 1000);
134-
set_current_state(TASK_RUNNING);
135134
CERROR("cfs_fail_timeout id %x awake\n", id);
136135
}
137136
return ret;

drivers/tty/bfin_jtag_comm.c

-1
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ bfin_jc_emudat_manager(void *arg)
7777
pr_debug("waiting for readers\n");
7878
__set_current_state(TASK_UNINTERRUPTIBLE);
7979
schedule();
80-
__set_current_state(TASK_RUNNING);
8180
continue;
8281
}
8382

fs/afs/vlocation.c

-1
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,6 @@ static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
130130
/* second+ BUSY - sleep a little bit */
131131
set_current_state(TASK_UNINTERRUPTIBLE);
132132
schedule_timeout(1);
133-
__set_current_state(TASK_RUNNING);
134133
}
135134
continue;
136135
}

fs/jfs/jfs_logmgr.c

-2
Original file line numberDiff line numberDiff line change
@@ -1585,7 +1585,6 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
15851585
set_current_state(TASK_UNINTERRUPTIBLE);
15861586
LOGGC_UNLOCK(log);
15871587
schedule();
1588-
__set_current_state(TASK_RUNNING);
15891588
LOGGC_LOCK(log);
15901589
remove_wait_queue(&target->gcwait, &__wait);
15911590
}
@@ -2359,7 +2358,6 @@ int jfsIOWait(void *arg)
23592358
set_current_state(TASK_INTERRUPTIBLE);
23602359
spin_unlock_irq(&log_redrive_lock);
23612360
schedule();
2362-
__set_current_state(TASK_RUNNING);
23632361
}
23642362
} while (!kthread_should_stop());
23652363

0 commit comments

Comments
 (0)