Skip to content

Commit

Permalink
Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux…
Browse files Browse the repository at this point in the history
…/kernel/git/tip/tip

Pull x86 mm updates from Ingo Molnar:
 "The main changes in this cycle were:

   - Continued work to add support for 5-level paging provided by future
     Intel CPUs. In particular we switch the x86 GUP code to the generic
     implementation. (Kirill A. Shutemov)

   - Continued work to add PCID CPU support to native kernels as well.
     In this round most of the focus is on reworking/refreshing the TLB
     flush infrastructure for the upcoming PCID changes. (Andy
     Lutomirski)"

* 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (34 commits)
  x86/mm: Delete a big outdated comment about TLB flushing
  x86/mm: Don't reenter flush_tlb_func_common()
  x86/KASLR: Fix detection 32/64 bit bootloaders for 5-level paging
  x86/ftrace: Exclude functions in head64.c from function-tracing
  x86/mmap, ASLR: Do not treat unlimited-stack tasks as legacy mmap
  x86/mm: Remove reset_lazy_tlbstate()
  x86/ldt: Simplify the LDT switching logic
  x86/boot/64: Put __startup_64() into .head.text
  x86/mm: Add support for 5-level paging for KASLR
  x86/mm: Make kernel_physical_mapping_init() support 5-level paging
  x86/mm: Add sync_global_pgds() for configuration with 5-level paging
  x86/boot/64: Add support of additional page table level during early boot
  x86/boot/64: Rename init_level4_pgt and early_level4_pgt
  x86/boot/64: Rewrite startup_64() in C
  x86/boot/compressed: Enable 5-level paging during decompression stage
  x86/boot/efi: Define __KERNEL32_CS GDT on 64-bit configurations
  x86/boot/efi: Fix __KERNEL_CS definition of GDT entry on 64-bit configurations
  x86/boot/efi: Cleanup initialization of GDT entries
  x86/asm: Fix comment in return_from_SYSCALL_64()
  x86/mm/gup: Switch GUP to the generic get_user_page_fast() implementation
  ...
  • Loading branch information
torvalds committed Jul 3, 2017
2 parents 9bc088a + 8781fb7 commit 7a69f9c
Show file tree
Hide file tree
Showing 62 changed files with 1,083 additions and 1,220 deletions.
2 changes: 1 addition & 1 deletion arch/arm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -1638,7 +1638,7 @@ config ARCH_SELECT_MEMORY_MODEL
config HAVE_ARCH_PFN_VALID
def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM

config HAVE_GENERIC_RCU_GUP
config HAVE_GENERIC_GUP
def_bool y
depends on ARM_LPAE

Expand Down
2 changes: 1 addition & 1 deletion arch/arm64/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ config GENERIC_CALIBRATE_DELAY
config ZONE_DMA
def_bool y

config HAVE_GENERIC_RCU_GUP
config HAVE_GENERIC_GUP
def_bool y

config ARCH_DMA_ADDR_T_64BIT
Expand Down
2 changes: 1 addition & 1 deletion arch/powerpc/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ config PPC
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
select HAVE_GCC_PLUGINS
select HAVE_GENERIC_RCU_GUP
select HAVE_GENERIC_GUP
select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
select HAVE_IDE
select HAVE_IOREMAP_PROT
Expand Down
5 changes: 4 additions & 1 deletion arch/x86/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ config X86
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
select BUILDTIME_EXTABLE_SORT
Expand Down Expand Up @@ -2793,6 +2793,9 @@ config X86_DMA_REMAP
bool
depends on STA2X11

config HAVE_GENERIC_GUP
def_bool y

source "net/Kconfig"

source "drivers/Kconfig"
Expand Down
73 changes: 51 additions & 22 deletions arch/x86/boot/compressed/eboot.c
Original file line number Diff line number Diff line change
Expand Up @@ -1046,9 +1046,31 @@ struct boot_params *efi_main(struct efi_config *c,
memset((char *)gdt->address, 0x0, gdt->size);
desc = (struct desc_struct *)gdt->address;

/* The first GDT is a dummy and the second is unused. */
desc += 2;
/* The first GDT is a dummy. */
desc++;

if (IS_ENABLED(CONFIG_X86_64)) {
/* __KERNEL32_CS */
desc->limit0 = 0xffff;
desc->base0 = 0x0000;
desc->base1 = 0x0000;
desc->type = SEG_TYPE_CODE | SEG_TYPE_EXEC_READ;
desc->s = DESC_TYPE_CODE_DATA;
desc->dpl = 0;
desc->p = 1;
desc->limit = 0xf;
desc->avl = 0;
desc->l = 0;
desc->d = SEG_OP_SIZE_32BIT;
desc->g = SEG_GRANULARITY_4KB;
desc->base2 = 0x00;
desc++;
} else {
/* Second entry is unused on 32-bit */
desc++;
}

/* __KERNEL_CS */
desc->limit0 = 0xffff;
desc->base0 = 0x0000;
desc->base1 = 0x0000;
Expand All @@ -1058,12 +1080,18 @@ struct boot_params *efi_main(struct efi_config *c,
desc->p = 1;
desc->limit = 0xf;
desc->avl = 0;
desc->l = 0;
desc->d = SEG_OP_SIZE_32BIT;
if (IS_ENABLED(CONFIG_X86_64)) {
desc->l = 1;
desc->d = 0;
} else {
desc->l = 0;
desc->d = SEG_OP_SIZE_32BIT;
}
desc->g = SEG_GRANULARITY_4KB;
desc->base2 = 0x00;

desc++;

/* __KERNEL_DS */
desc->limit0 = 0xffff;
desc->base0 = 0x0000;
desc->base1 = 0x0000;
Expand All @@ -1077,24 +1105,25 @@ struct boot_params *efi_main(struct efi_config *c,
desc->d = SEG_OP_SIZE_32BIT;
desc->g = SEG_GRANULARITY_4KB;
desc->base2 = 0x00;

#ifdef CONFIG_X86_64
/* Task segment value */
desc++;
desc->limit0 = 0x0000;
desc->base0 = 0x0000;
desc->base1 = 0x0000;
desc->type = SEG_TYPE_TSS;
desc->s = 0;
desc->dpl = 0;
desc->p = 1;
desc->limit = 0x0;
desc->avl = 0;
desc->l = 0;
desc->d = 0;
desc->g = SEG_GRANULARITY_4KB;
desc->base2 = 0x00;
#endif /* CONFIG_X86_64 */

if (IS_ENABLED(CONFIG_X86_64)) {
/* Task segment value */
desc->limit0 = 0x0000;
desc->base0 = 0x0000;
desc->base1 = 0x0000;
desc->type = SEG_TYPE_TSS;
desc->s = 0;
desc->dpl = 0;
desc->p = 1;
desc->limit = 0x0;
desc->avl = 0;
desc->l = 0;
desc->d = 0;
desc->g = SEG_GRANULARITY_4KB;
desc->base2 = 0x00;
desc++;
}

asm volatile("cli");
asm volatile ("lgdt %0" : : "m" (*gdt));
Expand Down
86 changes: 85 additions & 1 deletion arch/x86/boot/compressed/head_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,48 @@ preferred_addr:
/* Set up the stack */
leaq boot_stack_end(%rbx), %rsp

#ifdef CONFIG_X86_5LEVEL
/* Check if 5-level paging has already enabled */
movq %cr4, %rax
testl $X86_CR4_LA57, %eax
jnz lvl5

/*
* At this point we are in long mode with 4-level paging enabled,
* but we want to enable 5-level paging.
*
* The problem is that we cannot do it directly. Setting LA57 in
* long mode would trigger #GP. So we need to switch off long mode
* first.
*
* NOTE: This is not going to work if bootloader put us above 4G
* limit.
*
* The first step is go into compatibility mode.
*/

/* Clear additional page table */
leaq lvl5_pgtable(%rbx), %rdi
xorq %rax, %rax
movq $(PAGE_SIZE/8), %rcx
rep stosq

/*
* Setup current CR3 as the first and only entry in a new top level
* page table.
*/
movq %cr3, %rdi
leaq 0x7 (%rdi), %rax
movq %rax, lvl5_pgtable(%rbx)

/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
pushq $__KERNEL32_CS
leaq compatible_mode(%rip), %rax
pushq %rax
lretq
lvl5:
#endif

/* Zero EFLAGS */
pushq $0
popfq
Expand Down Expand Up @@ -429,6 +471,44 @@ relocated:
jmp *%rax

.code32
#ifdef CONFIG_X86_5LEVEL
compatible_mode:
/* Setup data and stack segments */
movl $__KERNEL_DS, %eax
movl %eax, %ds
movl %eax, %ss

/* Disable paging */
movl %cr0, %eax
btrl $X86_CR0_PG_BIT, %eax
movl %eax, %cr0

/* Point CR3 to 5-level paging */
leal lvl5_pgtable(%ebx), %eax
movl %eax, %cr3

/* Enable PAE and LA57 mode */
movl %cr4, %eax
orl $(X86_CR4_PAE | X86_CR4_LA57), %eax
movl %eax, %cr4

/* Calculate address we are running at */
call 1f
1: popl %edi
subl $1b, %edi

/* Prepare stack for far return to Long Mode */
pushl $__KERNEL_CS
leal lvl5(%edi), %eax
push %eax

/* Enable paging back */
movl $(X86_CR0_PG | X86_CR0_PE), %eax
movl %eax, %cr0

lret
#endif

no_longmode:
/* This isn't an x86-64 CPU so hang */
1:
Expand All @@ -442,7 +522,7 @@ gdt:
.word gdt_end - gdt
.long gdt
.word 0
.quad 0x0000000000000000 /* NULL descriptor */
.quad 0x00cf9a000000ffff /* __KERNEL32_CS */
.quad 0x00af9a000000ffff /* __KERNEL_CS */
.quad 0x00cf92000000ffff /* __KERNEL_DS */
.quad 0x0080890000000000 /* TS descriptor */
Expand Down Expand Up @@ -486,3 +566,7 @@ boot_stack_end:
.balign 4096
pgtable:
.fill BOOT_PGT_SIZE, 1, 0
#ifdef CONFIG_X86_5LEVEL
lvl5_pgtable:
.fill PAGE_SIZE, 1, 0
#endif
18 changes: 12 additions & 6 deletions arch/x86/boot/compressed/pagetable.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ static void *alloc_pgt_page(void *context)
static struct alloc_pgt_data pgt_data;

/* The top level page table entry pointer. */
static unsigned long level4p;
static unsigned long top_level_pgt;

/*
* Mapping information structure passed to kernel_ident_mapping_init().
Expand Down Expand Up @@ -91,9 +91,15 @@ void initialize_identity_maps(void)
* If we came here via startup_32(), cr3 will be _pgtable already
* and we must append to the existing area instead of entirely
* overwriting it.
*
* With 5-level paging, we use '_pgtable' to allocate the p4d page table,
* the top-level page table is allocated separately.
*
* p4d_offset(top_level_pgt, 0) would cover both the 4- and 5-level
* cases. On 4-level paging it's equal to 'top_level_pgt'.
*/
level4p = read_cr3();
if (level4p == (unsigned long)_pgtable) {
top_level_pgt = read_cr3_pa();
if (p4d_offset((pgd_t *)top_level_pgt, 0) == (p4d_t *)_pgtable) {
debug_putstr("booted via startup_32()\n");
pgt_data.pgt_buf = _pgtable + BOOT_INIT_PGT_SIZE;
pgt_data.pgt_buf_size = BOOT_PGT_SIZE - BOOT_INIT_PGT_SIZE;
Expand All @@ -103,7 +109,7 @@ void initialize_identity_maps(void)
pgt_data.pgt_buf = _pgtable;
pgt_data.pgt_buf_size = BOOT_PGT_SIZE;
memset(pgt_data.pgt_buf, 0, pgt_data.pgt_buf_size);
level4p = (unsigned long)alloc_pgt_page(&pgt_data);
top_level_pgt = (unsigned long)alloc_pgt_page(&pgt_data);
}
}

Expand All @@ -123,7 +129,7 @@ void add_identity_map(unsigned long start, unsigned long size)
return;

/* Build the mapping. */
kernel_ident_mapping_init(&mapping_info, (pgd_t *)level4p,
kernel_ident_mapping_init(&mapping_info, (pgd_t *)top_level_pgt,
start, end);
}

Expand All @@ -134,5 +140,5 @@ void add_identity_map(unsigned long start, unsigned long size)
*/
void finalize_identity_maps(void)
{
write_cr3(level4p);
write_cr3(top_level_pgt);
}
3 changes: 2 additions & 1 deletion arch/x86/entry/entry_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,8 @@ return_from_SYSCALL_64:
* If width of "canonical tail" ever becomes variable, this will need
* to be updated to remain correct on both old and new CPUs.
*
* Change top 16 bits to be the sign-extension of 47th bit
* Change top bits to match most significant bit (47th or 56th bit
* depending on paging mode) in the address.
*/
shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
Expand Down
5 changes: 2 additions & 3 deletions arch/x86/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2111,8 +2111,7 @@ static int x86_pmu_event_init(struct perf_event *event)

static void refresh_pce(void *ignored)
{
if (current->active_mm)
load_mm_cr4(current->active_mm);
load_mm_cr4(this_cpu_read(cpu_tlbstate.loaded_mm));
}

static void x86_pmu_event_mapped(struct perf_event *event)
Expand Down Expand Up @@ -2344,7 +2343,7 @@ static unsigned long get_segment_base(unsigned int segment)

/* IRQs are off, so this synchronizes with smp_store_release */
ldt = lockless_dereference(current->active_mm->context.ldt);
if (!ldt || idx > ldt->size)
if (!ldt || idx > ldt->nr_entries)
return 0;

desc = &ldt->entries[idx];
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/include/asm/efi.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ struct efi_scratch {
__kernel_fpu_begin(); \
\
if (efi_scratch.use_pgd) { \
efi_scratch.prev_cr3 = read_cr3(); \
efi_scratch.prev_cr3 = __read_cr3(); \
write_cr3((unsigned long)efi_scratch.efi_pgt); \
__flush_tlb_all(); \
} \
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/include/asm/hardirq.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ typedef struct {
#ifdef CONFIG_SMP
unsigned int irq_resched_count;
unsigned int irq_call_count;
unsigned int irq_tlb_count;
#endif
unsigned int irq_tlb_count;
#ifdef CONFIG_X86_THERMAL_VECTOR
unsigned int irq_thermal_count;
#endif
Expand Down
6 changes: 0 additions & 6 deletions arch/x86/include/asm/mmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,6 @@ typedef struct {
#endif
} mm_context_t;

#ifdef CONFIG_SMP
void leave_mm(int cpu);
#else
static inline void leave_mm(int cpu)
{
}
#endif

#endif /* _ASM_X86_MMU_H */
Loading

0 comments on commit 7a69f9c

Please sign in to comment.