Skip to content

Commit 6ebcb06

Browse files
tlendackyIngo Molnar
authored andcommitted
x86/mm: Add support to encrypt the kernel in-place
Add the support to encrypt the kernel in-place. This is done by creating new page mappings for the kernel - a decrypted write-protected mapping and an encrypted mapping. The kernel is encrypted by copying it through a temporary buffer. Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com> Reviewed-by: Thomas Gleixner <tglx@linutronix.de> Cc: Alexander Potapenko <glider@google.com> Cc: Andrey Ryabinin <aryabinin@virtuozzo.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Borislav Petkov <bp@alien8.de> Cc: Brijesh Singh <brijesh.singh@amd.com> Cc: Dave Young <dyoung@redhat.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Larry Woodman <lwoodman@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Matt Fleming <matt@codeblueprint.co.uk> Cc: Michael S. Tsirkin <mst@redhat.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Radim Krčmář <rkrcmar@redhat.com> Cc: Rik van Riel <riel@redhat.com> Cc: Toshimitsu Kani <toshi.kani@hpe.com> Cc: kasan-dev@googlegroups.com Cc: kvm@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-doc@vger.kernel.org Cc: linux-efi@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/c039bf9412ef95e1e6bf4fdf8facab95e00c717b.1500319216.git.thomas.lendacky@amd.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent db51699 commit 6ebcb06

File tree

4 files changed

+466
-0
lines changed

4 files changed

+466
-0
lines changed

arch/x86/include/asm/mem_encrypt.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@
2121

2222
extern unsigned long sme_me_mask;
2323

24+
void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr,
25+
unsigned long decrypted_kernel_vaddr,
26+
unsigned long kernel_len,
27+
unsigned long encryption_wa,
28+
unsigned long encryption_pgd);
29+
2430
void __init sme_early_encrypt(resource_size_t paddr,
2531
unsigned long size);
2632
void __init sme_early_decrypt(resource_size_t paddr,

arch/x86/mm/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,4 @@ obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
4040
obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
4141

4242
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o
43+
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o

arch/x86/mm/mem_encrypt.c

Lines changed: 310 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
#include <asm/setup.h>
2222
#include <asm/bootparam.h>
2323
#include <asm/set_memory.h>
24+
#include <asm/cacheflush.h>
25+
#include <asm/sections.h>
2426

2527
/*
2628
* Since SME related variables are set early in the boot process they must
@@ -199,8 +201,316 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size)
199201
set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT);
200202
}
201203

204+
static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
205+
unsigned long end)
206+
{
207+
unsigned long pgd_start, pgd_end, pgd_size;
208+
pgd_t *pgd_p;
209+
210+
pgd_start = start & PGDIR_MASK;
211+
pgd_end = end & PGDIR_MASK;
212+
213+
pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1);
214+
pgd_size *= sizeof(pgd_t);
215+
216+
pgd_p = pgd_base + pgd_index(start);
217+
218+
memset(pgd_p, 0, pgd_size);
219+
}
220+
221+
#define PGD_FLAGS _KERNPG_TABLE_NOENC
222+
#define P4D_FLAGS _KERNPG_TABLE_NOENC
223+
#define PUD_FLAGS _KERNPG_TABLE_NOENC
224+
#define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
225+
226+
static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
227+
unsigned long vaddr, pmdval_t pmd_val)
228+
{
229+
pgd_t *pgd_p;
230+
p4d_t *p4d_p;
231+
pud_t *pud_p;
232+
pmd_t *pmd_p;
233+
234+
pgd_p = pgd_base + pgd_index(vaddr);
235+
if (native_pgd_val(*pgd_p)) {
236+
if (IS_ENABLED(CONFIG_X86_5LEVEL))
237+
p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
238+
else
239+
pud_p = (pud_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
240+
} else {
241+
pgd_t pgd;
242+
243+
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
244+
p4d_p = pgtable_area;
245+
memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
246+
pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
247+
248+
pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS);
249+
} else {
250+
pud_p = pgtable_area;
251+
memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
252+
pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
253+
254+
pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS);
255+
}
256+
native_set_pgd(pgd_p, pgd);
257+
}
258+
259+
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
260+
p4d_p += p4d_index(vaddr);
261+
if (native_p4d_val(*p4d_p)) {
262+
pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK);
263+
} else {
264+
p4d_t p4d;
265+
266+
pud_p = pgtable_area;
267+
memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
268+
pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
269+
270+
p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS);
271+
native_set_p4d(p4d_p, p4d);
272+
}
273+
}
274+
275+
pud_p += pud_index(vaddr);
276+
if (native_pud_val(*pud_p)) {
277+
if (native_pud_val(*pud_p) & _PAGE_PSE)
278+
goto out;
279+
280+
pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK);
281+
} else {
282+
pud_t pud;
283+
284+
pmd_p = pgtable_area;
285+
memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
286+
pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
287+
288+
pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS);
289+
native_set_pud(pud_p, pud);
290+
}
291+
292+
pmd_p += pmd_index(vaddr);
293+
if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
294+
native_set_pmd(pmd_p, native_make_pmd(pmd_val));
295+
296+
out:
297+
return pgtable_area;
298+
}
299+
300+
static unsigned long __init sme_pgtable_calc(unsigned long len)
301+
{
302+
unsigned long p4d_size, pud_size, pmd_size;
303+
unsigned long total;
304+
305+
/*
306+
* Perform a relatively simplistic calculation of the pagetable
307+
* entries that are needed. That mappings will be covered by 2MB
308+
* PMD entries so we can conservatively calculate the required
309+
* number of P4D, PUD and PMD structures needed to perform the
310+
* mappings. Incrementing the count for each covers the case where
311+
* the addresses cross entries.
312+
*/
313+
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
314+
p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
315+
p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D;
316+
pud_size = (ALIGN(len, P4D_SIZE) / P4D_SIZE) + 1;
317+
pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
318+
} else {
319+
p4d_size = 0;
320+
pud_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1;
321+
pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
322+
}
323+
pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1;
324+
pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
325+
326+
total = p4d_size + pud_size + pmd_size;
327+
328+
/*
329+
* Now calculate the added pagetable structures needed to populate
330+
* the new pagetables.
331+
*/
332+
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
333+
p4d_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE;
334+
p4d_size *= sizeof(p4d_t) * PTRS_PER_P4D;
335+
pud_size = ALIGN(total, P4D_SIZE) / P4D_SIZE;
336+
pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
337+
} else {
338+
p4d_size = 0;
339+
pud_size = ALIGN(total, PGDIR_SIZE) / PGDIR_SIZE;
340+
pud_size *= sizeof(pud_t) * PTRS_PER_PUD;
341+
}
342+
pmd_size = ALIGN(total, PUD_SIZE) / PUD_SIZE;
343+
pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD;
344+
345+
total += p4d_size + pud_size + pmd_size;
346+
347+
return total;
348+
}
349+
202350
void __init sme_encrypt_kernel(void)
203351
{
352+
unsigned long workarea_start, workarea_end, workarea_len;
353+
unsigned long execute_start, execute_end, execute_len;
354+
unsigned long kernel_start, kernel_end, kernel_len;
355+
unsigned long pgtable_area_len;
356+
unsigned long paddr, pmd_flags;
357+
unsigned long decrypted_base;
358+
void *pgtable_area;
359+
pgd_t *pgd;
360+
361+
if (!sme_active())
362+
return;
363+
364+
/*
365+
* Prepare for encrypting the kernel by building new pagetables with
366+
* the necessary attributes needed to encrypt the kernel in place.
367+
*
368+
* One range of virtual addresses will map the memory occupied
369+
* by the kernel as encrypted.
370+
*
371+
* Another range of virtual addresses will map the memory occupied
372+
* by the kernel as decrypted and write-protected.
373+
*
374+
* The use of write-protect attribute will prevent any of the
375+
* memory from being cached.
376+
*/
377+
378+
/* Physical addresses gives us the identity mapped virtual addresses */
379+
kernel_start = __pa_symbol(_text);
380+
kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
381+
kernel_len = kernel_end - kernel_start;
382+
383+
/* Set the encryption workarea to be immediately after the kernel */
384+
workarea_start = kernel_end;
385+
386+
/*
387+
* Calculate required number of workarea bytes needed:
388+
* executable encryption area size:
389+
* stack page (PAGE_SIZE)
390+
* encryption routine page (PAGE_SIZE)
391+
* intermediate copy buffer (PMD_PAGE_SIZE)
392+
* pagetable structures for the encryption of the kernel
393+
* pagetable structures for workarea (in case not currently mapped)
394+
*/
395+
execute_start = workarea_start;
396+
execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE;
397+
execute_len = execute_end - execute_start;
398+
399+
/*
400+
* One PGD for both encrypted and decrypted mappings and a set of
401+
* PUDs and PMDs for each of the encrypted and decrypted mappings.
402+
*/
403+
pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD;
404+
pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2;
405+
406+
/* PUDs and PMDs needed in the current pagetables for the workarea */
407+
pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len);
408+
409+
/*
410+
* The total workarea includes the executable encryption area and
411+
* the pagetable area.
412+
*/
413+
workarea_len = execute_len + pgtable_area_len;
414+
workarea_end = workarea_start + workarea_len;
415+
416+
/*
417+
* Set the address to the start of where newly created pagetable
418+
* structures (PGDs, PUDs and PMDs) will be allocated. New pagetable
419+
* structures are created when the workarea is added to the current
420+
* pagetables and when the new encrypted and decrypted kernel
421+
* mappings are populated.
422+
*/
423+
pgtable_area = (void *)execute_end;
424+
425+
/*
426+
* Make sure the current pagetable structure has entries for
427+
* addressing the workarea.
428+
*/
429+
pgd = (pgd_t *)native_read_cr3_pa();
430+
paddr = workarea_start;
431+
while (paddr < workarea_end) {
432+
pgtable_area = sme_populate_pgd(pgd, pgtable_area,
433+
paddr,
434+
paddr + PMD_FLAGS);
435+
436+
paddr += PMD_PAGE_SIZE;
437+
}
438+
439+
/* Flush the TLB - no globals so cr3 is enough */
440+
native_write_cr3(__native_read_cr3());
441+
442+
/*
443+
* A new pagetable structure is being built to allow for the kernel
444+
* to be encrypted. It starts with an empty PGD that will then be
445+
* populated with new PUDs and PMDs as the encrypted and decrypted
446+
* kernel mappings are created.
447+
*/
448+
pgd = pgtable_area;
449+
memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD);
450+
pgtable_area += sizeof(*pgd) * PTRS_PER_PGD;
451+
452+
/* Add encrypted kernel (identity) mappings */
453+
pmd_flags = PMD_FLAGS | _PAGE_ENC;
454+
paddr = kernel_start;
455+
while (paddr < kernel_end) {
456+
pgtable_area = sme_populate_pgd(pgd, pgtable_area,
457+
paddr,
458+
paddr + pmd_flags);
459+
460+
paddr += PMD_PAGE_SIZE;
461+
}
462+
463+
/*
464+
* A different PGD index/entry must be used to get different
465+
* pagetable entries for the decrypted mapping. Choose the next
466+
* PGD index and convert it to a virtual address to be used as
467+
* the base of the mapping.
468+
*/
469+
decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1);
470+
decrypted_base <<= PGDIR_SHIFT;
471+
472+
/* Add decrypted, write-protected kernel (non-identity) mappings */
473+
pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT);
474+
paddr = kernel_start;
475+
while (paddr < kernel_end) {
476+
pgtable_area = sme_populate_pgd(pgd, pgtable_area,
477+
paddr + decrypted_base,
478+
paddr + pmd_flags);
479+
480+
paddr += PMD_PAGE_SIZE;
481+
}
482+
483+
/* Add decrypted workarea mappings to both kernel mappings */
484+
paddr = workarea_start;
485+
while (paddr < workarea_end) {
486+
pgtable_area = sme_populate_pgd(pgd, pgtable_area,
487+
paddr,
488+
paddr + PMD_FLAGS);
489+
490+
pgtable_area = sme_populate_pgd(pgd, pgtable_area,
491+
paddr + decrypted_base,
492+
paddr + PMD_FLAGS);
493+
494+
paddr += PMD_PAGE_SIZE;
495+
}
496+
497+
/* Perform the encryption */
498+
sme_encrypt_execute(kernel_start, kernel_start + decrypted_base,
499+
kernel_len, workarea_start, (unsigned long)pgd);
500+
501+
/*
502+
* At this point we are running encrypted. Remove the mappings for
503+
* the decrypted areas - all that is needed for this is to remove
504+
* the PGD entry/entries.
505+
*/
506+
sme_clear_pgd(pgd, kernel_start + decrypted_base,
507+
kernel_end + decrypted_base);
508+
509+
sme_clear_pgd(pgd, workarea_start + decrypted_base,
510+
workarea_end + decrypted_base);
511+
512+
/* Flush the TLB - no globals so cr3 is enough */
513+
native_write_cr3(__native_read_cr3());
204514
}
205515

206516
void __init sme_enable(void)

0 commit comments

Comments
 (0)