Skip to content

Commit

Permalink
kernel: mmu: shrink and align struct z_page_frame
Browse files Browse the repository at this point in the history
The struct z_page_frame is marked __packed to avoid extra padding as
such padding may represent significant memory waste when lots of page
frames are used. However this is a bad strategy.

The code contained this somewhat dubious comment and code in
free_page_frame_list_put():

	/* The structure is packed, which ensures that this is true */
	void *node = pf;
	sys_slist_append(&free_page_frame_list, node);

This is bad for many reasons:

- type checking is completely bypassed;

- if the sys_snode_t node member is no longer located at the front of
  struct z_page_frame then the code will still compile and possibly run
  but be broken with memory corruption as a likely outcome;

- the sys_slist_append() code is completely unaware of the packed
  attribute which breaks architectures with alignment restrictions.

Let's improve code efficiency as well as memory usage by removing the
packed attribute and manually packing the flags in the unused virtual
address bits. This way the page frame array remains naturally aligned,
data access becomes optimal and the actual array size gets even smaller.

Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
  • Loading branch information
Nicolas Pitre authored and nashif committed May 13, 2024
1 parent 5730597 commit e9a47d9
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 54 deletions.
4 changes: 4 additions & 0 deletions doc/kernel/memory_management/demand_paging.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ Page Frame
addresses. For every page frame, a ``struct z_page_frame`` is instantiated to
store metadata. Flags for each page frame:

* ``Z_PAGE_FRAME_FREE`` indicates a page frame is unused and on the list of
free page frames. When this flag is set, none of the other flags are
meaningful and they must not be modified.

* ``Z_PAGE_FRAME_PINNED`` indicates a page frame is pinned in memory
and should never be paged out.

Expand Down
98 changes: 59 additions & 39 deletions kernel/include/mmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#ifdef CONFIG_MMU

#include <stdint.h>
#include <zephyr/sys/slist.h>
#include <zephyr/sys/sflist.h>
#include <zephyr/sys/__assert.h>
#include <zephyr/sys/util.h>
#include <zephyr/kernel/mm.h>
Expand Down Expand Up @@ -64,31 +64,38 @@

/*
* z_page_frame flags bits
*
* Requirements:
* - Z_PAGE_FRAME_FREE must be one of the possible sfnode flag bits
* - All bit values must be lower than CONFIG_MMU_PAGE_SIZE
*/

/** This page contains critical kernel data and will never be swapped */
#define Z_PAGE_FRAME_PINNED BIT(0)
/** This physical page is free and part of the free list */
#define Z_PAGE_FRAME_FREE BIT(0)

/** This physical page is reserved by hardware; we will never use it */
#define Z_PAGE_FRAME_RESERVED BIT(1)

/** This page contains critical kernel data and will never be swapped */
#define Z_PAGE_FRAME_PINNED BIT(2)

/**
* This physical page is mapped to some virtual memory address
*
* Currently, we just support one mapping per page frame. If a page frame
* is mapped to multiple virtual pages then it must be pinned.
*/
#define Z_PAGE_FRAME_MAPPED BIT(2)
#define Z_PAGE_FRAME_MAPPED BIT(3)

/**
* This page frame is currently involved in a page-in/out operation
*/
#define Z_PAGE_FRAME_BUSY BIT(3)
#define Z_PAGE_FRAME_BUSY BIT(4)

/**
* This page frame has a clean copy in the backing store
*/
#define Z_PAGE_FRAME_BACKED BIT(4)
#define Z_PAGE_FRAME_BACKED BIT(5)

/**
* Data structure for physical page frames
Expand All @@ -98,78 +105,89 @@
*/
struct z_page_frame {
union {
/* If mapped, virtual address this page is mapped to */
void *addr;

/* If unmapped and available, free pages list membership. */
sys_snode_t node;
/*
* If mapped, Z_PAGE_FRAME_* flags and virtual address
* this page is mapped to.
*/
uintptr_t va_and_flags;

/*
* If unmapped and available, free pages list membership
* with the Z_PAGE_FRAME_FREE flag.
*/
sys_sfnode_t node;
};

/* Z_PAGE_FRAME_* flags */
uint8_t flags;

/* TODO: Backing store and eviction algorithms may both need to
* introduce custom members for accounting purposes. Come up with
* a layer of abstraction for this. They may also want additional
* flags bits which shouldn't clobber each other. At all costs
* the total size of struct z_page_frame must be minimized.
/* Backing store and eviction algorithms may both need to
* require additional per-frame custom data for accounting purposes.
* They should declare their own array with indices matching
* z_page_frames[] ones whenever possible.
* They may also want additional flags bits that could be stored here
* and they shouldn't clobber each other. At all costs the total
* size of struct z_page_frame must be minimized.
*/
};

/* On Xtensa we can't pack this struct because of the memory alignment.
*/
#ifdef CONFIG_XTENSA
} __aligned(4);
#else
} __packed;
#endif /* CONFIG_XTENSA */
/* Note: this must be false for the other flag bits to be valid */
static inline bool z_page_frame_is_free(struct z_page_frame *pf)
{
return (pf->va_and_flags & Z_PAGE_FRAME_FREE) != 0U;
}

static inline bool z_page_frame_is_pinned(struct z_page_frame *pf)
{
return (pf->flags & Z_PAGE_FRAME_PINNED) != 0U;
return (pf->va_and_flags & Z_PAGE_FRAME_PINNED) != 0U;
}

static inline bool z_page_frame_is_reserved(struct z_page_frame *pf)
{
return (pf->flags & Z_PAGE_FRAME_RESERVED) != 0U;
return (pf->va_and_flags & Z_PAGE_FRAME_RESERVED) != 0U;
}

static inline bool z_page_frame_is_mapped(struct z_page_frame *pf)
{
return (pf->flags & Z_PAGE_FRAME_MAPPED) != 0U;
return (pf->va_and_flags & Z_PAGE_FRAME_MAPPED) != 0U;
}

static inline bool z_page_frame_is_busy(struct z_page_frame *pf)
{
return (pf->flags & Z_PAGE_FRAME_BUSY) != 0U;
return (pf->va_and_flags & Z_PAGE_FRAME_BUSY) != 0U;
}

static inline bool z_page_frame_is_backed(struct z_page_frame *pf)
{
return (pf->flags & Z_PAGE_FRAME_BACKED) != 0U;
return (pf->va_and_flags & Z_PAGE_FRAME_BACKED) != 0U;
}

static inline bool z_page_frame_is_evictable(struct z_page_frame *pf)
{
return (!z_page_frame_is_reserved(pf) && z_page_frame_is_mapped(pf) &&
!z_page_frame_is_pinned(pf) && !z_page_frame_is_busy(pf));
return (!z_page_frame_is_free(pf) &&
!z_page_frame_is_reserved(pf) &&
z_page_frame_is_mapped(pf) &&
!z_page_frame_is_pinned(pf) &&
!z_page_frame_is_busy(pf));
}

/* If true, page is not being used for anything, is not reserved, is a member
* of some free pages list, isn't busy, and may be mapped in memory
/* If true, page is not being used for anything, is not reserved, is not
* a member of some free pages list, isn't busy, and is ready to be mapped
* in memory
*/
static inline bool z_page_frame_is_available(struct z_page_frame *page)
{
return page->flags == 0U;
return page->va_and_flags == 0U;
}

static inline void z_page_frame_set(struct z_page_frame *pf, uint8_t flags)
{
pf->flags |= flags;
pf->va_and_flags |= flags;
}

static inline void z_page_frame_clear(struct z_page_frame *pf, uint8_t flags)
{
pf->flags &= ~flags;
/* ensure bit inversion to follow is done on the proper type width */
uintptr_t wide_flags = flags;

pf->va_and_flags &= ~wide_flags;
}

static inline void z_assert_phys_aligned(uintptr_t phys)
Expand All @@ -190,7 +208,9 @@ static inline uintptr_t z_page_frame_to_phys(struct z_page_frame *pf)
/* Presumes there is but one mapping in the virtual address space */
static inline void *z_page_frame_to_virt(struct z_page_frame *pf)
{
return pf->addr;
uintptr_t flags_mask = CONFIG_MMU_PAGE_SIZE - 1;

return (void *)(pf->va_and_flags & ~flags_mask);
}

static inline bool z_is_page_frame(uintptr_t phys)
Expand Down
37 changes: 22 additions & 15 deletions kernel/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,10 @@ static bool page_frames_initialized;
/* LCOV_EXCL_START */
static void page_frame_dump(struct z_page_frame *pf)
{
if (z_page_frame_is_reserved(pf)) {
if (z_page_frame_is_free(pf)) {
COLOR(GREY);
printk("-");
} else if (z_page_frame_is_reserved(pf)) {
COLOR(CYAN);
printk("R");
} else if (z_page_frame_is_busy(pf)) {
Expand Down Expand Up @@ -381,7 +384,7 @@ static void *virt_region_alloc(size_t size, size_t align)
* This implies in the future there may be multiple slists managing physical
* pages. Each page frame will still just have one snode link.
*/
static sys_slist_t free_page_frame_list;
static sys_sflist_t free_page_frame_list;

/* Number of unused and available free page frames.
* This information may go stale immediately.
Expand All @@ -395,15 +398,16 @@ static size_t z_free_page_count;
/* Get an unused page frame. don't care which one, or NULL if there are none */
static struct z_page_frame *free_page_frame_list_get(void)
{
sys_snode_t *node;
sys_sfnode_t *node;
struct z_page_frame *pf = NULL;

node = sys_slist_get(&free_page_frame_list);
node = sys_sflist_get(&free_page_frame_list);
if (node != NULL) {
z_free_page_count--;
pf = CONTAINER_OF(node, struct z_page_frame, node);
PF_ASSERT(pf, z_page_frame_is_available(pf),
"unavailable but somehow on free list");
PF_ASSERT(pf, z_page_frame_is_free(pf),
"on free list but not free");
pf->va_and_flags = 0;
}

return pf;
Expand All @@ -414,21 +418,20 @@ static void free_page_frame_list_put(struct z_page_frame *pf)
{
PF_ASSERT(pf, z_page_frame_is_available(pf),
"unavailable page put on free list");
/* The structure is packed, which ensures that this is true */
void *node = pf;

sys_slist_append(&free_page_frame_list, node);
sys_sfnode_init(&pf->node, Z_PAGE_FRAME_FREE);
sys_sflist_append(&free_page_frame_list, &pf->node);
z_free_page_count++;
}

static void free_page_frame_list_init(void)
{
sys_slist_init(&free_page_frame_list);
sys_sflist_init(&free_page_frame_list);
}

static void page_frame_free_locked(struct z_page_frame *pf)
{
pf->flags = 0;
pf->va_and_flags = 0;
free_page_frame_list_put(pf);
}

Expand All @@ -441,6 +444,8 @@ static void page_frame_free_locked(struct z_page_frame *pf)
*/
static void frame_mapped_set(struct z_page_frame *pf, void *addr)
{
PF_ASSERT(pf, !z_page_frame_is_free(pf),
"attempted to map a page frame on the free list");
PF_ASSERT(pf, !z_page_frame_is_reserved(pf),
"attempted to map a reserved page frame");

Expand All @@ -453,9 +458,11 @@ static void frame_mapped_set(struct z_page_frame *pf, void *addr)
"non-pinned and already mapped to %p",
z_page_frame_to_virt(pf));

z_page_frame_set(pf, Z_PAGE_FRAME_MAPPED);
pf->addr = UINT_TO_POINTER(POINTER_TO_UINT(addr)
& ~(CONFIG_MMU_PAGE_SIZE - 1));
uintptr_t flags_mask = CONFIG_MMU_PAGE_SIZE - 1;
uintptr_t va = (uintptr_t)addr & ~flags_mask;

pf->va_and_flags &= flags_mask;
pf->va_and_flags |= va | Z_PAGE_FRAME_MAPPED;
}

/* LCOV_EXCL_START */
Expand Down Expand Up @@ -535,7 +542,7 @@ static int map_anon_page(void *addr, uint32_t flags)
if (dirty) {
do_backing_store_page_out(location);
}
pf->flags = 0;
pf->va_and_flags = 0;
#else
return -ENOMEM;
#endif /* CONFIG_DEMAND_PAGING */
Expand Down

0 comments on commit e9a47d9

Please sign in to comment.