Skip to content
23 changes: 2 additions & 21 deletions lib/scatterlist.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,31 +150,12 @@ EXPORT_SYMBOL(sg_init_one);
*/
static struct scatterlist *sg_kmalloc(unsigned int nents, gfp_t gfp_mask)
{
if (nents == SG_MAX_SINGLE_ALLOC) {
/*
* Kmemleak doesn't track page allocations as they are not
* commonly used (in a raw form) for kernel data structures.
* As we chain together a list of pages and then a normal
* kmalloc (tracked by kmemleak), in order to for that last
* allocation not to become decoupled (and thus a
* false-positive) we need to inform kmemleak of all the
* intermediate allocations.
*/
void *ptr = (void *) __get_free_page(gfp_mask);
kmemleak_alloc(ptr, PAGE_SIZE, 1, gfp_mask);
return ptr;
} else
return kmalloc_array(nents, sizeof(struct scatterlist),
gfp_mask);
return kmalloc_array(nents, sizeof(struct scatterlist), gfp_mask);
}

static void sg_kfree(struct scatterlist *sg, unsigned int nents)
{
if (nents == SG_MAX_SINGLE_ALLOC) {
kmemleak_free(sg);
free_page((unsigned long) sg);
} else
kfree(sg);
kfree(sg);
}

/**
Expand Down
2 changes: 1 addition & 1 deletion mm/compaction.c
Original file line number Diff line number Diff line change
Expand Up @@ -1780,7 +1780,7 @@ static int sysctl_compact_unevictable_allowed __read_mostly = CONFIG_COMPACT_UNE
* aggressively the kernel should compact memory in the
* background. It takes values in the range [0, 100].
*/
static unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
static unsigned int __read_mostly sysctl_compaction_proactiveness;
static int sysctl_extfrag_threshold = 500;
static int __read_mostly sysctl_compact_memory;

Expand Down
1 change: 1 addition & 0 deletions mm/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,7 @@ extern void prep_compound_page(struct page *page, unsigned int order);
extern void post_alloc_hook(struct page *page, unsigned int order,
gfp_t gfp_flags);
extern int user_min_free_kbytes;
extern atomic_long_t kswapd_waiters;

extern void free_unref_page(struct page *page, unsigned int order);
extern void free_unref_page_list(struct list_head *list);
Expand Down
4 changes: 4 additions & 0 deletions mm/list_lru.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ EXPORT_SYMBOL_GPL(list_lru_isolate_move);
unsigned long list_lru_count_one(struct list_lru *lru,
int nid, struct mem_cgroup *memcg)
{
#if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB)
struct list_lru_one *l;
long count;

Expand All @@ -190,6 +191,9 @@ unsigned long list_lru_count_one(struct list_lru *lru,
count = 0;

return count;
#else
return READ_ONCE(lru->node[nid].lru.nr_items);
#endif
}
EXPORT_SYMBOL_GPL(list_lru_count_one);

Expand Down
42 changes: 33 additions & 9 deletions mm/page_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,8 @@ EXPORT_SYMBOL(node_states);

gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;

atomic_long_t kswapd_waiters = ATOMIC_LONG_INIT(0);

/*
* A cached value of the page's pageblock's migratetype, used when the page is
* put on a pcplist. Used to avoid the pageblock migratetype lookup when
Expand Down Expand Up @@ -297,7 +299,7 @@ static compound_page_dtor * const compound_page_dtors[NR_COMPOUND_DTORS] = {

int min_free_kbytes = 1024;
int user_min_free_kbytes = -1;
static int watermark_boost_factor __read_mostly = 15000;
static int watermark_boost_factor __read_mostly;
static int watermark_scale_factor = 10;

/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */
Expand Down Expand Up @@ -2152,16 +2154,17 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype,
}

/*
* Obtain a specified number of elements from the buddy allocator, all under
* a single hold of the lock, for efficiency. Add them to the supplied list.
* Returns the number of new pages which were placed at *list.
* Obtain a specified number of elements from the buddy allocator, and relax the
* zone lock when needed. Add them to the supplied list. Returns the number of
* new pages which were placed at *list.
*/
static int rmqueue_bulk(struct zone *zone, unsigned int order,
unsigned long count, struct list_head *list,
int migratetype, unsigned int alloc_flags)
{
const bool can_resched = !preempt_count() && !irqs_disabled();
unsigned long flags;
int i;
int i, last_mod = 0;

spin_lock_irqsave(&zone->lock, flags);
for (i = 0; i < count; ++i) {
Expand All @@ -2170,6 +2173,18 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
if (unlikely(page == NULL))
break;

/* Reschedule and ease the contention on the lock if needed */
if (i + 1 < count && ((can_resched && need_resched()) ||
spin_needbreak(&zone->lock))) {
__mod_zone_page_state(zone, NR_FREE_PAGES,
-((i + 1 - last_mod) << order));
last_mod = i + 1;
spin_unlock_irqrestore(&zone->lock, flags);
if (can_resched)
cond_resched();
spin_lock_irqsave(&zone->lock, flags);
}

/*
* Split buddy pages returned by expand() are received here in
* physical page order. The page is added to the tail of
Expand All @@ -2186,7 +2201,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
-(1 << order));
}

__mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
__mod_zone_page_state(zone, NR_FREE_PAGES, -((i - last_mod) << order));
spin_unlock_irqrestore(&zone->lock, flags);

return i;
Expand Down Expand Up @@ -3962,6 +3977,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
unsigned int cpuset_mems_cookie;
unsigned int zonelist_iter_cookie;
int reserve_flags;
bool woke_kswapd = false;

restart:
compaction_retries = 0;
Expand Down Expand Up @@ -4001,8 +4017,13 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
goto nopage;
}

if (alloc_flags & ALLOC_KSWAPD)
if (alloc_flags & ALLOC_KSWAPD) {
if (!woke_kswapd) {
atomic_long_inc(&kswapd_waiters);
woke_kswapd = true;
}
wake_all_kswapds(order, gfp_mask, ac);
}

/*
* The adjusted alloc_flags might result in immediate success, so try
Expand Down Expand Up @@ -4217,9 +4238,12 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
goto retry;
}
fail:
warn_alloc(gfp_mask, ac->nodemask,
"page allocation failure: order:%u", order);
got_pg:
if (woke_kswapd)
atomic_long_dec(&kswapd_waiters);
if (!page)
warn_alloc(gfp_mask, ac->nodemask,
"page allocation failure: order:%u", order);
return page;
}

Expand Down
20 changes: 14 additions & 6 deletions mm/vmscan.c
Original file line number Diff line number Diff line change
Expand Up @@ -6901,7 +6901,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
return 0;
}

static bool allow_direct_reclaim(pg_data_t *pgdat)
static bool allow_direct_reclaim(pg_data_t *pgdat, bool using_kswapd)
{
struct zone *zone;
unsigned long pfmemalloc_reserve = 0;
Expand Down Expand Up @@ -6930,6 +6930,10 @@ static bool allow_direct_reclaim(pg_data_t *pgdat)

wmark_ok = free_pages > pfmemalloc_reserve / 2;

/* The throttled direct reclaimer is now a kswapd waiter */
if (unlikely(!using_kswapd && !wmark_ok))
atomic_long_inc(&kswapd_waiters);

/* kswapd must be awake if processes are being throttled */
if (!wmark_ok && waitqueue_active(&pgdat->kswapd_wait)) {
if (READ_ONCE(pgdat->kswapd_highest_zoneidx) > ZONE_NORMAL)
Expand Down Expand Up @@ -6995,7 +6999,7 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,

/* Throttle based on the first usable node */
pgdat = zone->zone_pgdat;
if (allow_direct_reclaim(pgdat))
if (allow_direct_reclaim(pgdat, gfp_mask & __GFP_KSWAPD_RECLAIM))
goto out;
break;
}
Expand All @@ -7017,11 +7021,14 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
*/
if (!(gfp_mask & __GFP_FS))
wait_event_interruptible_timeout(pgdat->pfmemalloc_wait,
allow_direct_reclaim(pgdat), HZ);
allow_direct_reclaim(pgdat, true), HZ);
else
/* Throttle until kswapd wakes the process */
wait_event_killable(zone->zone_pgdat->pfmemalloc_wait,
allow_direct_reclaim(pgdat));
allow_direct_reclaim(pgdat, true));

if (unlikely(!(gfp_mask & __GFP_KSWAPD_RECLAIM)))
atomic_long_dec(&kswapd_waiters);

if (fatal_signal_pending(current))
return true;
Expand Down Expand Up @@ -7519,14 +7526,15 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
* able to safely make forward progress. Wake them
*/
if (waitqueue_active(&pgdat->pfmemalloc_wait) &&
allow_direct_reclaim(pgdat))
allow_direct_reclaim(pgdat, true))
wake_up_all(&pgdat->pfmemalloc_wait);

/* Check if kswapd should be suspending */
__fs_reclaim_release(_THIS_IP_);
ret = try_to_freeze();
__fs_reclaim_acquire(_THIS_IP_);
if (ret || kthread_should_stop())
if (ret || kthread_should_stop() ||
!atomic_long_read(&kswapd_waiters))
break;

/*
Expand Down