Skip to content

Commit

Permalink
mm: remove __GFP_COLD
Browse files Browse the repository at this point in the history
As the page free path makes no distinction between cache hot and cold
pages, there is no real useful ordering of pages in the free list that
allocation requests can take advantage of.  Juding from the users of
__GFP_COLD, it is likely that a number of them are the result of copying
other sites instead of actually measuring the impact.  Remove the
__GFP_COLD parameter which simplifies a number of paths in the page
allocator.

This is potentially controversial but bear in mind that the size of the
per-cpu pagelists versus modern cache sizes means that the whole per-cpu
list can often fit in the L3 cache.  Hence, there is only a potential
benefit for microbenchmarks that alloc/free pages in a tight loop.  It's
even worse when THP is taken into account which has little or no chance
of getting a cache-hot page as the per-cpu list is bypassed and the
zeroing of multiple pages will thrash the cache anyway.

The truncate microbenchmarks are not shown as this patch affects the
allocation path and not the free path.  A page fault microbenchmark was
tested but it showed no sigificant difference which is not surprising
given that the __GFP_COLD branches are a miniscule percentage of the
fault path.

Link: http://lkml.kernel.org/r/20171018075952.10627-9-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
gormanm authored and torvalds committed Nov 16, 2017
1 parent 2d4894b commit 453f85d
Show file tree
Hide file tree
Showing 25 changed files with 32 additions and 62 deletions.
2 changes: 1 addition & 1 deletion drivers/net/ethernet/amazon/ena/ena_netdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,7 @@ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)


rc = ena_alloc_rx_page(rx_ring, rx_info,
__GFP_COLD | GFP_ATOMIC | __GFP_COMP);
GFP_ATOMIC | __GFP_COMP);
if (unlikely(rc < 0)) {
netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
"failed to alloc buffer for rx queue %d\n",
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/amd/xgbe/xgbe-desc.c
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ static int xgbe_alloc_pages(struct xgbe_prv_data *pdata,
order = alloc_order;

/* Try to obtain pages, decreasing order if necessary */
gfp = GFP_ATOMIC | __GFP_COLD | __GFP_COMP | __GFP_NOWARN;
gfp = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN;
while (order >= 0) {
pages = alloc_pages_node(node, gfp, order);
if (pages)
Expand Down
3 changes: 1 addition & 2 deletions drivers/net/ethernet/aquantia/atlantic/aq_ring.c
Original file line number Diff line number Diff line change
Expand Up @@ -304,8 +304,7 @@ int aq_ring_rx_fill(struct aq_ring_s *self)
buff->flags = 0U;
buff->len = AQ_CFG_RX_FRAME_MAX;

buff->page = alloc_pages(GFP_ATOMIC | __GFP_COLD |
__GFP_COMP, pages_order);
buff->page = alloc_pages(GFP_ATOMIC | __GFP_COMP, pages_order);
if (!buff->page) {
err = -ENOMEM;
goto err_exit;
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/cavium/liquidio/octeon_network.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ static inline void
struct sk_buff *skb;
struct octeon_skb_page_info *skb_pg_info;

page = alloc_page(GFP_ATOMIC | __GFP_COLD);
page = alloc_page(GFP_ATOMIC);
if (unlikely(!page))
return NULL;

Expand Down
5 changes: 2 additions & 3 deletions drivers/net/ethernet/mellanox/mlx4/en_rx.c
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv)

if (mlx4_en_prepare_rx_desc(priv, ring,
ring->actual_size,
GFP_KERNEL | __GFP_COLD)) {
GFP_KERNEL)) {
if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) {
en_err(priv, "Failed to allocate enough rx buffers\n");
return -ENOMEM;
Expand Down Expand Up @@ -552,8 +552,7 @@ static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
do {
if (mlx4_en_prepare_rx_desc(priv, ring,
ring->prod & ring->size_mask,
GFP_ATOMIC | __GFP_COLD |
__GFP_MEMALLOC))
GFP_ATOMIC | __GFP_MEMALLOC))
break;
ring->prod++;
} while (likely(--missing));
Expand Down
4 changes: 2 additions & 2 deletions drivers/net/ethernet/netronome/nfp/nfp_net_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -1185,7 +1185,7 @@ static void *nfp_net_rx_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr)
} else {
struct page *page;

page = alloc_page(GFP_KERNEL | __GFP_COLD);
page = alloc_page(GFP_KERNEL);
frag = page ? page_address(page) : NULL;
}
if (!frag) {
Expand All @@ -1212,7 +1212,7 @@ static void *nfp_net_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr)
} else {
struct page *page;

page = alloc_page(GFP_ATOMIC | __GFP_COLD);
page = alloc_page(GFP_ATOMIC);
frag = page ? page_address(page) : NULL;
}
if (!frag) {
Expand Down
3 changes: 1 addition & 2 deletions drivers/net/ethernet/qlogic/qlge/qlge_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1092,8 +1092,7 @@ static int ql_get_next_chunk(struct ql_adapter *qdev, struct rx_ring *rx_ring,
{
if (!rx_ring->pg_chunk.page) {
u64 map;
rx_ring->pg_chunk.page = alloc_pages(__GFP_COLD | __GFP_COMP |
GFP_ATOMIC,
rx_ring->pg_chunk.page = alloc_pages(__GFP_COMP | GFP_ATOMIC,
qdev->lbq_buf_order);
if (unlikely(!rx_ring->pg_chunk.page)) {
netif_err(qdev, drv, qdev->ndev,
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/sfc/falcon/rx.c
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ static int ef4_init_rx_buffers(struct ef4_rx_queue *rx_queue, bool atomic)
do {
page = ef4_reuse_page(rx_queue);
if (page == NULL) {
page = alloc_pages(__GFP_COLD | __GFP_COMP |
page = alloc_pages(__GFP_COMP |
(atomic ? GFP_ATOMIC : GFP_KERNEL),
efx->rx_buffer_order);
if (unlikely(page == NULL))
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/sfc/rx.c
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic)
do {
page = efx_reuse_page(rx_queue);
if (page == NULL) {
page = alloc_pages(__GFP_COLD | __GFP_COMP |
page = alloc_pages(__GFP_COMP |
(atomic ? GFP_ATOMIC : GFP_KERNEL),
efx->rx_buffer_order);
if (unlikely(page == NULL))
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/synopsys/dwc-xlgmac-desc.c
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ static int xlgmac_alloc_pages(struct xlgmac_pdata *pdata,
dma_addr_t pages_dma;

/* Try to obtain pages, decreasing order if necessary */
gfp |= __GFP_COLD | __GFP_COMP | __GFP_NOWARN;
gfp |= __GFP_COMP | __GFP_NOWARN;
while (order >= 0) {
pages = alloc_pages(gfp, order);
if (pages)
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/ti/netcp_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -906,7 +906,7 @@ static int netcp_allocate_rx_buf(struct netcp_intf *netcp, int fdq)
sw_data[0] = (u32)bufptr;
} else {
/* Allocate a secondary receive queue entry */
page = alloc_page(GFP_ATOMIC | GFP_DMA | __GFP_COLD);
page = alloc_page(GFP_ATOMIC | GFP_DMA);
if (unlikely(!page)) {
dev_warn_ratelimited(netcp->ndev_dev, "Secondary page alloc failed\n");
goto fail;
Expand Down
1 change: 0 additions & 1 deletion drivers/net/virtio_net.c
Original file line number Diff line number Diff line change
Expand Up @@ -988,7 +988,6 @@ static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
int err;
bool oom;

gfp |= __GFP_COLD;
do {
if (vi->mergeable_rx_bufs)
err = add_recvbuf_mergeable(vi, rq, gfp);
Expand Down
2 changes: 1 addition & 1 deletion drivers/staging/lustre/lustre/mdc/mdc_request.c
Original file line number Diff line number Diff line change
Expand Up @@ -1152,7 +1152,7 @@ static int mdc_read_page_remote(void *data, struct page *page0)
}

for (npages = 1; npages < max_pages; npages++) {
page = page_cache_alloc_cold(inode->i_mapping);
page = page_cache_alloc(inode->i_mapping);
if (!page)
break;
page_pool[npages] = page;
Expand Down
6 changes: 2 additions & 4 deletions fs/cachefiles/rdwr.c
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,7 @@ static int cachefiles_read_backing_file_one(struct cachefiles_object *object,
goto backing_page_already_present;

if (!newpage) {
newpage = __page_cache_alloc(cachefiles_gfp |
__GFP_COLD);
newpage = __page_cache_alloc(cachefiles_gfp);
if (!newpage)
goto nomem_monitor;
}
Expand Down Expand Up @@ -493,8 +492,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
goto backing_page_already_present;

if (!newpage) {
newpage = __page_cache_alloc(cachefiles_gfp |
__GFP_COLD);
newpage = __page_cache_alloc(cachefiles_gfp);
if (!newpage)
goto nomem;
}
Expand Down
5 changes: 0 additions & 5 deletions include/linux/gfp.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ struct vm_area_struct;
#define ___GFP_HIGH 0x20u
#define ___GFP_IO 0x40u
#define ___GFP_FS 0x80u
#define ___GFP_COLD 0x100u
#define ___GFP_NOWARN 0x200u
#define ___GFP_RETRY_MAYFAIL 0x400u
#define ___GFP_NOFAIL 0x800u
Expand Down Expand Up @@ -192,16 +191,12 @@ struct vm_area_struct;
/*
* Action modifiers
*
* __GFP_COLD indicates that the caller does not expect to be used in the near
* future. Where possible, a cache-cold page will be returned.
*
* __GFP_NOWARN suppresses allocation failure reports.
*
* __GFP_COMP address compound page metadata.
*
* __GFP_ZERO returns a zeroed page on success.
*/
#define __GFP_COLD ((__force gfp_t)___GFP_COLD)
#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN)
#define __GFP_COMP ((__force gfp_t)___GFP_COMP)
#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO)
Expand Down
8 changes: 1 addition & 7 deletions include/linux/pagemap.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,15 +234,9 @@ static inline struct page *page_cache_alloc(struct address_space *x)
return __page_cache_alloc(mapping_gfp_mask(x));
}

static inline struct page *page_cache_alloc_cold(struct address_space *x)
{
return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD);
}

static inline gfp_t readahead_gfp_mask(struct address_space *x)
{
return mapping_gfp_mask(x) |
__GFP_COLD | __GFP_NORETRY | __GFP_NOWARN;
return mapping_gfp_mask(x) | __GFP_NORETRY | __GFP_NOWARN;
}

typedef int filler_t(void *, struct page *);
Expand Down
2 changes: 1 addition & 1 deletion include/linux/skbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -2672,7 +2672,7 @@ static inline struct page *__dev_alloc_pages(gfp_t gfp_mask,
* 4. __GFP_MEMALLOC is ignored if __GFP_NOMEMALLOC is set due to
* code in gfp_to_alloc_flags that should be enforcing this.
*/
gfp_mask |= __GFP_COLD | __GFP_COMP | __GFP_MEMALLOC;
gfp_mask |= __GFP_COMP | __GFP_MEMALLOC;

return alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
}
Expand Down
3 changes: 0 additions & 3 deletions include/linux/slab.h
Original file line number Diff line number Diff line change
Expand Up @@ -467,9 +467,6 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
* Also it is possible to set different flags by OR'ing
* in one or more of the following additional @flags:
*
* %__GFP_COLD - Request cache-cold pages instead of
* trying to return cache-warm pages.
*
* %__GFP_HIGH - This allocation has high priority and may use emergency pools.
*
* %__GFP_NOFAIL - Indicate that this allocation is in no way allowed to fail
Expand Down
1 change: 0 additions & 1 deletion include/trace/events/mmflags.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
{(unsigned long)__GFP_ATOMIC, "__GFP_ATOMIC"}, \
{(unsigned long)__GFP_IO, "__GFP_IO"}, \
{(unsigned long)__GFP_FS, "__GFP_FS"}, \
{(unsigned long)__GFP_COLD, "__GFP_COLD"}, \
{(unsigned long)__GFP_NOWARN, "__GFP_NOWARN"}, \
{(unsigned long)__GFP_RETRY_MAYFAIL, "__GFP_RETRY_MAYFAIL"}, \
{(unsigned long)__GFP_NOFAIL, "__GFP_NOFAIL"}, \
Expand Down
4 changes: 2 additions & 2 deletions kernel/power/snapshot.c
Original file line number Diff line number Diff line change
Expand Up @@ -1884,7 +1884,7 @@ static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
*/
static inline int get_highmem_buffer(int safe_needed)
{
buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed);
buffer = get_image_page(GFP_ATOMIC, safe_needed);
return buffer ? 0 : -ENOMEM;
}

Expand Down Expand Up @@ -1945,7 +1945,7 @@ static int swsusp_alloc(struct memory_bitmap *copy_bm,
while (nr_pages-- > 0) {
struct page *page;

page = alloc_image_page(GFP_ATOMIC | __GFP_COLD);
page = alloc_image_page(GFP_ATOMIC);
if (!page)
goto err_out;
memory_bm_set_bit(copy_bm, page_to_pfn(page));
Expand Down
6 changes: 3 additions & 3 deletions mm/filemap.c
Original file line number Diff line number Diff line change
Expand Up @@ -2272,7 +2272,7 @@ static ssize_t generic_file_buffered_read(struct kiocb *iocb,
* Ok, it wasn't cached, so we need to create a new
* page..
*/
page = page_cache_alloc_cold(mapping);
page = page_cache_alloc(mapping);
if (!page) {
error = -ENOMEM;
goto out;
Expand Down Expand Up @@ -2384,7 +2384,7 @@ static int page_cache_read(struct file *file, pgoff_t offset, gfp_t gfp_mask)
int ret;

do {
page = __page_cache_alloc(gfp_mask|__GFP_COLD);
page = __page_cache_alloc(gfp_mask);
if (!page)
return -ENOMEM;

Expand Down Expand Up @@ -2788,7 +2788,7 @@ static struct page *do_read_cache_page(struct address_space *mapping,
repeat:
page = find_get_page(mapping, index);
if (!page) {
page = __page_cache_alloc(gfp | __GFP_COLD);
page = __page_cache_alloc(gfp);
if (!page)
return ERR_PTR(-ENOMEM);
err = add_to_page_cache_lru(page, mapping, index, gfp);
Expand Down
20 changes: 6 additions & 14 deletions mm/page_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2336,7 +2336,7 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype)
*/
static int rmqueue_bulk(struct zone *zone, unsigned int order,
unsigned long count, struct list_head *list,
int migratetype, bool cold)
int migratetype)
{
int i, alloced = 0;

Expand All @@ -2358,10 +2358,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
* merge IO requests if the physical pages are ordered
* properly.
*/
if (likely(!cold))
list_add(&page->lru, list);
else
list_add_tail(&page->lru, list);
list_add(&page->lru, list);
list = &page->lru;
alloced++;
if (is_migrate_cma(get_pcppage_migratetype(page)))
Expand Down Expand Up @@ -2795,7 +2792,7 @@ static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)

/* Remove page from the per-cpu list, caller must protect the list */
static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
bool cold, struct per_cpu_pages *pcp,
struct per_cpu_pages *pcp,
struct list_head *list)
{
struct page *page;
Expand All @@ -2804,16 +2801,12 @@ static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
if (list_empty(list)) {
pcp->count += rmqueue_bulk(zone, 0,
pcp->batch, list,
migratetype, cold);
migratetype);
if (unlikely(list_empty(list)))
return NULL;
}

if (cold)
page = list_last_entry(list, struct page, lru);
else
page = list_first_entry(list, struct page, lru);

page = list_first_entry(list, struct page, lru);
list_del(&page->lru);
pcp->count--;
} while (check_new_pcp(page));
Expand All @@ -2828,14 +2821,13 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
{
struct per_cpu_pages *pcp;
struct list_head *list;
bool cold = ((gfp_flags & __GFP_COLD) != 0);
struct page *page;
unsigned long flags;

local_irq_save(flags);
pcp = &this_cpu_ptr(zone->pageset)->pcp;
list = &pcp->lists[migratetype];
page = __rmqueue_pcplist(zone, migratetype, cold, pcp, list);
page = __rmqueue_pcplist(zone, migratetype, pcp, list);
if (page) {
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
zone_statistics(preferred_zone, zone);
Expand Down
2 changes: 1 addition & 1 deletion mm/percpu-vm.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ static void pcpu_free_pages(struct pcpu_chunk *chunk,
static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
struct page **pages, int page_start, int page_end)
{
const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD;
const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM;
unsigned int cpu, tcpu;
int i;

Expand Down
4 changes: 2 additions & 2 deletions net/core/skbuff.c
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
*/
void *netdev_alloc_frag(unsigned int fragsz)
{
return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
return __netdev_alloc_frag(fragsz, GFP_ATOMIC);
}
EXPORT_SYMBOL(netdev_alloc_frag);

Expand All @@ -366,7 +366,7 @@ static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)

void *napi_alloc_frag(unsigned int fragsz)
{
return __napi_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
return __napi_alloc_frag(fragsz, GFP_ATOMIC);
}
EXPORT_SYMBOL(napi_alloc_frag);

Expand Down
1 change: 0 additions & 1 deletion tools/perf/builtin-kmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -641,7 +641,6 @@ static const struct {
{ "__GFP_ATOMIC", "_A" },
{ "__GFP_IO", "I" },
{ "__GFP_FS", "F" },
{ "__GFP_COLD", "CO" },
{ "__GFP_NOWARN", "NWR" },
{ "__GFP_RETRY_MAYFAIL", "R" },
{ "__GFP_NOFAIL", "NF" },
Expand Down

0 comments on commit 453f85d

Please sign in to comment.